mirror of
https://forge.sourceware.org/marek/gcc.git
synced 2026-02-21 19:35:36 -05:00
950 lines
30 KiB
C++
950 lines
30 KiB
C++
// Late-stage instruction combination pass.
|
|
// Copyright (C) 2023-2026 Free Software Foundation, Inc.
|
|
//
|
|
// This file is part of GCC.
|
|
//
|
|
// GCC is free software; you can redistribute it and/or modify it under
|
|
// the terms of the GNU General Public License as published by the Free
|
|
// Software Foundation; either version 3, or (at your option) any later
|
|
// version.
|
|
//
|
|
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
// for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with GCC; see the file COPYING3. If not see
|
|
// <http://www.gnu.org/licenses/>.
|
|
|
|
// This pass currently has two purposes:
|
|
//
|
|
// - to substitute definitions into all uses, so that the definition
|
|
// can be removed.
|
|
//
|
|
// - to try to parallelise sets of condition-code registers with a
|
|
// related instruction (see combine_cc_setter for details).
|
|
//
|
|
// However, it could be extended to handle other combination-related
|
|
// optimizations in future.
|
|
//
|
|
// The pass can run before or after register allocation. When running
|
|
// before register allocation, it tries to avoid cases that are likely
|
|
// to increase register pressure. For the same reason, it avoids moving
|
|
// instructions around, even if doing so would allow an optimization to
|
|
// succeed. These limitations are removed when running after register
|
|
// allocation.
|
|
|
|
#define INCLUDE_ALGORITHM
|
|
#define INCLUDE_FUNCTIONAL
|
|
#define INCLUDE_ARRAY
|
|
#include "config.h"
|
|
#include "system.h"
|
|
#include "coretypes.h"
|
|
#include "backend.h"
|
|
#include "rtl.h"
|
|
#include "df.h"
|
|
#include "rtl-ssa.h"
|
|
#include "print-rtl.h"
|
|
#include "tree-pass.h"
|
|
#include "cfgcleanup.h"
|
|
#include "target.h"
|
|
#include "dbgcnt.h"
|
|
|
|
using namespace rtl_ssa;
|
|
|
|
namespace {
|
|
const pass_data pass_data_late_combine =
|
|
{
|
|
RTL_PASS, // type
|
|
"late_combine", // name
|
|
OPTGROUP_NONE, // optinfo_flags
|
|
TV_LATE_COMBINE, // tv_id
|
|
0, // properties_required
|
|
0, // properties_provided
|
|
0, // properties_destroyed
|
|
0, // todo_flags_start
|
|
TODO_df_finish, // todo_flags_finish
|
|
};
|
|
|
|
// Represents an attempt to substitute a single-set definition into all
|
|
// uses of the definition.
|
|
class insn_combination
|
|
{
|
|
public:
|
|
insn_combination (set_info *, rtx, rtx);
|
|
bool run ();
|
|
array_slice<insn_change *const> use_changes () const;
|
|
|
|
private:
|
|
use_array get_new_uses (use_info *);
|
|
bool substitute_nondebug_use (use_info *);
|
|
bool substitute_nondebug_uses (set_info *);
|
|
bool try_to_preserve_debug_info (insn_change &, use_info *);
|
|
void substitute_debug_use (use_info *);
|
|
bool substitute_note (insn_info *, rtx, bool);
|
|
void substitute_notes (insn_info *, bool);
|
|
void substitute_note_uses (use_info *);
|
|
void substitute_optional_uses (set_info *);
|
|
|
|
// Represents the state of the function's RTL at the start of this
|
|
// combination attempt.
|
|
insn_change_watermark m_rtl_watermark;
|
|
|
|
// Represents the rtl-ssa state at the start of this combination attempt.
|
|
obstack_watermark m_attempt;
|
|
|
|
// The instruction that contains the definition, and that we're trying
|
|
// to delete.
|
|
insn_info *m_def_insn;
|
|
|
|
// The definition itself.
|
|
set_info *m_def;
|
|
|
|
// The destination and source of the single set that defines m_def.
|
|
// The destination is known to be a plain REG.
|
|
rtx m_dest;
|
|
rtx m_src;
|
|
|
|
// Contains the full list of changes that we want to make, in reverse
|
|
// postorder.
|
|
auto_vec<insn_change *> m_nondebug_changes;
|
|
};
|
|
|
|
// Class that represents one run of the pass.
|
|
class late_combine
|
|
{
|
|
public:
|
|
unsigned int execute (function *);
|
|
|
|
private:
|
|
rtx optimizable_set (set_info *);
|
|
bool check_register_pressure (insn_info *, rtx);
|
|
bool check_uses (set_info *, rtx);
|
|
bool combine_into_uses (set_info *, rtx, insn_info *);
|
|
bool parallelize_insns (set_info *, rtx, set_info *, rtx);
|
|
bool combine_cc_setter (set_info *, rtx);
|
|
bool combine_insn (insn_info *, insn_info *);
|
|
|
|
auto_vec<insn_info *> m_worklist;
|
|
|
|
// A spare PARALLEL rtx, or null if none.
|
|
rtx m_parallel = NULL_RTX;
|
|
};
|
|
|
|
insn_combination::insn_combination (set_info *def, rtx dest, rtx src)
|
|
: m_rtl_watermark (),
|
|
m_attempt (crtl->ssa->new_change_attempt ()),
|
|
m_def_insn (def->insn ()),
|
|
m_def (def),
|
|
m_dest (dest),
|
|
m_src (src),
|
|
m_nondebug_changes ()
|
|
{
|
|
}
|
|
|
|
array_slice<insn_change *const>
|
|
insn_combination::use_changes () const
|
|
{
|
|
return { m_nondebug_changes.address () + 1,
|
|
m_nondebug_changes.length () - 1 };
|
|
}
|
|
|
|
// USE is a direct or indirect use of m_def. Return the list of uses
|
|
// that would be needed after substituting m_def into the instruction.
|
|
// The returned list is marked as invalid if USE's insn and m_def_insn
|
|
// use different definitions for the same resource (register or memory).
|
|
use_array
|
|
insn_combination::get_new_uses (use_info *use)
|
|
{
|
|
auto *def = use->def ();
|
|
auto *use_insn = use->insn ();
|
|
|
|
use_array new_uses = use_insn->uses ();
|
|
new_uses = remove_uses_of_def (m_attempt, new_uses, def);
|
|
new_uses = merge_access_arrays (m_attempt, m_def_insn->uses (), new_uses);
|
|
if (new_uses.is_valid () && use->ebb () != m_def->ebb ())
|
|
new_uses = crtl->ssa->make_uses_available (m_attempt, new_uses, use->bb (),
|
|
use_insn->is_debug_insn ());
|
|
return new_uses;
|
|
}
|
|
|
|
// Start the process of trying to replace USE by substitution, given that
|
|
// USE occurs in a non-debug instruction. Check:
|
|
//
|
|
// - that the substitution can be represented in RTL
|
|
//
|
|
// - that each use of a resource (register or memory) within the new
|
|
// instruction has a consistent definition
|
|
//
|
|
// - that the new instruction is a recognized pattern
|
|
//
|
|
// - that the instruction can be placed somewhere that makes all definitions
|
|
// and uses valid, and that permits any new hard-register clobbers added
|
|
// during the recognition process
|
|
//
|
|
// Return true on success.
|
|
bool
|
|
insn_combination::substitute_nondebug_use (use_info *use)
|
|
{
|
|
insn_info *use_insn = use->insn ();
|
|
rtx_insn *use_rtl = use_insn->rtl ();
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
dump_insn_slim (dump_file, use->insn ()->rtl ());
|
|
|
|
// Reject second and subsequent uses if the target does not allow
|
|
// the defining instruction to be copied.
|
|
if (targetm.cannot_copy_insn_p
|
|
&& m_nondebug_changes.length () >= 2
|
|
&& targetm.cannot_copy_insn_p (m_def_insn->rtl ()))
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "-- The target does not allow multiple"
|
|
" copies of insn %d\n", m_def_insn->uid ());
|
|
return false;
|
|
}
|
|
|
|
// Check that we can change the instruction pattern. Leave recognition
|
|
// of the result till later.
|
|
insn_propagation prop (use_rtl, m_dest, m_src);
|
|
if (!prop.apply_to_pattern (&PATTERN (use_rtl))
|
|
|| prop.num_replacements == 0)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "-- RTL substitution failed\n");
|
|
return false;
|
|
}
|
|
|
|
use_array new_uses = get_new_uses (use);
|
|
if (!new_uses.is_valid ())
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "-- could not prove that all sources"
|
|
" are available\n");
|
|
return false;
|
|
}
|
|
|
|
// Create a tentative change for the use.
|
|
auto *where = XOBNEW (m_attempt, insn_change);
|
|
auto *use_change = new (where) insn_change (use_insn);
|
|
m_nondebug_changes.safe_push (use_change);
|
|
use_change->new_uses = new_uses;
|
|
|
|
struct local_ignore : ignore_nothing
|
|
{
|
|
local_ignore (const set_info *def, const insn_info *use_insn)
|
|
: m_def (def), m_use_insn (use_insn) {}
|
|
|
|
// We don't limit the number of insns per optimization, so ignoring all
|
|
// insns for all insns would lead to quadratic complexity. Just ignore
|
|
// the use and definition, which should be enough for most purposes.
|
|
bool
|
|
should_ignore_insn (const insn_info *insn)
|
|
{
|
|
return insn == m_def->insn () || insn == m_use_insn;
|
|
}
|
|
|
|
// Ignore the definition that we're removing, and all uses of it.
|
|
bool should_ignore_def (const def_info *def) { return def == m_def; }
|
|
|
|
const set_info *m_def;
|
|
const insn_info *m_use_insn;
|
|
};
|
|
|
|
auto ignore = local_ignore (m_def, use_insn);
|
|
|
|
// Moving instructions before register allocation could increase
|
|
// register pressure. Only try moving them after RA.
|
|
if (reload_completed && can_move_insn_p (use_insn))
|
|
use_change->move_range = { use_insn->bb ()->head_insn (),
|
|
use_insn->ebb ()->last_bb ()->end_insn () };
|
|
if (!restrict_movement (*use_change, ignore))
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "-- cannot satisfy all definitions and uses"
|
|
" in insn %d\n", INSN_UID (use_insn->rtl ()));
|
|
return false;
|
|
}
|
|
|
|
if (!recog (m_attempt, *use_change, ignore))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
// Apply substitute_nondebug_use to all direct and indirect uses of DEF.
|
|
// There will be at most one level of indirection.
|
|
bool
|
|
insn_combination::substitute_nondebug_uses (set_info *def)
|
|
{
|
|
for (use_info *use : def->nondebug_insn_uses ())
|
|
if (!use->is_live_out_use ()
|
|
&& !use->only_occurs_in_notes ()
|
|
&& !substitute_nondebug_use (use))
|
|
return false;
|
|
|
|
for (use_info *use : def->phi_uses ())
|
|
if (!substitute_nondebug_uses (use->phi ()))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
// USE_CHANGE.insn () is a debug instruction that uses m_def. Try to
|
|
// substitute the definition into the instruction and try to describe
|
|
// the result in USE_CHANGE. Return true on success. Failure means that
|
|
// the instruction must be reset instead.
|
|
bool
|
|
insn_combination::try_to_preserve_debug_info (insn_change &use_change,
|
|
use_info *use)
|
|
{
|
|
// Punt on unsimplified subregs of hard registers. In that case,
|
|
// propagation can succeed and create a wider reg than the one we
|
|
// started with.
|
|
if (HARD_REGISTER_NUM_P (use->regno ())
|
|
&& use->includes_subregs ())
|
|
return false;
|
|
|
|
insn_info *use_insn = use_change.insn ();
|
|
rtx_insn *use_rtl = use_insn->rtl ();
|
|
|
|
use_change.new_uses = get_new_uses (use);
|
|
if (!use_change.new_uses.is_valid ()
|
|
|| !restrict_movement (use_change))
|
|
return false;
|
|
|
|
insn_propagation prop (use_rtl, m_dest, m_src);
|
|
return prop.apply_to_pattern (&INSN_VAR_LOCATION_LOC (use_rtl));
|
|
}
|
|
|
|
// USE_INSN is a debug instruction that uses m_def. Update it to reflect
|
|
// the fact that m_def is going to disappear. Try to preserve the source
|
|
// value if possible, but reset the instruction if not.
|
|
void
|
|
insn_combination::substitute_debug_use (use_info *use)
|
|
{
|
|
auto *use_insn = use->insn ();
|
|
rtx_insn *use_rtl = use_insn->rtl ();
|
|
|
|
auto use_change = insn_change (use_insn);
|
|
if (!try_to_preserve_debug_info (use_change, use))
|
|
{
|
|
use_change.new_uses = {};
|
|
use_change.move_range = use_change.insn ();
|
|
INSN_VAR_LOCATION_LOC (use_rtl) = gen_rtx_UNKNOWN_VAR_LOC ();
|
|
}
|
|
insn_change *changes[] = { &use_change };
|
|
crtl->ssa->change_insns (changes);
|
|
}
|
|
|
|
// NOTE is a reg note of USE_INSN, which previously used m_def. Update
|
|
// the note to reflect the fact that m_def is going to disappear. Return
|
|
// true on success, or false if the note must be deleted.
|
|
//
|
|
// CAN_PROPAGATE is true if m_dest can be replaced with m_use.
|
|
bool
|
|
insn_combination::substitute_note (insn_info *use_insn, rtx note,
|
|
bool can_propagate)
|
|
{
|
|
if (REG_NOTE_KIND (note) == REG_EQUAL
|
|
|| REG_NOTE_KIND (note) == REG_EQUIV)
|
|
{
|
|
insn_propagation prop (use_insn->rtl (), m_dest, m_src);
|
|
return (prop.apply_to_note (&XEXP (note, 0))
|
|
&& (can_propagate || prop.num_replacements == 0));
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Update USE_INSN's notes after deciding to go ahead with the optimization.
|
|
// CAN_PROPAGATE is true if m_dest can be replaced with m_use.
|
|
void
|
|
insn_combination::substitute_notes (insn_info *use_insn, bool can_propagate)
|
|
{
|
|
rtx_insn *use_rtl = use_insn->rtl ();
|
|
rtx *ptr = ®_NOTES (use_rtl);
|
|
while (rtx note = *ptr)
|
|
{
|
|
if (substitute_note (use_insn, note, can_propagate))
|
|
ptr = &XEXP (note, 1);
|
|
else
|
|
*ptr = XEXP (note, 1);
|
|
}
|
|
}
|
|
|
|
// We've decided to go ahead with the substitution. Update all REG_NOTES
|
|
// involving USE.
|
|
void
|
|
insn_combination::substitute_note_uses (use_info *use)
|
|
{
|
|
insn_info *use_insn = use->insn ();
|
|
|
|
bool can_propagate = true;
|
|
if (use->only_occurs_in_notes ())
|
|
{
|
|
// The only uses are in notes. Try to keep the note if we can,
|
|
// but removing it is better than aborting the optimization.
|
|
insn_change use_change (use_insn);
|
|
use_change.new_uses = get_new_uses (use);
|
|
if (!use_change.new_uses.is_valid ()
|
|
|| !restrict_movement (use_change))
|
|
{
|
|
use_change.move_range = use_insn;
|
|
use_change.new_uses = remove_uses_of_def (m_attempt,
|
|
use_insn->uses (),
|
|
use->def ());
|
|
can_propagate = false;
|
|
}
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "%s notes in:\n",
|
|
can_propagate ? "updating" : "removing");
|
|
dump_insn_slim (dump_file, use_insn->rtl ());
|
|
}
|
|
substitute_notes (use_insn, can_propagate);
|
|
insn_change *changes[] = { &use_change };
|
|
crtl->ssa->change_insns (changes);
|
|
}
|
|
else
|
|
// We've already decided to update the insn's pattern and know that m_src
|
|
// will be available at the insn's new location. Now update its notes.
|
|
substitute_notes (use_insn, can_propagate);
|
|
}
|
|
|
|
// We've decided to go ahead with the substitution and we've dealt with
|
|
// all uses that occur in the patterns of non-debug insns. Update all
|
|
// other uses for the fact that m_def is about to disappear.
|
|
void
|
|
insn_combination::substitute_optional_uses (set_info *def)
|
|
{
|
|
if (auto insn_uses = def->all_insn_uses ())
|
|
{
|
|
use_info *use = *insn_uses.begin ();
|
|
while (use)
|
|
{
|
|
use_info *next_use = use->next_any_insn_use ();
|
|
if (use->is_in_debug_insn ())
|
|
substitute_debug_use (use);
|
|
else if (!use->is_live_out_use ())
|
|
substitute_note_uses (use);
|
|
use = next_use;
|
|
}
|
|
}
|
|
for (use_info *use : def->phi_uses ())
|
|
substitute_optional_uses (use->phi ());
|
|
}
|
|
|
|
// Try to perform the substitution. Return true on success.
|
|
bool
|
|
insn_combination::run ()
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "\ntrying to combine definition of r%d in:\n",
|
|
m_def->regno ());
|
|
dump_insn_slim (dump_file, m_def_insn->rtl ());
|
|
fprintf (dump_file, "into:\n");
|
|
}
|
|
|
|
auto def_change = insn_change::delete_insn (m_def_insn);
|
|
m_nondebug_changes.safe_push (&def_change);
|
|
|
|
if (!substitute_nondebug_uses (m_def)
|
|
|| !changes_are_worthwhile (m_nondebug_changes)
|
|
|| !crtl->ssa->verify_insn_changes (m_nondebug_changes))
|
|
return false;
|
|
|
|
// We've now decided that the optimization is valid and profitable.
|
|
// Allow it to be suppressed for bisection purposes.
|
|
if (!dbg_cnt (::late_combine))
|
|
return false;
|
|
|
|
substitute_optional_uses (m_def);
|
|
|
|
confirm_change_group ();
|
|
crtl->ssa->change_insns (m_nondebug_changes);
|
|
return true;
|
|
}
|
|
|
|
// DEF is the result of calling single_set_info on its instruction.
|
|
// See whether that instruction is a single_set that we can optimize.
|
|
// Return the set if so, otherwise return null.
|
|
rtx
|
|
late_combine::optimizable_set (set_info *def)
|
|
{
|
|
// For simplicity, don't try to handle sets of multiple hard registers.
|
|
// And for correctness, don't remove any assignments to the stack or
|
|
// frame pointers, since that would implicitly change the set of valid
|
|
// memory locations between this assignment and the next.
|
|
//
|
|
// Removing assignments to the hard frame pointer would invalidate
|
|
// backtraces.
|
|
if (!def->is_reg ()
|
|
|| def->regno () == STACK_POINTER_REGNUM
|
|
|| def->regno () == FRAME_POINTER_REGNUM
|
|
|| def->regno () == HARD_FRAME_POINTER_REGNUM)
|
|
return NULL_RTX;
|
|
|
|
auto *insn = def->insn ();
|
|
if (!insn->can_be_optimized ()
|
|
|| insn->is_asm ()
|
|
|| insn->is_call ()
|
|
|| insn->has_volatile_refs ()
|
|
|| insn->has_pre_post_modify ()
|
|
|| !can_move_insn_p (insn))
|
|
return NULL_RTX;
|
|
|
|
rtx set = single_set (insn->rtl ());
|
|
if (!set)
|
|
return NULL_RTX;
|
|
|
|
// For simplicity, don't try to handle subreg destinations.
|
|
rtx dest = SET_DEST (set);
|
|
if (!REG_P (dest) || REG_NREGS (dest) != 1 || def->regno () != REGNO (dest))
|
|
return NULL_RTX;
|
|
|
|
return set;
|
|
}
|
|
|
|
// Suppose that we can replace all uses of SET_DEST (SET) with SET_SRC (SET),
|
|
// where SET occurs in INSN. Return true if doing so is not likely to
|
|
// increase register pressure.
|
|
bool
|
|
late_combine::check_register_pressure (insn_info *insn, rtx set)
|
|
{
|
|
// Plain register-to-register moves do not establish a register class
|
|
// preference and have no well-defined effect on the register allocator.
|
|
// If changes in register class are needed, the register allocator is
|
|
// in the best position to place those changes. If no change in
|
|
// register class is needed, then the optimization reduces register
|
|
// pressure if SET_SRC (set) was already live at uses, otherwise the
|
|
// optimization is pressure-neutral.
|
|
rtx src = SET_SRC (set);
|
|
if (REG_P (src))
|
|
return true;
|
|
|
|
// On the same basis, substituting a SET_SRC that contains a single
|
|
// pseudo register either reduces pressure or is pressure-neutral,
|
|
// subject to the constraints below. We would need to do more
|
|
// analysis for SET_SRCs that use more than one pseudo register.
|
|
unsigned int nregs = 0;
|
|
for (auto *use : insn->uses ())
|
|
if (use->is_reg ()
|
|
&& !HARD_REGISTER_NUM_P (use->regno ())
|
|
&& !use->only_occurs_in_notes ())
|
|
if (++nregs > 1)
|
|
return false;
|
|
|
|
// If there are no pseudo registers in SET_SRC then the optimization
|
|
// should improve register pressure.
|
|
if (nregs == 0)
|
|
return true;
|
|
|
|
// We'd be substituting (set (reg R1) SRC) where SRC is known to
|
|
// contain a single pseudo register R2. Assume for simplicity that
|
|
// each new use of R2 would need to be in the same class C as the
|
|
// current use of R2. If, for a realistic allocation, C is a
|
|
// non-strict superset of the R1's register class, the effect on
|
|
// register pressure should be positive or neutral. If instead
|
|
// R1 occupies a different register class from R2, or if R1 has
|
|
// more allocation freedom than R2, then there's a higher risk that
|
|
// the effect on register pressure could be negative.
|
|
//
|
|
// First use constrain_operands to get the most likely choice of
|
|
// alternative. For simplicity, just handle the case where the
|
|
// output operand is operand 0.
|
|
extract_insn (insn->rtl ());
|
|
rtx dest = SET_DEST (set);
|
|
if (recog_data.n_operands == 0
|
|
|| recog_data.operand[0] != dest)
|
|
return false;
|
|
|
|
if (!constrain_operands (0, get_enabled_alternatives (insn->rtl ())))
|
|
return false;
|
|
|
|
preprocess_constraints (insn->rtl ());
|
|
auto *alt = which_op_alt ();
|
|
auto dest_class = alt[0].cl;
|
|
|
|
// Check operands 1 and above.
|
|
auto check_src = [&] (unsigned int i)
|
|
{
|
|
if (recog_data.is_operator[i])
|
|
return true;
|
|
|
|
rtx op = recog_data.operand[i];
|
|
if (CONSTANT_P (op))
|
|
return true;
|
|
|
|
if (SUBREG_P (op))
|
|
op = SUBREG_REG (op);
|
|
if (REG_P (op))
|
|
{
|
|
// Ignore hard registers. We've already rejected uses of non-fixed
|
|
// hard registers in the SET_SRC.
|
|
if (HARD_REGISTER_P (op))
|
|
return true;
|
|
|
|
// Make sure that the source operand's class is at least as
|
|
// permissive as the destination operand's class.
|
|
auto src_class = alternative_class (alt, i);
|
|
if (dest_class != src_class)
|
|
{
|
|
auto extra_dest_regs = (reg_class_contents[dest_class]
|
|
& ~reg_class_contents[src_class]
|
|
& ~fixed_reg_set);
|
|
if (!hard_reg_set_empty_p (extra_dest_regs))
|
|
return false;
|
|
}
|
|
|
|
// Make sure that the source operand occupies no more hard
|
|
// registers than the destination operand. This mostly matters
|
|
// for subregs.
|
|
if (targetm.class_max_nregs (dest_class, GET_MODE (dest))
|
|
< targetm.class_max_nregs (src_class, GET_MODE (op)))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
for (int i = 1; i < recog_data.n_operands; ++i)
|
|
if (recog_data.operand_type[i] != OP_OUT && !check_src (i))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
// Check uses of DEF to see whether there is anything obvious that
|
|
// prevents the substitution of SET into uses of DEF.
|
|
bool
|
|
late_combine::check_uses (set_info *def, rtx set)
|
|
{
|
|
use_info *prev_use = nullptr;
|
|
for (use_info *use : def->nondebug_insn_uses ())
|
|
{
|
|
insn_info *use_insn = use->insn ();
|
|
|
|
if (use->is_live_out_use ())
|
|
continue;
|
|
if (use->only_occurs_in_notes ())
|
|
continue;
|
|
|
|
// We cannot replace all uses if the value is live on exit.
|
|
if (use->is_artificial ())
|
|
return false;
|
|
|
|
// Avoid increasing the complexity of instructions that
|
|
// reference allocatable hard registers.
|
|
if (!REG_P (SET_SRC (set))
|
|
&& !reload_completed
|
|
&& (accesses_include_nonfixed_hard_registers (use_insn->uses ())
|
|
|| accesses_include_nonfixed_hard_registers (use_insn->defs ())))
|
|
return false;
|
|
|
|
// Don't substitute into a non-local goto, since it can then be
|
|
// treated as a jump to local label, e.g. in shorten_branches.
|
|
// ??? But this shouldn't be necessary.
|
|
if (use_insn->is_jump ()
|
|
&& find_reg_note (use_insn->rtl (), REG_NON_LOCAL_GOTO, NULL_RTX))
|
|
return false;
|
|
|
|
// Reject cases where one of the uses is a function argument.
|
|
// The combine attempt should fail anyway, but this is a common
|
|
// case that is easy to check early.
|
|
if (use_insn->is_call ()
|
|
&& HARD_REGISTER_P (SET_DEST (set))
|
|
&& find_reg_fusage (use_insn->rtl (), USE, SET_DEST (set)))
|
|
return false;
|
|
|
|
// We'll keep the uses in their original order, even if we move
|
|
// them relative to other instructions. Make sure that non-final
|
|
// uses do not change any values that occur in the SET_SRC.
|
|
if (prev_use && prev_use->ebb () == use->ebb ())
|
|
{
|
|
def_info *ultimate_def = look_through_degenerate_phi (def);
|
|
if (insn_clobbers_resources (prev_use->insn (),
|
|
ultimate_def->insn ()->uses ()))
|
|
return false;
|
|
}
|
|
|
|
prev_use = use;
|
|
}
|
|
|
|
for (use_info *use : def->phi_uses ())
|
|
if (!use->phi ()->is_degenerate ()
|
|
|| !check_uses (use->phi (), set))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
// Try to remove DEF's instruction by substituting DEF into all uses.
|
|
// SET is the rtx set associated with DEF. If the optimization moves any
|
|
// instructions before CURSOR, add those instructions to the end of m_worklist.
|
|
bool
|
|
late_combine::combine_into_uses (set_info *def, rtx set, insn_info *cursor)
|
|
{
|
|
auto *insn = def->insn ();
|
|
|
|
// Don't prolong the live ranges of allocatable hard registers, or put
|
|
// them into more complicated instructions. Failing to prevent this
|
|
// could lead to spill failures, or at least to worst register allocation.
|
|
if (!reload_completed
|
|
&& accesses_include_nonfixed_hard_registers (insn->uses ()))
|
|
return false;
|
|
|
|
if (!reload_completed && !check_register_pressure (insn, set))
|
|
return false;
|
|
|
|
if (!check_uses (def, set))
|
|
return false;
|
|
|
|
insn_combination combination (def, SET_DEST (set), SET_SRC (set));
|
|
if (!combination.run ())
|
|
return false;
|
|
|
|
for (auto *use_change : combination.use_changes ())
|
|
if (*use_change->insn () < *cursor)
|
|
m_worklist.safe_push (use_change->insn ());
|
|
else
|
|
break;
|
|
return true;
|
|
}
|
|
|
|
// CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise
|
|
// a single_set that sets (only) OTHER_DEF. CC_SET is known to set a
|
|
// condition-code register and the instruction that contains CC_SET is
|
|
// known to use OTHER_DEF. Try to do CC_SET and OTHER_SET in parallel.
|
|
bool
|
|
late_combine::parallelize_insns (set_info *cc_def, rtx cc_set,
|
|
set_info *other_def, rtx other_set)
|
|
{
|
|
auto attempt = crtl->ssa->new_change_attempt ();
|
|
|
|
insn_info *cc_insn = cc_def->insn ();
|
|
insn_info *other_insn = other_def->insn ();
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "trying to parallelize insn %d and insn %d\n",
|
|
other_insn->uid (), cc_insn->uid ());
|
|
|
|
// Try to substitute OTHER_SET into CC_INSN.
|
|
insn_change_watermark rtl_watermark;
|
|
rtx_insn *cc_rtl = cc_insn->rtl ();
|
|
insn_propagation prop (cc_rtl, SET_DEST (other_set), SET_SRC (other_set));
|
|
if (!prop.apply_to_pattern (&PATTERN (cc_rtl))
|
|
|| prop.num_replacements == 0)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "-- failed to substitute all uses of r%d\n",
|
|
other_def->regno ());
|
|
return false;
|
|
}
|
|
|
|
// Restrict the uses to those outside notes.
|
|
use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ());
|
|
use_array other_set_uses = remove_note_accesses (attempt,
|
|
other_insn->uses ());
|
|
|
|
// Remove the use of the substituted value.
|
|
cc_uses = remove_uses_of_def (attempt, cc_uses, other_def);
|
|
|
|
// Get the list of uses for the new instruction.
|
|
insn_change cc_change (cc_insn);
|
|
cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses);
|
|
if (!cc_change.new_uses.is_valid ())
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "-- cannot merge uses\n");
|
|
return false;
|
|
}
|
|
|
|
// The instruction initially defines just two registers. recog can add
|
|
// extra clobbers if necessary.
|
|
auto_vec<access_info *, 2> new_defs;
|
|
new_defs.quick_push (cc_def);
|
|
new_defs.quick_push (other_def);
|
|
sort_accesses (new_defs);
|
|
cc_change.new_defs = def_array (access_array (new_defs));
|
|
|
|
// Make sure there is somewhere that the new instruction could live.
|
|
auto other_change = insn_change::delete_insn (other_insn);
|
|
insn_change *changes[] = { &other_change, &cc_change };
|
|
cc_change.move_range = cc_insn->ebb ()->insn_range ();
|
|
if (!restrict_movement (cc_change, ignore_changing_insns (changes)))
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "-- cannot satisfy all definitions and uses\n");
|
|
return false;
|
|
}
|
|
|
|
// Tentatively install the new pattern. By convention, the CC set
|
|
// must be first.
|
|
if (m_parallel)
|
|
{
|
|
XVECEXP (m_parallel, 0, 0) = cc_set;
|
|
XVECEXP (m_parallel, 0, 1) = other_set;
|
|
}
|
|
else
|
|
{
|
|
rtvec vec = gen_rtvec (2, cc_set, other_set);
|
|
m_parallel = gen_rtx_PARALLEL (VOIDmode, vec);
|
|
}
|
|
validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1);
|
|
|
|
// These routines report failures themselves.
|
|
if (!recog (attempt, cc_change, ignore_changing_insns (changes))
|
|
|| !changes_are_worthwhile (changes)
|
|
|| !crtl->ssa->verify_insn_changes (changes))
|
|
return false;
|
|
|
|
remove_reg_equal_equiv_notes (cc_rtl);
|
|
confirm_change_group ();
|
|
crtl->ssa->change_insns (changes);
|
|
m_parallel = NULL_RTX;
|
|
return true;
|
|
}
|
|
|
|
// CC_SET is a single_set that sets (only) CC_DEF. See whether CC_DEF
|
|
// is a definition of a condition-code register and try to optimize it
|
|
// with related instructions if so. Return true if something changed.
|
|
//
|
|
// This function looks for sequences of the form:
|
|
//
|
|
// A: (set (reg R1) X1)
|
|
// B: ...instructions that might change the value of X1...
|
|
// C: (set (reg CC) X2) // X2 uses R1
|
|
//
|
|
// and tries to change them to:
|
|
//
|
|
// C': [(set (reg CC) X2')
|
|
// (set (reg R1) X1)]
|
|
// B: ...instructions that might change the value of X1...
|
|
//
|
|
// where X2' is the result of replacing R1 with X1 in X2.
|
|
//
|
|
// Combine can handle this optimization if B doesn't exist and if A and
|
|
// C are in the same BB. This pass instead handles cases where B does
|
|
// exist and cases where A and C are in different BBs of the same EBB.
|
|
bool
|
|
late_combine::combine_cc_setter (set_info *cc_def, rtx cc_set)
|
|
{
|
|
// Check for a set of a CC register. This isn't needed for correctness;
|
|
// it's just a way of narrowing the search space. It could be relaxed if
|
|
// there are other situations that would benefit from the same optimization.
|
|
if (!HARD_REGISTER_NUM_P (cc_def->regno ())
|
|
|| GET_MODE_CLASS (cc_def->mode()) != MODE_CC)
|
|
return false;
|
|
|
|
// Search the registers used by the CC setter for an easily-substitutable
|
|
// def-use chain.
|
|
for (use_info *other_use : cc_def->insn ()->uses ())
|
|
if (auto *other_def = other_use->def ())
|
|
if (other_use->regno () != cc_def->regno ()
|
|
&& other_def->ebb () == cc_def->ebb ()
|
|
&& other_def == single_set_info (other_def->insn ()))
|
|
if (rtx other_set = optimizable_set (other_def))
|
|
if (parallelize_insns (cc_def, cc_set, other_def, other_set))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
// Try to optimize INSN in some way. If the optimization moves any
|
|
// instructions before CURSOR, and if further optimizations might be
|
|
// possible on those instructions, add them to the end of m_worklist.
|
|
bool
|
|
late_combine::combine_insn (insn_info *insn, insn_info *cursor)
|
|
{
|
|
if (set_info *def = single_set_info (insn))
|
|
if (rtx set = optimizable_set (def))
|
|
return (combine_into_uses (def, set, cursor)
|
|
|| combine_cc_setter (def, set));
|
|
|
|
return false;
|
|
}
|
|
|
|
// Run the pass on function FN.
|
|
unsigned int
|
|
late_combine::execute (function *fn)
|
|
{
|
|
// Initialization.
|
|
calculate_dominance_info (CDI_DOMINATORS);
|
|
df_analyze ();
|
|
crtl->ssa = new rtl_ssa::function_info (fn);
|
|
// Don't allow memory_operand to match volatile MEMs.
|
|
init_recog_no_volatile ();
|
|
|
|
insn_info *insn = *crtl->ssa->nondebug_insns ().begin ();
|
|
while (insn)
|
|
{
|
|
if (!insn->is_artificial ())
|
|
{
|
|
insn_info *prev = insn->prev_nondebug_insn ();
|
|
if (combine_insn (insn, prev))
|
|
{
|
|
// Any instructions that get added to the worklist were
|
|
// previously after PREV. Thus if we were able to move
|
|
// an instruction X before PREV during one combination,
|
|
// X cannot depend on any instructions that we move before
|
|
// PREV during subsequent combinations. This means that
|
|
// the worklist should be free of backwards dependencies,
|
|
// even if it isn't necessarily in RPO.
|
|
for (unsigned int i = 0; i < m_worklist.length (); ++i)
|
|
combine_insn (m_worklist[i], prev);
|
|
m_worklist.truncate (0);
|
|
insn = prev;
|
|
}
|
|
}
|
|
insn = insn->next_nondebug_insn ();
|
|
}
|
|
|
|
// Finalization.
|
|
if (crtl->ssa->perform_pending_updates ())
|
|
cleanup_cfg (0);
|
|
|
|
delete crtl->ssa;
|
|
crtl->ssa = nullptr;
|
|
|
|
// Make the recognizer allow volatile MEMs again.
|
|
init_recog ();
|
|
free_dominance_info (CDI_DOMINATORS);
|
|
return 0;
|
|
}
|
|
|
|
class pass_late_combine : public rtl_opt_pass
|
|
{
|
|
public:
|
|
pass_late_combine (gcc::context *ctxt)
|
|
: rtl_opt_pass (pass_data_late_combine, ctxt)
|
|
{}
|
|
|
|
// opt_pass methods:
|
|
opt_pass *clone () override { return new pass_late_combine (m_ctxt); }
|
|
bool gate (function *) override;
|
|
unsigned int execute (function *) override;
|
|
};
|
|
|
|
bool
|
|
pass_late_combine::gate (function *)
|
|
{
|
|
return optimize > 0 && flag_late_combine_instructions;
|
|
}
|
|
|
|
unsigned int
|
|
pass_late_combine::execute (function *fn)
|
|
{
|
|
return late_combine ().execute (fn);
|
|
}
|
|
|
|
} // end namespace
|
|
|
|
// Create a new CC fusion pass instance.
|
|
|
|
rtl_opt_pass *
|
|
make_pass_late_combine (gcc::context *ctxt)
|
|
{
|
|
return new pass_late_combine (ctxt);
|
|
}
|