mirror of
https://forge.sourceware.org/marek/gcc.git
synced 2026-02-21 19:35:36 -05:00
1402 lines
44 KiB
C++
1402 lines
44 KiB
C++
/* RTL dead zero/sign extension (code) elimination.
|
|
Copyright (C) 2000-2026 Free Software Foundation, Inc.
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free
|
|
Software Foundation; either version 3, or (at your option) any later
|
|
version.
|
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GCC; see the file COPYING3. If not see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#include "config.h"
|
|
#include "system.h"
|
|
#include "coretypes.h"
|
|
#include "backend.h"
|
|
#include "rtl.h"
|
|
#include "tree.h"
|
|
#include "memmodel.h"
|
|
#include "insn-config.h"
|
|
#include "emit-rtl.h"
|
|
#include "expr.h"
|
|
#include "recog.h"
|
|
#include "cfganal.h"
|
|
#include "tree-pass.h"
|
|
#include "cfgrtl.h"
|
|
#include "rtl-iter.h"
|
|
#include "df.h"
|
|
#include "print-rtl.h"
|
|
#include "dbgcnt.h"
|
|
#include "diagnostic-core.h"
|
|
#include "target.h"
|
|
|
|
/* These should probably move into a C++ class. */
|
|
static vec<bitmap_head> livein;
|
|
static bitmap all_blocks;
|
|
static bitmap livenow;
|
|
static bitmap changed_pseudos;
|
|
static bool modify;
|
|
|
|
/* We consider four bit groups for liveness:
|
|
bit 0..7 (least significant byte)
|
|
bit 8..15 (second least significant byte)
|
|
bit 16..31
|
|
bit 32..BITS_PER_WORD-1 */
|
|
|
|
/* For the given REG, return the number of bit groups implied by the
|
|
size of the REG's mode, up to a maximum of 4 (number of bit groups
|
|
tracked by this pass).
|
|
|
|
For partial integer and variable sized modes also return 4. This
|
|
could possibly be refined for something like PSI mode, but it
|
|
does not seem worth the effort. */
|
|
|
|
static int
|
|
group_limit (const_rtx reg)
|
|
{
|
|
machine_mode mode = GET_MODE (reg);
|
|
|
|
if (!GET_MODE_BITSIZE (mode).is_constant ())
|
|
return 4;
|
|
|
|
int size = GET_MODE_SIZE (mode).to_constant ();
|
|
|
|
size = exact_log2 (size);
|
|
|
|
if (size < 0)
|
|
return 4;
|
|
|
|
size++;
|
|
return (size > 4 ? 4 : size);
|
|
}
|
|
|
|
/* Make all bit groups live for REGNO in bitmap BMAP. For hard regs,
|
|
we assume all groups are live. For a pseudo we consider the size
|
|
of the pseudo to avoid creating unnecessarily live chunks of data. */
|
|
|
|
static void
|
|
make_reg_live (bitmap bmap, int regno)
|
|
{
|
|
int limit;
|
|
|
|
/* For pseudos we can use the mode to limit how many bit groups
|
|
are marked as live since a pseudo only has one mode. Hard
|
|
registers have to be handled more conservatively. */
|
|
if (regno > FIRST_PSEUDO_REGISTER)
|
|
{
|
|
rtx reg = regno_reg_rtx[regno];
|
|
limit = group_limit (reg);
|
|
}
|
|
else
|
|
limit = 4;
|
|
|
|
for (int i = 0; i < limit; i++)
|
|
bitmap_set_bit (bmap, regno * 4 + i);
|
|
}
|
|
|
|
/* Note this pass could be used to narrow memory loads too. It's
|
|
not clear if that's profitable or not in general. */
|
|
|
|
#define UNSPEC_P(X) (GET_CODE (X) == UNSPEC || GET_CODE (X) == UNSPEC_VOLATILE)
|
|
|
|
/* If we know the destination of CODE only uses some low bits
|
|
(say just the QI bits of an SI operation), then return true
|
|
if we can propagate the need for just the subset of bits
|
|
from the destination to the sources.
|
|
|
|
FIXME: This is safe for operands 1 and 2 of an IF_THEN_ELSE, but not
|
|
operand 0. Thus is likely would need some special casing to handle. */
|
|
|
|
static bool
|
|
safe_for_live_propagation (rtx_code code)
|
|
{
|
|
/* First handle rtx classes which as a whole are known to
|
|
be either safe or unsafe. */
|
|
switch (GET_RTX_CLASS (code))
|
|
{
|
|
case RTX_OBJ:
|
|
case RTX_CONST_OBJ:
|
|
return true;
|
|
|
|
case RTX_COMPARE:
|
|
case RTX_COMM_COMPARE:
|
|
case RTX_TERNARY:
|
|
return false;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/* What's left are specific codes. We only need to identify those
|
|
which are safe. */
|
|
switch (code)
|
|
{
|
|
/* These are trivially safe. */
|
|
case SUBREG:
|
|
case NOT:
|
|
case ZERO_EXTEND:
|
|
case SIGN_EXTEND:
|
|
case TRUNCATE:
|
|
case PLUS:
|
|
case MINUS:
|
|
case MULT:
|
|
case SMUL_HIGHPART:
|
|
case UMUL_HIGHPART:
|
|
case AND:
|
|
case IOR:
|
|
case XOR:
|
|
return true;
|
|
|
|
/* We can propagate for the shifted operand, but not the shift
|
|
count. The count is handled specially. */
|
|
case ASHIFT:
|
|
case LSHIFTRT:
|
|
case ASHIFTRT:
|
|
case SS_ASHIFT:
|
|
case US_ASHIFT:
|
|
return true;
|
|
|
|
/* There may be other safe codes. If so they can be added
|
|
individually when discovered. */
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/* Clear bits in LIVENOW and set bits in LIVE_TMP for objects
|
|
set/clobbered by OBJ contained in INSN.
|
|
|
|
Conceptually it is always safe to ignore a particular destination
|
|
here as that will result in more chunks of data being considered
|
|
live. That's what happens when we "continue" the main loop when
|
|
we see something we don't know how to handle such as a vector
|
|
mode destination.
|
|
|
|
The more accurate we are in identifying what objects (and chunks
|
|
within an object) are set by INSN, the more aggressive the
|
|
optimization phase during use handling will be. */
|
|
|
|
static bool
|
|
ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp)
|
|
{
|
|
bool skipped_dest = false;
|
|
|
|
subrtx_iterator::array_type array;
|
|
FOR_EACH_SUBRTX (iter, array, obj, NONCONST)
|
|
{
|
|
const_rtx x = *iter;
|
|
|
|
/* An EXPR_LIST (from call fusage) ends in NULL_RTX. */
|
|
if (x == NULL_RTX)
|
|
continue;
|
|
|
|
if (UNSPEC_P (x))
|
|
continue;
|
|
|
|
if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
|
|
{
|
|
unsigned bit = 0;
|
|
x = SET_DEST (x);
|
|
|
|
/* We don't support vector destinations or destinations
|
|
wider than DImode. */
|
|
scalar_mode outer_mode;
|
|
if (!is_a <scalar_mode> (GET_MODE (x), &outer_mode)
|
|
|| GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
|
|
{
|
|
/* Skip the subrtxs of this destination. There is
|
|
little value in iterating into the subobjects, so
|
|
just skip them for a bit of efficiency. */
|
|
skipped_dest = true;
|
|
iter.skip_subrtxes ();
|
|
continue;
|
|
}
|
|
|
|
/* We could have (strict_low_part (subreg ...)). We can not just
|
|
strip the STRICT_LOW_PART as that would result in clearing
|
|
some bits in LIVENOW that are still live. So process the
|
|
STRICT_LOW_PART specially. */
|
|
if (GET_CODE (x) == STRICT_LOW_PART)
|
|
{
|
|
x = XEXP (x, 0);
|
|
|
|
/* The only valid operand of a STRICT_LOW_PART is a non
|
|
paradoxical SUBREG. */
|
|
gcc_assert (SUBREG_P (x)
|
|
&& !paradoxical_subreg_p (x)
|
|
&& SUBREG_BYTE (x).is_constant ());
|
|
|
|
/* I think we should always see a REG here. But let's
|
|
be sure. */
|
|
gcc_assert (REG_P (SUBREG_REG (x)));
|
|
|
|
/* The inner mode might be larger, just punt for
|
|
that case. Remember, we can not just continue to process
|
|
the inner RTXs due to the STRICT_LOW_PART. */
|
|
if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
|
|
|| GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
|
|
{
|
|
/* Skip the subrtxs of the STRICT_LOW_PART. We can't
|
|
process them because it'll set objects as no longer
|
|
live when they are in fact still live. */
|
|
skipped_dest = true;
|
|
iter.skip_subrtxes ();
|
|
continue;
|
|
}
|
|
|
|
/* LIVE_TMP contains the set groups that are live-out and set in
|
|
this insn. It is used to narrow the groups live-in for the
|
|
inputs of this insn.
|
|
|
|
The simple thing to do is mark all the groups as live, but
|
|
that will significantly inhibit optimization.
|
|
|
|
We also need to be careful in the case where we have an in-out
|
|
operand. If we're not careful we'd clear LIVE_TMP
|
|
incorrectly. */
|
|
HOST_WIDE_INT rn = REGNO (SUBREG_REG (x));
|
|
int limit = group_limit (SUBREG_REG (x));
|
|
for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
|
|
if (bitmap_bit_p (livenow, i))
|
|
bitmap_set_bit (live_tmp, i);
|
|
|
|
if (bitmap_empty_p (live_tmp))
|
|
make_reg_live (live_tmp, rn);
|
|
|
|
/* The mode of the SUBREG tells us how many bits we can
|
|
clear. */
|
|
machine_mode mode = GET_MODE (x);
|
|
HOST_WIDE_INT size
|
|
= exact_log2 (GET_MODE_SIZE (mode).to_constant ()) + 1;
|
|
bitmap_clear_range (livenow, 4 * rn, size);
|
|
|
|
/* We have fully processed this destination. */
|
|
iter.skip_subrtxes ();
|
|
continue;
|
|
}
|
|
|
|
/* Phase one of destination handling. First remove any wrapper
|
|
such as SUBREG or ZERO_EXTRACT. */
|
|
unsigned HOST_WIDE_INT mask
|
|
= GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
|
|
if (SUBREG_P (x))
|
|
{
|
|
/* If we have a SUBREG destination that is too wide, just
|
|
skip the destination rather than continuing this iterator.
|
|
While continuing would be better, we'd need to strip the
|
|
subreg and restart within the SET processing rather than
|
|
the top of the loop which just complicates the flow even
|
|
more. */
|
|
if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
|
|
|| GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
|
|
{
|
|
skipped_dest = true;
|
|
iter.skip_subrtxes ();
|
|
continue;
|
|
}
|
|
|
|
/* We can safely strip a paradoxical subreg. The inner mode will
|
|
be narrower than the outer mode. We'll clear fewer bits in
|
|
LIVENOW than we'd like, but that's always safe. */
|
|
if (paradoxical_subreg_p (x))
|
|
x = XEXP (x, 0);
|
|
else if (SUBREG_BYTE (x).is_constant ())
|
|
{
|
|
bit = subreg_lsb (x).to_constant ();
|
|
mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit;
|
|
gcc_assert (mask);
|
|
x = SUBREG_REG (x);
|
|
}
|
|
else
|
|
gcc_unreachable ();
|
|
}
|
|
|
|
if (GET_CODE (x) == ZERO_EXTRACT)
|
|
{
|
|
/* Unlike a SUBREG destination, a set of a ZERO_EXTRACT only
|
|
modifies the bits referenced in the ZERO_EXTRACT, the rest
|
|
remain the same. Thus we can not continue here, we must
|
|
either figure out what part of the destination is modified
|
|
or skip the sub-rtxs. */
|
|
skipped_dest = true;
|
|
iter.skip_subrtxes ();
|
|
continue;
|
|
}
|
|
|
|
/* BIT >= 64 indicates something went horribly wrong. */
|
|
gcc_assert (bit <= HOST_BITS_PER_WIDE_INT - 1);
|
|
|
|
/* Now handle the actual object that was changed. */
|
|
if (REG_P (x))
|
|
{
|
|
/* LIVE_TMP contains the set groups that are live-out and set in
|
|
this insn. It is used to narrow the groups live-in for the
|
|
inputs of this insn.
|
|
|
|
The simple thing to do is mark all the groups as live, but
|
|
that will significantly inhibit optimization.
|
|
|
|
We also need to be careful in the case where we have an in-out
|
|
operand. If we're not careful we'd clear LIVE_TMP
|
|
incorrectly. */
|
|
HOST_WIDE_INT rn = REGNO (x);
|
|
int limit = group_limit (x);
|
|
for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
|
|
if (bitmap_bit_p (livenow, i))
|
|
bitmap_set_bit (live_tmp, i);
|
|
|
|
if (bitmap_empty_p (live_tmp))
|
|
make_reg_live (live_tmp, rn);
|
|
|
|
/* Now clear the bits known written by this instruction.
|
|
Note that BIT need not be a power of two, consider a
|
|
ZERO_EXTRACT destination. */
|
|
int start = (bit < 8 ? 0 : bit < 16 ? 1 : bit < 32 ? 2 : 3);
|
|
int end = ((mask & ~HOST_WIDE_INT_UC (0xffffffff)) ? 4
|
|
: (mask & HOST_WIDE_INT_UC (0xffff0000)) ? 3
|
|
: (mask & 0xff00) ? 2 : 1);
|
|
bitmap_clear_range (livenow, 4 * rn + start, end - start);
|
|
}
|
|
/* Some ports generate (clobber (const_int)). */
|
|
else if (CONST_INT_P (x))
|
|
continue;
|
|
else
|
|
gcc_assert (CALL_P (insn)
|
|
|| MEM_P (x)
|
|
|| x == pc_rtx
|
|
|| GET_CODE (x) == SCRATCH);
|
|
|
|
iter.skip_subrtxes ();
|
|
}
|
|
else if (GET_CODE (x) == COND_EXEC)
|
|
{
|
|
/* This isn't ideal, but may not be so bad in practice. */
|
|
skipped_dest = true;
|
|
iter.skip_subrtxes ();
|
|
}
|
|
}
|
|
return skipped_dest;
|
|
}
|
|
|
|
/* INSN is a right shift and the second insn in a shift pair that is a
|
|
sign or zero extension (SET is the single set associated with INSN).
|
|
|
|
Replace the source of SET with NEW_SRC which is a source register
|
|
from NEW_SRC_INSN (the left shift in the pair). This is effectively
|
|
the same as the replacement we do for ZERO/SIGN extends on targets
|
|
that support those insns. */
|
|
static void
|
|
ext_dce_try_optimize_rshift (rtx_insn *insn, rtx set, rtx new_src, rtx_insn *new_src_insn)
|
|
{
|
|
/* If the modes are not the same or one is a hard register, then
|
|
conservatively do nothing. */
|
|
if (GET_MODE (SET_SRC (set)) != GET_MODE (new_src)
|
|
|| !REG_P (XEXP (SET_SRC (set), 0))
|
|
|| !REG_P (new_src)
|
|
|| REGNO (XEXP (SET_SRC (set), 0)) < FIRST_PSEUDO_REGISTER
|
|
|| REGNO (new_src) < FIRST_PSEUDO_REGISTER)
|
|
return;
|
|
|
|
if (dump_file)
|
|
{
|
|
fprintf (dump_file, "Processing insn:\n");
|
|
dump_insn_slim (dump_file, insn);
|
|
fprintf (dump_file, "Trying to simplify pattern:\n");
|
|
print_rtl_single (dump_file, SET_SRC (set));
|
|
}
|
|
|
|
/* We decided to turn do the optimization but allow it to be rejected for
|
|
bisection purposes. */
|
|
if (!dbg_cnt (::ext_dce))
|
|
{
|
|
if (dump_file)
|
|
fprintf (dump_file, "Rejected due to debug counter.\n");
|
|
return;
|
|
}
|
|
|
|
/* We're going to generate a fresh insn for the move, so put it
|
|
into a sequence that we can emit after the current insn. */
|
|
start_sequence ();
|
|
emit_move_insn (SET_DEST (set), new_src);
|
|
rtx_insn *seq = end_sequence ();
|
|
emit_insn_after (seq, insn);
|
|
|
|
/* Mark the destination as changed. */
|
|
rtx x = SET_DEST (set);
|
|
while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
|
|
x = XEXP (x, 0);
|
|
gcc_assert (REG_P (x));
|
|
bitmap_set_bit (changed_pseudos, REGNO (x));
|
|
|
|
if (dump_file)
|
|
{
|
|
fprintf (dump_file, "Successfully transformed to:\n");
|
|
print_rtl_single (dump_file, PATTERN (seq));
|
|
fprintf (dump_file, "\n");
|
|
}
|
|
|
|
delete_insn (insn);
|
|
|
|
/* If NEW_SRC died in its prior location, then we need to remove the
|
|
death note and move it to the new location. */
|
|
rtx note = find_regno_note (new_src_insn, REG_DEAD, REGNO (new_src));
|
|
if (note)
|
|
{
|
|
remove_note (new_src_insn, note);
|
|
add_reg_note (insn, REG_DEAD, new_src);
|
|
}
|
|
}
|
|
|
|
|
|
/* INSN has a sign/zero extended source inside SET that we will
|
|
try to turn into a SUBREG. If NEW_SRC is non-null, use that
|
|
for the new source of INSN's set. That scenario only happens
|
|
when we're optimizing a shift pair. */
|
|
static void
|
|
ext_dce_try_optimize_extension (rtx_insn *insn, rtx set)
|
|
{
|
|
rtx src = SET_SRC (set);
|
|
rtx inner = XEXP (src, 0);
|
|
|
|
/* Avoid (subreg (mem)) and other constructs which may be valid RTL, but
|
|
not useful for this optimization. */
|
|
if (!(REG_P (inner) || (SUBREG_P (inner) && REG_P (SUBREG_REG (inner)))))
|
|
return;
|
|
|
|
rtx new_pattern;
|
|
if (dump_file)
|
|
{
|
|
fprintf (dump_file, "Processing insn:\n");
|
|
dump_insn_slim (dump_file, insn);
|
|
fprintf (dump_file, "Trying to simplify pattern:\n");
|
|
print_rtl_single (dump_file, SET_SRC (set));
|
|
}
|
|
|
|
/* We decided to turn do the optimization but allow it to be rejected for
|
|
bisection purposes. */
|
|
if (!dbg_cnt (::ext_dce))
|
|
{
|
|
if (dump_file)
|
|
fprintf (dump_file, "Rejected due to debug counter.\n");
|
|
return;
|
|
}
|
|
|
|
new_pattern = simplify_gen_subreg (GET_MODE (src), inner,
|
|
GET_MODE (inner), 0);
|
|
/* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL.
|
|
We must not pass that as a replacement pattern to validate_change. */
|
|
if (new_pattern)
|
|
{
|
|
int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
|
|
|
|
rtx x = SET_DEST (set);
|
|
while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
|
|
x = XEXP (x, 0);
|
|
|
|
gcc_assert (REG_P (x));
|
|
if (ok)
|
|
bitmap_set_bit (changed_pseudos, REGNO (x));
|
|
|
|
if (dump_file)
|
|
{
|
|
if (ok)
|
|
fprintf (dump_file, "Successfully transformed to:\n");
|
|
else
|
|
fprintf (dump_file, "Failed transformation to:\n");
|
|
|
|
print_rtl_single (dump_file, new_pattern);
|
|
fprintf (dump_file, "\n");
|
|
}
|
|
|
|
/* INSN may have a REG_EQUAL note indicating that the value was
|
|
sign or zero extended. That note is no longer valid since we've
|
|
just removed the extension. Just wipe the notes. */
|
|
remove_reg_equal_equiv_notes (insn, false);
|
|
}
|
|
else
|
|
{
|
|
if (dump_file)
|
|
fprintf (dump_file, "Unable to generate valid SUBREG expression.\n");
|
|
}
|
|
}
|
|
|
|
/* Some operators imply that their second operand is fully live,
|
|
regardless of how many bits in the output are live. An example
|
|
would be the shift count on a target without SHIFT_COUNT_TRUNCATED
|
|
defined.
|
|
|
|
Return TRUE if CODE is such an operator. FALSE otherwise. */
|
|
|
|
static bool
|
|
binop_implies_op2_fully_live (rtx_code code)
|
|
{
|
|
switch (code)
|
|
{
|
|
case ASHIFT:
|
|
case LSHIFTRT:
|
|
case ASHIFTRT:
|
|
case ROTATE:
|
|
case ROTATERT:
|
|
case SS_ASHIFT:
|
|
case US_ASHIFT:
|
|
return !SHIFT_COUNT_TRUNCATED;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/* X, with code CODE, is an operation for which safe_for_live_propagation
|
|
holds true, and bits set in MASK are live in the result. Compute a
|
|
mask of (potentially) live bits in the non-constant inputs. In case of
|
|
binop_implies_op2_fully_live (e.g. shifts), the computed mask may
|
|
exclusively pertain to the first operand.
|
|
|
|
This looks wrong as we may have some important operations embedded as
|
|
operands of another operation. For example, we might have an extension
|
|
wrapping a shift. It really feels like this needs to be recursing down
|
|
into operands much more often. */
|
|
|
|
unsigned HOST_WIDE_INT
|
|
carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
|
|
{
|
|
if (mask == 0)
|
|
return 0;
|
|
|
|
enum machine_mode mode = GET_MODE_INNER (GET_MODE (x));
|
|
unsigned HOST_WIDE_INT mmask = GET_MODE_MASK (mode);
|
|
|
|
/* While we don't try to optimize operations on types larger
|
|
than 64 bits, we do want to make sure not to invoke undefined
|
|
behavior when presented with such operations during use
|
|
processing. The safe thing to do is to just return mmask
|
|
for that scenario indicating every possible chunk is life. */
|
|
scalar_int_mode smode;
|
|
if (!is_a <scalar_int_mode> (mode, &smode)
|
|
|| GET_MODE_BITSIZE (smode) > HOST_BITS_PER_WIDE_INT)
|
|
return mmask;
|
|
|
|
switch (code)
|
|
{
|
|
case PLUS:
|
|
case MINUS:
|
|
case MULT:
|
|
return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
|
|
|
|
/* We propagate for the shifted operand, but not the shift
|
|
count. The count is handled specially. */
|
|
case ASHIFT:
|
|
if (CONST_INT_P (XEXP (x, 1))
|
|
&& UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
|
|
return (HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1));
|
|
return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
|
|
|
|
/* We propagate for the shifted operand, but not the shift
|
|
count. The count is handled specially. */
|
|
case LSHIFTRT:
|
|
if (CONST_INT_P (XEXP (x, 1))
|
|
&& UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
|
|
return mmask & (mask << INTVAL (XEXP (x, 1)));
|
|
return mmask;
|
|
|
|
/* We propagate for the shifted operand, but not the shift
|
|
count. The count is handled specially. */
|
|
case ASHIFTRT:
|
|
if (CONST_INT_P (XEXP (x, 1))
|
|
&& UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
|
|
{
|
|
HOST_WIDE_INT sign = 0;
|
|
if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1))
|
|
> GET_MODE_BITSIZE (smode))
|
|
sign = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (smode) - 1);
|
|
return sign | (mmask & (mask << INTVAL (XEXP (x, 1))));
|
|
}
|
|
return mmask;
|
|
|
|
case SMUL_HIGHPART:
|
|
case UMUL_HIGHPART:
|
|
if (XEXP (x, 1) == const0_rtx)
|
|
return 0;
|
|
if (XEXP (x, 1) == const1_rtx)
|
|
return mmask;
|
|
if (CONST_INT_P (XEXP (x, 1)))
|
|
{
|
|
if (pow2p_hwi (INTVAL (XEXP (x, 1))))
|
|
return mmask & (mask << (GET_MODE_BITSIZE (smode)
|
|
- exact_log2 (INTVAL (XEXP (x, 1)))));
|
|
|
|
int bits = (HOST_BITS_PER_WIDE_INT + GET_MODE_BITSIZE (smode)
|
|
- clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1))));
|
|
if (bits < GET_MODE_BITSIZE (smode))
|
|
return (HOST_WIDE_INT_1U << bits) - 1;
|
|
}
|
|
return mmask;
|
|
|
|
case SIGN_EXTEND:
|
|
if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
|
|
|| !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
|
|
return -1;
|
|
|
|
/* We want the mode of the inner object. We need to ensure its
|
|
sign bit is on in MASK. */
|
|
mode = GET_MODE_INNER (GET_MODE (XEXP (x, 0)));
|
|
if (mask & ~GET_MODE_MASK (mode))
|
|
mask |= HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode).to_constant ()
|
|
- 1);
|
|
|
|
/* Recurse into the operand. */
|
|
return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
|
|
|
|
case ZERO_EXTEND:
|
|
if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
|
|
|| !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
|
|
return -1;
|
|
|
|
/* Recurse into the operand. */
|
|
return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
|
|
|
|
/* We propagate for the shifted operand, but not the shift
|
|
count. The count is handled specially. */
|
|
case SS_ASHIFT:
|
|
case US_ASHIFT:
|
|
if (CONST_INT_P (XEXP (x, 1))
|
|
&& UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
|
|
{
|
|
return ((mmask & ~((unsigned HOST_WIDE_INT) mmask
|
|
>> (INTVAL (XEXP (x, 1))
|
|
+ (XEXP (x, 1) != const0_rtx
|
|
&& code == SS_ASHIFT))))
|
|
| ((HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1))));
|
|
}
|
|
return mmask;
|
|
|
|
default:
|
|
return mask;
|
|
}
|
|
}
|
|
|
|
/* Process uses in INSN contained in OBJ. Set appropriate bits in LIVENOW
|
|
for any chunks of pseudos that become live, potentially filtering using
|
|
bits from LIVE_TMP.
|
|
|
|
If MODIFY is true, then optimize sign/zero extensions to SUBREGs when
|
|
the extended bits are never read and mark pseudos which had extensions
|
|
eliminated in CHANGED_PSEUDOS. */
|
|
|
|
static void
|
|
ext_dce_process_uses (rtx_insn *insn, rtx obj,
|
|
bitmap live_tmp, bool skipped_dest)
|
|
{
|
|
subrtx_var_iterator::array_type array_var;
|
|
FOR_EACH_SUBRTX_VAR (iter, array_var, obj, NONCONST)
|
|
{
|
|
/* An EXPR_LIST (from call fusage) ends in NULL_RTX. */
|
|
rtx x = *iter;
|
|
if (x == NULL_RTX)
|
|
continue;
|
|
|
|
/* So the basic idea in this FOR_EACH_SUBRTX_VAR loop is to
|
|
handle SETs explicitly, possibly propagating live information
|
|
into the uses.
|
|
|
|
We may continue the loop at various points which will cause
|
|
iteration into the next level of RTL. Breaking from the loop
|
|
is never safe as it can lead us to fail to process some of the
|
|
RTL and thus not make objects live when necessary. */
|
|
enum rtx_code xcode = GET_CODE (x);
|
|
if (xcode == SET)
|
|
{
|
|
const_rtx dst = SET_DEST (x);
|
|
rtx src = SET_SRC (x);
|
|
const_rtx y;
|
|
unsigned HOST_WIDE_INT bit = 0;
|
|
|
|
/* The code of the RHS of a SET. */
|
|
enum rtx_code code = GET_CODE (src);
|
|
|
|
/* ?!? How much of this should mirror SET handling, potentially
|
|
being shared? */
|
|
if (SUBREG_P (dst) && subreg_lsb (dst).is_constant (&bit))
|
|
{
|
|
if (bit >= HOST_BITS_PER_WIDE_INT)
|
|
bit = HOST_BITS_PER_WIDE_INT - 1;
|
|
dst = SUBREG_REG (dst);
|
|
}
|
|
else if (GET_CODE (dst) == STRICT_LOW_PART)
|
|
dst = XEXP (dst, 0);
|
|
|
|
/* Main processing of the uses. Two major goals here.
|
|
|
|
First, we want to try and propagate liveness (or the lack
|
|
thereof) from the destination register to the source
|
|
register(s).
|
|
|
|
Second, if the source is an extension, try to optimize
|
|
it into a SUBREG. The SUBREG form indicates we don't
|
|
care about the upper bits and will usually be copy
|
|
propagated away.
|
|
|
|
If we fail to handle something in here, the expectation
|
|
is the iterator will dive into the sub-components and
|
|
mark all the chunks in any found REGs as live. */
|
|
if (REG_P (dst) && safe_for_live_propagation (code))
|
|
{
|
|
/* Create a mask representing the bits of this output
|
|
operand that are live after this insn. We can use
|
|
this information to refine the live in state of
|
|
inputs to this insn in many cases.
|
|
|
|
We have to do this on a per SET basis, we might have
|
|
an INSN with multiple SETS, some of which can narrow
|
|
the source operand liveness, some of which may not. */
|
|
unsigned HOST_WIDE_INT dst_mask = 0;
|
|
HOST_WIDE_INT rn = REGNO (dst);
|
|
unsigned HOST_WIDE_INT mask_array[]
|
|
= { 0xff, 0xff00, HOST_WIDE_INT_UC (0xffff0000),
|
|
-HOST_WIDE_INT_UC (0x100000000) };
|
|
for (int i = 0; i < 4; i++)
|
|
if (bitmap_bit_p (live_tmp, 4 * rn + i))
|
|
dst_mask |= mask_array[i];
|
|
dst_mask >>= bit;
|
|
|
|
/* If we ignored a destination during set processing, then
|
|
consider all the bits live. */
|
|
if (skipped_dest)
|
|
dst_mask = -1;
|
|
|
|
dst_mask = carry_backpropagate (dst_mask, code, src);
|
|
|
|
/* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT
|
|
of the source specially to improve optimization. */
|
|
if (code == SIGN_EXTEND || code == ZERO_EXTEND)
|
|
{
|
|
rtx inner = XEXP (src, 0);
|
|
unsigned HOST_WIDE_INT src_mask
|
|
= GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner)));
|
|
|
|
/* DST_MASK could be zero if we had something in the SET
|
|
that we couldn't handle. */
|
|
if (modify && !skipped_dest && (dst_mask & ~src_mask) == 0)
|
|
ext_dce_try_optimize_extension (insn, x);
|
|
|
|
/* Stripping the extension here just seems wrong on multiple
|
|
levels. It's source side handling, so it seems like it
|
|
belongs in the loop below. Stripping here also makes it
|
|
harder than necessary to properly handle live bit groups
|
|
for (ANY_EXTEND (SUBREG)) where the SUBREG has
|
|
SUBREG_PROMOTED state. */
|
|
dst_mask &= src_mask;
|
|
src = XEXP (src, 0);
|
|
code = GET_CODE (src);
|
|
}
|
|
|
|
/* Special case for (sub)targets that do not have extension
|
|
insns (and thus use shifts). We want to detect when we have
|
|
a shift pair and treat the pair as-if was an extension.
|
|
|
|
Key on the right shift and use (for now) simplistic tests
|
|
to find the corresponding left shift. */
|
|
scalar_mode outer_mode;
|
|
if ((code == LSHIFTRT || code == ASHIFTRT)
|
|
&& CONST_INT_P (XEXP (src, 1))
|
|
&& (INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 8
|
|
|| INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 16
|
|
|| INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 32)
|
|
&& is_a <scalar_mode> (GET_MODE (src), &outer_mode)
|
|
&& GET_MODE_BITSIZE (outer_mode) <= HOST_BITS_PER_WIDE_INT)
|
|
{
|
|
/* So we have a right shift that could correspond to
|
|
the second in a pair impementing QI, HI or SI -> DI
|
|
extension. See if we can find the left shift. For
|
|
now, just look one real instruction back. */
|
|
rtx_insn *prev_insn = prev_nonnote_nondebug_insn_bb (insn);
|
|
|
|
/* The previous insn must be a left shift by the same
|
|
amount. */
|
|
rtx prev_set;
|
|
if (prev_insn
|
|
&& (prev_set = single_set (prev_insn))
|
|
/* The destination of the left shift must be the
|
|
source of the right shift. */
|
|
&& SET_DEST (prev_set) == XEXP (src, 0)
|
|
&& GET_CODE (SET_SRC (prev_set)) == ASHIFT
|
|
&& CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
|
|
/* The counts must match. */
|
|
&& (INTVAL (XEXP (src, 1))
|
|
== INTVAL (XEXP (SET_SRC (prev_set), 1))))
|
|
{
|
|
unsigned HOST_WIDE_INT src_mask = GET_MODE_BITSIZE (GET_MODE (src)).to_constant ();
|
|
src_mask -= INTVAL (XEXP (src, 1));
|
|
src_mask = (HOST_WIDE_INT_1U << src_mask) - 1;
|
|
|
|
/* DST_MASK has been adjusted for INSN. We need its original value. */
|
|
unsigned HOST_WIDE_INT tmp_mask = 0;
|
|
for (int i = 0; i < 4; i++)
|
|
if (bitmap_bit_p (live_tmp, 4 * rn + i))
|
|
tmp_mask |= mask_array[i];
|
|
tmp_mask >>= bit;
|
|
|
|
if (modify && !skipped_dest && (tmp_mask & ~src_mask) == 0)
|
|
{
|
|
ext_dce_try_optimize_rshift (insn, x, XEXP (SET_SRC (prev_set), 0), prev_insn);
|
|
|
|
/* These may not strictly be necessary, but we might as well try and be
|
|
as accurate as possible. The RHS is now a simple REG. */
|
|
dst_mask = src_mask;
|
|
src = XEXP (SET_SRC (prev_set), 0);
|
|
code = GET_CODE (src);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Optimization is done at this point. We just want to make
|
|
sure everything that should get marked as live is marked
|
|
from here onward. */
|
|
|
|
/* We will handle the other operand of a binary operator
|
|
at the bottom of the loop by resetting Y. */
|
|
if (BINARY_P (src))
|
|
y = XEXP (src, 0);
|
|
else
|
|
y = src;
|
|
|
|
/* We're inside a SET and want to process the source operands
|
|
making things live. Breaking from this loop will cause
|
|
the iterator to work on sub-rtxs, so it is safe to break
|
|
if we see something we don't know how to handle.
|
|
|
|
This code is just hokey as it really just handles trivial
|
|
unary and binary cases. Otherwise the loop exits and we
|
|
continue iterating on sub-rtxs, but outside the set context. */
|
|
unsigned HOST_WIDE_INT save_mask = dst_mask;
|
|
for (;;)
|
|
{
|
|
/* In general we want to restore DST_MASK before each loop
|
|
iteration. The exception is when the opcode implies that
|
|
the other operand is fully live. That's handled by
|
|
changing SAVE_MASK below. */
|
|
dst_mask = save_mask;
|
|
/* Strip an outer paradoxical subreg. The bits outside
|
|
the inner mode are don't cares. So we can just strip
|
|
and process the inner object. */
|
|
if (paradoxical_subreg_p (y))
|
|
y = XEXP (y, 0);
|
|
else if (SUBREG_P (y) && subreg_lsb (y).is_constant (&bit))
|
|
{
|
|
/* If !TRULY_NOOP_TRUNCATION_MODES_P, the mode
|
|
change performed by Y would normally need to be a
|
|
TRUNCATE rather than a SUBREG. It is probably the
|
|
guarantee provided by SUBREG_PROMOTED_VAR_P that
|
|
allows the SUBREG in Y as an exception. We must
|
|
therefore preserve that guarantee and treat the
|
|
upper bits of the inner register as live
|
|
regardless of the outer code. See PR 120050. */
|
|
if (!REG_P (SUBREG_REG (y))
|
|
|| (SUBREG_PROMOTED_VAR_P (y)
|
|
&& (!TRULY_NOOP_TRUNCATION_MODES_P (
|
|
GET_MODE (y),
|
|
GET_MODE (SUBREG_REG (y))))))
|
|
break;
|
|
|
|
/* If this is a wide object (more bits than we can fit
|
|
in a HOST_WIDE_INT), then just break from the SET
|
|
context. That will cause the iterator to walk down
|
|
into the subrtx and if we land on a REG we'll mark
|
|
the whole think live. */
|
|
if (bit >= HOST_BITS_PER_WIDE_INT)
|
|
break;
|
|
|
|
/* The SUBREG's mode determines the live width. */
|
|
if (dst_mask)
|
|
{
|
|
dst_mask <<= bit;
|
|
if (!dst_mask)
|
|
dst_mask = -HOST_WIDE_INT_UC (0x100000000);
|
|
}
|
|
y = SUBREG_REG (y);
|
|
}
|
|
|
|
if (REG_P (y))
|
|
{
|
|
/* We have found the use of a register. We need to mark
|
|
the appropriate chunks of the register live. The mode
|
|
of the REG is a starting point. We may refine that
|
|
based on what chunks in the output were live. */
|
|
rn = 4 * REGNO (y);
|
|
unsigned HOST_WIDE_INT tmp_mask = dst_mask;
|
|
|
|
/* If the RTX code for the SET_SRC is not one we can
|
|
propagate destination liveness through, then just
|
|
set the mask to the mode's mask. */
|
|
if (!safe_for_live_propagation (code))
|
|
tmp_mask
|
|
= GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y)));
|
|
|
|
if (tmp_mask & 0xff)
|
|
bitmap_set_bit (livenow, rn);
|
|
if (tmp_mask & 0xff00)
|
|
bitmap_set_bit (livenow, rn + 1);
|
|
if (tmp_mask & HOST_WIDE_INT_UC (0xffff0000))
|
|
bitmap_set_bit (livenow, rn + 2);
|
|
if (tmp_mask & -HOST_WIDE_INT_UC (0x100000000))
|
|
bitmap_set_bit (livenow, rn + 3);
|
|
}
|
|
else if (!CONSTANT_P (y))
|
|
break;
|
|
|
|
/* We might have (ashift (const_int 1) (reg...))
|
|
By setting dst_mask we can continue iterating on the
|
|
the next operand and it will be considered fully live.
|
|
|
|
Note that since we restore DST_MASK from SAVE_MASK at the
|
|
top of the loop, we have to change SAVE_MASK to get the
|
|
semantics we want. */
|
|
if (binop_implies_op2_fully_live (GET_CODE (src)))
|
|
save_mask = -1;
|
|
|
|
/* If this was anything but a binary operand, break the inner
|
|
loop. This is conservatively correct as it will cause the
|
|
iterator to look at the sub-rtxs outside the SET context. */
|
|
if (!BINARY_P (src))
|
|
break;
|
|
|
|
/* We processed the first operand of a binary operator. Now
|
|
handle the second. */
|
|
y = XEXP (src, 1), src = pc_rtx;
|
|
}
|
|
|
|
/* These are leaf nodes, no need to iterate down into them. */
|
|
if (REG_P (y) || CONSTANT_P (y))
|
|
iter.skip_subrtxes ();
|
|
}
|
|
}
|
|
/* If we are reading the low part of a SUBREG, then we can
|
|
refine liveness of the input register, otherwise let the
|
|
iterator continue into SUBREG_REG. */
|
|
else if (SUBREG_P (x)
|
|
&& REG_P (SUBREG_REG (x))
|
|
&& !paradoxical_subreg_p (x)
|
|
&& subreg_lowpart_p (x)
|
|
&& GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
|
|
&& GET_MODE_BITSIZE (GET_MODE (x)).to_constant () <= 32)
|
|
{
|
|
HOST_WIDE_INT size = GET_MODE_BITSIZE (GET_MODE (x)).to_constant ();
|
|
HOST_WIDE_INT rn = 4 * REGNO (SUBREG_REG (x));
|
|
|
|
/* If this is a promoted subreg, then more of it may be live than
|
|
is otherwise obvious. */
|
|
if (SUBREG_PROMOTED_VAR_P (x))
|
|
size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))).to_constant ();
|
|
|
|
bitmap_set_bit (livenow, rn);
|
|
if (size > 8)
|
|
bitmap_set_bit (livenow, rn + 1);
|
|
if (size > 16)
|
|
bitmap_set_bit (livenow, rn + 2);
|
|
if (size >= 32)
|
|
bitmap_set_bit (livenow, rn + 3);
|
|
iter.skip_subrtxes ();
|
|
}
|
|
/* If we have a register reference that is not otherwise handled,
|
|
just assume all the chunks are live. */
|
|
else if (REG_P (x))
|
|
bitmap_set_range (livenow, REGNO (x) * 4, group_limit (x));
|
|
}
|
|
}
|
|
|
|
/* Process a single basic block BB with current liveness information
|
|
in LIVENOW, returning updated liveness information.
|
|
|
|
If MODIFY is true, then this is the last pass and unnecessary
|
|
extensions should be eliminated when possible. If an extension
|
|
is removed, the source pseudo is marked in CHANGED_PSEUDOS. */
|
|
|
|
static void
|
|
ext_dce_process_bb (basic_block bb)
|
|
{
|
|
rtx_insn *insn;
|
|
|
|
FOR_BB_INSNS_REVERSE (bb, insn)
|
|
{
|
|
if (!NONDEBUG_INSN_P (insn))
|
|
continue;
|
|
|
|
/* Live-out state of the destination of this insn. We can
|
|
use this to refine the live-in state of the sources of
|
|
this insn in many cases. */
|
|
bitmap live_tmp = BITMAP_ALLOC (NULL);
|
|
|
|
/* First process any sets/clobbers in INSN. */
|
|
bool skipped_dest = ext_dce_process_sets (insn, PATTERN (insn), live_tmp);
|
|
|
|
/* CALL_INSNs need processing their fusage data. */
|
|
if (CALL_P (insn))
|
|
skipped_dest |= ext_dce_process_sets (insn,
|
|
CALL_INSN_FUNCTION_USAGE (insn),
|
|
live_tmp);
|
|
|
|
/* And now uses, optimizing away SIGN/ZERO extensions as we go. */
|
|
ext_dce_process_uses (insn, PATTERN (insn), live_tmp, skipped_dest);
|
|
|
|
/* A nonlocal goto implicitly uses the frame pointer. */
|
|
if (JUMP_P (insn) && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
|
|
{
|
|
bitmap_set_range (livenow, FRAME_POINTER_REGNUM * 4, 4);
|
|
if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
|
|
bitmap_set_range (livenow, HARD_FRAME_POINTER_REGNUM * 4, 4);
|
|
}
|
|
|
|
/* And process fusage data for the use as well. */
|
|
if (CALL_P (insn))
|
|
{
|
|
if (!FAKE_CALL_P (insn))
|
|
bitmap_set_range (livenow, STACK_POINTER_REGNUM * 4, 4);
|
|
|
|
/* If this is not a call to a const fucntion, then assume it
|
|
can read any global register. */
|
|
if (!RTL_CONST_CALL_P (insn))
|
|
for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
|
|
if (global_regs[i])
|
|
bitmap_set_range (livenow, i * 4, 4);
|
|
|
|
ext_dce_process_uses (insn, CALL_INSN_FUNCTION_USAGE (insn), live_tmp, false);
|
|
}
|
|
|
|
BITMAP_FREE (live_tmp);
|
|
}
|
|
}
|
|
|
|
/* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and
|
|
is usually helpful. However, in some cases setting the value when
|
|
it not strictly needed can cause this pass to miss optimizations.
|
|
|
|
Specifically consider (set (mem) (subreg (reg))). If set in that
|
|
case it will cause more bit groups to be live for REG than would
|
|
be strictly necessary which in turn can inhibit extension removal.
|
|
|
|
So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it
|
|
is obviously not needed. */
|
|
|
|
static void
|
|
maybe_clear_subreg_promoted_p (void)
|
|
{
|
|
for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
|
|
{
|
|
if (!NONDEBUG_INSN_P (insn))
|
|
continue;
|
|
|
|
rtx set = single_set (insn);
|
|
if (!set)
|
|
continue;
|
|
|
|
/* There may be other cases where we should clear, but for
|
|
now, this is the only known case where it causes problems. */
|
|
if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set))
|
|
&& GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set))))
|
|
SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0;
|
|
}
|
|
}
|
|
|
|
/* Walk the IL and build the transitive closure of all the REGs tied
|
|
together by copies where either the source or destination is
|
|
marked in CHANGED_PSEUDOS. */
|
|
|
|
static void
|
|
expand_changed_pseudos (void)
|
|
{
|
|
/* Build a vector of registers related by a copy. This is meant to
|
|
speed up the next step by avoiding full IL walks. */
|
|
struct copy_pair { rtx first; rtx second; };
|
|
auto_vec<copy_pair> pairs;
|
|
for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
|
|
{
|
|
if (!NONDEBUG_INSN_P (insn))
|
|
continue;
|
|
|
|
rtx pat = PATTERN (insn);
|
|
|
|
/* Simple copies to a REG from another REG or SUBREG of a REG. */
|
|
if (GET_CODE (pat) == SET
|
|
&& REG_P (SET_DEST (pat))
|
|
&& (REG_P (SET_SRC (pat))
|
|
|| (SUBREG_P (SET_SRC (pat))
|
|
&& REG_P (SUBREG_REG (SET_SRC (pat))))))
|
|
{
|
|
rtx src = (REG_P (SET_SRC (pat))
|
|
? SET_SRC (pat)
|
|
: SUBREG_REG (SET_SRC (pat)));
|
|
pairs.safe_push ({ SET_DEST (pat), src });
|
|
}
|
|
|
|
/* Simple copies to a REG from another REG or SUBREG of a REG
|
|
held inside a PARALLEL. */
|
|
if (GET_CODE (pat) == PARALLEL)
|
|
{
|
|
for (int i = XVECLEN (pat, 0) - 1; i >= 0; i--)
|
|
{
|
|
rtx elem = XVECEXP (pat, 0, i);
|
|
|
|
if (GET_CODE (elem) == SET
|
|
&& REG_P (SET_DEST (elem))
|
|
&& (REG_P (SET_SRC (elem))
|
|
|| (SUBREG_P (SET_SRC (elem))
|
|
&& REG_P (SUBREG_REG (SET_SRC (elem))))))
|
|
{
|
|
rtx src = (REG_P (SET_SRC (elem))
|
|
? SET_SRC (elem)
|
|
: SUBREG_REG (SET_SRC (elem)));
|
|
pairs.safe_push ({ SET_DEST (elem), src });
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/* Now we have a vector with copy pairs. Iterate over that list
|
|
updating CHANGED_PSEUDOS as we go. Eliminate copies from the
|
|
list as we go as they don't need further processing. */
|
|
bool changed = true;
|
|
while (changed)
|
|
{
|
|
changed = false;
|
|
unsigned int i;
|
|
copy_pair *p;
|
|
FOR_EACH_VEC_ELT (pairs, i, p)
|
|
{
|
|
if (bitmap_bit_p (changed_pseudos, REGNO (p->second))
|
|
&& bitmap_set_bit (changed_pseudos, REGNO (p->first)))
|
|
{
|
|
pairs.unordered_remove (i);
|
|
changed = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* We optimize away sign/zero extensions in this pass and replace
|
|
them with SUBREGs indicating certain bits are don't cares.
|
|
|
|
This changes the SUBREG_PROMOTED_VAR_P state of the object.
|
|
It is fairly painful to fix this on the fly, so we have
|
|
recorded which pseudos are affected and we look for SUBREGs
|
|
of those pseudos and fix them up. */
|
|
|
|
static void
|
|
reset_subreg_promoted_p (void)
|
|
{
|
|
/* This pass eliminates zero/sign extensions on pseudo regs found
|
|
in CHANGED_PSEUDOS. Elimination of those extensions changes if
|
|
the pseudos are known to hold values extended to wider modes
|
|
via SUBREG_PROMOTED_VAR. So we wipe the SUBREG_PROMOTED_VAR
|
|
state on all affected pseudos.
|
|
|
|
But that is insufficient. We might have a copy from one REG
|
|
to another (possibly with the source register wrapped with a
|
|
SUBREG). We need to wipe SUBREG_PROMOTED_VAR on the transitive
|
|
closure of the original CHANGED_PSEUDOS and registers they're
|
|
connected to via copies. So expand the set. */
|
|
expand_changed_pseudos ();
|
|
|
|
/* If we removed an extension, that changed the promoted state
|
|
of the destination of that extension. Thus we need to go
|
|
find any SUBREGs that reference that pseudo and adjust their
|
|
SUBREG_PROMOTED_P state. */
|
|
for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
|
|
{
|
|
if (!NONDEBUG_INSN_P (insn))
|
|
continue;
|
|
|
|
rtx pat = PATTERN (insn);
|
|
subrtx_var_iterator::array_type array;
|
|
FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
|
|
{
|
|
rtx sub = *iter;
|
|
|
|
/* We only care about SUBREGs. */
|
|
if (GET_CODE (sub) != SUBREG)
|
|
continue;
|
|
|
|
const_rtx x = SUBREG_REG (sub);
|
|
|
|
/* We only care if the inner object is a REG. */
|
|
if (!REG_P (x))
|
|
continue;
|
|
|
|
/* And only if the SUBREG is a promoted var. */
|
|
if (!SUBREG_PROMOTED_VAR_P (sub))
|
|
continue;
|
|
|
|
if (bitmap_bit_p (changed_pseudos, REGNO (x)))
|
|
SUBREG_PROMOTED_VAR_P (sub) = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Initialization of the ext-dce pass. Primarily this means
|
|
setting up the various bitmaps we utilize. */
|
|
|
|
static void
|
|
ext_dce_init (void)
|
|
{
|
|
livein.create (last_basic_block_for_fn (cfun));
|
|
livein.quick_grow_cleared (last_basic_block_for_fn (cfun));
|
|
for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
|
|
bitmap_initialize (&livein[i], &bitmap_default_obstack);
|
|
|
|
auto_bitmap refs (&bitmap_default_obstack);
|
|
df_get_exit_block_use_set (refs);
|
|
|
|
unsigned i;
|
|
bitmap_iterator bi;
|
|
EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
|
|
make_reg_live (&livein[EXIT_BLOCK], i);
|
|
|
|
livenow = BITMAP_ALLOC (NULL);
|
|
all_blocks = BITMAP_ALLOC (NULL);
|
|
changed_pseudos = BITMAP_ALLOC (NULL);
|
|
|
|
for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
|
|
if (i != ENTRY_BLOCK && i != EXIT_BLOCK)
|
|
bitmap_set_bit (all_blocks, i);
|
|
|
|
modify = false;
|
|
}
|
|
|
|
/* Finalization of the ext-dce pass. Primarily this means
|
|
releasing up the various bitmaps we utilize. */
|
|
|
|
static void
|
|
ext_dce_finish (void)
|
|
{
|
|
for (unsigned i = 0; i < livein.length (); i++)
|
|
bitmap_clear (&livein[i]);
|
|
livein.release ();
|
|
|
|
BITMAP_FREE (livenow);
|
|
BITMAP_FREE (changed_pseudos);
|
|
BITMAP_FREE (all_blocks);
|
|
}
|
|
|
|
/* Process block number BB_INDEX as part of the backward
|
|
simple dataflow analysis. Return TRUE if something in
|
|
this block changed or FALSE otherwise. */
|
|
|
|
static bool
|
|
ext_dce_rd_transfer_n (int bb_index)
|
|
{
|
|
/* The ENTRY/EXIT blocks never change. */
|
|
if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
|
|
return false;
|
|
|
|
basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
|
|
|
|
/* Make everything live that's live in the successors. */
|
|
bitmap_clear (livenow);
|
|
edge_iterator ei;
|
|
edge e;
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->succs)
|
|
bitmap_ior_into (livenow, &livein[e->dest->index]);
|
|
|
|
ext_dce_process_bb (bb);
|
|
|
|
/* We only allow widening the set of objects live at the start
|
|
of a block. Otherwise we run the risk of not converging. */
|
|
return bitmap_ior_into (&livein[bb_index], livenow);
|
|
}
|
|
|
|
/* Dummy function for the df_simple_dataflow API. */
|
|
static bool ext_dce_rd_confluence_n (edge) { return true; }
|
|
|
|
/* Use lifetime analyis to identify extensions that set bits that
|
|
are never read. Turn such extensions into SUBREGs instead which
|
|
can often be propagated away. */
|
|
|
|
void
|
|
ext_dce_execute (void)
|
|
{
|
|
/* Limit the amount of memory we use for livein, with 4 bits per
|
|
reg per basic-block including overhead that maps to one byte
|
|
per reg per basic-block. */
|
|
uint64_t memory_request
|
|
= (uint64_t)n_basic_blocks_for_fn (cfun) * max_reg_num ();
|
|
if (memory_request / 1024 > (uint64_t)param_max_gcse_memory)
|
|
{
|
|
warning (OPT_Wdisabled_optimization,
|
|
"ext-dce disabled: %d basic blocks and %d registers; "
|
|
"increase %<--param max-gcse-memory%> above %wu",
|
|
n_basic_blocks_for_fn (cfun), max_reg_num (),
|
|
memory_request / 1024);
|
|
return;
|
|
}
|
|
|
|
/* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
|
|
to this pass. Clear it for those cases. */
|
|
maybe_clear_subreg_promoted_p ();
|
|
df_analyze ();
|
|
ext_dce_init ();
|
|
|
|
do
|
|
{
|
|
df_simple_dataflow (DF_BACKWARD, NULL, NULL,
|
|
ext_dce_rd_confluence_n, ext_dce_rd_transfer_n,
|
|
all_blocks, df_get_postorder (DF_BACKWARD),
|
|
df_get_n_blocks (DF_BACKWARD));
|
|
modify = !modify;
|
|
}
|
|
while (modify);
|
|
|
|
reset_subreg_promoted_p ();
|
|
|
|
ext_dce_finish ();
|
|
}
|
|
|
|
|
|
namespace {
|
|
|
|
const pass_data pass_data_ext_dce =
|
|
{
|
|
RTL_PASS, /* type */
|
|
"ext_dce", /* name */
|
|
OPTGROUP_NONE, /* optinfo_flags */
|
|
TV_EXT_DCE, /* tv_id */
|
|
PROP_cfglayout, /* properties_required */
|
|
0, /* properties_provided */
|
|
0, /* properties_destroyed */
|
|
0, /* todo_flags_start */
|
|
TODO_df_finish, /* todo_flags_finish */
|
|
};
|
|
|
|
class pass_ext_dce : public rtl_opt_pass
|
|
{
|
|
public:
|
|
pass_ext_dce (gcc::context *ctxt)
|
|
: rtl_opt_pass (pass_data_ext_dce, ctxt)
|
|
{}
|
|
|
|
/* opt_pass methods: */
|
|
virtual bool gate (function *) { return flag_ext_dce && optimize > 0; }
|
|
virtual unsigned int execute (function *)
|
|
{
|
|
ext_dce_execute ();
|
|
return 0;
|
|
}
|
|
|
|
}; // class pass_combine
|
|
|
|
} // anon namespace
|
|
|
|
rtl_opt_pass *
|
|
make_pass_ext_dce (gcc::context *ctxt)
|
|
{
|
|
return new pass_ext_dce (ctxt);
|
|
}
|