mirror of
https://forge.sourceware.org/marek/gcc.git
synced 2026-02-22 03:47:02 -05:00
[RISC-V][PR target/120811] Improving address reloads in LRA
In pr120811, we have cases where GCC is emitting an extra addi instruction instead of using the 12-bit signed-immediate of ld. addi t1, t1, 1 ld t1, 0(t1) This problem occurs when fp -> sp+offset elimination results in an out-of-range constant and we generate an address reload in LRA using addsi/adddi expanders. We've already adjusted the expanders to widen the set of valid operands to allow more constants for the 2nd input operand. These expanders, rather than constructing the constant into a register and using an add instruction, will generate two addi instructions (or shNadd) during initial RTL generation. We define a new pattern for cases where we need to access the current frame and the offsets are too large. This gets reasonable code out of LRA in a form fold-mem-offsets can handle, rather than having to wait for sched2 to do the height reduction transformation and leaving in the unnecessary add instruction in the RTL stream. To avoid the two addi instructions being squashed back together in the post-reload combine, we remove the adddi3_const_sum_of_two_s12 pattern. We are seeing about 100 billion dynamic instructions saved which is about 5% on cactuBSSN and a 2% improvement in performance on the BPI. PR target/120811 gcc/ * config/riscv/riscv.cc (synthesize_add): Exchange constant terms when generating addi pairs. (synthesize_addsi): Similarly. * config/riscv/riscv.md (addptr<mode>3): New define_expand. (*add<mode>3_const_sum_of_two_s12): Remove pattern. gcc/testsuite/ * gcc.target/riscv/add-synthesis-1.c: Adjust const to fit in range. * gcc.target/riscv/pr120811.c: Add new test case. * gcc.target/riscv/sum-of-two-s12-const-1.c: Adjust const to fit in range.
This commit is contained in:
committed by
Jeff Law
parent
a92369a6be
commit
304d7359a6
@@ -15440,9 +15440,13 @@ synthesize_add (rtx operands[3])
|
||||
|
||||
ival -= saturated;
|
||||
|
||||
rtx x = gen_rtx_PLUS (word_mode, operands[1], GEN_INT (saturated));
|
||||
/* The first add may be an FP relative address during reload. FP
|
||||
may be replaced with (sp + C). We don't want that to already
|
||||
be saturated as (sp + C) would then exceed a simm12 field. So
|
||||
emit the smaller offset first and the saturated constant last. */
|
||||
rtx x = gen_rtx_PLUS (word_mode, operands[1], GEN_INT (ival));
|
||||
emit_insn (gen_rtx_SET (operands[0], x));
|
||||
rtx output = gen_rtx_PLUS (word_mode, operands[0], GEN_INT (ival));
|
||||
rtx output = gen_rtx_PLUS (word_mode, operands[0], GEN_INT (saturated));
|
||||
emit_insn (gen_rtx_SET (operands[0], output));
|
||||
return true;
|
||||
}
|
||||
@@ -15539,14 +15543,18 @@ synthesize_add_extended (rtx operands[3])
|
||||
|
||||
ival -= saturated;
|
||||
|
||||
/* The first add may be an FP relative address during reload. FP
|
||||
may be replaced with (sp + C). We don't want that to already
|
||||
be saturated as (sp + C) would then exceed a simm12 field. So
|
||||
emit the smaller offset first and the saturated constant last. */
|
||||
rtx temp = gen_reg_rtx (DImode);
|
||||
emit_insn (gen_addsi3_extended (temp, operands[1], GEN_INT (saturated)));
|
||||
emit_insn (gen_addsi3_extended (temp, operands[1], GEN_INT (ival)));
|
||||
temp = gen_lowpart (SImode, temp);
|
||||
SUBREG_PROMOTED_VAR_P (temp) = 1;
|
||||
SUBREG_PROMOTED_SET (temp, SRP_SIGNED);
|
||||
emit_insn (gen_rtx_SET (operands[0], temp));
|
||||
rtx t = gen_reg_rtx (DImode);
|
||||
emit_insn (gen_addsi3_extended (t, operands[0], GEN_INT (ival)));
|
||||
emit_insn (gen_addsi3_extended (t, operands[0], GEN_INT (saturated)));
|
||||
t = gen_lowpart (SImode, t);
|
||||
SUBREG_PROMOTED_VAR_P (t) = 1;
|
||||
SUBREG_PROMOTED_SET (t, SRP_SIGNED);
|
||||
|
||||
@@ -702,6 +702,22 @@
|
||||
[(set_attr "type" "fadd")
|
||||
(set_attr "mode" "<UNITMODE>")])
|
||||
|
||||
(define_expand "addptr<mode>3"
|
||||
[(set (match_operand:X 0 "register_operand")
|
||||
(plus:X (match_operand:X 1 "register_operand")
|
||||
(match_operand 2 "const_int_operand")))]
|
||||
""
|
||||
{
|
||||
gcc_assert (CONST_INT_P (operands[2]));
|
||||
bool status = synthesize_add (operands);
|
||||
|
||||
if (!SMALL_OPERAND (INTVAL (operands[2])))
|
||||
{
|
||||
gcc_assert (status);
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_insn "*addsi3"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r,r")
|
||||
(plus:SI (match_operand:SI 1 "register_operand" " r,r")
|
||||
@@ -759,46 +775,6 @@
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
;; Special case of adding a reg and constant if latter is sum of two S12
|
||||
;; values (in range -2048 to 2047). Avoid materialized the const and fuse
|
||||
;; into the add (with an additional add for 2nd value). Makes a 3 insn
|
||||
;; sequence into 2 insn.
|
||||
|
||||
(define_insn_and_split "*add<mode>3_const_sum_of_two_s12"
|
||||
[(set (match_operand:P 0 "register_operand" "=r,r")
|
||||
(plus:P (match_operand:P 1 "register_operand" " r,r")
|
||||
(match_operand:P 2 "const_two_s12" " MiG,r")))]
|
||||
"!riscv_reg_frame_related (operands[0])"
|
||||
{
|
||||
/* operand matching MiG constraint is always meant to be split. */
|
||||
if (which_alternative == 0)
|
||||
return "#";
|
||||
else
|
||||
return "add %0,%1,%2";
|
||||
}
|
||||
""
|
||||
[(set (match_dup 0)
|
||||
(plus:P (match_dup 1) (match_dup 3)))
|
||||
(set (match_dup 0)
|
||||
(plus:P (match_dup 0) (match_dup 4)))]
|
||||
{
|
||||
int val = INTVAL (operands[2]);
|
||||
if (SUM_OF_TWO_S12_P (val))
|
||||
{
|
||||
operands[3] = GEN_INT (2047);
|
||||
operands[4] = GEN_INT (val - 2047);
|
||||
}
|
||||
else if (SUM_OF_TWO_S12_N (val))
|
||||
{
|
||||
operands[3] = GEN_INT (-2048);
|
||||
operands[4] = GEN_INT (val + 2048);
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "mode" "<P:MODE>")])
|
||||
|
||||
(define_expand "addv<mode>4"
|
||||
[(set (match_operand:GPR 0 "register_operand" "=r,r")
|
||||
(plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
|
||||
|
||||
@@ -25,7 +25,7 @@ T (4100)
|
||||
T (8200)
|
||||
|
||||
TM (2049)
|
||||
TM (4096)
|
||||
TM (4094)
|
||||
TM (4100)
|
||||
TM (8200)
|
||||
|
||||
|
||||
@@ -38,5 +38,4 @@ void q() {
|
||||
}
|
||||
/* { dg-final { scan-rtl-dump-not "const_sum_of_two_s12" "reload" } } */
|
||||
/* { dg-final { scan-rtl-dump-not "const_sum_of_two_s12" "late_combine2" } } */
|
||||
/* { dg-final { scan-assembler "addi.*sp,2047\n\tl\[dw\]\t.*,1\(.*\).*" } } */
|
||||
|
||||
/* { dg-final { scan-assembler "addi.*\[ats\]\[0-9\]*,sp,\[0-9\]*\n\tld\t.*,2047\(.*\).*" } } */
|
||||
|
||||
@@ -26,7 +26,7 @@ plus3(unsigned long i)
|
||||
long
|
||||
minus1(unsigned long i)
|
||||
{
|
||||
return i - 4096;
|
||||
return i - 4094;
|
||||
}
|
||||
|
||||
long
|
||||
|
||||
Reference in New Issue
Block a user