mirror of
https://gcc.gnu.org/git/gcc.git
synced 2026-02-21 19:35:28 -05:00
Revert changes
This commit is contained in:
@@ -107,9 +107,6 @@
|
||||
(match_test "TARGET_P8_VECTOR")
|
||||
(match_operand 0 "s5bit_cint_operand")))
|
||||
|
||||
(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
|
||||
"Accumulator register.")
|
||||
|
||||
(define_constraint "wE"
|
||||
"@internal Vector constant that can be loaded with the XXSPLTIB instruction."
|
||||
(match_test "xxspltib_constant_nosplit (op, mode)"))
|
||||
|
||||
@@ -90,12 +90,6 @@
|
||||
UNSPEC_MMA_XVI8GER4SPP
|
||||
UNSPEC_MMA_XXMFACC
|
||||
UNSPEC_MMA_XXMTACC
|
||||
UNSPEC_MMA_DMSETDMRZ
|
||||
UNSPEC_DM_INSERT512_UPPER
|
||||
UNSPEC_DM_INSERT512_LOWER
|
||||
UNSPEC_DM_EXTRACT512
|
||||
UNSPEC_DM_RELOAD_FROM_MEMORY
|
||||
UNSPEC_DM_RELOAD_TO_MEMORY
|
||||
])
|
||||
|
||||
(define_c_enum "unspecv"
|
||||
@@ -319,7 +313,7 @@
|
||||
(set_attr "length" "*,*,8")])
|
||||
|
||||
|
||||
;; Vector quad support.
|
||||
;; Vector quad support. XOmode can only live in FPRs.
|
||||
(define_expand "movxo"
|
||||
[(set (match_operand:XO 0 "nonimmediate_operand")
|
||||
(match_operand:XO 1 "input_operand"))]
|
||||
@@ -344,13 +338,10 @@
|
||||
gcc_assert (false);
|
||||
})
|
||||
|
||||
;; If we do not have dense math registers, XOmode can only live in FPR
|
||||
;; registers (0..31).
|
||||
|
||||
(define_insn_and_split "*movxo_nodm"
|
||||
(define_insn_and_split "*movxo"
|
||||
[(set (match_operand:XO 0 "nonimmediate_operand" "=d,ZwO,d")
|
||||
(match_operand:XO 1 "input_operand" "ZwO,d,d"))]
|
||||
"TARGET_MMA && !TARGET_DENSE_MATH
|
||||
"TARGET_MMA
|
||||
&& (gpc_reg_operand (operands[0], XOmode)
|
||||
|| gpc_reg_operand (operands[1], XOmode))"
|
||||
"@
|
||||
@@ -367,34 +358,6 @@
|
||||
(set_attr "length" "*,*,16")
|
||||
(set_attr "max_prefixed_insns" "2,2,*")])
|
||||
|
||||
;; If dense math registers are available, XOmode can live in either VSX
|
||||
;; registers (0..63) or dense math registers.
|
||||
|
||||
(define_insn_and_split "*movxo_dm"
|
||||
[(set (match_operand:XO 0 "nonimmediate_operand" "=wa,ZwO,wa,wD,wD,wa")
|
||||
(match_operand:XO 1 "input_operand" "ZwO,wa, wa,wa,wD,wD"))]
|
||||
"TARGET_DENSE_MATH
|
||||
&& (gpc_reg_operand (operands[0], XOmode)
|
||||
|| gpc_reg_operand (operands[1], XOmode))"
|
||||
"@
|
||||
#
|
||||
#
|
||||
#
|
||||
dmxxinstdmr512 %0,%1,%Y1,0
|
||||
dmmr %0,%1
|
||||
dmxxextfdmr512 %0,%Y0,%1,0"
|
||||
"&& reload_completed
|
||||
&& !dense_math_operand (operands[0], XOmode)
|
||||
&& !dense_math_operand (operands[1], XOmode)"
|
||||
[(const_int 0)]
|
||||
{
|
||||
rs6000_split_multireg_move (operands[0], operands[1]);
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "type" "vecload,vecstore,veclogical,mma,mma,mma")
|
||||
(set_attr "length" "*,*,16,*,*,*")
|
||||
(set_attr "max_prefixed_insns" "2,2,*,*,*,*")])
|
||||
|
||||
(define_expand "vsx_assemble_pair"
|
||||
[(match_operand:OO 0 "vsx_register_operand")
|
||||
(match_operand:V16QI 1 "mma_assemble_input_operand")
|
||||
@@ -493,68 +456,31 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; If dense math registers are not available, MMA instructions that do
|
||||
;; not use their accumulators that overlap with FPR registers as an
|
||||
;; input, still must not allow their vector operands to overlap the
|
||||
;; registers used by the accumulator. We enforce this by marking the
|
||||
;; output as early clobber. The prime and de-prime instructions are
|
||||
;; not needed on systems with dense math registers.
|
||||
;; MMA instructions that do not use their accumulators as an input, still
|
||||
;; must not allow their vector operands to overlap the registers used by
|
||||
;; the accumulator. We enforce this by marking the output as early clobber.
|
||||
|
||||
(define_insn "mma_<acc>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
|
||||
MMA_ACC))]
|
||||
"TARGET_MMA && !TARGET_DENSE_MATH"
|
||||
"TARGET_MMA"
|
||||
"<acc> %A0"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
;; We can't have integer constants in XOmode so we wrap this in an
|
||||
;; UNSPEC_VOLATILE. If we have dense math registers, we can just use a normal
|
||||
;; UNSPEC instead of UNSPEC_VOLATILE.
|
||||
;; UNSPEC_VOLATILE.
|
||||
|
||||
(define_expand "mma_xxsetaccz"
|
||||
[(set (match_operand:XO 0 "accumulator_operand")
|
||||
(unspec_volatile:XO [(const_int 0)]
|
||||
UNSPECV_MMA_XXSETACCZ))]
|
||||
"TARGET_MMA"
|
||||
{
|
||||
if (TARGET_DENSE_MATH)
|
||||
{
|
||||
emit_insn (gen_mma_xxsetaccz_dm (operands[0]));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
;; Clear accumulator without dense math registers
|
||||
(define_insn "*mma_xxsetaccz_nodm"
|
||||
(define_insn "mma_xxsetaccz"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=d")
|
||||
(unspec_volatile:XO [(const_int 0)]
|
||||
UNSPECV_MMA_XXSETACCZ))]
|
||||
"TARGET_MMA && !TARGET_DENSE_MATH"
|
||||
"TARGET_MMA"
|
||||
"xxsetaccz %A0"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
;; Clear accumulator when dense math registers are available.
|
||||
(define_insn "mma_xxsetaccz_dm"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=wD")
|
||||
(unspec [(const_int 0)]
|
||||
UNSPEC_MMA_DMSETDMRZ))]
|
||||
"TARGET_DENSE_MATH"
|
||||
"dmsetdmrz %A0"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
|
||||
;; MMA operations below. If dense math registers are available, these
|
||||
;; operations will use the 8 accumultors which are separate registers.
|
||||
;; If dense math registers are not available, these operations will use
|
||||
;; accumulators that are overlaid on top of the FPR registers.
|
||||
|
||||
;; Instructions:
|
||||
;; xvi4ger8 xvi8ger4 xvi16ger2 xvi16ger2s xvf16ger2
|
||||
;; xvbf16ger2 xvf32ger
|
||||
|
||||
(define_insn "mma_<vv>"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
|
||||
MMA_VV))]
|
||||
@@ -562,15 +488,9 @@
|
||||
"<vv> %A0,%x1,%x2"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
;; Instructions:
|
||||
;; xvi4ger8pp xvi8ger4pp xvi8ger4spp xvi16ger2pp xvi16ger2spp
|
||||
;; xvf16ger2pp xvf16ger2pn xvf16ger2np xvf16ger2nn xvbf16ger2pp
|
||||
;; xvbf16ger2pn xvbf16ger2np xvbf16ger2nn xvf32gerpp xvf32gerpn
|
||||
;; xvf32gernp xvf32gernn
|
||||
|
||||
(define_insn "mma_<avv>"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
|
||||
MMA_AVV))]
|
||||
@@ -578,10 +498,8 @@
|
||||
"<avv> %A0,%x2,%x3"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
;; Instruction: xvf64ger
|
||||
|
||||
(define_insn "mma_<pv>"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
|
||||
MMA_PV))]
|
||||
@@ -589,11 +507,9 @@
|
||||
"<pv> %A0,%x1,%x2"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
;; Instructions: xvf64gerpp xvf64gerpn xvf64gernp xvf64gernn
|
||||
|
||||
(define_insn "mma_<apv>"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
|
||||
MMA_APV))]
|
||||
@@ -601,10 +517,8 @@
|
||||
"<apv> %A0,%x2,%x3"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
;; Instruction: pmxvi4ger8
|
||||
|
||||
(define_insn "mma_<vvi4i4i8>"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
@@ -616,11 +530,9 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instruction: pmxvi4ger8pp
|
||||
|
||||
(define_insn "mma_<avvi4i4i8>"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
@@ -632,11 +544,8 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instructions:
|
||||
;; pmxvi16ger2 pmxvi16ger2s pmxvf16ger2 pmxvbf16ger2
|
||||
|
||||
(define_insn "mma_<vvi4i4i2>"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
@@ -648,14 +557,9 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instructions:
|
||||
;; pmxvi16ger2pp pmxvi16ger2spp pmxvf16ger2pp pmxvf16ger2pn
|
||||
;; pmxvf16ger2np pmxvf16ger2nn pmxvbf16ger2pp pmxvbf16ger2pn
|
||||
;; pmxvbf16ger2np pmxvbf16ger2nn
|
||||
|
||||
(define_insn "mma_<avvi4i4i2>"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
@@ -667,10 +571,8 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instruction: pmxvf32ger
|
||||
|
||||
(define_insn "mma_<vvi4i4>"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
@@ -681,11 +583,9 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instructions: pmxvf32gerpp pmxvf32gerpn pmxvf32gernp pmxvf32gernn
|
||||
|
||||
(define_insn "mma_<avvi4i4>"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
@@ -696,10 +596,8 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instruction: pmxvf64ger
|
||||
|
||||
(define_insn "mma_<pvi4i2>"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
@@ -710,11 +608,9 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instructions: pmxvf64gerpp pmxvf64gerpn pmxvf64gernp pmxvf64gernn
|
||||
|
||||
(define_insn "mma_<apvi4i2>"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
@@ -725,10 +621,8 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instruction: pmxvi8ger4
|
||||
|
||||
(define_insn "mma_<vvi4i4i4>"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
@@ -740,11 +634,9 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instructions: pmxvi8ger4pp pmxvi8ger4spp
|
||||
|
||||
(define_insn "mma_<avvi4i4i4>"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
@@ -755,153 +647,3 @@
|
||||
"<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; TDOmode (__dmf keyword for 1,024 bit registers).
|
||||
(define_expand "movtdo"
|
||||
[(set (match_operand:TDO 0 "nonimmediate_operand")
|
||||
(match_operand:TDO 1 "input_operand"))]
|
||||
"TARGET_DENSE_MATH"
|
||||
{
|
||||
rs6000_emit_move (operands[0], operands[1], TDOmode);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn_and_split "*movtdo"
|
||||
[(set (match_operand:TDO 0 "nonimmediate_operand" "=wa,m,wa,wD,wD,wa")
|
||||
(match_operand:TDO 1 "input_operand" "m,wa,wa,wa,wD,wD"))]
|
||||
"TARGET_DENSE_MATH
|
||||
&& (gpc_reg_operand (operands[0], TDOmode)
|
||||
|| gpc_reg_operand (operands[1], TDOmode))"
|
||||
"@
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
dmmr %0,%1
|
||||
#"
|
||||
"&& reload_completed
|
||||
&& (!dense_math_operand (operands[0], TDOmode)
|
||||
|| !dense_math_operand (operands[1], TDOmode))"
|
||||
[(const_int 0)]
|
||||
{
|
||||
rtx op0 = operands[0];
|
||||
rtx op1 = operands[1];
|
||||
|
||||
if (REG_P (op0) && REG_P (op1))
|
||||
{
|
||||
int regno0 = REGNO (op0);
|
||||
int regno1 = REGNO (op1);
|
||||
|
||||
if (DM_REGNO_P (regno0) && VSX_REGNO_P (regno1))
|
||||
{
|
||||
rtx op1_upper = gen_rtx_REG (XOmode, regno1);
|
||||
rtx op1_lower = gen_rtx_REG (XOmode, regno1 + 4);
|
||||
emit_insn (gen_movtdo_insert512_upper (op0, op1_upper));
|
||||
emit_insn (gen_movtdo_insert512_lower (op0, op0, op1_lower));
|
||||
DONE;
|
||||
}
|
||||
|
||||
else if (VSX_REGNO_P (regno0) && DM_REGNO_P (regno1))
|
||||
{
|
||||
rtx op0_upper = gen_rtx_REG (XOmode, regno0);
|
||||
rtx op0_lower = gen_rtx_REG (XOmode, regno0 + 4);
|
||||
emit_insn (gen_movtdo_extract512 (op0_upper, op1, const0_rtx));
|
||||
emit_insn (gen_movtdo_extract512 (op0_lower, op1, const1_rtx));
|
||||
DONE;
|
||||
}
|
||||
|
||||
else
|
||||
gcc_assert (VSX_REGNO_P (regno0) && VSX_REGNO_P (regno1));
|
||||
}
|
||||
|
||||
rs6000_split_multireg_move (operands[0], operands[1]);
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "type" "vecload,vecstore,vecmove,vecmove,vecmove,vecmove")
|
||||
(set_attr "length" "*,*,32,8,*,8")
|
||||
(set_attr "max_prefixed_insns" "4,4,*,*,*,*")])
|
||||
|
||||
;; Move from VSX registers to dense math registers via two insert 512 bit
|
||||
;; instructions.
|
||||
(define_insn "movtdo_insert512_upper"
|
||||
[(set (match_operand:TDO 0 "dense_math_operand" "=wD")
|
||||
(unspec:TDO [(match_operand:XO 1 "vsx_register_operand" "wa")]
|
||||
UNSPEC_DM_INSERT512_UPPER))]
|
||||
"TARGET_DENSE_MATH"
|
||||
"dmxxinstdmr512 %0,%1,%Y1,0"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
(define_insn "movtdo_insert512_lower"
|
||||
[(set (match_operand:TDO 0 "dense_math_operand" "=wD")
|
||||
(unspec:TDO [(match_operand:TDO 1 "dense_math_operand" "0")
|
||||
(match_operand:XO 2 "vsx_register_operand" "wa")]
|
||||
UNSPEC_DM_INSERT512_LOWER))]
|
||||
"TARGET_DENSE_MATH"
|
||||
"dmxxinstdmr512 %0,%2,%Y2,1"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
;; Move from dense math registers to VSX registers via two extract 512 bit
|
||||
;; instructions.
|
||||
(define_insn "movtdo_extract512"
|
||||
[(set (match_operand:XO 0 "vsx_register_operand" "=wa")
|
||||
(unspec:XO [(match_operand:TDO 1 "dense_math_operand" "wD")
|
||||
(match_operand 2 "const_0_to_1_operand" "n")]
|
||||
UNSPEC_DM_EXTRACT512))]
|
||||
"TARGET_DENSE_MATH"
|
||||
"dmxxextfdmr512 %0,%Y0,%1,%2"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
;; Reload dense math registers from memory.
|
||||
(define_insn_and_split "reload_tdo_from_memory"
|
||||
[(set (match_operand:TDO 0 "dense_math_operand" "=wD")
|
||||
(unspec:TDO [(match_operand:TDO 1 "memory_operand" "m")]
|
||||
UNSPEC_DM_RELOAD_FROM_MEMORY))
|
||||
(clobber (match_operand:XO 2 "vsx_register_operand" "=wa"))]
|
||||
"TARGET_DENSE_MATH"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
rtx dest = operands[0];
|
||||
rtx src = operands[1];
|
||||
rtx tmp = operands[2];
|
||||
rtx mem_upper = adjust_address (src, XOmode, BYTES_BIG_ENDIAN ? 0 : 64);
|
||||
rtx mem_lower = adjust_address (src, XOmode, BYTES_BIG_ENDIAN ? 64 : 0);
|
||||
|
||||
emit_move_insn (tmp, mem_upper);
|
||||
emit_insn (gen_movtdo_insert512_upper (dest, tmp));
|
||||
|
||||
emit_move_insn (tmp, mem_lower);
|
||||
emit_insn (gen_movtdo_insert512_lower (dest, dest, tmp));
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "length" "16")
|
||||
(set_attr "max_prefixed_insns" "2")
|
||||
(set_attr "type" "vecload")])
|
||||
|
||||
;; Reload dense math registers to memory
|
||||
(define_insn_and_split "reload_tdo_to_memory"
|
||||
[(set (match_operand:TDO 0 "memory_operand" "=m")
|
||||
(unspec:TDO [(match_operand:TDO 1 "dense_math_operand" "wD")]
|
||||
UNSPEC_DM_RELOAD_TO_MEMORY))
|
||||
(clobber (match_operand:XO 2 "vsx_register_operand" "=wa"))]
|
||||
"TARGET_DENSE_MATH"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
rtx dest = operands[0];
|
||||
rtx src = operands[1];
|
||||
rtx tmp = operands[2];
|
||||
rtx mem_upper = adjust_address (dest, XOmode, BYTES_BIG_ENDIAN ? 0 : 64);
|
||||
rtx mem_lower = adjust_address (dest, XOmode, BYTES_BIG_ENDIAN ? 64 : 0);
|
||||
|
||||
emit_insn (gen_movtdo_extract512 (tmp, src, const0_rtx));
|
||||
emit_move_insn (mem_upper, tmp);
|
||||
|
||||
emit_insn (gen_movtdo_extract512 (tmp, src, const1_rtx));
|
||||
emit_move_insn (mem_lower, tmp);
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "length" "16")
|
||||
(set_attr "max_prefixed_insns" "2")])
|
||||
|
||||
@@ -186,44 +186,6 @@
|
||||
return VLOGICAL_REGNO_P (REGNO (op));
|
||||
})
|
||||
|
||||
;; Return 1 if op is a dense math register
|
||||
(define_predicate "dense_math_operand"
|
||||
(match_operand 0 "register_operand")
|
||||
{
|
||||
if (!REG_P (op))
|
||||
return 0;
|
||||
|
||||
if (!HARD_REGISTER_P (op))
|
||||
return 1;
|
||||
|
||||
return DM_REGNO_P (REGNO (op));
|
||||
})
|
||||
|
||||
;; Return 1 if op is an accumulator.
|
||||
;;
|
||||
;; On power10 and power11 systems, the accumulators overlap with the
|
||||
;; FPRs and the register must be divisible by 4.
|
||||
;;
|
||||
;; On systems with dense math registers, the accumulators are separate
|
||||
;; registers and do not overlap with the FPR registers.
|
||||
(define_predicate "accumulator_operand"
|
||||
(match_operand 0 "register_operand")
|
||||
{
|
||||
if (SUBREG_P (op))
|
||||
op = SUBREG_REG (op);
|
||||
|
||||
if (!REG_P (op))
|
||||
return 0;
|
||||
|
||||
if (!HARD_REGISTER_P (op))
|
||||
return 1;
|
||||
|
||||
int r = REGNO (op);
|
||||
return (TARGET_DENSE_MATH
|
||||
? DM_REGNO_P (r)
|
||||
: FP_REGNO_P (r) && (r & 3) == 0);
|
||||
})
|
||||
|
||||
;; Return 1 if op is the carry register.
|
||||
(define_predicate "ca_operand"
|
||||
(match_operand 0 "register_operand")
|
||||
|
||||
@@ -495,8 +495,6 @@ const char *rs6000_type_string (tree type_node)
|
||||
return "__vector_pair";
|
||||
else if (type_node == vector_quad_type_node)
|
||||
return "__vector_quad";
|
||||
else if (type_node == dm1024_type_node)
|
||||
return "__dm1024";
|
||||
|
||||
return "unknown";
|
||||
}
|
||||
@@ -783,21 +781,6 @@ rs6000_init_builtins (void)
|
||||
t = build_qualified_type (vector_quad_type_node, TYPE_QUAL_CONST);
|
||||
ptr_vector_quad_type_node = build_pointer_type (t);
|
||||
|
||||
/* For TDOmode (1,024 bit dense math accumulators), don't use an alignment of
|
||||
1,024, use 512. TDOmode loads and stores are always broken up into 2
|
||||
vector pair loads or stores. In addition, we don't have support for
|
||||
aligning the stack to 1,024 bits. */
|
||||
dm1024_type_node = make_node (OPAQUE_TYPE);
|
||||
SET_TYPE_MODE (dm1024_type_node, TDOmode);
|
||||
TYPE_SIZE (dm1024_type_node) = bitsize_int (GET_MODE_BITSIZE (TDOmode));
|
||||
TYPE_PRECISION (dm1024_type_node) = GET_MODE_BITSIZE (TDOmode);
|
||||
TYPE_SIZE_UNIT (dm1024_type_node) = size_int (GET_MODE_SIZE (TDOmode));
|
||||
SET_TYPE_ALIGN (dm1024_type_node, 512);
|
||||
TYPE_USER_ALIGN (dm1024_type_node) = 0;
|
||||
lang_hooks.types.register_builtin_type (dm1024_type_node, "__dm1024");
|
||||
t = build_qualified_type (dm1024_type_node, TYPE_QUAL_CONST);
|
||||
ptr_dm1024_type_node = build_pointer_type (t);
|
||||
|
||||
tdecl = add_builtin_type ("__bool char", bool_char_type_node);
|
||||
TYPE_NAME (bool_char_type_node) = tdecl;
|
||||
|
||||
@@ -1142,9 +1125,8 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
|
||||
}
|
||||
|
||||
/* If we're disassembling an accumulator into a different type, we need
|
||||
to emit a xxmfacc instruction now, since we cannot do it later. If we
|
||||
have dense math registers, we don't need to do this. */
|
||||
if (fncode == RS6000_BIF_DISASSEMBLE_ACC && !TARGET_DENSE_MATH)
|
||||
to emit a xxmfacc instruction now, since we cannot do it later. */
|
||||
if (fncode == RS6000_BIF_DISASSEMBLE_ACC)
|
||||
{
|
||||
new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL];
|
||||
new_call = gimple_build_call (new_decl, 1, src);
|
||||
|
||||
@@ -590,10 +590,6 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags)
|
||||
/* Tell the user if we support the MMA instructions. */
|
||||
if ((flags & OPTION_MASK_MMA) != 0)
|
||||
rs6000_define_or_undefine_macro (define_p, "__MMA__");
|
||||
/* Tell the user if we support the dense math registers for use with MMA and
|
||||
cryptography. */
|
||||
if ((flags & OPTION_MASK_DENSE_MATH) != 0)
|
||||
rs6000_define_or_undefine_macro (define_p, "__DENSE_MATH__");
|
||||
/* Whether pc-relative code is being generated. */
|
||||
if ((flags & OPTION_MASK_PCREL) != 0)
|
||||
rs6000_define_or_undefine_macro (define_p, "__PCREL__");
|
||||
|
||||
@@ -437,15 +437,14 @@ rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
|
||||
if (cfun
|
||||
&& !cfun->machine->mma_return_type_error
|
||||
&& TREE_TYPE (cfun->decl) == fntype
|
||||
&& OPAQUE_MODE_P (TYPE_MODE (type)))
|
||||
&& (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
|
||||
{
|
||||
/* Record we have now handled function CFUN, so the next time we
|
||||
are called, we do not re-report the same error. */
|
||||
cfun->machine->mma_return_type_error = true;
|
||||
if (TYPE_CANONICAL (type) != NULL_TREE)
|
||||
type = TYPE_CANONICAL (type);
|
||||
error ("invalid use of %s type %qs as a function return value",
|
||||
(TYPE_MODE (type) == TDOmode) ? "dense math" : "MMA",
|
||||
error ("invalid use of MMA type %qs as a function return value",
|
||||
IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
|
||||
}
|
||||
|
||||
@@ -1633,12 +1632,11 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
|
||||
int n_elts;
|
||||
|
||||
/* We do not allow MMA types being used as function arguments. */
|
||||
if (OPAQUE_MODE_P (mode))
|
||||
if (mode == OOmode || mode == XOmode)
|
||||
{
|
||||
if (TYPE_CANONICAL (type) != NULL_TREE)
|
||||
type = TYPE_CANONICAL (type);
|
||||
error ("invalid use of %s operand of type %qs as a function parameter",
|
||||
(mode == TDOmode) ? "dense math" : "MMA",
|
||||
error ("invalid use of MMA operand of type %qs as a function parameter",
|
||||
IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
|
||||
return NULL_RTX;
|
||||
}
|
||||
|
||||
@@ -91,7 +91,6 @@
|
||||
will be fixed in potential future machines. */
|
||||
#define FUTURE_MASKS_SERVER (POWER11_MASKS_SERVER \
|
||||
| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR \
|
||||
| OPTION_MASK_DENSE_MATH \
|
||||
| OPTION_MASK_FUTURE)
|
||||
|
||||
/* Flags that need to be turned off if -mno-vsx. */
|
||||
@@ -125,7 +124,6 @@
|
||||
| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR \
|
||||
| OPTION_MASK_CMPB \
|
||||
| OPTION_MASK_CRYPTO \
|
||||
| OPTION_MASK_DENSE_MATH \
|
||||
| OPTION_MASK_DFP \
|
||||
| OPTION_MASK_DLMZB \
|
||||
| OPTION_MASK_EFFICIENT_UNALIGNED_VSX \
|
||||
|
||||
@@ -79,7 +79,3 @@ PARTIAL_INT_MODE (TI, 128, PTI);
|
||||
/* Modes used by __vector_pair and __vector_quad. */
|
||||
OPAQUE_MODE (OO, 32);
|
||||
OPAQUE_MODE (XO, 64);
|
||||
|
||||
/* Mode used by __dmf. */
|
||||
OPAQUE_MODE (TDO, 128);
|
||||
|
||||
|
||||
@@ -292,8 +292,7 @@ enum rs6000_reg_type {
|
||||
ALTIVEC_REG_TYPE,
|
||||
FPR_REG_TYPE,
|
||||
SPR_REG_TYPE,
|
||||
CR_REG_TYPE,
|
||||
DM_REG_TYPE
|
||||
CR_REG_TYPE
|
||||
};
|
||||
|
||||
/* Map register class to register type. */
|
||||
@@ -307,24 +306,22 @@ static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
|
||||
|
||||
|
||||
/* Register classes we care about in secondary reload or go if legitimate
|
||||
address. We only need to worry about GPR, FPR, Altivec, and dense math
|
||||
registers here, along an ANY field that is the OR of the 4 register
|
||||
classes. */
|
||||
address. We only need to worry about GPR, FPR, and Altivec registers here,
|
||||
along an ANY field that is the OR of the 3 register classes. */
|
||||
|
||||
enum rs6000_reload_reg_type {
|
||||
RELOAD_REG_GPR, /* General purpose registers. */
|
||||
RELOAD_REG_FPR, /* Traditional floating point regs. */
|
||||
RELOAD_REG_VMX, /* Altivec (VMX) registers. */
|
||||
RELOAD_REG_DMR, /* Dense math registers. */
|
||||
RELOAD_REG_ANY, /* OR of GPR/FPR/VMX/DMR masks. */
|
||||
RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
|
||||
N_RELOAD_REG
|
||||
};
|
||||
|
||||
/* For setting up register classes, loop through the 4 register classes mapping
|
||||
/* For setting up register classes, loop through the 3 register classes mapping
|
||||
into real registers, and skip the ANY class, which is just an OR of the
|
||||
bits. */
|
||||
#define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
|
||||
#define LAST_RELOAD_REG_CLASS RELOAD_REG_DMR
|
||||
#define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
|
||||
|
||||
/* Map reload register type to a register in the register class. */
|
||||
struct reload_reg_map_type {
|
||||
@@ -336,7 +333,6 @@ static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
|
||||
{ "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
|
||||
{ "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
|
||||
{ "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
|
||||
{ "Dmr", FIRST_DM_REGNO }, /* RELOAD_REG_DMR. */
|
||||
{ "Any", -1 }, /* RELOAD_REG_ANY. */
|
||||
};
|
||||
|
||||
@@ -1230,8 +1226,6 @@ char rs6000_reg_names[][8] =
|
||||
"0", "1", "2", "3", "4", "5", "6", "7",
|
||||
/* vrsave vscr sfp */
|
||||
"vrsave", "vscr", "sfp",
|
||||
/* dense math registers. */
|
||||
"0", "1", "2", "3", "4", "5", "6", "7",
|
||||
};
|
||||
|
||||
#ifdef TARGET_REGNAMES
|
||||
@@ -1258,8 +1252,6 @@ static const char alt_reg_names[][8] =
|
||||
"%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
|
||||
/* vrsave vscr sfp */
|
||||
"vrsave", "vscr", "sfp",
|
||||
/* dense math registers. */
|
||||
"%dmr0", "%dmr1", "%dmr2", "%dmr3", "%dmr4", "%dmr5", "%dmr6", "%dmr7",
|
||||
};
|
||||
#endif
|
||||
|
||||
@@ -1843,17 +1835,13 @@ rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
|
||||
128-bit floating point that can go in vector registers, which has VSX
|
||||
memory addressing. */
|
||||
if (FP_REGNO_P (regno))
|
||||
reg_size = (VECTOR_MEM_VSX_P (mode)
|
||||
|| VECTOR_ALIGNMENT_P (mode)
|
||||
reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
|
||||
? UNITS_PER_VSX_WORD
|
||||
: UNITS_PER_FP_WORD);
|
||||
|
||||
else if (ALTIVEC_REGNO_P (regno))
|
||||
reg_size = UNITS_PER_ALTIVEC_WORD;
|
||||
|
||||
else if (DM_REGNO_P (regno))
|
||||
reg_size = UNITS_PER_DM_WORD;
|
||||
|
||||
else
|
||||
reg_size = UNITS_PER_WORD;
|
||||
|
||||
@@ -1875,47 +1863,9 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
|
||||
if (mode == OOmode)
|
||||
return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
|
||||
|
||||
/* On ISA 3.1 (power10), MMA accumulator modes need FPR registers divisible
|
||||
by 4.
|
||||
|
||||
If dense math registers are enabled, we can allow all VSX registers plus
|
||||
the dense math registers. VSX registers are used to load and store the
|
||||
registers as the accumulator registers do not have load and store
|
||||
instructions. Because we just use the VSX registers for load/store
|
||||
operations, we just need to make sure load vector pair and store vector
|
||||
pair instructions can be used. */
|
||||
/* MMA accumulator modes need FPR registers divisible by 4. */
|
||||
if (mode == XOmode)
|
||||
{
|
||||
if (!TARGET_DENSE_MATH)
|
||||
return (FP_REGNO_P (regno) && (regno & 3) == 0);
|
||||
|
||||
else if (DM_REGNO_P (regno))
|
||||
return 1;
|
||||
|
||||
else
|
||||
return (VSX_REGNO_P (regno)
|
||||
&& VSX_REGNO_P (last_regno)
|
||||
&& (regno & 1) == 0);
|
||||
}
|
||||
|
||||
if (mode == TDOmode)
|
||||
{
|
||||
if (!TARGET_DENSE_MATH)
|
||||
return 0;
|
||||
|
||||
if (DM_REGNO_P (regno))
|
||||
return 1;
|
||||
|
||||
else
|
||||
return (VSX_REGNO_P (regno)
|
||||
&& VSX_REGNO_P (last_regno)
|
||||
&& (regno & 1) == 0);
|
||||
}
|
||||
|
||||
/* No other types other than XOmode or TDOmode can go in dense math
|
||||
registers. */
|
||||
if (DM_REGNO_P (regno))
|
||||
return 0;
|
||||
return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
|
||||
|
||||
/* PTImode can only go in GPRs. Quad word memory operations require even/odd
|
||||
register combinations, and use PTImode where we need to deal with quad
|
||||
@@ -2021,11 +1971,9 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
|
||||
GPR registers, and TImode can go in any GPR as well as VSX registers (PR
|
||||
57744).
|
||||
|
||||
Similarly, don't allow OOmode (vector pair), XOmode (vector quad), or
|
||||
TDOmode (dense math register) to pair with anything else. Vector pairs are
|
||||
restricted to even/odd VSX registers. Without dense math, vector quads are
|
||||
limited to FPR registers divisible by 4. With dense math, vector quads are
|
||||
limited to even VSX registers or dense math registers.
|
||||
Similarly, don't allow OOmode (vector pair, restricted to even VSX
|
||||
registers) or XOmode (vector quad, restricted to FPR registers divisible
|
||||
by 4) to tie with other modes.
|
||||
|
||||
Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
|
||||
128-bit floating point on VSX systems ties with other vectors. */
|
||||
@@ -2034,8 +1982,7 @@ static bool
|
||||
rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
|
||||
{
|
||||
if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
|
||||
|| mode1 == TDOmode || mode2 == PTImode || mode2 == OOmode
|
||||
|| mode2 == XOmode || mode2 == TDOmode)
|
||||
|| mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
|
||||
return mode1 == mode2;
|
||||
|
||||
if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
|
||||
@@ -2326,7 +2273,6 @@ rs6000_debug_reg_global (void)
|
||||
V4DFmode,
|
||||
OOmode,
|
||||
XOmode,
|
||||
TDOmode,
|
||||
CCmode,
|
||||
CCUNSmode,
|
||||
CCEQmode,
|
||||
@@ -2362,7 +2308,6 @@ rs6000_debug_reg_global (void)
|
||||
rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
|
||||
LAST_ALTIVEC_REGNO,
|
||||
"vs");
|
||||
rs6000_debug_reg_print (FIRST_DM_REGNO, LAST_DM_REGNO, "dense_math");
|
||||
rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
|
||||
rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
|
||||
rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
|
||||
@@ -2383,7 +2328,6 @@ rs6000_debug_reg_global (void)
|
||||
"wr reg_class = %s\n"
|
||||
"wx reg_class = %s\n"
|
||||
"wA reg_class = %s\n"
|
||||
"wD reg_class = %s\n"
|
||||
"\n",
|
||||
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
|
||||
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
|
||||
@@ -2391,8 +2335,7 @@ rs6000_debug_reg_global (void)
|
||||
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
|
||||
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
|
||||
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
|
||||
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
|
||||
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wD]]);
|
||||
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
|
||||
|
||||
nl = "\n";
|
||||
for (m = 0; m < NUM_MACHINE_MODES; ++m)
|
||||
@@ -2689,21 +2632,6 @@ rs6000_setup_reg_addr_masks (void)
|
||||
addr_mask = 0;
|
||||
reg = reload_reg_map[rc].reg;
|
||||
|
||||
/* Special case dense math registers. */
|
||||
if (rc == RELOAD_REG_DMR)
|
||||
{
|
||||
if (TARGET_DENSE_MATH && (m2 == XOmode || m2 == TDOmode))
|
||||
{
|
||||
addr_mask = RELOAD_REG_VALID;
|
||||
reg_addr[m].addr_mask[rc] = addr_mask;
|
||||
any_addr_mask |= addr_mask;
|
||||
}
|
||||
else
|
||||
reg_addr[m].addr_mask[rc] = 0;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Can mode values go in the GPR/FPR/Altivec registers? */
|
||||
if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
|
||||
{
|
||||
@@ -2799,10 +2727,10 @@ rs6000_setup_reg_addr_masks (void)
|
||||
|
||||
/* Vector pairs can do both indexed and offset loads if the
|
||||
instructions are enabled, otherwise they can only do offset loads
|
||||
since it will be broken into two vector moves. Vector quads and
|
||||
dense math types can only do offset loads. */
|
||||
since it will be broken into two vector moves. Vector quads can
|
||||
only do offset loads. */
|
||||
else if ((addr_mask != 0) && TARGET_MMA
|
||||
&& (m2 == OOmode || m2 == XOmode || m2 == TDOmode))
|
||||
&& (m2 == OOmode || m2 == XOmode))
|
||||
{
|
||||
addr_mask |= RELOAD_REG_OFFSET;
|
||||
if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
|
||||
@@ -2854,9 +2782,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
|
||||
for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
|
||||
rs6000_regno_regclass[r] = CR_REGS;
|
||||
|
||||
for (r = FIRST_DM_REGNO; r <= LAST_DM_REGNO; ++r)
|
||||
rs6000_regno_regclass[r] = DM_REGS;
|
||||
|
||||
rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
|
||||
rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
|
||||
rs6000_regno_regclass[CA_REGNO] = NO_REGS;
|
||||
@@ -2881,7 +2806,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
|
||||
reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
|
||||
reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
|
||||
reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
|
||||
reg_class_to_reg_type[(int)DM_REGS] = DM_REG_TYPE;
|
||||
|
||||
if (TARGET_VSX)
|
||||
{
|
||||
@@ -3030,14 +2954,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
|
||||
rs6000_vector_align[XOmode] = 512;
|
||||
}
|
||||
|
||||
/* Add support for 1,024 bit dense math registers. */
|
||||
if (TARGET_DENSE_MATH)
|
||||
{
|
||||
rs6000_vector_unit[TDOmode] = VECTOR_NONE;
|
||||
rs6000_vector_mem[TDOmode] = VECTOR_VSX;
|
||||
rs6000_vector_align[TDOmode] = 512;
|
||||
}
|
||||
|
||||
/* Register class constraints for the constraints that depend on compile
|
||||
switches. When the VSX code was added, different constraints were added
|
||||
based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
|
||||
@@ -3076,12 +2992,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
|
||||
if (TARGET_DIRECT_MOVE_128)
|
||||
rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
|
||||
|
||||
/* Support for the accumulator registers, either FPR registers (aka original
|
||||
mma) or dense math registers. */
|
||||
if (TARGET_MMA)
|
||||
rs6000_constraints[RS6000_CONSTRAINT_wD]
|
||||
= TARGET_DENSE_MATH ? DM_REGS : FLOAT_REGS;
|
||||
|
||||
/* Set up the reload helper and direct move functions. */
|
||||
if (TARGET_VSX || TARGET_ALTIVEC)
|
||||
{
|
||||
@@ -3250,12 +3160,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
|
||||
}
|
||||
}
|
||||
|
||||
if (TARGET_DENSE_MATH)
|
||||
{
|
||||
reg_addr[TDOmode].reload_load = CODE_FOR_reload_tdo_from_memory;
|
||||
reg_addr[TDOmode].reload_store = CODE_FOR_reload_tdo_to_memory;
|
||||
}
|
||||
|
||||
/* Precalculate HARD_REGNO_NREGS. */
|
||||
for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
|
||||
for (m = 0; m < NUM_MACHINE_MODES; ++m)
|
||||
@@ -4501,15 +4405,6 @@ rs6000_option_override_internal (bool global_init_p)
|
||||
if (!TARGET_PCREL && TARGET_PCREL_OPT)
|
||||
rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
|
||||
|
||||
/* Turn off dense math register support on non-future systems. */
|
||||
if (TARGET_DENSE_MATH && !TARGET_FUTURE)
|
||||
{
|
||||
if ((rs6000_isa_flags_explicit & OPTION_MASK_DENSE_MATH) != 0)
|
||||
error ("%qs requires %qs", "-mdense-math", "-mcpu=future");
|
||||
|
||||
rs6000_isa_flags &= ~OPTION_MASK_DENSE_MATH;
|
||||
}
|
||||
|
||||
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
|
||||
rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
|
||||
|
||||
@@ -8769,15 +8664,12 @@ reg_offset_addressing_ok_p (machine_mode mode)
|
||||
return mode_supports_dq_form (mode);
|
||||
break;
|
||||
|
||||
/* The vector pair/quad types and the dense math types support offset
|
||||
addressing if the underlying vectors support offset addressing. */
|
||||
/* The vector pair/quad types support offset addressing if the
|
||||
underlying vectors support offset addressing. */
|
||||
case E_OOmode:
|
||||
case E_XOmode:
|
||||
return TARGET_MMA;
|
||||
|
||||
case E_TDOmode:
|
||||
return TARGET_DENSE_MATH;
|
||||
|
||||
case E_SDmode:
|
||||
/* If we can do direct load/stores of SDmode, restrict it to reg+reg
|
||||
addressing for the LFIWZX and STFIWX instructions. */
|
||||
@@ -11331,12 +11223,6 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
|
||||
(mode == OOmode) ? "__vector_pair" : "__vector_quad");
|
||||
break;
|
||||
|
||||
case E_TDOmode:
|
||||
if (CONST_INT_P (operands[1]))
|
||||
error ("%qs is an opaque type, and you cannot set it to constants",
|
||||
"__dm1024");
|
||||
break;
|
||||
|
||||
case E_SImode:
|
||||
case E_DImode:
|
||||
/* Use default pattern for address of ELF small data */
|
||||
@@ -12465,11 +12351,6 @@ rs6000_secondary_reload_memory (rtx addr,
|
||||
addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
|
||||
& ~RELOAD_REG_AND_M16);
|
||||
|
||||
/* Dense math registers use VSX registers for memory operations, and need to
|
||||
generate some extra instructions. */
|
||||
else if (rclass == DM_REGS)
|
||||
return 2;
|
||||
|
||||
/* If the register allocator hasn't made up its mind yet on the register
|
||||
class to use, settle on defaults to use. */
|
||||
else if (rclass == NO_REGS)
|
||||
@@ -12798,13 +12679,6 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
|
||||
|| (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
|
||||
return true;
|
||||
|
||||
/* We can transfer between VSX registers and dense math registers without
|
||||
needing extra registers. */
|
||||
if (TARGET_DENSE_MATH && (mode == XOmode || mode == TDOmode)
|
||||
&& ((to_type == DM_REG_TYPE && from_type == VSX_REG_TYPE)
|
||||
|| (to_type == VSX_REG_TYPE && from_type == DM_REG_TYPE)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -13499,10 +13373,6 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
|
||||
machine_mode mode = GET_MODE (x);
|
||||
bool is_constant = CONSTANT_P (x);
|
||||
|
||||
/* Dense math registers can't be loaded or stored. */
|
||||
if (rclass == DM_REGS)
|
||||
return NO_REGS;
|
||||
|
||||
/* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
|
||||
reload class for it. */
|
||||
if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
|
||||
@@ -13599,10 +13469,7 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
|
||||
return VSX_REGS;
|
||||
|
||||
if (mode == XOmode)
|
||||
return TARGET_DENSE_MATH ? VSX_REGS : FLOAT_REGS;
|
||||
|
||||
if (mode == TDOmode)
|
||||
return VSX_REGS;
|
||||
return FLOAT_REGS;
|
||||
|
||||
if (GET_MODE_CLASS (mode) == MODE_INT)
|
||||
return GENERAL_REGS;
|
||||
@@ -13727,11 +13594,6 @@ rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
|
||||
else
|
||||
regno = -1;
|
||||
|
||||
/* Dense math registers don't have loads or stores. We have to go through
|
||||
the VSX registers to load XOmode (vector quad). */
|
||||
if (TARGET_DENSE_MATH && rclass == DM_REGS)
|
||||
return VSX_REGS;
|
||||
|
||||
/* If we have VSX register moves, prefer moving scalar values between
|
||||
Altivec registers and GPR by going via an FPR (and then via memory)
|
||||
instead of reloading the secondary memory address for Altivec moves. */
|
||||
@@ -14263,14 +14125,8 @@ print_operand (FILE *file, rtx x, int code)
|
||||
output_operand. */
|
||||
|
||||
case 'A':
|
||||
/* Write the MMA accumulator number associated with VSX register X. On
|
||||
dense math systems, only allow dense math accumulators, not
|
||||
accumulators overlapping with the FPR registers. */
|
||||
if (!REG_P (x))
|
||||
output_operand_lossage ("invalid %%A value");
|
||||
else if (TARGET_DENSE_MATH && DM_REGNO_P (REGNO (x)))
|
||||
fprintf (file, "%d", REGNO (x) - FIRST_DM_REGNO);
|
||||
else if (!FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
|
||||
/* Write the MMA accumulator number associated with VSX register X. */
|
||||
if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
|
||||
output_operand_lossage ("invalid %%A value");
|
||||
else
|
||||
fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
|
||||
@@ -20778,8 +20634,6 @@ rs6000_mangle_type (const_tree type)
|
||||
return "u13__vector_pair";
|
||||
if (type == vector_quad_type_node)
|
||||
return "u13__vector_quad";
|
||||
if (type == dm1024_type_node)
|
||||
return "u8__dm1024";
|
||||
|
||||
/* For all other types, use the default mangling. */
|
||||
return NULL;
|
||||
@@ -22892,35 +22746,6 @@ rs6000_debug_address_cost (rtx x, machine_mode mode,
|
||||
}
|
||||
|
||||
|
||||
/* Subroutine to determine the move cost of dense math registers. If we are
|
||||
moving to/from VSX_REGISTER registers, the cost is either 1 move (for
|
||||
512-bit accumulators) or 2 moves (for 1,024 dense math registers). If we are
|
||||
moving to anything else like GPR registers, make the cost very high. */
|
||||
|
||||
static int
|
||||
rs6000_dense_math_register_move_cost (machine_mode mode, reg_class_t rclass)
|
||||
{
|
||||
const int reg_move_base = 2;
|
||||
HARD_REG_SET vsx_set = (reg_class_contents[rclass]
|
||||
& reg_class_contents[VSX_REGS]);
|
||||
|
||||
if (TARGET_DENSE_MATH && !hard_reg_set_empty_p (vsx_set))
|
||||
{
|
||||
/* __vector_quad (i.e. XOmode) is tranfered in 1 instruction. */
|
||||
if (mode == XOmode)
|
||||
return reg_move_base;
|
||||
|
||||
/* __dm1024 (i.e. TDOmode) is transferred in 2 instructions. */
|
||||
else if (mode == TDOmode)
|
||||
return reg_move_base * 2;
|
||||
|
||||
else
|
||||
return reg_move_base * 2 * hard_regno_nregs (FIRST_DM_REGNO, mode);
|
||||
}
|
||||
|
||||
return 1000 * 2 * hard_regno_nregs (FIRST_DM_REGNO, mode);
|
||||
}
|
||||
|
||||
/* A C expression returning the cost of moving data from a register of class
|
||||
CLASS1 to one of CLASS2. */
|
||||
|
||||
@@ -22934,28 +22759,17 @@ rs6000_register_move_cost (machine_mode mode,
|
||||
if (TARGET_DEBUG_COST)
|
||||
dbg_cost_ctrl++;
|
||||
|
||||
HARD_REG_SET to_vsx, from_vsx;
|
||||
to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
|
||||
from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
|
||||
|
||||
/* Special case dense math registers, that can only move to/from VSX registers. */
|
||||
if (from == DM_REGS && to == DM_REGS)
|
||||
ret = 2 * hard_regno_nregs (FIRST_DM_REGNO, mode);
|
||||
|
||||
else if (from == DM_REGS)
|
||||
ret = rs6000_dense_math_register_move_cost (mode, to);
|
||||
|
||||
else if (to == DM_REGS)
|
||||
ret = rs6000_dense_math_register_move_cost (mode, from);
|
||||
|
||||
/* If we have VSX, we can easily move between FPR or Altivec registers,
|
||||
otherwise we can only easily move within classes.
|
||||
Do this first so we give best-case answers for union classes
|
||||
containing both gprs and vsx regs. */
|
||||
else if (!hard_reg_set_empty_p (to_vsx)
|
||||
&& !hard_reg_set_empty_p (from_vsx)
|
||||
&& (TARGET_VSX
|
||||
|| hard_reg_set_intersect_p (to_vsx, from_vsx)))
|
||||
HARD_REG_SET to_vsx, from_vsx;
|
||||
to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
|
||||
from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
|
||||
if (!hard_reg_set_empty_p (to_vsx)
|
||||
&& !hard_reg_set_empty_p (from_vsx)
|
||||
&& (TARGET_VSX
|
||||
|| hard_reg_set_intersect_p (to_vsx, from_vsx)))
|
||||
{
|
||||
int reg = FIRST_FPR_REGNO;
|
||||
if (TARGET_VSX
|
||||
@@ -23051,9 +22865,6 @@ rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
|
||||
ret = 4 * hard_regno_nregs (32, mode);
|
||||
else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
|
||||
ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
|
||||
else if (reg_classes_intersect_p (rclass, DM_REGS))
|
||||
ret = (rs6000_dense_math_register_move_cost (mode, VSX_REGS)
|
||||
+ rs6000_memory_move_cost (mode, VSX_REGS, false));
|
||||
else
|
||||
ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
|
||||
|
||||
@@ -24262,8 +24073,6 @@ rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
|
||||
if (TARGET_HARD_FLOAT)
|
||||
pressure_classes[n++] = FLOAT_REGS;
|
||||
}
|
||||
if (TARGET_DENSE_MATH)
|
||||
pressure_classes[n++] = DM_REGS;
|
||||
pressure_classes[n++] = CR_REGS;
|
||||
pressure_classes[n++] = SPECIAL_REGS;
|
||||
|
||||
@@ -24428,10 +24237,6 @@ rs6000_debugger_regno (unsigned int regno, unsigned int format)
|
||||
return 67;
|
||||
if (regno == 64)
|
||||
return 64;
|
||||
/* XXX: This is a guess. The GCC register number for FIRST_DM_REGNO is 111,
|
||||
but the frame pointer regnum uses that. */
|
||||
if (DM_REGNO_P (regno))
|
||||
return regno - FIRST_DM_REGNO + 112;
|
||||
|
||||
gcc_unreachable ();
|
||||
}
|
||||
@@ -24653,7 +24458,6 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
|
||||
false, true },
|
||||
{ "cmpb", OPTION_MASK_CMPB, false, true },
|
||||
{ "crypto", OPTION_MASK_CRYPTO, false, true },
|
||||
{ "dense-math", OPTION_MASK_DENSE_MATH, false, true },
|
||||
{ "direct-move", 0, false, true },
|
||||
{ "dlmzb", OPTION_MASK_DLMZB, false, true },
|
||||
{ "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
|
||||
@@ -27600,10 +27404,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
||||
mode = GET_MODE (dst);
|
||||
nregs = hard_regno_nregs (reg, mode);
|
||||
|
||||
/* If we have a vector quad register for MMA or dense math register
|
||||
and this is a load or store, see if we can use vector paired
|
||||
load/stores. */
|
||||
if ((mode == XOmode || mode == TDOmode) && TARGET_MMA
|
||||
/* If we have a vector quad register for MMA, and this is a load or store,
|
||||
see if we can use vector paired load/stores. */
|
||||
if (mode == XOmode && TARGET_MMA
|
||||
&& (MEM_P (dst) || MEM_P (src)))
|
||||
{
|
||||
reg_mode = OOmode;
|
||||
@@ -27611,7 +27414,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
||||
}
|
||||
/* If we have a vector pair/quad mode, split it into two/four separate
|
||||
vectors. */
|
||||
else if (mode == OOmode || mode == XOmode || mode == TDOmode)
|
||||
else if (mode == OOmode || mode == XOmode)
|
||||
reg_mode = V1TImode;
|
||||
else if (FP_REGNO_P (reg))
|
||||
reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
|
||||
@@ -27657,13 +27460,13 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
||||
return;
|
||||
}
|
||||
|
||||
/* The __vector_pair, __vector_quad, and __dm1024 modes are multi-register
|
||||
modes, so if we have to load or store the registers, we have to be careful
|
||||
to properly swap them if we're in little endian mode below. This means
|
||||
the last register gets the first memory location. We also need to be
|
||||
careful of using the right register numbers if we are splitting XO to
|
||||
OO. */
|
||||
if (mode == OOmode || mode == XOmode || mode == TDOmode)
|
||||
/* The __vector_pair and __vector_quad modes are multi-register
|
||||
modes, so if we have to load or store the registers, we have to be
|
||||
careful to properly swap them if we're in little endian mode
|
||||
below. This means the last register gets the first memory
|
||||
location. We also need to be careful of using the right register
|
||||
numbers if we are splitting XO to OO. */
|
||||
if (mode == OOmode || mode == XOmode)
|
||||
{
|
||||
nregs = hard_regno_nregs (reg, mode);
|
||||
int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
|
||||
@@ -27672,9 +27475,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
||||
unsigned offset = 0;
|
||||
unsigned size = GET_MODE_SIZE (reg_mode);
|
||||
|
||||
/* If we are reading an accumulator register, we have to deprime it
|
||||
before we can access it unless we have dense math registers. */
|
||||
if (TARGET_MMA && !TARGET_DENSE_MATH
|
||||
/* If we are reading an accumulator register, we have to
|
||||
deprime it before we can access it. */
|
||||
if (TARGET_MMA
|
||||
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
|
||||
emit_insn (gen_mma_xxmfacc (src, src));
|
||||
|
||||
@@ -27706,9 +27509,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
||||
emit_insn (gen_rtx_SET (dst2, src2));
|
||||
}
|
||||
|
||||
/* If we are writing an accumulator register, we have to prime it
|
||||
after we've written it unless we have dense math registers. */
|
||||
if (TARGET_MMA && !TARGET_DENSE_MATH
|
||||
/* If we are writing an accumulator register, we have to
|
||||
prime it after we've written it. */
|
||||
if (TARGET_MMA
|
||||
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
|
||||
emit_insn (gen_mma_xxmtacc (dst, dst));
|
||||
|
||||
@@ -27722,9 +27525,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
||||
|| XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
|
||||
gcc_assert (REG_P (dst));
|
||||
if (GET_MODE (src) == XOmode)
|
||||
gcc_assert ((TARGET_DENSE_MATH
|
||||
? VSX_REGNO_P (REGNO (dst))
|
||||
: FP_REGNO_P (REGNO (dst))));
|
||||
gcc_assert (FP_REGNO_P (REGNO (dst)));
|
||||
if (GET_MODE (src) == OOmode)
|
||||
gcc_assert (VSX_REGNO_P (REGNO (dst)));
|
||||
|
||||
@@ -27777,9 +27578,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
||||
emit_insn (gen_rtx_SET (dst_i, op));
|
||||
}
|
||||
|
||||
/* We are writing an accumulator register, so we have to prime it
|
||||
after we've written it unless we have dense math registers. */
|
||||
if (GET_MODE (src) == XOmode && !TARGET_DENSE_MATH)
|
||||
/* We are writing an accumulator register, so we have to
|
||||
prime it after we've written it. */
|
||||
if (GET_MODE (src) == XOmode)
|
||||
emit_insn (gen_mma_xxmtacc (dst, dst));
|
||||
|
||||
return;
|
||||
@@ -27790,9 +27591,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
||||
|
||||
if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
|
||||
{
|
||||
/* If we are reading an accumulator register, we have to deprime it
|
||||
before we can access it unless we have dense math registers. */
|
||||
if (TARGET_MMA && !TARGET_DENSE_MATH
|
||||
/* If we are reading an accumulator register, we have to
|
||||
deprime it before we can access it. */
|
||||
if (TARGET_MMA
|
||||
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
|
||||
emit_insn (gen_mma_xxmfacc (src, src));
|
||||
|
||||
@@ -27800,7 +27601,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
||||
overlap. */
|
||||
int i;
|
||||
/* XO/OO are opaque so cannot use subregs. */
|
||||
if (mode == OOmode || mode == XOmode || mode == TDOmode)
|
||||
if (mode == OOmode || mode == XOmode )
|
||||
{
|
||||
for (i = nregs - 1; i >= 0; i--)
|
||||
{
|
||||
@@ -27818,9 +27619,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
||||
i * reg_mode_size)));
|
||||
}
|
||||
|
||||
/* If we are writing an accumulator register, we have to prime it after
|
||||
we've written it unless we have dense math registers. */
|
||||
if (TARGET_MMA && !TARGET_DENSE_MATH
|
||||
/* If we are writing an accumulator register, we have to
|
||||
prime it after we've written it. */
|
||||
if (TARGET_MMA
|
||||
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
|
||||
emit_insn (gen_mma_xxmtacc (dst, dst));
|
||||
}
|
||||
@@ -27955,9 +27756,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
||||
gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
|
||||
}
|
||||
|
||||
/* If we are reading an accumulator register, we have to deprime it
|
||||
before we can access it unless we have dense math registers. */
|
||||
if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (src)
|
||||
/* If we are reading an accumulator register, we have to
|
||||
deprime it before we can access it. */
|
||||
if (TARGET_MMA && REG_P (src)
|
||||
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
|
||||
emit_insn (gen_mma_xxmfacc (src, src));
|
||||
|
||||
@@ -27974,7 +27775,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
||||
continue;
|
||||
|
||||
/* XO/OO are opaque so cannot use subregs. */
|
||||
if (mode == OOmode || mode == XOmode || mode == TDOmode)
|
||||
if (mode == OOmode || mode == XOmode )
|
||||
{
|
||||
rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
|
||||
rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
|
||||
@@ -27987,9 +27788,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
||||
j * reg_mode_size)));
|
||||
}
|
||||
|
||||
/* If we are writing an accumulator register, we have to prime it after
|
||||
we've written it unless we have dense math registers. */
|
||||
if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (dst)
|
||||
/* If we are writing an accumulator register, we have to
|
||||
prime it after we've written it. */
|
||||
if (TARGET_MMA && REG_P (dst)
|
||||
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
|
||||
emit_insn (gen_mma_xxmtacc (dst, dst));
|
||||
|
||||
@@ -29002,8 +28803,7 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
|
||||
|
||||
if (frommode != tomode)
|
||||
{
|
||||
/* Do not allow conversions to/from XOmode, OOmode, and TDOmode
|
||||
types. */
|
||||
/* Do not allow conversions to/from XOmode and OOmode types. */
|
||||
if (frommode == XOmode)
|
||||
return N_("invalid conversion from type %<__vector_quad%>");
|
||||
if (tomode == XOmode)
|
||||
@@ -29012,10 +28812,6 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
|
||||
return N_("invalid conversion from type %<__vector_pair%>");
|
||||
if (tomode == OOmode)
|
||||
return N_("invalid conversion to type %<__vector_pair%>");
|
||||
if (frommode == TDOmode)
|
||||
return N_("invalid conversion from type %<__dm1024%>");
|
||||
if (tomode == TDOmode)
|
||||
return N_("invalid conversion to type %<__dm1024%>");
|
||||
}
|
||||
|
||||
/* Conversion allowed. */
|
||||
|
||||
@@ -653,7 +653,6 @@ extern unsigned char rs6000_recip_bits[];
|
||||
#define UNITS_PER_FP_WORD 8
|
||||
#define UNITS_PER_ALTIVEC_WORD 16
|
||||
#define UNITS_PER_VSX_WORD 16
|
||||
#define UNITS_PER_DM_WORD 128
|
||||
|
||||
/* Type used for ptrdiff_t, as a string used in a declaration. */
|
||||
#define PTRDIFF_TYPE "int"
|
||||
@@ -767,7 +766,7 @@ enum data_align { align_abi, align_opt, align_both };
|
||||
Another pseudo (not included in DWARF_FRAME_REGISTERS) is soft frame
|
||||
pointer, which is eventually eliminated in favor of SP or FP. */
|
||||
|
||||
#define FIRST_PSEUDO_REGISTER 119
|
||||
#define FIRST_PSEUDO_REGISTER 111
|
||||
|
||||
/* Use standard DWARF numbering for DWARF debugging information. */
|
||||
#define DEBUGGER_REGNO(REGNO) rs6000_debugger_regno ((REGNO), 0)
|
||||
@@ -804,9 +803,7 @@ enum data_align { align_abi, align_opt, align_both };
|
||||
/* cr0..cr7 */ \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
/* vrsave vscr sfp */ \
|
||||
1, 1, 1, \
|
||||
/* Dense math registers. */ \
|
||||
0, 0, 0, 0, 0, 0, 0, 0 \
|
||||
1, 1, 1 \
|
||||
}
|
||||
|
||||
/* Like `CALL_USED_REGISTERS' except this macro doesn't require that
|
||||
@@ -830,9 +827,7 @@ enum data_align { align_abi, align_opt, align_both };
|
||||
/* cr0..cr7 */ \
|
||||
1, 1, 0, 0, 0, 1, 1, 1, \
|
||||
/* vrsave vscr sfp */ \
|
||||
0, 0, 0, \
|
||||
/* Dense math registers. */ \
|
||||
0, 0, 0, 0, 0, 0, 0, 0 \
|
||||
0, 0, 0 \
|
||||
}
|
||||
|
||||
#define TOTAL_ALTIVEC_REGS (LAST_ALTIVEC_REGNO - FIRST_ALTIVEC_REGNO + 1)
|
||||
@@ -869,7 +864,6 @@ enum data_align { align_abi, align_opt, align_both };
|
||||
v2 (not saved; incoming vector arg reg; return value)
|
||||
v19 - v14 (not saved or used for anything)
|
||||
v31 - v20 (saved; order given to save least number)
|
||||
dmr0 - dmr7 (not saved)
|
||||
vrsave, vscr (fixed)
|
||||
sfp (fixed)
|
||||
*/
|
||||
@@ -912,9 +906,6 @@ enum data_align { align_abi, align_opt, align_both };
|
||||
66, \
|
||||
83, 82, 81, 80, 79, 78, \
|
||||
95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, \
|
||||
/* Dense math registers. */ \
|
||||
111, 112, 113, 114, 115, 116, 117, 118, \
|
||||
/* Vrsave, vscr, sfp. */ \
|
||||
108, 109, \
|
||||
110 \
|
||||
}
|
||||
@@ -941,9 +932,6 @@ enum data_align { align_abi, align_opt, align_both };
|
||||
/* True if register is a VSX register. */
|
||||
#define VSX_REGNO_P(N) (FP_REGNO_P (N) || ALTIVEC_REGNO_P (N))
|
||||
|
||||
/* True if register is a Dense math register. */
|
||||
#define DM_REGNO_P(N) ((N) >= FIRST_DM_REGNO && (N) <= LAST_DM_REGNO)
|
||||
|
||||
/* Alternate name for any vector register supporting floating point, no matter
|
||||
which instruction set(s) are available. */
|
||||
#define VFLOAT_REGNO_P(N) \
|
||||
@@ -983,7 +971,7 @@ enum data_align { align_abi, align_opt, align_both };
|
||||
/* Modes that are not vectors, but require vector alignment. Treat these like
|
||||
vectors in terms of loads and stores. */
|
||||
#define VECTOR_ALIGNMENT_P(MODE) \
|
||||
(FLOAT128_VECTOR_P (MODE) || OPAQUE_MODE_P (MODE))
|
||||
(FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode)
|
||||
|
||||
#define ALTIVEC_VECTOR_MODE(MODE) \
|
||||
((MODE) == V16QImode \
|
||||
@@ -1081,7 +1069,6 @@ enum reg_class
|
||||
FLOAT_REGS,
|
||||
ALTIVEC_REGS,
|
||||
VSX_REGS,
|
||||
DM_REGS,
|
||||
VRSAVE_REGS,
|
||||
VSCR_REGS,
|
||||
GEN_OR_FLOAT_REGS,
|
||||
@@ -1111,7 +1098,6 @@ enum reg_class
|
||||
"FLOAT_REGS", \
|
||||
"ALTIVEC_REGS", \
|
||||
"VSX_REGS", \
|
||||
"DM_REGS", \
|
||||
"VRSAVE_REGS", \
|
||||
"VSCR_REGS", \
|
||||
"GEN_OR_FLOAT_REGS", \
|
||||
@@ -1146,8 +1132,6 @@ enum reg_class
|
||||
{ 0x00000000, 0x00000000, 0xffffffff, 0x00000000 }, \
|
||||
/* VSX_REGS. */ \
|
||||
{ 0x00000000, 0xffffffff, 0xffffffff, 0x00000000 }, \
|
||||
/* DM_REGS. */ \
|
||||
{ 0x00000000, 0x00000000, 0x00000000, 0x007f8000 }, \
|
||||
/* VRSAVE_REGS. */ \
|
||||
{ 0x00000000, 0x00000000, 0x00000000, 0x00001000 }, \
|
||||
/* VSCR_REGS. */ \
|
||||
@@ -1175,7 +1159,7 @@ enum reg_class
|
||||
/* CA_REGS. */ \
|
||||
{ 0x00000000, 0x00000000, 0x00000000, 0x00000004 }, \
|
||||
/* ALL_REGS. */ \
|
||||
{ 0xffffffff, 0xffffffff, 0xffffffff, 0x007fffff } \
|
||||
{ 0xffffffff, 0xffffffff, 0xffffffff, 0x00007fff } \
|
||||
}
|
||||
|
||||
/* The same information, inverted:
|
||||
@@ -1199,7 +1183,6 @@ enum r6000_reg_class_enum {
|
||||
RS6000_CONSTRAINT_wr, /* GPR register if 64-bit */
|
||||
RS6000_CONSTRAINT_wx, /* FPR register for STFIWX */
|
||||
RS6000_CONSTRAINT_wA, /* BASE_REGS if 64-bit. */
|
||||
RS6000_CONSTRAINT_wD, /* Accumulator regs if MMA/Dense Math. */
|
||||
RS6000_CONSTRAINT_MAX
|
||||
};
|
||||
|
||||
@@ -2076,16 +2059,7 @@ extern char rs6000_reg_names[][8]; /* register names (0 vs. %r0). */
|
||||
&rs6000_reg_names[108][0], /* vrsave */ \
|
||||
&rs6000_reg_names[109][0], /* vscr */ \
|
||||
\
|
||||
&rs6000_reg_names[110][0], /* sfp */ \
|
||||
\
|
||||
&rs6000_reg_names[111][0], /* dmr0 */ \
|
||||
&rs6000_reg_names[112][0], /* dmr1 */ \
|
||||
&rs6000_reg_names[113][0], /* dmr2 */ \
|
||||
&rs6000_reg_names[114][0], /* dmr3 */ \
|
||||
&rs6000_reg_names[115][0], /* dmr4 */ \
|
||||
&rs6000_reg_names[116][0], /* dmr5 */ \
|
||||
&rs6000_reg_names[117][0], /* dmr6 */ \
|
||||
&rs6000_reg_names[118][0], /* dmr7 */ \
|
||||
&rs6000_reg_names[110][0] /* sfp */ \
|
||||
}
|
||||
|
||||
/* Table of additional register names to use in user input. */
|
||||
@@ -2139,8 +2113,6 @@ extern char rs6000_reg_names[][8]; /* register names (0 vs. %r0). */
|
||||
{"vs52", 84}, {"vs53", 85}, {"vs54", 86}, {"vs55", 87}, \
|
||||
{"vs56", 88}, {"vs57", 89}, {"vs58", 90}, {"vs59", 91}, \
|
||||
{"vs60", 92}, {"vs61", 93}, {"vs62", 94}, {"vs63", 95}, \
|
||||
{"dmr0", 111}, {"dmr1", 112}, {"dmr2", 113}, {"dmr3", 114}, \
|
||||
{"dmr4", 115}, {"dmr5", 116}, {"dmr6", 117}, {"dmr7", 118}, \
|
||||
}
|
||||
|
||||
/* This is how to output an element of a case-vector that is relative. */
|
||||
@@ -2274,7 +2246,6 @@ enum rs6000_builtin_type_index
|
||||
RS6000_BTI_const_str, /* pointer to const char * */
|
||||
RS6000_BTI_vector_pair, /* unsigned 256-bit types (vector pair). */
|
||||
RS6000_BTI_vector_quad, /* unsigned 512-bit types (vector quad). */
|
||||
RS6000_BTI_dm1024, /* unsigned 1,024-bit types (dmf). */
|
||||
RS6000_BTI_const_ptr_void, /* const pointer to void */
|
||||
RS6000_BTI_ptr_V16QI,
|
||||
RS6000_BTI_ptr_V1TI,
|
||||
@@ -2313,7 +2284,6 @@ enum rs6000_builtin_type_index
|
||||
RS6000_BTI_ptr_dfloat128,
|
||||
RS6000_BTI_ptr_vector_pair,
|
||||
RS6000_BTI_ptr_vector_quad,
|
||||
RS6000_BTI_ptr_dm1024,
|
||||
RS6000_BTI_ptr_long_long,
|
||||
RS6000_BTI_ptr_long_long_unsigned,
|
||||
RS6000_BTI_MAX
|
||||
@@ -2371,7 +2341,6 @@ enum rs6000_builtin_type_index
|
||||
#define const_str_type_node (rs6000_builtin_types[RS6000_BTI_const_str])
|
||||
#define vector_pair_type_node (rs6000_builtin_types[RS6000_BTI_vector_pair])
|
||||
#define vector_quad_type_node (rs6000_builtin_types[RS6000_BTI_vector_quad])
|
||||
#define dm1024_type_node (rs6000_builtin_types[RS6000_BTI_dm1024])
|
||||
#define pcvoid_type_node (rs6000_builtin_types[RS6000_BTI_const_ptr_void])
|
||||
#define ptr_V16QI_type_node (rs6000_builtin_types[RS6000_BTI_ptr_V16QI])
|
||||
#define ptr_V1TI_type_node (rs6000_builtin_types[RS6000_BTI_ptr_V1TI])
|
||||
@@ -2410,7 +2379,6 @@ enum rs6000_builtin_type_index
|
||||
#define ptr_dfloat128_type_node (rs6000_builtin_types[RS6000_BTI_ptr_dfloat128])
|
||||
#define ptr_vector_pair_type_node (rs6000_builtin_types[RS6000_BTI_ptr_vector_pair])
|
||||
#define ptr_vector_quad_type_node (rs6000_builtin_types[RS6000_BTI_ptr_vector_quad])
|
||||
#define ptr_dm1024_type_node (rs6000_builtin_types[RS6000_BTI_ptr_dm1024])
|
||||
#define ptr_long_long_integer_type_node (rs6000_builtin_types[RS6000_BTI_ptr_long_long])
|
||||
#define ptr_long_long_unsigned_type_node (rs6000_builtin_types[RS6000_BTI_ptr_long_long_unsigned])
|
||||
|
||||
|
||||
@@ -51,8 +51,6 @@
|
||||
(VRSAVE_REGNO 108)
|
||||
(VSCR_REGNO 109)
|
||||
(FRAME_POINTER_REGNUM 110)
|
||||
(FIRST_DM_REGNO 111)
|
||||
(LAST_DM_REGNO 118)
|
||||
])
|
||||
|
||||
;;
|
||||
|
||||
@@ -639,10 +639,6 @@ mieee128-constant
|
||||
Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
|
||||
Generate (do not generate) code that uses the LXVKQ instruction.
|
||||
|
||||
mdense-math
|
||||
Target Mask(DENSE_MATH) Var(rs6000_isa_flags)
|
||||
Generate (do not generate) instructions that use dense math registers.
|
||||
|
||||
; Documented parameters
|
||||
|
||||
-param=rs6000-vect-unroll-limit=
|
||||
|
||||
@@ -32674,13 +32674,6 @@ This option is enabled by default.
|
||||
Enable or disable warnings about deprecated @samp{vector long ...} Altivec
|
||||
type usage. This option is enabled by default.
|
||||
|
||||
@opindex mdense-math
|
||||
@opindex mno-dense-math
|
||||
@item -mdense-math
|
||||
@itemx -mno-dense-math
|
||||
Generate (do not generate) code that uses the dense math registers.
|
||||
This option is enabled by default.
|
||||
|
||||
@end table
|
||||
|
||||
@node RX Options
|
||||
|
||||
@@ -3415,11 +3415,6 @@ Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise, @code{NO_REGS}.
|
||||
@item wA
|
||||
Like @code{b}, if @option{-mpowerpc64} is used; otherwise, @code{NO_REGS}.
|
||||
|
||||
@item wD
|
||||
Accumulator register if @option{-mma} is used; otherwise,
|
||||
@code{NO_REGS}. For @option{-mcpu=power10} the accumulator registers
|
||||
overlap with VSX vector registers 0..31.
|
||||
|
||||
@item wB
|
||||
Signed 5-bit constant integer that can be loaded into an Altivec register.
|
||||
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target powerpc_dense_math_ok } */
|
||||
/* { dg-options "-mdejagnu-cpu=future -O2" } */
|
||||
|
||||
/* Test basic load/store for __dm1024 type. */
|
||||
|
||||
#ifndef CONSTRAINT
|
||||
#if defined(USE_D)
|
||||
#define CONSTRAINT "d"
|
||||
|
||||
#elif defined(USE_V)
|
||||
#define CONSTRAINT "v"
|
||||
|
||||
#elif defined(USE_WA)
|
||||
#define CONSTRAINT "wa"
|
||||
|
||||
#else
|
||||
#define CONSTRAINT "wD"
|
||||
#endif
|
||||
#endif
|
||||
const char constraint[] = CONSTRAINT;
|
||||
|
||||
void foo_mem_asm (__dm1024 *p, __dm1024 *q)
|
||||
{
|
||||
/* 2 LXVP instructions. */
|
||||
__dm1024 vq = *p;
|
||||
|
||||
/* 2 DMXXINSTDMR512 instructions to transfer VSX to dense math register. */
|
||||
__asm__ ("# foo (" CONSTRAINT ") %A0" : "+" CONSTRAINT (vq));
|
||||
/* 2 DMXXEXTFDMR512 instructions to transfer dense math register to VSX. */
|
||||
|
||||
/* 2 STXVP instructions. */
|
||||
*q = vq;
|
||||
}
|
||||
|
||||
void foo_mem_asm2 (__dm1024 *p, __dm1024 *q)
|
||||
{
|
||||
/* 2 LXVP instructions. */
|
||||
__dm1024 vq = *p;
|
||||
__dm1024 vq2;
|
||||
__dm1024 vq3;
|
||||
|
||||
/* 2 DMXXINSTDMR512 instructions to transfer VSX to dense math register. */
|
||||
__asm__ ("# foo1 (" CONSTRAINT ") %A0" : "+" CONSTRAINT (vq));
|
||||
/* 2 DMXXEXTFDMR512 instructions to transfer dense math register to VSX. */
|
||||
|
||||
vq2 = vq;
|
||||
__asm__ ("# foo2 (wa) %0" : "+wa" (vq2));
|
||||
|
||||
/* 2 STXVP instructions. */
|
||||
*q = vq2;
|
||||
}
|
||||
|
||||
void foo_mem (__dm1024 *p, __dm1024 *q)
|
||||
{
|
||||
/* 2 LXVP, 2 STXVP instructions, no dense math transfer. */
|
||||
*q = *p;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\mdmxxextfdmr512\M} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\mdmxxinstdmr512\M} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */
|
||||
/* { dg-final { scan-assembler-times {\mstxvp\M} 12 } } */
|
||||
@@ -1,67 +0,0 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target powerpc_dense_math_ok } */
|
||||
/* { dg-options "-mdejagnu-cpu=future -O2" } */
|
||||
|
||||
/* Test basic dense math support for MMA. */
|
||||
|
||||
void
|
||||
move_simple (__vector_quad *a, __vector_quad *b)
|
||||
{
|
||||
/* 2 lxvp, 2 stxvp. */
|
||||
__vector_quad c = *a;
|
||||
*b = c;
|
||||
}
|
||||
|
||||
void
|
||||
move_constraint_d (__vector_quad *a, __vector_quad *b)
|
||||
{
|
||||
/* 2 lxvp, 2 stxvp. */
|
||||
__vector_quad c = *a;
|
||||
__asm__ (" # %x0 (d constraint)" : "+d" (c));
|
||||
*b = c;
|
||||
}
|
||||
|
||||
void
|
||||
move_constraint_wD (__vector_quad *a, __vector_quad *b)
|
||||
{
|
||||
/* 2 lxvp, dmxxinstdmr512, dmxxextfdmr512, 2 stxvp. */
|
||||
__vector_quad c = *a;
|
||||
__asm__ (" # %A0 (wD constraint)" : "+wD" (c));
|
||||
*b = c;
|
||||
}
|
||||
|
||||
void
|
||||
clear_simple (__vector_quad *a)
|
||||
{
|
||||
/* dmsetdmrz, dmxxextfdmr512, 2 stxvp. */
|
||||
__builtin_mma_xxsetaccz (a);
|
||||
}
|
||||
|
||||
void
|
||||
clear_constraint_d (__vector_quad *a)
|
||||
{
|
||||
__vector_quad z;
|
||||
|
||||
/* dmsetdmrz, dmxxextfdmr512, 2 stxvp. */
|
||||
__builtin_mma_xxsetaccz (&z);
|
||||
__asm__ (" # %x0 (d constraint)" : "+d" (z));
|
||||
*a = z;
|
||||
}
|
||||
|
||||
void
|
||||
clear_constraint_wD (__vector_quad *a)
|
||||
{
|
||||
__vector_quad z;
|
||||
|
||||
/* dmsetdmrz, dmxxextfdmr512, 2 stxvp. */
|
||||
__builtin_mma_xxsetaccz (&z);
|
||||
__asm__ (" # %A0 (d constraint)" : "+wD" (z));
|
||||
*a = z;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\mdmsetdmrz\M} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\mdmxxextfdmr512\M} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\mdmxxinstdmr512\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-not {\mxxmfacc\M} } } */
|
||||
/* { dg-final { scan-assembler-not {\mxxmtacc\M} } } */
|
||||
/* { dg-final { scan-assembler-not {\mxxsetaccz\M} } } */
|
||||
@@ -1,67 +0,0 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target powerpc_dense_math_ok } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -mno-dense-math -O2" } */
|
||||
|
||||
/* Test basic dense math support for MMA. */
|
||||
|
||||
void
|
||||
move_simple (__vector_quad *a, __vector_quad *b)
|
||||
{
|
||||
/* 2 lxvp, xxmtacc, xxftacc 2 stxvp. */
|
||||
__vector_quad c = *a;
|
||||
*b = c;
|
||||
}
|
||||
|
||||
void
|
||||
move_constraint_d (__vector_quad *a, __vector_quad *b)
|
||||
{
|
||||
/* 2 lxvp, xxmtacc, xxftacc, 2 stxvp. */
|
||||
__vector_quad c = *a;
|
||||
__asm__ (" # %x0 (d constraint)" : "+d" (c));
|
||||
*b = c;
|
||||
}
|
||||
|
||||
void
|
||||
move_constraint_wD (__vector_quad *a, __vector_quad *b)
|
||||
{
|
||||
/* 2 lxvp, xxmtacc, xxftacc, 2 stxvp. */
|
||||
__vector_quad c = *a;
|
||||
__asm__ (" # %A0 (wD constraint)" : "+wD" (c));
|
||||
*b = c;
|
||||
}
|
||||
|
||||
void
|
||||
clear_simple (__vector_quad *a)
|
||||
{
|
||||
/* xxsetaccz, xxmfacc, 2 stxvp. */
|
||||
__builtin_mma_xxsetaccz (a);
|
||||
}
|
||||
|
||||
void
|
||||
clear_constraint_d (__vector_quad *a)
|
||||
{
|
||||
__vector_quad z;
|
||||
|
||||
/* xxsetaccz, xxmfacc, 2 stxvp. */
|
||||
__builtin_mma_xxsetaccz (&z);
|
||||
__asm__ (" # %x0 (d constraint)" : "+d" (z));
|
||||
*a = z;
|
||||
}
|
||||
|
||||
void
|
||||
clear_constraint_wD (__vector_quad *a)
|
||||
{
|
||||
__vector_quad z;
|
||||
|
||||
/* xxsetaccz, xxmfacc, 2 stxvp. */
|
||||
__builtin_mma_xxsetaccz (&z);
|
||||
__asm__ (" # %A0 (d constraint)" : "+wD" (z));
|
||||
*a = z;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not {\mdmsetdmrz\M} } } */
|
||||
/* { dg-final { scan-assembler-not {\mdmxxextfdmr512\M} } } */
|
||||
/* { dg-final { scan-assembler-not {\mdmxxinstdmr512\M} } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxmfacc\M} 6 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxmtacc\M} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxsetaccz\M} 3 } } */
|
||||
@@ -7989,25 +7989,6 @@ proc check_effective_target_power10_ok { } {
|
||||
}
|
||||
}
|
||||
|
||||
# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
|
||||
# the dense math operations.
|
||||
proc check_effective_target_powerpc_dense_math_ok { } {
|
||||
if { ([istarget powerpc*-*-*]) } {
|
||||
return [check_no_compiler_messages powerpc_dense_math_ok object {
|
||||
__vector_quad vq;
|
||||
int main (void) {
|
||||
/* Make sure we have dense math support. */
|
||||
__vector_quad dmr;
|
||||
__asm__ ("dmsetaccz %A0" : "=wD" (dmr));
|
||||
vq = dmr;
|
||||
return 0;
|
||||
}
|
||||
} "-mcpu=future"]
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
# Return 1 if this is a PowerPC target supporting -mfloat128 via either
|
||||
# software emulation on power7/power8 systems or hardware support on power9.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user