Revert changes

This commit is contained in:
Michael Meissner
2026-02-20 16:21:05 -05:00
parent 4af564e032
commit 4192a68f65
18 changed files with 108 additions and 907 deletions

View File

@@ -107,9 +107,6 @@
(match_test "TARGET_P8_VECTOR")
(match_operand 0 "s5bit_cint_operand")))
(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]"
"Accumulator register.")
(define_constraint "wE"
"@internal Vector constant that can be loaded with the XXSPLTIB instruction."
(match_test "xxspltib_constant_nosplit (op, mode)"))

View File

@@ -90,12 +90,6 @@
UNSPEC_MMA_XVI8GER4SPP
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
UNSPEC_MMA_DMSETDMRZ
UNSPEC_DM_INSERT512_UPPER
UNSPEC_DM_INSERT512_LOWER
UNSPEC_DM_EXTRACT512
UNSPEC_DM_RELOAD_FROM_MEMORY
UNSPEC_DM_RELOAD_TO_MEMORY
])
(define_c_enum "unspecv"
@@ -319,7 +313,7 @@
(set_attr "length" "*,*,8")])
;; Vector quad support.
;; Vector quad support. XOmode can only live in FPRs.
(define_expand "movxo"
[(set (match_operand:XO 0 "nonimmediate_operand")
(match_operand:XO 1 "input_operand"))]
@@ -344,13 +338,10 @@
gcc_assert (false);
})
;; If we do not have dense math registers, XOmode can only live in FPR
;; registers (0..31).
(define_insn_and_split "*movxo_nodm"
(define_insn_and_split "*movxo"
[(set (match_operand:XO 0 "nonimmediate_operand" "=d,ZwO,d")
(match_operand:XO 1 "input_operand" "ZwO,d,d"))]
"TARGET_MMA && !TARGET_DENSE_MATH
"TARGET_MMA
&& (gpc_reg_operand (operands[0], XOmode)
|| gpc_reg_operand (operands[1], XOmode))"
"@
@@ -367,34 +358,6 @@
(set_attr "length" "*,*,16")
(set_attr "max_prefixed_insns" "2,2,*")])
;; If dense math registers are available, XOmode can live in either VSX
;; registers (0..63) or dense math registers.
(define_insn_and_split "*movxo_dm"
[(set (match_operand:XO 0 "nonimmediate_operand" "=wa,ZwO,wa,wD,wD,wa")
(match_operand:XO 1 "input_operand" "ZwO,wa, wa,wa,wD,wD"))]
"TARGET_DENSE_MATH
&& (gpc_reg_operand (operands[0], XOmode)
|| gpc_reg_operand (operands[1], XOmode))"
"@
#
#
#
dmxxinstdmr512 %0,%1,%Y1,0
dmmr %0,%1
dmxxextfdmr512 %0,%Y0,%1,0"
"&& reload_completed
&& !dense_math_operand (operands[0], XOmode)
&& !dense_math_operand (operands[1], XOmode)"
[(const_int 0)]
{
rs6000_split_multireg_move (operands[0], operands[1]);
DONE;
}
[(set_attr "type" "vecload,vecstore,veclogical,mma,mma,mma")
(set_attr "length" "*,*,16,*,*,*")
(set_attr "max_prefixed_insns" "2,2,*,*,*,*")])
(define_expand "vsx_assemble_pair"
[(match_operand:OO 0 "vsx_register_operand")
(match_operand:V16QI 1 "mma_assemble_input_operand")
@@ -493,68 +456,31 @@
DONE;
})
;; If dense math registers are not available, MMA instructions that do
;; not use their accumulators that overlap with FPR registers as an
;; input, still must not allow their vector operands to overlap the
;; registers used by the accumulator. We enforce this by marking the
;; output as early clobber. The prime and de-prime instructions are
;; not needed on systems with dense math registers.
;; MMA instructions that do not use their accumulators as an input, still
;; must not allow their vector operands to overlap the registers used by
;; the accumulator. We enforce this by marking the output as early clobber.
(define_insn "mma_<acc>"
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
MMA_ACC))]
"TARGET_MMA && !TARGET_DENSE_MATH"
"TARGET_MMA"
"<acc> %A0"
[(set_attr "type" "mma")])
;; We can't have integer constants in XOmode so we wrap this in an
;; UNSPEC_VOLATILE. If we have dense math registers, we can just use a normal
;; UNSPEC instead of UNSPEC_VOLATILE.
;; UNSPEC_VOLATILE.
(define_expand "mma_xxsetaccz"
[(set (match_operand:XO 0 "accumulator_operand")
(unspec_volatile:XO [(const_int 0)]
UNSPECV_MMA_XXSETACCZ))]
"TARGET_MMA"
{
if (TARGET_DENSE_MATH)
{
emit_insn (gen_mma_xxsetaccz_dm (operands[0]));
DONE;
}
})
;; Clear accumulator without dense math registers
(define_insn "*mma_xxsetaccz_nodm"
(define_insn "mma_xxsetaccz"
[(set (match_operand:XO 0 "fpr_reg_operand" "=d")
(unspec_volatile:XO [(const_int 0)]
UNSPECV_MMA_XXSETACCZ))]
"TARGET_MMA && !TARGET_DENSE_MATH"
"TARGET_MMA"
"xxsetaccz %A0"
[(set_attr "type" "mma")])
;; Clear accumulator when dense math registers are available.
(define_insn "mma_xxsetaccz_dm"
[(set (match_operand:XO 0 "accumulator_operand" "=wD")
(unspec [(const_int 0)]
UNSPEC_MMA_DMSETDMRZ))]
"TARGET_DENSE_MATH"
"dmsetdmrz %A0"
[(set_attr "type" "mma")])
;; MMA operations below. If dense math registers are available, these
;; operations will use the 8 accumultors which are separate registers.
;; If dense math registers are not available, these operations will use
;; accumulators that are overlaid on top of the FPR registers.
;; Instructions:
;; xvi4ger8 xvi8ger4 xvi16ger2 xvi16ger2s xvf16ger2
;; xvbf16ger2 xvf32ger
(define_insn "mma_<vv>"
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -562,15 +488,9 @@
"<vv> %A0,%x1,%x2"
[(set_attr "type" "mma")])
;; Instructions:
;; xvi4ger8pp xvi8ger4pp xvi8ger4spp xvi16ger2pp xvi16ger2spp
;; xvf16ger2pp xvf16ger2pn xvf16ger2np xvf16ger2nn xvbf16ger2pp
;; xvbf16ger2pn xvbf16ger2np xvbf16ger2nn xvf32gerpp xvf32gerpn
;; xvf32gernp xvf32gernn
(define_insn "mma_<avv>"
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -578,10 +498,8 @@
"<avv> %A0,%x2,%x3"
[(set_attr "type" "mma")])
;; Instruction: xvf64ger
(define_insn "mma_<pv>"
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -589,11 +507,9 @@
"<pv> %A0,%x1,%x2"
[(set_attr "type" "mma")])
;; Instructions: xvf64gerpp xvf64gerpn xvf64gernp xvf64gernn
(define_insn "mma_<apv>"
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -601,10 +517,8 @@
"<apv> %A0,%x2,%x3"
[(set_attr "type" "mma")])
;; Instruction: pmxvi4ger8
(define_insn "mma_<vvi4i4i8>"
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -616,11 +530,9 @@
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
;; Instruction: pmxvi4ger8pp
(define_insn "mma_<avvi4i4i8>"
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -632,11 +544,8 @@
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
;; Instructions:
;; pmxvi16ger2 pmxvi16ger2s pmxvf16ger2 pmxvbf16ger2
(define_insn "mma_<vvi4i4i2>"
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -648,14 +557,9 @@
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
;; Instructions:
;; pmxvi16ger2pp pmxvi16ger2spp pmxvf16ger2pp pmxvf16ger2pn
;; pmxvf16ger2np pmxvf16ger2nn pmxvbf16ger2pp pmxvbf16ger2pn
;; pmxvbf16ger2np pmxvbf16ger2nn
(define_insn "mma_<avvi4i4i2>"
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -667,10 +571,8 @@
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
;; Instruction: pmxvf32ger
(define_insn "mma_<vvi4i4>"
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -681,11 +583,9 @@
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
;; Instructions: pmxvf32gerpp pmxvf32gerpn pmxvf32gernp pmxvf32gernn
(define_insn "mma_<avvi4i4>"
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -696,10 +596,8 @@
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
;; Instruction: pmxvf64ger
(define_insn "mma_<pvi4i2>"
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -710,11 +608,9 @@
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
;; Instructions: pmxvf64gerpp pmxvf64gerpn pmxvf64gernp pmxvf64gernn
(define_insn "mma_<apvi4i2>"
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -725,10 +621,8 @@
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
;; Instruction: pmxvi8ger4
(define_insn "mma_<vvi4i4i4>"
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -740,11 +634,9 @@
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
;; Instructions: pmxvi8ger4pp pmxvi8ger4spp
(define_insn "mma_<avvi4i4i4>"
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -755,153 +647,3 @@
"<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
;; TDOmode (__dmf keyword for 1,024 bit registers).
(define_expand "movtdo"
[(set (match_operand:TDO 0 "nonimmediate_operand")
(match_operand:TDO 1 "input_operand"))]
"TARGET_DENSE_MATH"
{
rs6000_emit_move (operands[0], operands[1], TDOmode);
DONE;
})
(define_insn_and_split "*movtdo"
[(set (match_operand:TDO 0 "nonimmediate_operand" "=wa,m,wa,wD,wD,wa")
(match_operand:TDO 1 "input_operand" "m,wa,wa,wa,wD,wD"))]
"TARGET_DENSE_MATH
&& (gpc_reg_operand (operands[0], TDOmode)
|| gpc_reg_operand (operands[1], TDOmode))"
"@
#
#
#
#
dmmr %0,%1
#"
"&& reload_completed
&& (!dense_math_operand (operands[0], TDOmode)
|| !dense_math_operand (operands[1], TDOmode))"
[(const_int 0)]
{
rtx op0 = operands[0];
rtx op1 = operands[1];
if (REG_P (op0) && REG_P (op1))
{
int regno0 = REGNO (op0);
int regno1 = REGNO (op1);
if (DM_REGNO_P (regno0) && VSX_REGNO_P (regno1))
{
rtx op1_upper = gen_rtx_REG (XOmode, regno1);
rtx op1_lower = gen_rtx_REG (XOmode, regno1 + 4);
emit_insn (gen_movtdo_insert512_upper (op0, op1_upper));
emit_insn (gen_movtdo_insert512_lower (op0, op0, op1_lower));
DONE;
}
else if (VSX_REGNO_P (regno0) && DM_REGNO_P (regno1))
{
rtx op0_upper = gen_rtx_REG (XOmode, regno0);
rtx op0_lower = gen_rtx_REG (XOmode, regno0 + 4);
emit_insn (gen_movtdo_extract512 (op0_upper, op1, const0_rtx));
emit_insn (gen_movtdo_extract512 (op0_lower, op1, const1_rtx));
DONE;
}
else
gcc_assert (VSX_REGNO_P (regno0) && VSX_REGNO_P (regno1));
}
rs6000_split_multireg_move (operands[0], operands[1]);
DONE;
}
[(set_attr "type" "vecload,vecstore,vecmove,vecmove,vecmove,vecmove")
(set_attr "length" "*,*,32,8,*,8")
(set_attr "max_prefixed_insns" "4,4,*,*,*,*")])
;; Move from VSX registers to dense math registers via two insert 512 bit
;; instructions.
(define_insn "movtdo_insert512_upper"
[(set (match_operand:TDO 0 "dense_math_operand" "=wD")
(unspec:TDO [(match_operand:XO 1 "vsx_register_operand" "wa")]
UNSPEC_DM_INSERT512_UPPER))]
"TARGET_DENSE_MATH"
"dmxxinstdmr512 %0,%1,%Y1,0"
[(set_attr "type" "mma")])
(define_insn "movtdo_insert512_lower"
[(set (match_operand:TDO 0 "dense_math_operand" "=wD")
(unspec:TDO [(match_operand:TDO 1 "dense_math_operand" "0")
(match_operand:XO 2 "vsx_register_operand" "wa")]
UNSPEC_DM_INSERT512_LOWER))]
"TARGET_DENSE_MATH"
"dmxxinstdmr512 %0,%2,%Y2,1"
[(set_attr "type" "mma")])
;; Move from dense math registers to VSX registers via two extract 512 bit
;; instructions.
(define_insn "movtdo_extract512"
[(set (match_operand:XO 0 "vsx_register_operand" "=wa")
(unspec:XO [(match_operand:TDO 1 "dense_math_operand" "wD")
(match_operand 2 "const_0_to_1_operand" "n")]
UNSPEC_DM_EXTRACT512))]
"TARGET_DENSE_MATH"
"dmxxextfdmr512 %0,%Y0,%1,%2"
[(set_attr "type" "mma")])
;; Reload dense math registers from memory.
(define_insn_and_split "reload_tdo_from_memory"
[(set (match_operand:TDO 0 "dense_math_operand" "=wD")
(unspec:TDO [(match_operand:TDO 1 "memory_operand" "m")]
UNSPEC_DM_RELOAD_FROM_MEMORY))
(clobber (match_operand:XO 2 "vsx_register_operand" "=wa"))]
"TARGET_DENSE_MATH"
"#"
"&& reload_completed"
[(const_int 0)]
{
rtx dest = operands[0];
rtx src = operands[1];
rtx tmp = operands[2];
rtx mem_upper = adjust_address (src, XOmode, BYTES_BIG_ENDIAN ? 0 : 64);
rtx mem_lower = adjust_address (src, XOmode, BYTES_BIG_ENDIAN ? 64 : 0);
emit_move_insn (tmp, mem_upper);
emit_insn (gen_movtdo_insert512_upper (dest, tmp));
emit_move_insn (tmp, mem_lower);
emit_insn (gen_movtdo_insert512_lower (dest, dest, tmp));
DONE;
}
[(set_attr "length" "16")
(set_attr "max_prefixed_insns" "2")
(set_attr "type" "vecload")])
;; Reload dense math registers to memory
(define_insn_and_split "reload_tdo_to_memory"
[(set (match_operand:TDO 0 "memory_operand" "=m")
(unspec:TDO [(match_operand:TDO 1 "dense_math_operand" "wD")]
UNSPEC_DM_RELOAD_TO_MEMORY))
(clobber (match_operand:XO 2 "vsx_register_operand" "=wa"))]
"TARGET_DENSE_MATH"
"#"
"&& reload_completed"
[(const_int 0)]
{
rtx dest = operands[0];
rtx src = operands[1];
rtx tmp = operands[2];
rtx mem_upper = adjust_address (dest, XOmode, BYTES_BIG_ENDIAN ? 0 : 64);
rtx mem_lower = adjust_address (dest, XOmode, BYTES_BIG_ENDIAN ? 64 : 0);
emit_insn (gen_movtdo_extract512 (tmp, src, const0_rtx));
emit_move_insn (mem_upper, tmp);
emit_insn (gen_movtdo_extract512 (tmp, src, const1_rtx));
emit_move_insn (mem_lower, tmp);
DONE;
}
[(set_attr "length" "16")
(set_attr "max_prefixed_insns" "2")])

View File

@@ -186,44 +186,6 @@
return VLOGICAL_REGNO_P (REGNO (op));
})
;; Return 1 if op is a dense math register
(define_predicate "dense_math_operand"
(match_operand 0 "register_operand")
{
if (!REG_P (op))
return 0;
if (!HARD_REGISTER_P (op))
return 1;
return DM_REGNO_P (REGNO (op));
})
;; Return 1 if op is an accumulator.
;;
;; On power10 and power11 systems, the accumulators overlap with the
;; FPRs and the register must be divisible by 4.
;;
;; On systems with dense math registers, the accumulators are separate
;; registers and do not overlap with the FPR registers.
(define_predicate "accumulator_operand"
(match_operand 0 "register_operand")
{
if (SUBREG_P (op))
op = SUBREG_REG (op);
if (!REG_P (op))
return 0;
if (!HARD_REGISTER_P (op))
return 1;
int r = REGNO (op);
return (TARGET_DENSE_MATH
? DM_REGNO_P (r)
: FP_REGNO_P (r) && (r & 3) == 0);
})
;; Return 1 if op is the carry register.
(define_predicate "ca_operand"
(match_operand 0 "register_operand")

View File

@@ -495,8 +495,6 @@ const char *rs6000_type_string (tree type_node)
return "__vector_pair";
else if (type_node == vector_quad_type_node)
return "__vector_quad";
else if (type_node == dm1024_type_node)
return "__dm1024";
return "unknown";
}
@@ -783,21 +781,6 @@ rs6000_init_builtins (void)
t = build_qualified_type (vector_quad_type_node, TYPE_QUAL_CONST);
ptr_vector_quad_type_node = build_pointer_type (t);
/* For TDOmode (1,024 bit dense math accumulators), don't use an alignment of
1,024, use 512. TDOmode loads and stores are always broken up into 2
vector pair loads or stores. In addition, we don't have support for
aligning the stack to 1,024 bits. */
dm1024_type_node = make_node (OPAQUE_TYPE);
SET_TYPE_MODE (dm1024_type_node, TDOmode);
TYPE_SIZE (dm1024_type_node) = bitsize_int (GET_MODE_BITSIZE (TDOmode));
TYPE_PRECISION (dm1024_type_node) = GET_MODE_BITSIZE (TDOmode);
TYPE_SIZE_UNIT (dm1024_type_node) = size_int (GET_MODE_SIZE (TDOmode));
SET_TYPE_ALIGN (dm1024_type_node, 512);
TYPE_USER_ALIGN (dm1024_type_node) = 0;
lang_hooks.types.register_builtin_type (dm1024_type_node, "__dm1024");
t = build_qualified_type (dm1024_type_node, TYPE_QUAL_CONST);
ptr_dm1024_type_node = build_pointer_type (t);
tdecl = add_builtin_type ("__bool char", bool_char_type_node);
TYPE_NAME (bool_char_type_node) = tdecl;
@@ -1142,9 +1125,8 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
}
/* If we're disassembling an accumulator into a different type, we need
to emit a xxmfacc instruction now, since we cannot do it later. If we
have dense math registers, we don't need to do this. */
if (fncode == RS6000_BIF_DISASSEMBLE_ACC && !TARGET_DENSE_MATH)
to emit a xxmfacc instruction now, since we cannot do it later. */
if (fncode == RS6000_BIF_DISASSEMBLE_ACC)
{
new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL];
new_call = gimple_build_call (new_decl, 1, src);

View File

@@ -590,10 +590,6 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags)
/* Tell the user if we support the MMA instructions. */
if ((flags & OPTION_MASK_MMA) != 0)
rs6000_define_or_undefine_macro (define_p, "__MMA__");
/* Tell the user if we support the dense math registers for use with MMA and
cryptography. */
if ((flags & OPTION_MASK_DENSE_MATH) != 0)
rs6000_define_or_undefine_macro (define_p, "__DENSE_MATH__");
/* Whether pc-relative code is being generated. */
if ((flags & OPTION_MASK_PCREL) != 0)
rs6000_define_or_undefine_macro (define_p, "__PCREL__");

View File

@@ -437,15 +437,14 @@ rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
if (cfun
&& !cfun->machine->mma_return_type_error
&& TREE_TYPE (cfun->decl) == fntype
&& OPAQUE_MODE_P (TYPE_MODE (type)))
&& (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
{
/* Record we have now handled function CFUN, so the next time we
are called, we do not re-report the same error. */
cfun->machine->mma_return_type_error = true;
if (TYPE_CANONICAL (type) != NULL_TREE)
type = TYPE_CANONICAL (type);
error ("invalid use of %s type %qs as a function return value",
(TYPE_MODE (type) == TDOmode) ? "dense math" : "MMA",
error ("invalid use of MMA type %qs as a function return value",
IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
}
@@ -1633,12 +1632,11 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
int n_elts;
/* We do not allow MMA types being used as function arguments. */
if (OPAQUE_MODE_P (mode))
if (mode == OOmode || mode == XOmode)
{
if (TYPE_CANONICAL (type) != NULL_TREE)
type = TYPE_CANONICAL (type);
error ("invalid use of %s operand of type %qs as a function parameter",
(mode == TDOmode) ? "dense math" : "MMA",
error ("invalid use of MMA operand of type %qs as a function parameter",
IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
return NULL_RTX;
}

View File

@@ -91,7 +91,6 @@
will be fixed in potential future machines. */
#define FUTURE_MASKS_SERVER (POWER11_MASKS_SERVER \
| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR \
| OPTION_MASK_DENSE_MATH \
| OPTION_MASK_FUTURE)
/* Flags that need to be turned off if -mno-vsx. */
@@ -125,7 +124,6 @@
| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR \
| OPTION_MASK_CMPB \
| OPTION_MASK_CRYPTO \
| OPTION_MASK_DENSE_MATH \
| OPTION_MASK_DFP \
| OPTION_MASK_DLMZB \
| OPTION_MASK_EFFICIENT_UNALIGNED_VSX \

View File

@@ -79,7 +79,3 @@ PARTIAL_INT_MODE (TI, 128, PTI);
/* Modes used by __vector_pair and __vector_quad. */
OPAQUE_MODE (OO, 32);
OPAQUE_MODE (XO, 64);
/* Mode used by __dmf. */
OPAQUE_MODE (TDO, 128);

View File

@@ -292,8 +292,7 @@ enum rs6000_reg_type {
ALTIVEC_REG_TYPE,
FPR_REG_TYPE,
SPR_REG_TYPE,
CR_REG_TYPE,
DM_REG_TYPE
CR_REG_TYPE
};
/* Map register class to register type. */
@@ -307,24 +306,22 @@ static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
/* Register classes we care about in secondary reload or go if legitimate
address. We only need to worry about GPR, FPR, Altivec, and dense math
registers here, along an ANY field that is the OR of the 4 register
classes. */
address. We only need to worry about GPR, FPR, and Altivec registers here,
along an ANY field that is the OR of the 3 register classes. */
enum rs6000_reload_reg_type {
RELOAD_REG_GPR, /* General purpose registers. */
RELOAD_REG_FPR, /* Traditional floating point regs. */
RELOAD_REG_VMX, /* Altivec (VMX) registers. */
RELOAD_REG_DMR, /* Dense math registers. */
RELOAD_REG_ANY, /* OR of GPR/FPR/VMX/DMR masks. */
RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
N_RELOAD_REG
};
/* For setting up register classes, loop through the 4 register classes mapping
/* For setting up register classes, loop through the 3 register classes mapping
into real registers, and skip the ANY class, which is just an OR of the
bits. */
#define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
#define LAST_RELOAD_REG_CLASS RELOAD_REG_DMR
#define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
/* Map reload register type to a register in the register class. */
struct reload_reg_map_type {
@@ -336,7 +333,6 @@ static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
{ "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
{ "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
{ "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
{ "Dmr", FIRST_DM_REGNO }, /* RELOAD_REG_DMR. */
{ "Any", -1 }, /* RELOAD_REG_ANY. */
};
@@ -1230,8 +1226,6 @@ char rs6000_reg_names[][8] =
"0", "1", "2", "3", "4", "5", "6", "7",
/* vrsave vscr sfp */
"vrsave", "vscr", "sfp",
/* dense math registers. */
"0", "1", "2", "3", "4", "5", "6", "7",
};
#ifdef TARGET_REGNAMES
@@ -1258,8 +1252,6 @@ static const char alt_reg_names[][8] =
"%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
/* vrsave vscr sfp */
"vrsave", "vscr", "sfp",
/* dense math registers. */
"%dmr0", "%dmr1", "%dmr2", "%dmr3", "%dmr4", "%dmr5", "%dmr6", "%dmr7",
};
#endif
@@ -1843,17 +1835,13 @@ rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
128-bit floating point that can go in vector registers, which has VSX
memory addressing. */
if (FP_REGNO_P (regno))
reg_size = (VECTOR_MEM_VSX_P (mode)
|| VECTOR_ALIGNMENT_P (mode)
reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
? UNITS_PER_VSX_WORD
: UNITS_PER_FP_WORD);
else if (ALTIVEC_REGNO_P (regno))
reg_size = UNITS_PER_ALTIVEC_WORD;
else if (DM_REGNO_P (regno))
reg_size = UNITS_PER_DM_WORD;
else
reg_size = UNITS_PER_WORD;
@@ -1875,47 +1863,9 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
if (mode == OOmode)
return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
/* On ISA 3.1 (power10), MMA accumulator modes need FPR registers divisible
by 4.
If dense math registers are enabled, we can allow all VSX registers plus
the dense math registers. VSX registers are used to load and store the
registers as the accumulator registers do not have load and store
instructions. Because we just use the VSX registers for load/store
operations, we just need to make sure load vector pair and store vector
pair instructions can be used. */
/* MMA accumulator modes need FPR registers divisible by 4. */
if (mode == XOmode)
{
if (!TARGET_DENSE_MATH)
return (FP_REGNO_P (regno) && (regno & 3) == 0);
else if (DM_REGNO_P (regno))
return 1;
else
return (VSX_REGNO_P (regno)
&& VSX_REGNO_P (last_regno)
&& (regno & 1) == 0);
}
if (mode == TDOmode)
{
if (!TARGET_DENSE_MATH)
return 0;
if (DM_REGNO_P (regno))
return 1;
else
return (VSX_REGNO_P (regno)
&& VSX_REGNO_P (last_regno)
&& (regno & 1) == 0);
}
/* No other types other than XOmode or TDOmode can go in dense math
registers. */
if (DM_REGNO_P (regno))
return 0;
return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
/* PTImode can only go in GPRs. Quad word memory operations require even/odd
register combinations, and use PTImode where we need to deal with quad
@@ -2021,11 +1971,9 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
GPR registers, and TImode can go in any GPR as well as VSX registers (PR
57744).
Similarly, don't allow OOmode (vector pair), XOmode (vector quad), or
TDOmode (dense math register) to pair with anything else. Vector pairs are
restricted to even/odd VSX registers. Without dense math, vector quads are
limited to FPR registers divisible by 4. With dense math, vector quads are
limited to even VSX registers or dense math registers.
Similarly, don't allow OOmode (vector pair, restricted to even VSX
registers) or XOmode (vector quad, restricted to FPR registers divisible
by 4) to tie with other modes.
Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
128-bit floating point on VSX systems ties with other vectors. */
@@ -2034,8 +1982,7 @@ static bool
rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
{
if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
|| mode1 == TDOmode || mode2 == PTImode || mode2 == OOmode
|| mode2 == XOmode || mode2 == TDOmode)
|| mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
return mode1 == mode2;
if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2326,7 +2273,6 @@ rs6000_debug_reg_global (void)
V4DFmode,
OOmode,
XOmode,
TDOmode,
CCmode,
CCUNSmode,
CCEQmode,
@@ -2362,7 +2308,6 @@ rs6000_debug_reg_global (void)
rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
LAST_ALTIVEC_REGNO,
"vs");
rs6000_debug_reg_print (FIRST_DM_REGNO, LAST_DM_REGNO, "dense_math");
rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
@@ -2383,7 +2328,6 @@ rs6000_debug_reg_global (void)
"wr reg_class = %s\n"
"wx reg_class = %s\n"
"wA reg_class = %s\n"
"wD reg_class = %s\n"
"\n",
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
@@ -2391,8 +2335,7 @@ rs6000_debug_reg_global (void)
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wD]]);
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
nl = "\n";
for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -2689,21 +2632,6 @@ rs6000_setup_reg_addr_masks (void)
addr_mask = 0;
reg = reload_reg_map[rc].reg;
/* Special case dense math registers. */
if (rc == RELOAD_REG_DMR)
{
if (TARGET_DENSE_MATH && (m2 == XOmode || m2 == TDOmode))
{
addr_mask = RELOAD_REG_VALID;
reg_addr[m].addr_mask[rc] = addr_mask;
any_addr_mask |= addr_mask;
}
else
reg_addr[m].addr_mask[rc] = 0;
continue;
}
/* Can mode values go in the GPR/FPR/Altivec registers? */
if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
{
@@ -2799,10 +2727,10 @@ rs6000_setup_reg_addr_masks (void)
/* Vector pairs can do both indexed and offset loads if the
instructions are enabled, otherwise they can only do offset loads
since it will be broken into two vector moves. Vector quads and
dense math types can only do offset loads. */
since it will be broken into two vector moves. Vector quads can
only do offset loads. */
else if ((addr_mask != 0) && TARGET_MMA
&& (m2 == OOmode || m2 == XOmode || m2 == TDOmode))
&& (m2 == OOmode || m2 == XOmode))
{
addr_mask |= RELOAD_REG_OFFSET;
if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
@@ -2854,9 +2782,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
rs6000_regno_regclass[r] = CR_REGS;
for (r = FIRST_DM_REGNO; r <= LAST_DM_REGNO; ++r)
rs6000_regno_regclass[r] = DM_REGS;
rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
rs6000_regno_regclass[CA_REGNO] = NO_REGS;
@@ -2881,7 +2806,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
reg_class_to_reg_type[(int)DM_REGS] = DM_REG_TYPE;
if (TARGET_VSX)
{
@@ -3030,14 +2954,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
rs6000_vector_align[XOmode] = 512;
}
/* Add support for 1,024 bit dense math registers. */
if (TARGET_DENSE_MATH)
{
rs6000_vector_unit[TDOmode] = VECTOR_NONE;
rs6000_vector_mem[TDOmode] = VECTOR_VSX;
rs6000_vector_align[TDOmode] = 512;
}
/* Register class constraints for the constraints that depend on compile
switches. When the VSX code was added, different constraints were added
based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
@@ -3076,12 +2992,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
if (TARGET_DIRECT_MOVE_128)
rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
/* Support for the accumulator registers, either FPR registers (aka original
mma) or dense math registers. */
if (TARGET_MMA)
rs6000_constraints[RS6000_CONSTRAINT_wD]
= TARGET_DENSE_MATH ? DM_REGS : FLOAT_REGS;
/* Set up the reload helper and direct move functions. */
if (TARGET_VSX || TARGET_ALTIVEC)
{
@@ -3250,12 +3160,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
}
}
if (TARGET_DENSE_MATH)
{
reg_addr[TDOmode].reload_load = CODE_FOR_reload_tdo_from_memory;
reg_addr[TDOmode].reload_store = CODE_FOR_reload_tdo_to_memory;
}
/* Precalculate HARD_REGNO_NREGS. */
for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -4501,15 +4405,6 @@ rs6000_option_override_internal (bool global_init_p)
if (!TARGET_PCREL && TARGET_PCREL_OPT)
rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
/* Turn off dense math register support on non-future systems. */
if (TARGET_DENSE_MATH && !TARGET_FUTURE)
{
if ((rs6000_isa_flags_explicit & OPTION_MASK_DENSE_MATH) != 0)
error ("%qs requires %qs", "-mdense-math", "-mcpu=future");
rs6000_isa_flags &= ~OPTION_MASK_DENSE_MATH;
}
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
@@ -8769,15 +8664,12 @@ reg_offset_addressing_ok_p (machine_mode mode)
return mode_supports_dq_form (mode);
break;
/* The vector pair/quad types and the dense math types support offset
addressing if the underlying vectors support offset addressing. */
/* The vector pair/quad types support offset addressing if the
underlying vectors support offset addressing. */
case E_OOmode:
case E_XOmode:
return TARGET_MMA;
case E_TDOmode:
return TARGET_DENSE_MATH;
case E_SDmode:
/* If we can do direct load/stores of SDmode, restrict it to reg+reg
addressing for the LFIWZX and STFIWX instructions. */
@@ -11331,12 +11223,6 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
(mode == OOmode) ? "__vector_pair" : "__vector_quad");
break;
case E_TDOmode:
if (CONST_INT_P (operands[1]))
error ("%qs is an opaque type, and you cannot set it to constants",
"__dm1024");
break;
case E_SImode:
case E_DImode:
/* Use default pattern for address of ELF small data */
@@ -12465,11 +12351,6 @@ rs6000_secondary_reload_memory (rtx addr,
addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
& ~RELOAD_REG_AND_M16);
/* Dense math registers use VSX registers for memory operations, and need to
generate some extra instructions. */
else if (rclass == DM_REGS)
return 2;
/* If the register allocator hasn't made up its mind yet on the register
class to use, settle on defaults to use. */
else if (rclass == NO_REGS)
@@ -12798,13 +12679,6 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
|| (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
return true;
/* We can transfer between VSX registers and dense math registers without
needing extra registers. */
if (TARGET_DENSE_MATH && (mode == XOmode || mode == TDOmode)
&& ((to_type == DM_REG_TYPE && from_type == VSX_REG_TYPE)
|| (to_type == VSX_REG_TYPE && from_type == DM_REG_TYPE)))
return true;
return false;
}
@@ -13499,10 +13373,6 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
machine_mode mode = GET_MODE (x);
bool is_constant = CONSTANT_P (x);
/* Dense math registers can't be loaded or stored. */
if (rclass == DM_REGS)
return NO_REGS;
/* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
reload class for it. */
if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
@@ -13599,10 +13469,7 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
return VSX_REGS;
if (mode == XOmode)
return TARGET_DENSE_MATH ? VSX_REGS : FLOAT_REGS;
if (mode == TDOmode)
return VSX_REGS;
return FLOAT_REGS;
if (GET_MODE_CLASS (mode) == MODE_INT)
return GENERAL_REGS;
@@ -13727,11 +13594,6 @@ rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
else
regno = -1;
/* Dense math registers don't have loads or stores. We have to go through
the VSX registers to load XOmode (vector quad). */
if (TARGET_DENSE_MATH && rclass == DM_REGS)
return VSX_REGS;
/* If we have VSX register moves, prefer moving scalar values between
Altivec registers and GPR by going via an FPR (and then via memory)
instead of reloading the secondary memory address for Altivec moves. */
@@ -14263,14 +14125,8 @@ print_operand (FILE *file, rtx x, int code)
output_operand. */
case 'A':
/* Write the MMA accumulator number associated with VSX register X. On
dense math systems, only allow dense math accumulators, not
accumulators overlapping with the FPR registers. */
if (!REG_P (x))
output_operand_lossage ("invalid %%A value");
else if (TARGET_DENSE_MATH && DM_REGNO_P (REGNO (x)))
fprintf (file, "%d", REGNO (x) - FIRST_DM_REGNO);
else if (!FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
/* Write the MMA accumulator number associated with VSX register X. */
if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
output_operand_lossage ("invalid %%A value");
else
fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
@@ -20778,8 +20634,6 @@ rs6000_mangle_type (const_tree type)
return "u13__vector_pair";
if (type == vector_quad_type_node)
return "u13__vector_quad";
if (type == dm1024_type_node)
return "u8__dm1024";
/* For all other types, use the default mangling. */
return NULL;
@@ -22892,35 +22746,6 @@ rs6000_debug_address_cost (rtx x, machine_mode mode,
}
/* Subroutine to determine the move cost of dense math registers. If we are
moving to/from VSX_REGISTER registers, the cost is either 1 move (for
512-bit accumulators) or 2 moves (for 1,024 dense math registers). If we are
moving to anything else like GPR registers, make the cost very high. */
static int
rs6000_dense_math_register_move_cost (machine_mode mode, reg_class_t rclass)
{
const int reg_move_base = 2;
HARD_REG_SET vsx_set = (reg_class_contents[rclass]
& reg_class_contents[VSX_REGS]);
if (TARGET_DENSE_MATH && !hard_reg_set_empty_p (vsx_set))
{
/* __vector_quad (i.e. XOmode) is tranfered in 1 instruction. */
if (mode == XOmode)
return reg_move_base;
/* __dm1024 (i.e. TDOmode) is transferred in 2 instructions. */
else if (mode == TDOmode)
return reg_move_base * 2;
else
return reg_move_base * 2 * hard_regno_nregs (FIRST_DM_REGNO, mode);
}
return 1000 * 2 * hard_regno_nregs (FIRST_DM_REGNO, mode);
}
/* A C expression returning the cost of moving data from a register of class
CLASS1 to one of CLASS2. */
@@ -22934,28 +22759,17 @@ rs6000_register_move_cost (machine_mode mode,
if (TARGET_DEBUG_COST)
dbg_cost_ctrl++;
HARD_REG_SET to_vsx, from_vsx;
to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
/* Special case dense math registers, that can only move to/from VSX registers. */
if (from == DM_REGS && to == DM_REGS)
ret = 2 * hard_regno_nregs (FIRST_DM_REGNO, mode);
else if (from == DM_REGS)
ret = rs6000_dense_math_register_move_cost (mode, to);
else if (to == DM_REGS)
ret = rs6000_dense_math_register_move_cost (mode, from);
/* If we have VSX, we can easily move between FPR or Altivec registers,
otherwise we can only easily move within classes.
Do this first so we give best-case answers for union classes
containing both gprs and vsx regs. */
else if (!hard_reg_set_empty_p (to_vsx)
&& !hard_reg_set_empty_p (from_vsx)
&& (TARGET_VSX
|| hard_reg_set_intersect_p (to_vsx, from_vsx)))
HARD_REG_SET to_vsx, from_vsx;
to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
if (!hard_reg_set_empty_p (to_vsx)
&& !hard_reg_set_empty_p (from_vsx)
&& (TARGET_VSX
|| hard_reg_set_intersect_p (to_vsx, from_vsx)))
{
int reg = FIRST_FPR_REGNO;
if (TARGET_VSX
@@ -23051,9 +22865,6 @@ rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
ret = 4 * hard_regno_nregs (32, mode);
else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
else if (reg_classes_intersect_p (rclass, DM_REGS))
ret = (rs6000_dense_math_register_move_cost (mode, VSX_REGS)
+ rs6000_memory_move_cost (mode, VSX_REGS, false));
else
ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
@@ -24262,8 +24073,6 @@ rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
if (TARGET_HARD_FLOAT)
pressure_classes[n++] = FLOAT_REGS;
}
if (TARGET_DENSE_MATH)
pressure_classes[n++] = DM_REGS;
pressure_classes[n++] = CR_REGS;
pressure_classes[n++] = SPECIAL_REGS;
@@ -24428,10 +24237,6 @@ rs6000_debugger_regno (unsigned int regno, unsigned int format)
return 67;
if (regno == 64)
return 64;
/* XXX: This is a guess. The GCC register number for FIRST_DM_REGNO is 111,
but the frame pointer regnum uses that. */
if (DM_REGNO_P (regno))
return regno - FIRST_DM_REGNO + 112;
gcc_unreachable ();
}
@@ -24653,7 +24458,6 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
false, true },
{ "cmpb", OPTION_MASK_CMPB, false, true },
{ "crypto", OPTION_MASK_CRYPTO, false, true },
{ "dense-math", OPTION_MASK_DENSE_MATH, false, true },
{ "direct-move", 0, false, true },
{ "dlmzb", OPTION_MASK_DLMZB, false, true },
{ "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
@@ -27600,10 +27404,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
mode = GET_MODE (dst);
nregs = hard_regno_nregs (reg, mode);
/* If we have a vector quad register for MMA or dense math register
and this is a load or store, see if we can use vector paired
load/stores. */
if ((mode == XOmode || mode == TDOmode) && TARGET_MMA
/* If we have a vector quad register for MMA, and this is a load or store,
see if we can use vector paired load/stores. */
if (mode == XOmode && TARGET_MMA
&& (MEM_P (dst) || MEM_P (src)))
{
reg_mode = OOmode;
@@ -27611,7 +27414,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
}
/* If we have a vector pair/quad mode, split it into two/four separate
vectors. */
else if (mode == OOmode || mode == XOmode || mode == TDOmode)
else if (mode == OOmode || mode == XOmode)
reg_mode = V1TImode;
else if (FP_REGNO_P (reg))
reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
@@ -27657,13 +27460,13 @@ rs6000_split_multireg_move (rtx dst, rtx src)
return;
}
/* The __vector_pair, __vector_quad, and __dm1024 modes are multi-register
modes, so if we have to load or store the registers, we have to be careful
to properly swap them if we're in little endian mode below. This means
the last register gets the first memory location. We also need to be
careful of using the right register numbers if we are splitting XO to
OO. */
if (mode == OOmode || mode == XOmode || mode == TDOmode)
/* The __vector_pair and __vector_quad modes are multi-register
modes, so if we have to load or store the registers, we have to be
careful to properly swap them if we're in little endian mode
below. This means the last register gets the first memory
location. We also need to be careful of using the right register
numbers if we are splitting XO to OO. */
if (mode == OOmode || mode == XOmode)
{
nregs = hard_regno_nregs (reg, mode);
int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
@@ -27672,9 +27475,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
unsigned offset = 0;
unsigned size = GET_MODE_SIZE (reg_mode);
/* If we are reading an accumulator register, we have to deprime it
before we can access it unless we have dense math registers. */
if (TARGET_MMA && !TARGET_DENSE_MATH
/* If we are reading an accumulator register, we have to
deprime it before we can access it. */
if (TARGET_MMA
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
@@ -27706,9 +27509,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
emit_insn (gen_rtx_SET (dst2, src2));
}
/* If we are writing an accumulator register, we have to prime it
after we've written it unless we have dense math registers. */
if (TARGET_MMA && !TARGET_DENSE_MATH
/* If we are writing an accumulator register, we have to
prime it after we've written it. */
if (TARGET_MMA
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
@@ -27722,9 +27525,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|| XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
gcc_assert (REG_P (dst));
if (GET_MODE (src) == XOmode)
gcc_assert ((TARGET_DENSE_MATH
? VSX_REGNO_P (REGNO (dst))
: FP_REGNO_P (REGNO (dst))));
gcc_assert (FP_REGNO_P (REGNO (dst)));
if (GET_MODE (src) == OOmode)
gcc_assert (VSX_REGNO_P (REGNO (dst)));
@@ -27777,9 +27578,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
emit_insn (gen_rtx_SET (dst_i, op));
}
/* We are writing an accumulator register, so we have to prime it
after we've written it unless we have dense math registers. */
if (GET_MODE (src) == XOmode && !TARGET_DENSE_MATH)
/* We are writing an accumulator register, so we have to
prime it after we've written it. */
if (GET_MODE (src) == XOmode)
emit_insn (gen_mma_xxmtacc (dst, dst));
return;
@@ -27790,9 +27591,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
{
/* If we are reading an accumulator register, we have to deprime it
before we can access it unless we have dense math registers. */
if (TARGET_MMA && !TARGET_DENSE_MATH
/* If we are reading an accumulator register, we have to
deprime it before we can access it. */
if (TARGET_MMA
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
@@ -27800,7 +27601,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
overlap. */
int i;
/* XO/OO are opaque so cannot use subregs. */
if (mode == OOmode || mode == XOmode || mode == TDOmode)
if (mode == OOmode || mode == XOmode )
{
for (i = nregs - 1; i >= 0; i--)
{
@@ -27818,9 +27619,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
i * reg_mode_size)));
}
/* If we are writing an accumulator register, we have to prime it after
we've written it unless we have dense math registers. */
if (TARGET_MMA && !TARGET_DENSE_MATH
/* If we are writing an accumulator register, we have to
prime it after we've written it. */
if (TARGET_MMA
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
}
@@ -27955,9 +27756,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
}
/* If we are reading an accumulator register, we have to deprime it
before we can access it unless we have dense math registers. */
if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (src)
/* If we are reading an accumulator register, we have to
deprime it before we can access it. */
if (TARGET_MMA && REG_P (src)
&& GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
emit_insn (gen_mma_xxmfacc (src, src));
@@ -27974,7 +27775,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
continue;
/* XO/OO are opaque so cannot use subregs. */
if (mode == OOmode || mode == XOmode || mode == TDOmode)
if (mode == OOmode || mode == XOmode )
{
rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
@@ -27987,9 +27788,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
j * reg_mode_size)));
}
/* If we are writing an accumulator register, we have to prime it after
we've written it unless we have dense math registers. */
if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (dst)
/* If we are writing an accumulator register, we have to
prime it after we've written it. */
if (TARGET_MMA && REG_P (dst)
&& GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
emit_insn (gen_mma_xxmtacc (dst, dst));
@@ -29002,8 +28803,7 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
if (frommode != tomode)
{
/* Do not allow conversions to/from XOmode, OOmode, and TDOmode
types. */
/* Do not allow conversions to/from XOmode and OOmode types. */
if (frommode == XOmode)
return N_("invalid conversion from type %<__vector_quad%>");
if (tomode == XOmode)
@@ -29012,10 +28812,6 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
return N_("invalid conversion from type %<__vector_pair%>");
if (tomode == OOmode)
return N_("invalid conversion to type %<__vector_pair%>");
if (frommode == TDOmode)
return N_("invalid conversion from type %<__dm1024%>");
if (tomode == TDOmode)
return N_("invalid conversion to type %<__dm1024%>");
}
/* Conversion allowed. */

View File

@@ -653,7 +653,6 @@ extern unsigned char rs6000_recip_bits[];
#define UNITS_PER_FP_WORD 8
#define UNITS_PER_ALTIVEC_WORD 16
#define UNITS_PER_VSX_WORD 16
#define UNITS_PER_DM_WORD 128
/* Type used for ptrdiff_t, as a string used in a declaration. */
#define PTRDIFF_TYPE "int"
@@ -767,7 +766,7 @@ enum data_align { align_abi, align_opt, align_both };
Another pseudo (not included in DWARF_FRAME_REGISTERS) is soft frame
pointer, which is eventually eliminated in favor of SP or FP. */
#define FIRST_PSEUDO_REGISTER 119
#define FIRST_PSEUDO_REGISTER 111
/* Use standard DWARF numbering for DWARF debugging information. */
#define DEBUGGER_REGNO(REGNO) rs6000_debugger_regno ((REGNO), 0)
@@ -804,9 +803,7 @@ enum data_align { align_abi, align_opt, align_both };
/* cr0..cr7 */ \
0, 0, 0, 0, 0, 0, 0, 0, \
/* vrsave vscr sfp */ \
1, 1, 1, \
/* Dense math registers. */ \
0, 0, 0, 0, 0, 0, 0, 0 \
1, 1, 1 \
}
/* Like `CALL_USED_REGISTERS' except this macro doesn't require that
@@ -830,9 +827,7 @@ enum data_align { align_abi, align_opt, align_both };
/* cr0..cr7 */ \
1, 1, 0, 0, 0, 1, 1, 1, \
/* vrsave vscr sfp */ \
0, 0, 0, \
/* Dense math registers. */ \
0, 0, 0, 0, 0, 0, 0, 0 \
0, 0, 0 \
}
#define TOTAL_ALTIVEC_REGS (LAST_ALTIVEC_REGNO - FIRST_ALTIVEC_REGNO + 1)
@@ -869,7 +864,6 @@ enum data_align { align_abi, align_opt, align_both };
v2 (not saved; incoming vector arg reg; return value)
v19 - v14 (not saved or used for anything)
v31 - v20 (saved; order given to save least number)
dmr0 - dmr7 (not saved)
vrsave, vscr (fixed)
sfp (fixed)
*/
@@ -912,9 +906,6 @@ enum data_align { align_abi, align_opt, align_both };
66, \
83, 82, 81, 80, 79, 78, \
95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, \
/* Dense math registers. */ \
111, 112, 113, 114, 115, 116, 117, 118, \
/* Vrsave, vscr, sfp. */ \
108, 109, \
110 \
}
@@ -941,9 +932,6 @@ enum data_align { align_abi, align_opt, align_both };
/* True if register is a VSX register. */
#define VSX_REGNO_P(N) (FP_REGNO_P (N) || ALTIVEC_REGNO_P (N))
/* True if register is a Dense math register. */
#define DM_REGNO_P(N) ((N) >= FIRST_DM_REGNO && (N) <= LAST_DM_REGNO)
/* Alternate name for any vector register supporting floating point, no matter
which instruction set(s) are available. */
#define VFLOAT_REGNO_P(N) \
@@ -983,7 +971,7 @@ enum data_align { align_abi, align_opt, align_both };
/* Modes that are not vectors, but require vector alignment. Treat these like
vectors in terms of loads and stores. */
#define VECTOR_ALIGNMENT_P(MODE) \
(FLOAT128_VECTOR_P (MODE) || OPAQUE_MODE_P (MODE))
(FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode)
#define ALTIVEC_VECTOR_MODE(MODE) \
((MODE) == V16QImode \
@@ -1081,7 +1069,6 @@ enum reg_class
FLOAT_REGS,
ALTIVEC_REGS,
VSX_REGS,
DM_REGS,
VRSAVE_REGS,
VSCR_REGS,
GEN_OR_FLOAT_REGS,
@@ -1111,7 +1098,6 @@ enum reg_class
"FLOAT_REGS", \
"ALTIVEC_REGS", \
"VSX_REGS", \
"DM_REGS", \
"VRSAVE_REGS", \
"VSCR_REGS", \
"GEN_OR_FLOAT_REGS", \
@@ -1146,8 +1132,6 @@ enum reg_class
{ 0x00000000, 0x00000000, 0xffffffff, 0x00000000 }, \
/* VSX_REGS. */ \
{ 0x00000000, 0xffffffff, 0xffffffff, 0x00000000 }, \
/* DM_REGS. */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x007f8000 }, \
/* VRSAVE_REGS. */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x00001000 }, \
/* VSCR_REGS. */ \
@@ -1175,7 +1159,7 @@ enum reg_class
/* CA_REGS. */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x00000004 }, \
/* ALL_REGS. */ \
{ 0xffffffff, 0xffffffff, 0xffffffff, 0x007fffff } \
{ 0xffffffff, 0xffffffff, 0xffffffff, 0x00007fff } \
}
/* The same information, inverted:
@@ -1199,7 +1183,6 @@ enum r6000_reg_class_enum {
RS6000_CONSTRAINT_wr, /* GPR register if 64-bit */
RS6000_CONSTRAINT_wx, /* FPR register for STFIWX */
RS6000_CONSTRAINT_wA, /* BASE_REGS if 64-bit. */
RS6000_CONSTRAINT_wD, /* Accumulator regs if MMA/Dense Math. */
RS6000_CONSTRAINT_MAX
};
@@ -2076,16 +2059,7 @@ extern char rs6000_reg_names[][8]; /* register names (0 vs. %r0). */
&rs6000_reg_names[108][0], /* vrsave */ \
&rs6000_reg_names[109][0], /* vscr */ \
\
&rs6000_reg_names[110][0], /* sfp */ \
\
&rs6000_reg_names[111][0], /* dmr0 */ \
&rs6000_reg_names[112][0], /* dmr1 */ \
&rs6000_reg_names[113][0], /* dmr2 */ \
&rs6000_reg_names[114][0], /* dmr3 */ \
&rs6000_reg_names[115][0], /* dmr4 */ \
&rs6000_reg_names[116][0], /* dmr5 */ \
&rs6000_reg_names[117][0], /* dmr6 */ \
&rs6000_reg_names[118][0], /* dmr7 */ \
&rs6000_reg_names[110][0] /* sfp */ \
}
/* Table of additional register names to use in user input. */
@@ -2139,8 +2113,6 @@ extern char rs6000_reg_names[][8]; /* register names (0 vs. %r0). */
{"vs52", 84}, {"vs53", 85}, {"vs54", 86}, {"vs55", 87}, \
{"vs56", 88}, {"vs57", 89}, {"vs58", 90}, {"vs59", 91}, \
{"vs60", 92}, {"vs61", 93}, {"vs62", 94}, {"vs63", 95}, \
{"dmr0", 111}, {"dmr1", 112}, {"dmr2", 113}, {"dmr3", 114}, \
{"dmr4", 115}, {"dmr5", 116}, {"dmr6", 117}, {"dmr7", 118}, \
}
/* This is how to output an element of a case-vector that is relative. */
@@ -2274,7 +2246,6 @@ enum rs6000_builtin_type_index
RS6000_BTI_const_str, /* pointer to const char * */
RS6000_BTI_vector_pair, /* unsigned 256-bit types (vector pair). */
RS6000_BTI_vector_quad, /* unsigned 512-bit types (vector quad). */
RS6000_BTI_dm1024, /* unsigned 1,024-bit types (dmf). */
RS6000_BTI_const_ptr_void, /* const pointer to void */
RS6000_BTI_ptr_V16QI,
RS6000_BTI_ptr_V1TI,
@@ -2313,7 +2284,6 @@ enum rs6000_builtin_type_index
RS6000_BTI_ptr_dfloat128,
RS6000_BTI_ptr_vector_pair,
RS6000_BTI_ptr_vector_quad,
RS6000_BTI_ptr_dm1024,
RS6000_BTI_ptr_long_long,
RS6000_BTI_ptr_long_long_unsigned,
RS6000_BTI_MAX
@@ -2371,7 +2341,6 @@ enum rs6000_builtin_type_index
#define const_str_type_node (rs6000_builtin_types[RS6000_BTI_const_str])
#define vector_pair_type_node (rs6000_builtin_types[RS6000_BTI_vector_pair])
#define vector_quad_type_node (rs6000_builtin_types[RS6000_BTI_vector_quad])
#define dm1024_type_node (rs6000_builtin_types[RS6000_BTI_dm1024])
#define pcvoid_type_node (rs6000_builtin_types[RS6000_BTI_const_ptr_void])
#define ptr_V16QI_type_node (rs6000_builtin_types[RS6000_BTI_ptr_V16QI])
#define ptr_V1TI_type_node (rs6000_builtin_types[RS6000_BTI_ptr_V1TI])
@@ -2410,7 +2379,6 @@ enum rs6000_builtin_type_index
#define ptr_dfloat128_type_node (rs6000_builtin_types[RS6000_BTI_ptr_dfloat128])
#define ptr_vector_pair_type_node (rs6000_builtin_types[RS6000_BTI_ptr_vector_pair])
#define ptr_vector_quad_type_node (rs6000_builtin_types[RS6000_BTI_ptr_vector_quad])
#define ptr_dm1024_type_node (rs6000_builtin_types[RS6000_BTI_ptr_dm1024])
#define ptr_long_long_integer_type_node (rs6000_builtin_types[RS6000_BTI_ptr_long_long])
#define ptr_long_long_unsigned_type_node (rs6000_builtin_types[RS6000_BTI_ptr_long_long_unsigned])

View File

@@ -51,8 +51,6 @@
(VRSAVE_REGNO 108)
(VSCR_REGNO 109)
(FRAME_POINTER_REGNUM 110)
(FIRST_DM_REGNO 111)
(LAST_DM_REGNO 118)
])
;;

View File

@@ -639,10 +639,6 @@ mieee128-constant
Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
Generate (do not generate) code that uses the LXVKQ instruction.
mdense-math
Target Mask(DENSE_MATH) Var(rs6000_isa_flags)
Generate (do not generate) instructions that use dense math registers.
; Documented parameters
-param=rs6000-vect-unroll-limit=

View File

@@ -32674,13 +32674,6 @@ This option is enabled by default.
Enable or disable warnings about deprecated @samp{vector long ...} Altivec
type usage. This option is enabled by default.
@opindex mdense-math
@opindex mno-dense-math
@item -mdense-math
@itemx -mno-dense-math
Generate (do not generate) code that uses the dense math registers.
This option is enabled by default.
@end table
@node RX Options

View File

@@ -3415,11 +3415,6 @@ Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise, @code{NO_REGS}.
@item wA
Like @code{b}, if @option{-mpowerpc64} is used; otherwise, @code{NO_REGS}.
@item wD
Accumulator register if @option{-mma} is used; otherwise,
@code{NO_REGS}. For @option{-mcpu=power10} the accumulator registers
overlap with VSX vector registers 0..31.
@item wB
Signed 5-bit constant integer that can be loaded into an Altivec register.

View File

@@ -1,63 +0,0 @@
/* { dg-do compile } */
/* { dg-require-effective-target powerpc_dense_math_ok } */
/* { dg-options "-mdejagnu-cpu=future -O2" } */
/* Test basic load/store for __dm1024 type. */
#ifndef CONSTRAINT
#if defined(USE_D)
#define CONSTRAINT "d"
#elif defined(USE_V)
#define CONSTRAINT "v"
#elif defined(USE_WA)
#define CONSTRAINT "wa"
#else
#define CONSTRAINT "wD"
#endif
#endif
const char constraint[] = CONSTRAINT;
void foo_mem_asm (__dm1024 *p, __dm1024 *q)
{
/* 2 LXVP instructions. */
__dm1024 vq = *p;
/* 2 DMXXINSTDMR512 instructions to transfer VSX to dense math register. */
__asm__ ("# foo (" CONSTRAINT ") %A0" : "+" CONSTRAINT (vq));
/* 2 DMXXEXTFDMR512 instructions to transfer dense math register to VSX. */
/* 2 STXVP instructions. */
*q = vq;
}
void foo_mem_asm2 (__dm1024 *p, __dm1024 *q)
{
/* 2 LXVP instructions. */
__dm1024 vq = *p;
__dm1024 vq2;
__dm1024 vq3;
/* 2 DMXXINSTDMR512 instructions to transfer VSX to dense math register. */
__asm__ ("# foo1 (" CONSTRAINT ") %A0" : "+" CONSTRAINT (vq));
/* 2 DMXXEXTFDMR512 instructions to transfer dense math register to VSX. */
vq2 = vq;
__asm__ ("# foo2 (wa) %0" : "+wa" (vq2));
/* 2 STXVP instructions. */
*q = vq2;
}
void foo_mem (__dm1024 *p, __dm1024 *q)
{
/* 2 LXVP, 2 STXVP instructions, no dense math transfer. */
*q = *p;
}
/* { dg-final { scan-assembler-times {\mdmxxextfdmr512\M} 4 } } */
/* { dg-final { scan-assembler-times {\mdmxxinstdmr512\M} 4 } } */
/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */
/* { dg-final { scan-assembler-times {\mstxvp\M} 12 } } */

View File

@@ -1,67 +0,0 @@
/* { dg-do compile } */
/* { dg-require-effective-target powerpc_dense_math_ok } */
/* { dg-options "-mdejagnu-cpu=future -O2" } */
/* Test basic dense math support for MMA. */
void
move_simple (__vector_quad *a, __vector_quad *b)
{
/* 2 lxvp, 2 stxvp. */
__vector_quad c = *a;
*b = c;
}
void
move_constraint_d (__vector_quad *a, __vector_quad *b)
{
/* 2 lxvp, 2 stxvp. */
__vector_quad c = *a;
__asm__ (" # %x0 (d constraint)" : "+d" (c));
*b = c;
}
void
move_constraint_wD (__vector_quad *a, __vector_quad *b)
{
/* 2 lxvp, dmxxinstdmr512, dmxxextfdmr512, 2 stxvp. */
__vector_quad c = *a;
__asm__ (" # %A0 (wD constraint)" : "+wD" (c));
*b = c;
}
void
clear_simple (__vector_quad *a)
{
/* dmsetdmrz, dmxxextfdmr512, 2 stxvp. */
__builtin_mma_xxsetaccz (a);
}
void
clear_constraint_d (__vector_quad *a)
{
__vector_quad z;
/* dmsetdmrz, dmxxextfdmr512, 2 stxvp. */
__builtin_mma_xxsetaccz (&z);
__asm__ (" # %x0 (d constraint)" : "+d" (z));
*a = z;
}
void
clear_constraint_wD (__vector_quad *a)
{
__vector_quad z;
/* dmsetdmrz, dmxxextfdmr512, 2 stxvp. */
__builtin_mma_xxsetaccz (&z);
__asm__ (" # %A0 (d constraint)" : "+wD" (z));
*a = z;
}
/* { dg-final { scan-assembler-times {\mdmsetdmrz\M} 3 } } */
/* { dg-final { scan-assembler-times {\mdmxxextfdmr512\M} 4 } } */
/* { dg-final { scan-assembler-times {\mdmxxinstdmr512\M} 1 } } */
/* { dg-final { scan-assembler-not {\mxxmfacc\M} } } */
/* { dg-final { scan-assembler-not {\mxxmtacc\M} } } */
/* { dg-final { scan-assembler-not {\mxxsetaccz\M} } } */

View File

@@ -1,67 +0,0 @@
/* { dg-do compile } */
/* { dg-require-effective-target powerpc_dense_math_ok } */
/* { dg-options "-mdejagnu-cpu=power10 -mno-dense-math -O2" } */
/* Test basic dense math support for MMA. */
void
move_simple (__vector_quad *a, __vector_quad *b)
{
/* 2 lxvp, xxmtacc, xxftacc 2 stxvp. */
__vector_quad c = *a;
*b = c;
}
void
move_constraint_d (__vector_quad *a, __vector_quad *b)
{
/* 2 lxvp, xxmtacc, xxftacc, 2 stxvp. */
__vector_quad c = *a;
__asm__ (" # %x0 (d constraint)" : "+d" (c));
*b = c;
}
void
move_constraint_wD (__vector_quad *a, __vector_quad *b)
{
/* 2 lxvp, xxmtacc, xxftacc, 2 stxvp. */
__vector_quad c = *a;
__asm__ (" # %A0 (wD constraint)" : "+wD" (c));
*b = c;
}
void
clear_simple (__vector_quad *a)
{
/* xxsetaccz, xxmfacc, 2 stxvp. */
__builtin_mma_xxsetaccz (a);
}
void
clear_constraint_d (__vector_quad *a)
{
__vector_quad z;
/* xxsetaccz, xxmfacc, 2 stxvp. */
__builtin_mma_xxsetaccz (&z);
__asm__ (" # %x0 (d constraint)" : "+d" (z));
*a = z;
}
void
clear_constraint_wD (__vector_quad *a)
{
__vector_quad z;
/* xxsetaccz, xxmfacc, 2 stxvp. */
__builtin_mma_xxsetaccz (&z);
__asm__ (" # %A0 (d constraint)" : "+wD" (z));
*a = z;
}
/* { dg-final { scan-assembler-not {\mdmsetdmrz\M} } } */
/* { dg-final { scan-assembler-not {\mdmxxextfdmr512\M} } } */
/* { dg-final { scan-assembler-not {\mdmxxinstdmr512\M} } } */
/* { dg-final { scan-assembler-times {\mxxmfacc\M} 6 } } */
/* { dg-final { scan-assembler-times {\mxxmtacc\M} 3 } } */
/* { dg-final { scan-assembler-times {\mxxsetaccz\M} 3 } } */

View File

@@ -7989,25 +7989,6 @@ proc check_effective_target_power10_ok { } {
}
}
# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
# the dense math operations.
proc check_effective_target_powerpc_dense_math_ok { } {
if { ([istarget powerpc*-*-*]) } {
return [check_no_compiler_messages powerpc_dense_math_ok object {
__vector_quad vq;
int main (void) {
/* Make sure we have dense math support. */
__vector_quad dmr;
__asm__ ("dmsetaccz %A0" : "=wD" (dmr));
vq = dmr;
return 0;
}
} "-mcpu=future"]
} else {
return 0;
}
}
# Return 1 if this is a PowerPC target supporting -mfloat128 via either
# software emulation on power7/power8 systems or hardware support on power9.