mirror of
https://gcc.gnu.org/git/gcc.git
synced 2026-02-21 19:35:28 -05:00
Make the MMA instructions support -mdense-math.
This patch completes support for the dense math registes with 512-bit types. The MMA insns have been modfiied to use the 'wD' constraint and the accumulator_operand predicate. The insn (mma_xxsetaccz) that clears accumulators has been changed to be a normal unspec when -mdense-math. If -mno-dense-math is in effect, the insn remains an unspec_volatile due to register constraints and the need to issue a de-prime operation. I added a comment in front of each insn to say which instructions are generated by the insns. I set -mcpu=future to turn on -mdense-math. I added 2 tests to the testsuite for -mdense-math support. A future path will add support for 1,024-bit dense registers. The patches have been tested on both little and big endian systems. Can I check it into the master branch? This is version 4 of the patches. The previous patches were: * https://gcc.gnu.org/pipermail/gcc-patches/2026-February/707452.html * https://gcc.gnu.org/pipermail/gcc-patches/2026-February/707453.html * https://gcc.gnu.org/pipermail/gcc-patches/2026-February/707454.html * https://gcc.gnu.org/pipermail/gcc-patches/2026-February/707455.html * https://gcc.gnu.org/pipermail/gcc-patches/2026-February/707456.html gcc/ 2026-02-20 Michael Meissner <meissner@linux.ibm.com> * config/rs6000/mma.md (UNSPEC_MMA_DMSETDMRZ): New unspec. (mma_xxsetaccz) Convert to being a define_expand that can handle both the original MMA support without dense math registers, and support with dense math register support. (mma_xxsetaccz_nodm): Rename original mma_xxsetaccz, and restrict this to when we do not have dense math registers. (mma_xxsetaccz_dm): New insn for clearing dense math registers. (mma_<acc>): Add support for dense registers. Document which instructions are generated by each insn. (mma_<vv>): Likewise. (mma_<avv>): Likewise. (mma_<pv>): Likewise. (mma_<apv>): Likewise. (mma_<vvi4i4i8>): Likewise. (mma_<avvi4i4i8>): Likewise. (mma_<vvi4i4i2>): Likewise. (mma_<avvi4i4i2): Likewise. (mma_<vvi4i4>): Likewise. (mma_<avvi4i4>): Likewise. (mma_<pvi4i2>): Likewise. (mma_<apvi4i2>): Likewise. (mma_<vvi4i4i4>): Likewise. (mma_<avvi4i4i4>): Likewise. * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): Do not issue a xxmfacc instruction if we support dense math registers. * config/rs6000/rs6000-cpu.def (FUTURE_MASKS_SERVER): If -mcpu=future, turn on -mdense-math. (POWERPC_MASKS): Mark -mdense-math as being set by -mcpu=<xxx> options. gcc/testsuite/ 2026-02-20 Michael Meissner <meissner@linux.ibm.com> * gcc.target/powerpc/mma-dm-1.c: New test. * gcc.target/powerpc/mma-dm-1.c: Likewise. * lib/target-supports.exp (check_effective_target_powerpc_dense_math_ok): New powerpc target support.
This commit is contained in:
@@ -90,6 +90,7 @@
|
||||
UNSPEC_MMA_XVI8GER4SPP
|
||||
UNSPEC_MMA_XXMFACC
|
||||
UNSPEC_MMA_XXMTACC
|
||||
UNSPEC_MMA_DMSETDMRZ
|
||||
])
|
||||
|
||||
(define_c_enum "unspecv"
|
||||
@@ -487,31 +488,68 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; MMA instructions that do not use their accumulators as an input, still
|
||||
;; must not allow their vector operands to overlap the registers used by
|
||||
;; the accumulator. We enforce this by marking the output as early clobber.
|
||||
;; If dense math registers are not available, MMA instructions that do
|
||||
;; not use their accumulators that overlap with FPR registers as an
|
||||
;; input, still must not allow their vector operands to overlap the
|
||||
;; registers used by the accumulator. We enforce this by marking the
|
||||
;; output as early clobber. The prime and de-prime instructions are
|
||||
;; not needed on systems with dense math registers.
|
||||
|
||||
(define_insn "mma_<acc>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
|
||||
MMA_ACC))]
|
||||
"TARGET_MMA"
|
||||
"TARGET_MMA && !TARGET_DENSE_MATH"
|
||||
"<acc> %A0"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
;; We can't have integer constants in XOmode so we wrap this in an
|
||||
;; UNSPEC_VOLATILE.
|
||||
;; UNSPEC_VOLATILE. If we have dense math registers, we can just use a normal
|
||||
;; UNSPEC instead of UNSPEC_VOLATILE.
|
||||
|
||||
(define_insn "mma_xxsetaccz"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=d")
|
||||
(define_expand "mma_xxsetaccz"
|
||||
[(set (match_operand:XO 0 "accumulator_operand")
|
||||
(unspec_volatile:XO [(const_int 0)]
|
||||
UNSPECV_MMA_XXSETACCZ))]
|
||||
"TARGET_MMA"
|
||||
{
|
||||
if (TARGET_DENSE_MATH)
|
||||
{
|
||||
emit_insn (gen_mma_xxsetaccz_dm (operands[0]));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
;; Clear accumulator without dense math registers
|
||||
(define_insn "*mma_xxsetaccz_nodm"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=d")
|
||||
(unspec_volatile:XO [(const_int 0)]
|
||||
UNSPECV_MMA_XXSETACCZ))]
|
||||
"TARGET_MMA && !TARGET_DENSE_MATH"
|
||||
"xxsetaccz %A0"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
;; Clear accumulator when dense math registers are available.
|
||||
(define_insn "mma_xxsetaccz_dm"
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=wD")
|
||||
(unspec [(const_int 0)]
|
||||
UNSPEC_MMA_DMSETDMRZ))]
|
||||
"TARGET_DENSE_MATH"
|
||||
"dmsetdmrz %A0"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
|
||||
;; MMA operations below. If dense math registers are available, these
|
||||
;; operations will use the 8 accumultors which are separate registers.
|
||||
;; If dense math registers are not available, these operations will use
|
||||
;; accumulators that are overlaid on top of the FPR registers.
|
||||
|
||||
;; Instructions:
|
||||
;; xvi4ger8 xvi8ger4 xvi16ger2 xvi16ger2s xvf16ger2
|
||||
;; xvbf16ger2 xvf32ger
|
||||
|
||||
(define_insn "mma_<vv>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
|
||||
MMA_VV))]
|
||||
@@ -519,9 +557,15 @@
|
||||
"<vv> %A0,%x1,%x2"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
;; Instructions:
|
||||
;; xvi4ger8pp xvi8ger4pp xvi8ger4spp xvi16ger2pp xvi16ger2spp
|
||||
;; xvf16ger2pp xvf16ger2pn xvf16ger2np xvf16ger2nn xvbf16ger2pp
|
||||
;; xvbf16ger2pn xvbf16ger2np xvbf16ger2nn xvf32gerpp xvf32gerpn
|
||||
;; xvf32gernp xvf32gernn
|
||||
|
||||
(define_insn "mma_<avv>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
|
||||
MMA_AVV))]
|
||||
@@ -529,8 +573,10 @@
|
||||
"<avv> %A0,%x2,%x3"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
;; Instruction: xvf64ger
|
||||
|
||||
(define_insn "mma_<pv>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
|
||||
MMA_PV))]
|
||||
@@ -538,9 +584,11 @@
|
||||
"<pv> %A0,%x1,%x2"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
;; Instructions: xvf64gerpp xvf64gerpn xvf64gernp xvf64gernn
|
||||
|
||||
(define_insn "mma_<apv>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
|
||||
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
|
||||
MMA_APV))]
|
||||
@@ -548,8 +596,10 @@
|
||||
"<apv> %A0,%x2,%x3"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
;; Instruction: pmxvi4ger8
|
||||
|
||||
(define_insn "mma_<vvi4i4i8>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
@@ -561,9 +611,11 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instruction: pmxvi4ger8pp
|
||||
|
||||
(define_insn "mma_<avvi4i4i8>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
@@ -575,8 +627,11 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instructions:
|
||||
;; pmxvi16ger2 pmxvi16ger2s pmxvf16ger2 pmxvbf16ger2
|
||||
|
||||
(define_insn "mma_<vvi4i4i2>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
@@ -588,9 +643,14 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instructions:
|
||||
;; pmxvi16ger2pp pmxvi16ger2spp pmxvf16ger2pp pmxvf16ger2pn
|
||||
;; pmxvf16ger2np pmxvf16ger2nn pmxvbf16ger2pp pmxvbf16ger2pn
|
||||
;; pmxvbf16ger2np pmxvbf16ger2nn
|
||||
|
||||
(define_insn "mma_<avvi4i4i2>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
@@ -602,8 +662,10 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instruction: pmxvf32ger
|
||||
|
||||
(define_insn "mma_<vvi4i4>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
@@ -614,9 +676,11 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instructions: pmxvf32gerpp pmxvf32gerpn pmxvf32gernp pmxvf32gernn
|
||||
|
||||
(define_insn "mma_<avvi4i4>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
@@ -627,8 +691,10 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instruction: pmxvf64ger
|
||||
|
||||
(define_insn "mma_<pvi4i2>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
@@ -639,9 +705,11 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instructions: pmxvf64gerpp pmxvf64gerpn pmxvf64gernp pmxvf64gernn
|
||||
|
||||
(define_insn "mma_<apvi4i2>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
|
||||
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
@@ -652,8 +720,10 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instruction: pmxvi8ger4
|
||||
|
||||
(define_insn "mma_<vvi4i4i4>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
|
||||
@@ -665,9 +735,11 @@
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "prefixed" "yes")])
|
||||
|
||||
;; Instructions: pmxvi8ger4pp pmxvi8ger4spp
|
||||
|
||||
(define_insn "mma_<avvi4i4i4>"
|
||||
[(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
|
||||
(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
|
||||
[(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
|
||||
(unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
|
||||
|
||||
@@ -1125,8 +1125,9 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
|
||||
}
|
||||
|
||||
/* If we're disassembling an accumulator into a different type, we need
|
||||
to emit a xxmfacc instruction now, since we cannot do it later. */
|
||||
if (fncode == RS6000_BIF_DISASSEMBLE_ACC)
|
||||
to emit a xxmfacc instruction now, since we cannot do it later. If we
|
||||
have dense math registers, we don't need to do this. */
|
||||
if (fncode == RS6000_BIF_DISASSEMBLE_ACC && !TARGET_DENSE_MATH)
|
||||
{
|
||||
new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL];
|
||||
new_call = gimple_build_call (new_decl, 1, src);
|
||||
|
||||
@@ -91,6 +91,7 @@
|
||||
will be fixed in potential future machines. */
|
||||
#define FUTURE_MASKS_SERVER (POWER11_MASKS_SERVER \
|
||||
| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR \
|
||||
| OPTION_MASK_DENSE_MATH \
|
||||
| OPTION_MASK_FUTURE)
|
||||
|
||||
/* Flags that need to be turned off if -mno-vsx. */
|
||||
@@ -124,6 +125,7 @@
|
||||
| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR \
|
||||
| OPTION_MASK_CMPB \
|
||||
| OPTION_MASK_CRYPTO \
|
||||
| OPTION_MASK_DENSE_MATH \
|
||||
| OPTION_MASK_DFP \
|
||||
| OPTION_MASK_DLMZB \
|
||||
| OPTION_MASK_EFFICIENT_UNALIGNED_VSX \
|
||||
|
||||
67
gcc/testsuite/gcc.target/powerpc/mma-dm-1.c
Normal file
67
gcc/testsuite/gcc.target/powerpc/mma-dm-1.c
Normal file
@@ -0,0 +1,67 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target powerpc_dense_math_ok } */
|
||||
/* { dg-options "-mdejagnu-cpu=future -O2" } */
|
||||
|
||||
/* Test basic dense math support for MMA. */
|
||||
|
||||
void
|
||||
move_simple (__vector_quad *a, __vector_quad *b)
|
||||
{
|
||||
/* 2 lxvp, 2 stxvp. */
|
||||
__vector_quad c = *a;
|
||||
*b = c;
|
||||
}
|
||||
|
||||
void
|
||||
move_constraint_d (__vector_quad *a, __vector_quad *b)
|
||||
{
|
||||
/* 2 lxvp, 2 stxvp. */
|
||||
__vector_quad c = *a;
|
||||
__asm__ (" # %x0 (d constraint)" : "+d" (c));
|
||||
*b = c;
|
||||
}
|
||||
|
||||
void
|
||||
move_constraint_wD (__vector_quad *a, __vector_quad *b)
|
||||
{
|
||||
/* 2 lxvp, dmxxinstdmr512, dmxxextfdmr512, 2 stxvp. */
|
||||
__vector_quad c = *a;
|
||||
__asm__ (" # %A0 (wD constraint)" : "+wD" (c));
|
||||
*b = c;
|
||||
}
|
||||
|
||||
void
|
||||
clear_simple (__vector_quad *a)
|
||||
{
|
||||
/* dmsetdmrz, dmxxextfdmr512, 2 stxvp. */
|
||||
__builtin_mma_xxsetaccz (a);
|
||||
}
|
||||
|
||||
void
|
||||
clear_constraint_d (__vector_quad *a)
|
||||
{
|
||||
__vector_quad z;
|
||||
|
||||
/* dmsetdmrz, dmxxextfdmr512, 2 stxvp. */
|
||||
__builtin_mma_xxsetaccz (&z);
|
||||
__asm__ (" # %x0 (d constraint)" : "+d" (z));
|
||||
*a = z;
|
||||
}
|
||||
|
||||
void
|
||||
clear_constraint_wD (__vector_quad *a)
|
||||
{
|
||||
__vector_quad z;
|
||||
|
||||
/* dmsetdmrz, dmxxextfdmr512, 2 stxvp. */
|
||||
__builtin_mma_xxsetaccz (&z);
|
||||
__asm__ (" # %A0 (d constraint)" : "+wD" (z));
|
||||
*a = z;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\mdmsetdmrz\M} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\mdmxxextfdmr512\M} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\mdmxxinstdmr512\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-not {\mxxmfacc\M} } } */
|
||||
/* { dg-final { scan-assembler-not {\mxxmtacc\M} } } */
|
||||
/* { dg-final { scan-assembler-not {\mxxsetaccz\M} } } */
|
||||
67
gcc/testsuite/gcc.target/powerpc/mma-dm-2.c
Normal file
67
gcc/testsuite/gcc.target/powerpc/mma-dm-2.c
Normal file
@@ -0,0 +1,67 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target powerpc_dense_math_ok } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -mno-dense-math -O2" } */
|
||||
|
||||
/* Test basic dense math support for MMA. */
|
||||
|
||||
void
|
||||
move_simple (__vector_quad *a, __vector_quad *b)
|
||||
{
|
||||
/* 2 lxvp, xxmtacc, xxftacc 2 stxvp. */
|
||||
__vector_quad c = *a;
|
||||
*b = c;
|
||||
}
|
||||
|
||||
void
|
||||
move_constraint_d (__vector_quad *a, __vector_quad *b)
|
||||
{
|
||||
/* 2 lxvp, xxmtacc, xxftacc, 2 stxvp. */
|
||||
__vector_quad c = *a;
|
||||
__asm__ (" # %x0 (d constraint)" : "+d" (c));
|
||||
*b = c;
|
||||
}
|
||||
|
||||
void
|
||||
move_constraint_wD (__vector_quad *a, __vector_quad *b)
|
||||
{
|
||||
/* 2 lxvp, xxmtacc, xxftacc, 2 stxvp. */
|
||||
__vector_quad c = *a;
|
||||
__asm__ (" # %A0 (wD constraint)" : "+wD" (c));
|
||||
*b = c;
|
||||
}
|
||||
|
||||
void
|
||||
clear_simple (__vector_quad *a)
|
||||
{
|
||||
/* xxsetaccz, xxmfacc, 2 stxvp. */
|
||||
__builtin_mma_xxsetaccz (a);
|
||||
}
|
||||
|
||||
void
|
||||
clear_constraint_d (__vector_quad *a)
|
||||
{
|
||||
__vector_quad z;
|
||||
|
||||
/* xxsetaccz, xxmfacc, 2 stxvp. */
|
||||
__builtin_mma_xxsetaccz (&z);
|
||||
__asm__ (" # %x0 (d constraint)" : "+d" (z));
|
||||
*a = z;
|
||||
}
|
||||
|
||||
void
|
||||
clear_constraint_wD (__vector_quad *a)
|
||||
{
|
||||
__vector_quad z;
|
||||
|
||||
/* xxsetaccz, xxmfacc, 2 stxvp. */
|
||||
__builtin_mma_xxsetaccz (&z);
|
||||
__asm__ (" # %A0 (d constraint)" : "+wD" (z));
|
||||
*a = z;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not {\mdmsetdmrz\M} } } */
|
||||
/* { dg-final { scan-assembler-not {\mdmxxextfdmr512\M} } } */
|
||||
/* { dg-final { scan-assembler-not {\mdmxxinstdmr512\M} } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxmfacc\M} 6 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxmtacc\M} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxsetaccz\M} 3 } } */
|
||||
@@ -7989,6 +7989,25 @@ proc check_effective_target_power10_ok { } {
|
||||
}
|
||||
}
|
||||
|
||||
# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
|
||||
# the dense math operations.
|
||||
proc check_effective_target_powerpc_dense_math_ok { } {
|
||||
if { ([istarget powerpc*-*-*]) } {
|
||||
return [check_no_compiler_messages powerpc_dense_math_ok object {
|
||||
__vector_quad vq;
|
||||
int main (void) {
|
||||
/* Make sure we have dense math support. */
|
||||
__vector_quad dmr;
|
||||
__asm__ ("dmsetaccz %A0" : "=wD" (dmr));
|
||||
vq = dmr;
|
||||
return 0;
|
||||
}
|
||||
} "-mcpu=future"]
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
# Return 1 if this is a PowerPC target supporting -mfloat128 via either
|
||||
# software emulation on power7/power8 systems or hardware support on power9.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user