mirror of
https://gcc.gnu.org/git/gcc.git
synced 2026-02-22 12:00:03 -05:00
Compare commits
5 Commits
master
...
devel/exis
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d54a66c1d8 | ||
|
|
8e45a01d0f | ||
|
|
ee10846d02 | ||
|
|
3103441079 | ||
|
|
1b6b028e27 |
@@ -696,6 +696,7 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
|
||||
VREINTERPRET_BUILTINS \
|
||||
VREINTERPRETQ_BUILTINS
|
||||
|
||||
/* Add fp8 here and in high */
|
||||
#define AARCH64_SIMD_VGET_LOW_BUILTINS \
|
||||
VGET_LOW_BUILTIN(f16) \
|
||||
VGET_LOW_BUILTIN(f32) \
|
||||
@@ -780,7 +781,7 @@ typedef struct
|
||||
AARCH64_SIMD_BUILTIN_##T##_##N##A,
|
||||
|
||||
#undef ENTRY
|
||||
#define ENTRY(N, S, M, U) \
|
||||
#define ENTRY(N, S, M0, M1, M2, M3, M4, USES_FPMR, U) \
|
||||
AARCH64_##N,
|
||||
|
||||
enum aarch64_builtins
|
||||
@@ -1590,13 +1591,118 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma)
|
||||
|
||||
enum class aarch64_builtin_signatures
|
||||
{
|
||||
unary,
|
||||
binary,
|
||||
ternary,
|
||||
quaternary,
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
struct simd_type {
|
||||
machine_mode mode;
|
||||
aarch64_type_qualifiers qualifiers;
|
||||
};
|
||||
|
||||
namespace simd_types {
|
||||
|
||||
constexpr simd_type f8 { V8QImode, qualifier_modal_float };
|
||||
constexpr simd_type f8q { V16QImode, qualifier_modal_float };
|
||||
|
||||
constexpr simd_type s8_scalar_const_ptr
|
||||
{ QImode, qualifier_const_pointer_map_mode };
|
||||
constexpr simd_type s8_scalar { QImode, qualifier_none };
|
||||
constexpr simd_type s8 { V8QImode, qualifier_none };
|
||||
constexpr simd_type s8q { V16QImode, qualifier_none };
|
||||
constexpr simd_type u8_scalar_const_ptr
|
||||
{ QImode, qualifier_const_pointer_map_mode };
|
||||
constexpr simd_type u8_scalar { QImode, qualifier_unsigned };
|
||||
constexpr simd_type u8 { V8QImode, qualifier_unsigned };
|
||||
constexpr simd_type u8q { V16QImode, qualifier_unsigned };
|
||||
|
||||
constexpr simd_type s16_scalar_const_ptr
|
||||
{ HImode, qualifier_const_pointer_map_mode };
|
||||
constexpr simd_type s16_scalar { HImode, qualifier_none };
|
||||
constexpr simd_type s16 { V4HImode, qualifier_none };
|
||||
constexpr simd_type u16_scalar_const_ptr
|
||||
{ HImode, qualifier_const_pointer_map_mode };
|
||||
constexpr simd_type u16_scalar { HImode, qualifier_unsigned };
|
||||
constexpr simd_type u16 { V4HImode, qualifier_unsigned };
|
||||
constexpr simd_type s16q { V8HImode, qualifier_none };
|
||||
constexpr simd_type u16q { V8HImode, qualifier_unsigned };
|
||||
|
||||
constexpr simd_type s32_scalar_const_ptr
|
||||
{ SImode, qualifier_const_pointer_map_mode };
|
||||
constexpr simd_type s32_index { SImode, qualifier_lane_index };
|
||||
constexpr simd_type s32_scalar { SImode, qualifier_none };
|
||||
constexpr simd_type s32 { V2SImode, qualifier_none };
|
||||
constexpr simd_type u32_scalar_const_ptr
|
||||
{ SImode, qualifier_const_pointer_map_mode };
|
||||
constexpr simd_type u32_scalar { SImode, qualifier_unsigned };
|
||||
constexpr simd_type u32 { V2SImode, qualifier_unsigned };
|
||||
constexpr simd_type s32q { V4SImode, qualifier_none };
|
||||
constexpr simd_type u32q { V4SImode, qualifier_unsigned };
|
||||
|
||||
constexpr simd_type s64_scalar_const_ptr
|
||||
{ DImode, qualifier_const_pointer_map_mode };
|
||||
constexpr simd_type s64_scalar { DImode, qualifier_none };
|
||||
constexpr simd_type s64 { V1DImode, qualifier_none };
|
||||
constexpr simd_type u64_scalar_const_ptr
|
||||
{ DImode, qualifier_const_pointer_map_mode };
|
||||
constexpr simd_type u64_scalar { DImode, qualifier_unsigned };
|
||||
constexpr simd_type u64 { V1DImode, qualifier_unsigned };
|
||||
constexpr simd_type s64q { V2DImode, qualifier_none };
|
||||
constexpr simd_type u64q { V2DImode, qualifier_unsigned };
|
||||
|
||||
constexpr simd_type p8_scalar_const_ptr
|
||||
{ QImode, qualifier_const_pointer_map_mode };
|
||||
constexpr simd_type p8_scalar { QImode, qualifier_poly };
|
||||
constexpr simd_type p8 { V8QImode, qualifier_poly };
|
||||
constexpr simd_type p8q { V16QImode, qualifier_poly };
|
||||
|
||||
constexpr simd_type p16_scalar_const_ptr
|
||||
{ HImode, qualifier_const_pointer_map_mode };
|
||||
constexpr simd_type p16_scalar { HImode, qualifier_poly };
|
||||
constexpr simd_type p16 { V4HImode, qualifier_poly };
|
||||
constexpr simd_type p16q { V8HImode, qualifier_poly };
|
||||
|
||||
constexpr simd_type p64_scalar_const_ptr
|
||||
{ DImode, qualifier_const_pointer_map_mode };
|
||||
constexpr simd_type p64_scalar { DImode, qualifier_poly };
|
||||
constexpr simd_type p64 { V1DImode, qualifier_poly };
|
||||
constexpr simd_type p64q { V2DImode, qualifier_poly };
|
||||
|
||||
constexpr simd_type f16_scalar_const_ptr
|
||||
{ HFmode, qualifier_const_pointer_map_mode };
|
||||
constexpr simd_type f16_scalar { HFmode, qualifier_none };
|
||||
constexpr simd_type f16 { V4HFmode, qualifier_none };
|
||||
constexpr simd_type f16q { V8HFmode, qualifier_none };
|
||||
|
||||
constexpr simd_type f32_scalar_const_ptr
|
||||
{ SFmode, qualifier_const_pointer_map_mode };
|
||||
constexpr simd_type f32_scalar { SFmode, qualifier_none };
|
||||
constexpr simd_type f32 { V2SFmode, qualifier_none };
|
||||
constexpr simd_type f32q { V4SFmode, qualifier_none };
|
||||
|
||||
constexpr simd_type f64_scalar_const_ptr
|
||||
{ DFmode, qualifier_const_pointer_map_mode };
|
||||
constexpr simd_type f64_scalar { DFmode, qualifier_none };
|
||||
constexpr simd_type f64 { V1DFmode, qualifier_none };
|
||||
constexpr simd_type f64q { V2DFmode, qualifier_none };
|
||||
|
||||
constexpr simd_type bf16 { V4BFmode, qualifier_none };
|
||||
constexpr simd_type bf16q { V8BFmode, qualifier_none };
|
||||
|
||||
constexpr simd_type none { VOIDmode, qualifier_none };
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#undef ENTRY
|
||||
#define ENTRY(N, S, M, U) \
|
||||
{#N, aarch64_builtin_signatures::S, E_##M##mode, U, \
|
||||
aarch64_required_extensions::REQUIRED_EXTENSIONS},
|
||||
#define ENTRY(N, S, T0, T1, T2, T3, T4, USES_FPMR, U) \
|
||||
{#N, aarch64_builtin_signatures::S, simd_types::T0, simd_types::T1, \
|
||||
simd_types::T2, simd_types::T3, simd_types::T4, U, \
|
||||
USES_FPMR, aarch64_required_extensions::REQUIRED_EXTENSIONS},
|
||||
|
||||
/* Initialize pragma builtins. */
|
||||
|
||||
@@ -1604,8 +1710,9 @@ struct aarch64_pragma_builtins_data
|
||||
{
|
||||
const char *name;
|
||||
aarch64_builtin_signatures signature;
|
||||
machine_mode mode;
|
||||
simd_type types[5];
|
||||
int unspec;
|
||||
bool uses_fpmr;
|
||||
aarch64_required_extensions required_extensions;
|
||||
};
|
||||
|
||||
@@ -1613,17 +1720,42 @@ static aarch64_pragma_builtins_data aarch64_pragma_builtins[] = {
|
||||
#include "aarch64-simd-pragma-builtins.def"
|
||||
};
|
||||
|
||||
static unsigned int
|
||||
aarch64_get_number_of_args (const aarch64_pragma_builtins_data &builtin_data)
|
||||
{
|
||||
if (builtin_data.signature == aarch64_builtin_signatures::unary)
|
||||
return 1;
|
||||
else if (builtin_data.signature == aarch64_builtin_signatures::binary)
|
||||
return 2;
|
||||
else if (builtin_data.signature == aarch64_builtin_signatures::ternary)
|
||||
return 3;
|
||||
else if (builtin_data.signature == aarch64_builtin_signatures::quaternary)
|
||||
return 4;
|
||||
else
|
||||
// No other signature supported.
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
static tree
|
||||
aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data)
|
||||
{
|
||||
auto type = aarch64_simd_builtin_type (builtin_data.mode, qualifier_none);
|
||||
switch (builtin_data.signature)
|
||||
tree return_type
|
||||
= aarch64_simd_builtin_type (builtin_data.types[0].mode,
|
||||
builtin_data.types[0].qualifiers);
|
||||
|
||||
vec<tree, va_gc> *arg_types = NULL;
|
||||
auto nargs = aarch64_get_number_of_args (builtin_data);
|
||||
for (unsigned int i = 1; i <= nargs; ++i)
|
||||
{
|
||||
case aarch64_builtin_signatures::binary:
|
||||
return build_function_type_list (type, type, type, NULL_TREE);
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
auto type = aarch64_simd_builtin_type (builtin_data.types[i].mode,
|
||||
builtin_data.types[i].qualifiers);
|
||||
vec_safe_push (arg_types, type);
|
||||
}
|
||||
|
||||
if (builtin_data.uses_fpmr == true)
|
||||
vec_safe_push (arg_types, uint64_type_node);
|
||||
|
||||
return build_function_type_vec (return_type, arg_types);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -2431,6 +2563,88 @@ aarch64_general_required_extensions (unsigned int code)
|
||||
return ext::streaming_compatible (0);
|
||||
}
|
||||
|
||||
namespace function_checker {
|
||||
|
||||
void
|
||||
require_integer_constant (location_t location, tree arg)
|
||||
{
|
||||
if (TREE_CODE (arg) != INTEGER_CST)
|
||||
{
|
||||
error_at (location, "Constant-type integer argument expected");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
require_immediate_range (location_t location, tree arg, HOST_WIDE_INT min,
|
||||
HOST_WIDE_INT max)
|
||||
{
|
||||
if (wi::to_widest (arg) < min || wi::to_widest (arg) > max)
|
||||
{
|
||||
error_at (location, "lane out of range %wd - %wd", min, max);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validates indexing into a vector using the index's size and the instruction,
|
||||
where instruction is represented by the unspec.
|
||||
This only works for intrinsics declared using pragmas in
|
||||
aarch64-simd-pragma-builtins.def. */
|
||||
|
||||
void
|
||||
check_simd_lane_bounds (location_t location, const aarch64_pragma_builtins_data
|
||||
*builtin_data, tree *args)
|
||||
{
|
||||
if (builtin_data == NULL)
|
||||
// Don't check for functions that are not declared in
|
||||
// aarch64-simd-pragma-builtins.def.
|
||||
return;
|
||||
|
||||
auto nargs = aarch64_get_number_of_args (*builtin_data);
|
||||
switch (builtin_data->unspec)
|
||||
{
|
||||
case UNSPEC_VDOT2:
|
||||
case UNSPEC_VDOT4:
|
||||
{
|
||||
if (builtin_data->types[nargs].qualifiers != qualifier_lane_index)
|
||||
break;
|
||||
|
||||
auto index_arg = args[nargs - 1];
|
||||
require_integer_constant (location, index_arg);
|
||||
|
||||
auto vector_to_index_mode = builtin_data->types[nargs - 1].mode;
|
||||
int vector_to_index_mode_size
|
||||
= GET_MODE_NUNITS (vector_to_index_mode).to_constant ();
|
||||
|
||||
auto low = 0;
|
||||
int high;
|
||||
switch (builtin_data->unspec)
|
||||
{
|
||||
case UNSPEC_VDOT2:
|
||||
high = vector_to_index_mode_size / 2 - 1;
|
||||
break;
|
||||
case UNSPEC_VDOT4:
|
||||
high = vector_to_index_mode_size / 4 - 1;
|
||||
break;
|
||||
case UNSPEC_FMLALB:
|
||||
case UNSPEC_FMLALT:
|
||||
case UNSPEC_FMLALLBB:
|
||||
case UNSPEC_FMLALLBT:
|
||||
case UNSPEC_FMLALLTB:
|
||||
case UNSPEC_FMLALLTT:
|
||||
high = vector_to_index_mode_size - 1;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
require_immediate_range (location, index_arg, low, high);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
bool
|
||||
aarch64_general_check_builtin_call (location_t location, vec<location_t>,
|
||||
unsigned int code, tree fndecl,
|
||||
@@ -2442,6 +2656,9 @@ aarch64_general_check_builtin_call (location_t location, vec<location_t>,
|
||||
if (!aarch64_check_required_extensions (location, decl, required_extensions))
|
||||
return false;
|
||||
|
||||
auto builtin_data = aarch64_get_pragma_builtin (code);
|
||||
function_checker::check_simd_lane_bounds (location, builtin_data, args);
|
||||
|
||||
switch (code)
|
||||
{
|
||||
case AARCH64_RSR:
|
||||
@@ -3336,17 +3553,184 @@ static rtx
|
||||
aarch64_expand_pragma_builtin (tree exp, rtx target,
|
||||
const aarch64_pragma_builtins_data *builtin_data)
|
||||
{
|
||||
expand_operand ops[3];
|
||||
auto mode = builtin_data->mode;
|
||||
auto op1 = expand_normal (CALL_EXPR_ARG (exp, 0));
|
||||
auto op2 = expand_normal (CALL_EXPR_ARG (exp, 1));
|
||||
create_output_operand (&ops[0], target, mode);
|
||||
create_input_operand (&ops[1], op1, mode);
|
||||
create_input_operand (&ops[2], op2, mode);
|
||||
auto nargs = aarch64_get_number_of_args (*builtin_data);
|
||||
|
||||
auto unspec = builtin_data->unspec;
|
||||
auto icode = code_for_aarch64 (unspec, mode);
|
||||
expand_insn (icode, 3, ops);
|
||||
expand_operand ops[5];
|
||||
create_output_operand (&ops[0], target, builtin_data->types[0].mode);
|
||||
for (unsigned int i = 1; i <= nargs; ++i)
|
||||
create_input_operand (&ops[i],
|
||||
expand_normal (CALL_EXPR_ARG (exp, i - 1)),
|
||||
builtin_data->types[i].mode);
|
||||
|
||||
if (builtin_data->uses_fpmr == true)
|
||||
{
|
||||
auto fpm_input = expand_normal (CALL_EXPR_ARG (exp, nargs));
|
||||
auto fpmr = gen_rtx_REG (DImode, FPM_REGNUM);
|
||||
emit_move_insn (fpmr, fpm_input);
|
||||
}
|
||||
|
||||
enum insn_code icode;
|
||||
switch (builtin_data->unspec)
|
||||
{
|
||||
case UNSPEC_FAMAX:
|
||||
case UNSPEC_FAMIN:
|
||||
icode = code_for_aarch64 (builtin_data->unspec,
|
||||
builtin_data->types[0].mode);
|
||||
expand_insn (icode, nargs + 1, ops);
|
||||
break;
|
||||
|
||||
case UNSPEC_VCVT1:
|
||||
case UNSPEC_VCVT1_HIGH:
|
||||
case UNSPEC_VCVT2:
|
||||
case UNSPEC_VCVT2_HIGH:
|
||||
icode = code_for_aarch64 (builtin_data->unspec,
|
||||
builtin_data->types[0].mode,
|
||||
builtin_data->types[1].mode);
|
||||
expand_insn (icode, nargs + 1, ops);
|
||||
break;
|
||||
|
||||
case UNSPEC_VCVT1_LOW:
|
||||
case UNSPEC_VCVT2_LOW:
|
||||
icode = code_for_aarch64_lower (builtin_data->unspec,
|
||||
builtin_data->types[0].mode,
|
||||
builtin_data->types[1].mode);
|
||||
expand_insn (icode, nargs + 1, ops);
|
||||
break;
|
||||
|
||||
case UNSPEC_FSCALE:
|
||||
icode = code_for_aarch64 (builtin_data->unspec,
|
||||
builtin_data->types[1].mode,
|
||||
builtin_data->types[2].mode);
|
||||
expand_insn (icode, nargs + 1, ops);
|
||||
break;
|
||||
|
||||
case UNSPEC_VCVT:
|
||||
icode = code_for_aarch64 (builtin_data->unspec,
|
||||
builtin_data->types[0].mode,
|
||||
builtin_data->types[1].mode,
|
||||
builtin_data->types[2].mode);
|
||||
expand_insn (icode, nargs + 1, ops);
|
||||
break;
|
||||
|
||||
case UNSPEC_VCVT_HIGH:
|
||||
icode = code_for_aarch64 (builtin_data->unspec,
|
||||
builtin_data->types[0].mode,
|
||||
builtin_data->types[1].mode,
|
||||
builtin_data->types[2].mode,
|
||||
builtin_data->types[3].mode);
|
||||
expand_insn (icode, nargs + 1, ops);
|
||||
break;
|
||||
|
||||
case UNSPEC_VDOT2:
|
||||
case UNSPEC_VDOT4:
|
||||
case UNSPEC_FMLALB:
|
||||
case UNSPEC_FMLALT:
|
||||
case UNSPEC_FMLALLBB:
|
||||
case UNSPEC_FMLALLBT:
|
||||
case UNSPEC_FMLALLTB:
|
||||
case UNSPEC_FMLALLTT:
|
||||
if (builtin_data->signature == aarch64_builtin_signatures::ternary)
|
||||
icode = code_for_aarch64 (builtin_data->unspec,
|
||||
builtin_data->types[0].mode,
|
||||
builtin_data->types[1].mode,
|
||||
builtin_data->types[2].mode,
|
||||
builtin_data->types[3].mode);
|
||||
else if
|
||||
(builtin_data->signature == aarch64_builtin_signatures::quaternary)
|
||||
icode = code_for_aarch64 (builtin_data->unspec,
|
||||
builtin_data->types[0].mode,
|
||||
builtin_data->types[1].mode,
|
||||
builtin_data->types[2].mode,
|
||||
builtin_data->types[3].mode,
|
||||
builtin_data->types[4].mode);
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
||||
expand_insn (icode, nargs + 1, ops);
|
||||
target = ops[0].value;
|
||||
break;
|
||||
|
||||
case UNSPEC_VCREATE:
|
||||
target = force_lowpart_subreg (builtin_data->types[0].mode,
|
||||
expand_normal (CALL_EXPR_ARG (exp, 0)),
|
||||
DImode);
|
||||
break;
|
||||
|
||||
case UNSPEC_VEC_COPY:
|
||||
{
|
||||
/* Need to do lane checks here. */
|
||||
/* Also need to set indexes correctly here. */
|
||||
expand_operand vget_ops[3];
|
||||
rtx vget_target;
|
||||
auto vget_output_mode = GET_MODE_INNER (builtin_data->types[0].mode);
|
||||
create_output_operand (&vget_ops[0], vget_target, vget_output_mode);
|
||||
vget_ops[1] = ops[3];
|
||||
vget_ops[2] = ops[4];
|
||||
auto vget_icode = code_for_aarch64_get_lane (builtin_data->types[0].mode);
|
||||
expand_insn (vget_icode, 3, vget_ops);
|
||||
vget_target = vget_ops[0].value;
|
||||
|
||||
expand_operand vset_ops[4];
|
||||
create_output_operand (&vset_ops[0],
|
||||
target,
|
||||
builtin_data->types[0].mode);
|
||||
vset_ops[1] = vget_ops[0];
|
||||
vset_ops[2] = ops[2];
|
||||
vset_ops[3] = ops[1];
|
||||
auto vset_icode = code_for_aarch64_simd_vec_set (builtin_data->types[0].mode);
|
||||
expand_insn (vset_icode, 4, vset_ops);
|
||||
|
||||
target = vset_ops[0].value;
|
||||
break;
|
||||
}
|
||||
|
||||
case UNSPEC_DUP:
|
||||
target = expand_vector_broadcast (builtin_data->types[0].mode,
|
||||
expand_normal (CALL_EXPR_ARG (exp, 0)));
|
||||
break;
|
||||
|
||||
case UNSPEC_DUPB:
|
||||
icode = code_for_aarch64_get_lane (builtin_data->types[1].mode);
|
||||
expand_insn (icode, nargs + 1, ops);
|
||||
target = ops[0].value;
|
||||
break;
|
||||
|
||||
case UNSPEC_LD1:
|
||||
{
|
||||
if (builtin_data->types[0].mode == V1DFmode)
|
||||
target = expand_vector_broadcast (builtin_data->types[0].mode,
|
||||
expand_normal (CALL_EXPR_ARG (exp, 0)));
|
||||
else
|
||||
{
|
||||
icode = code_for_aarch64_ld1 (builtin_data->types[0].mode);
|
||||
auto input
|
||||
= convert_memory_address (Pmode,
|
||||
expand_normal (CALL_EXPR_ARG (exp, 0)));
|
||||
create_input_operand (&ops[1], input, Pmode);
|
||||
expand_insn (icode, nargs + 1, ops);
|
||||
}
|
||||
target = ops[0].value;
|
||||
break;
|
||||
}
|
||||
|
||||
case UNSPEC_DUP_LANE:
|
||||
{
|
||||
/* We need to do lane checks here. */
|
||||
auto lane = INTVAL (expand_normal (CALL_EXPR_ARG (exp, 1)));
|
||||
auto vector_mode = builtin_data->types[1].mode;
|
||||
auto nunits = GET_MODE_NUNITS (vector_mode).to_constant ();
|
||||
create_input_operand(&ops[2],
|
||||
gen_int_mode ((ENDIAN_LANE_N (nunits, lane)),
|
||||
SImode),
|
||||
SImode);
|
||||
icode = code_for_aarch64_dup_lane (builtin_data->types[0].mode);
|
||||
expand_insn (icode, nargs + 1, ops);
|
||||
target = ops[0].value;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
return target;
|
||||
}
|
||||
@@ -4186,7 +4570,6 @@ aarch64_resolve_overloaded_builtin_general (location_t loc, tree function,
|
||||
#undef CF3
|
||||
#undef CF4
|
||||
#undef CF10
|
||||
#undef ENTRY_VHSDF
|
||||
#undef VAR1
|
||||
#undef VAR2
|
||||
#undef VAR3
|
||||
|
||||
@@ -258,6 +258,14 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
|
||||
aarch64_def_or_undef (TARGET_SVE_BF16,
|
||||
"__ARM_FEATURE_SVE_BF16", pfile);
|
||||
|
||||
aarch64_def_or_undef (TARGET_FP8, "__ARM_FEATURE_FP8", pfile);
|
||||
|
||||
aarch64_def_or_undef (TARGET_FP8DOT2, "__ARM_FEATURE_FP8DOT2", pfile);
|
||||
|
||||
aarch64_def_or_undef (TARGET_FP8DOT4, "__ARM_FEATURE_FP8DOT4", pfile);
|
||||
|
||||
aarch64_def_or_undef (TARGET_FP8FMA, "__ARM_FEATURE_FP8FMA", pfile);
|
||||
|
||||
aarch64_def_or_undef (TARGET_LS64,
|
||||
"__ARM_FEATURE_LS64", pfile);
|
||||
aarch64_def_or_undef (TARGET_RCPC, "__ARM_FEATURE_RCPC", pfile);
|
||||
|
||||
@@ -236,6 +236,12 @@ AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs")
|
||||
|
||||
AARCH64_OPT_EXTENSION("fp8", FP8, (SIMD), (), (), "fp8")
|
||||
|
||||
AARCH64_OPT_EXTENSION("fp8dot2", FP8DOT2, (SIMD), (), (), "fp8dot2")
|
||||
|
||||
AARCH64_OPT_EXTENSION("fp8dot4", FP8DOT4, (SIMD), (), (), "fp8dot4")
|
||||
|
||||
AARCH64_OPT_EXTENSION("fp8fma", FP8FMA, (SIMD), (), (), "fp8fma")
|
||||
|
||||
AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax")
|
||||
|
||||
#undef AARCH64_OPT_FMV_EXTENSION
|
||||
|
||||
@@ -18,16 +18,263 @@
|
||||
along with GCC; see the file COPYING3. If not see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
|
||||
#undef ENTRY_BINARY
|
||||
#define ENTRY_BINARY(N, T0, T1, T2, U) \
|
||||
ENTRY (N, binary, T0, T1, T2, none, none, false, U)
|
||||
|
||||
#undef ENTRY_BINARY_FPM
|
||||
#define ENTRY_BINARY_FPM(N, T0, T1, T2, U) \
|
||||
ENTRY (N, binary, T0, T1, T2, none, none, true, U)
|
||||
|
||||
#undef ENTRY_BINARY_TWO_LANES
|
||||
#define ENTRY_BINARY_TWO_LANES(N, T0, T1, T2, U) \
|
||||
ENTRY (N, quaternary, T0, T1, s32_index, T2, s32_index, false, U)
|
||||
|
||||
#undef ENTRY_TERNARY_FPM
|
||||
#define ENTRY_TERNARY_FPM(N, T0, T1, T2, T3, U) \
|
||||
ENTRY (N, ternary, T0, T1, T2, T3, none, true, U)
|
||||
|
||||
#undef ENTRY_TERNARY_FPM_LANE
|
||||
#define ENTRY_TERNARY_FPM_LANE(N, T0, T1, T2, T3, U) \
|
||||
ENTRY (N, quaternary, T0, T1, T2, T3, s32_index, true, U)
|
||||
|
||||
#undef ENTRY_UNARY
|
||||
#define ENTRY_UNARY(N, T0, T1, U) \
|
||||
ENTRY (N, unary, T0, T1, none, none, none, false, U)
|
||||
|
||||
#undef ENTRY_UNARY_LANE
|
||||
#define ENTRY_UNARY_LANE(N, T0, T1, U) \
|
||||
ENTRY_BINARY (N, T0, T1, s32_index, U) \
|
||||
|
||||
#undef ENTRY_UNARY_FPM
|
||||
#define ENTRY_UNARY_FPM(N, T0, T1, U) \
|
||||
ENTRY (N, unary, T0, T1, none, none, none, true, U)
|
||||
|
||||
#undef ENTRY_VDOT_FPM
|
||||
#define ENTRY_VDOT_FPM(T, U) \
|
||||
ENTRY_TERNARY_FPM (vdot_##T##_mf8_fpm, T, T, f8, f8, U) \
|
||||
ENTRY_TERNARY_FPM (vdotq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U) \
|
||||
ENTRY_TERNARY_FPM_LANE (vdot_lane_##T##_mf8_fpm, T, T, f8, f8, U) \
|
||||
ENTRY_TERNARY_FPM_LANE (vdot_laneq_##T##_mf8_fpm, T, T, f8, f8q, U) \
|
||||
ENTRY_TERNARY_FPM_LANE (vdotq_lane_##T##_mf8_fpm, T##q, T##q, f8q, f8, U) \
|
||||
ENTRY_TERNARY_FPM_LANE (vdotq_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U)
|
||||
|
||||
#undef ENTRY_FMA_FPM
|
||||
#define ENTRY_FMA_FPM(N, T, U) \
|
||||
ENTRY_TERNARY_FPM (N##_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U) \
|
||||
ENTRY_TERNARY_FPM_LANE (N##_lane_##T##_mf8_fpm, T##q, T##q, f8q, f8, U) \
|
||||
ENTRY_TERNARY_FPM_LANE (N##_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U)
|
||||
|
||||
#undef ENTRY_VHSDF
|
||||
#define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC) \
|
||||
ENTRY (NAME##_f16, SIGNATURE, V4HF, UNSPEC) \
|
||||
ENTRY (NAME##q_f16, SIGNATURE, V8HF, UNSPEC) \
|
||||
ENTRY (NAME##_f32, SIGNATURE, V2SF, UNSPEC) \
|
||||
ENTRY (NAME##q_f32, SIGNATURE, V4SF, UNSPEC) \
|
||||
ENTRY (NAME##q_f64, SIGNATURE, V2DF, UNSPEC)
|
||||
#define ENTRY_VHSDF(NAME, UNSPEC) \
|
||||
ENTRY_BINARY (NAME##_f16, f16, f16, f16, UNSPEC) \
|
||||
ENTRY_BINARY (NAME##q_f16, f16q, f16q, f16q, UNSPEC) \
|
||||
ENTRY_BINARY (NAME##_f32, f32, f32, f32, UNSPEC) \
|
||||
ENTRY_BINARY (NAME##q_f32, f32q, f32q, f32q, UNSPEC) \
|
||||
ENTRY_BINARY (NAME##q_f64, f64q, f64q, f64q, UNSPEC)
|
||||
|
||||
#undef ENTRY_VHSDF_VHSDI
|
||||
#define ENTRY_VHSDF_VHSDI(NAME, UNSPEC) \
|
||||
ENTRY_BINARY (NAME##_f16, f16, f16, s16, UNSPEC) \
|
||||
ENTRY_BINARY (NAME##q_f16, f16q, f16q, s16q, UNSPEC) \
|
||||
ENTRY_BINARY (NAME##_f32, f32, f32, s32, UNSPEC) \
|
||||
ENTRY_BINARY (NAME##q_f32, f32q, f32q, s32q, UNSPEC) \
|
||||
ENTRY_BINARY (NAME##q_f64, f64q, f64q, s64q, UNSPEC)
|
||||
|
||||
#undef ENTRY_UNARY_N_VALL_F16_SCALAR
|
||||
#define ENTRY_UNARY_N_VALL_F16_SCALAR(NAME, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_n_p8, p8, p8_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_n_p8, p8q, p8_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_n_p16, p16, p16_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_n_p16, p16q, p16_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_n_p64, p64, p64_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_n_p64, p64q, p64_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_n_s8, s8, s8_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_n_s8, s8q, s8_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_n_s16, s16, s16_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_n_s16, s16q, s16_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_n_s32, s32, s32_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_n_s32, s32q, s32_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_n_s64, s64, s64_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_n_s64, s64q, s64_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_n_u8, u8, u8_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_n_u8, u8q, u8_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_n_u16, u16, u16_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_n_u16, u16q, u16_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_n_u32, u32, u32_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_n_u32, u32q, u32_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_n_u64, u64, u64_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_n_u64, u64q, u64_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_n_f16, f16, f16_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_n_f16, f16q, f16_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_n_f32, f32, f32_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_n_f32, f32q, f32_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_n_f64, f64, f64_scalar, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_n_f64, f64q, f64_scalar, UNSPEC) \
|
||||
|
||||
#undef ENTRY_UNARY_VALL_F16_CONST_PTR
|
||||
#define ENTRY_UNARY_VALL_F16_CONST_PTR(NAME, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_p8, p8, p8_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_p8, p8q, p8_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_p16, p16, p16_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_p16, p16q, p16_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_p64, p64, p64_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_p64, p64q, p64_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_s8, s8, s8_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_s8, s8q, s8_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_s16, s16, s16_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_s16, s16q, s16_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_s32, s32, s32_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_s32, s32q, s32_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_s64, s64, s64_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_s64, s64q, s64_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_u8, u8, u8_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_u8, u8q, u8_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_u16, u16, u16_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_u16, u16q, u16_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_u32, u32, u32_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_u32, u32q, u32_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_u64, u64, u64_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_u64, u64q, u64_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_f16, f16, f16_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_f16, f16q, f16_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_f32, f32, f32_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_f32, f32q, f32_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##_f64, f64, f64_scalar_const_ptr, UNSPEC) \
|
||||
ENTRY_UNARY (NAME##q_f64, f64q, f64_scalar_const_ptr, UNSPEC) \
|
||||
|
||||
#undef ENTRY_UNARY_LANE_VALL_F16
|
||||
#define ENTRY_UNARY_LANE_VALL_F16(NAME, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_lane_p8, p8, p8, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_laneq_p8, p8, p8q, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_lane_p16, p16, p16, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_laneq_p16, p16, p16q, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_lane_p64, p64, p64, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_laneq_p64, p64, p64q, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_lane_s8, s8, s8, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_laneq_s8, s8, s8q, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_lane_s16, s16, s16, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_laneq_s16, s16, s16q, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_lane_s32, s32, s32, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_laneq_s32, s32, s32q, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_lane_s64, s64, s64, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_laneq_s64, s64, s64q, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_lane_u8, u8, u8, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_laneq_u8, u8, u8q, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_lane_u16, u16, u16, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_laneq_u16, u16, u16q, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_lane_u32, u32, u32, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_laneq_u32, u32, u32q, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_lane_u64, u64, u64, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_laneq_u64, u64, u64q, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_lane_f16, f16, f16, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_laneq_f16, f16, f16q, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_lane_f32, f32, f32, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_laneq_f32, f32, f32q, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_lane_f64, f64, f64, UNSPEC) \
|
||||
ENTRY_UNARY_LANE (NAME##_laneq_f64, f64, f64q, UNSPEC) \
|
||||
|
||||
// faminmax
|
||||
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX)
|
||||
ENTRY_VHSDF (vamax, binary, UNSPEC_FAMAX)
|
||||
ENTRY_VHSDF (vamin, binary, UNSPEC_FAMIN)
|
||||
ENTRY_VHSDF (vamax, UNSPEC_FAMAX)
|
||||
ENTRY_VHSDF (vamin, UNSPEC_FAMIN)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
// fpm conversion
|
||||
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8)
|
||||
ENTRY_UNARY_FPM (vcvt1_bf16_mf8_fpm, bf16q, f8, UNSPEC_VCVT1)
|
||||
ENTRY_UNARY_FPM (vcvt1_high_bf16_mf8_fpm, bf16q, f8q, UNSPEC_VCVT1_HIGH)
|
||||
ENTRY_UNARY_FPM (vcvt1_low_bf16_mf8_fpm, bf16q, f8q, UNSPEC_VCVT1_LOW)
|
||||
ENTRY_UNARY_FPM (vcvt1_f16_mf8_fpm, f16q, f8, UNSPEC_VCVT1)
|
||||
ENTRY_UNARY_FPM (vcvt1_high_f16_mf8_fpm, f16q, f8q, UNSPEC_VCVT1_HIGH)
|
||||
ENTRY_UNARY_FPM (vcvt1_low_f16_mf8_fpm, f16q, f8q, UNSPEC_VCVT1_LOW)
|
||||
ENTRY_UNARY_FPM (vcvt2_bf16_mf8_fpm, bf16q, f8, UNSPEC_VCVT2)
|
||||
ENTRY_UNARY_FPM (vcvt2_high_bf16_mf8_fpm, bf16q, f8q, UNSPEC_VCVT2_HIGH)
|
||||
ENTRY_UNARY_FPM (vcvt2_low_bf16_mf8_fpm, bf16q, f8q, UNSPEC_VCVT2_LOW)
|
||||
ENTRY_UNARY_FPM (vcvt2_f16_mf8_fpm, f16q, f8, UNSPEC_VCVT2)
|
||||
ENTRY_UNARY_FPM (vcvt2_high_f16_mf8_fpm, f16q, f8q, UNSPEC_VCVT2_HIGH)
|
||||
ENTRY_UNARY_FPM (vcvt2_low_f16_mf8_fpm, f16q, f8q, UNSPEC_VCVT2_LOW)
|
||||
|
||||
ENTRY_BINARY_FPM (vcvt_mf8_f16_fpm, f8, f16, f16, UNSPEC_VCVT)
|
||||
ENTRY_BINARY_FPM (vcvtq_mf8_f16_fpm, f8q, f16q, f16q, UNSPEC_VCVT)
|
||||
ENTRY_BINARY_FPM (vcvt_mf8_f32_fpm, f8, f32q, f32q, UNSPEC_VCVT)
|
||||
|
||||
ENTRY_TERNARY_FPM (vcvt_high_mf8_f32_fpm, f8q, f8, f32q, f32q, UNSPEC_VCVT_HIGH)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
// fpm scaling
|
||||
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8)
|
||||
ENTRY_VHSDF_VHSDI (vscale, UNSPEC_FSCALE)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
// fpm dot2 product
|
||||
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT2)
|
||||
ENTRY_VDOT_FPM (f16, UNSPEC_VDOT2)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
// fpm dot4 product
|
||||
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT4)
|
||||
ENTRY_VDOT_FPM (f32, UNSPEC_VDOT4)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
// fp8 multiply-add
|
||||
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8FMA)
|
||||
ENTRY_FMA_FPM (vmlalbq, f16, UNSPEC_FMLALB)
|
||||
ENTRY_FMA_FPM (vmlaltq, f16, UNSPEC_FMLALT)
|
||||
ENTRY_FMA_FPM (vmlallbbq, f32, UNSPEC_FMLALLBB)
|
||||
ENTRY_FMA_FPM (vmlallbtq, f32, UNSPEC_FMLALLBT)
|
||||
ENTRY_FMA_FPM (vmlalltbq, f32, UNSPEC_FMLALLTB)
|
||||
ENTRY_FMA_FPM (vmlallttq, f32, UNSPEC_FMLALLTT)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
// dup
|
||||
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
|
||||
ENTRY_UNARY_N_VALL_F16_SCALAR (vdup, UNSPEC_DUP)
|
||||
ENTRY_UNARY_LANE_VALL_F16 (vdup, UNSPEC_DUP_LANE)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
// mov
|
||||
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
|
||||
ENTRY_UNARY_N_VALL_F16_SCALAR (vmov, UNSPEC_DUP)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
// vcreate
|
||||
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
|
||||
ENTRY_UNARY (vcreate_p8, p8, u64_scalar, UNSPEC_VCREATE)
|
||||
ENTRY_UNARY (vcreate_p16, p16, u64_scalar, UNSPEC_VCREATE)
|
||||
ENTRY_UNARY (vcreate_p64, p64, u64_scalar, UNSPEC_VCREATE)
|
||||
ENTRY_UNARY (vcreate_s8, s8, u64_scalar, UNSPEC_VCREATE)
|
||||
ENTRY_UNARY (vcreate_s16, s16, u64_scalar, UNSPEC_VCREATE)
|
||||
ENTRY_UNARY (vcreate_s32, s32, u64_scalar, UNSPEC_VCREATE)
|
||||
ENTRY_UNARY (vcreate_s64, s64, u64_scalar, UNSPEC_VCREATE)
|
||||
ENTRY_UNARY (vcreate_u8, u8, u64_scalar, UNSPEC_VCREATE)
|
||||
ENTRY_UNARY (vcreate_u16, u16, u64_scalar, UNSPEC_VCREATE)
|
||||
ENTRY_UNARY (vcreate_u32, u32, u64_scalar, UNSPEC_VCREATE)
|
||||
ENTRY_UNARY (vcreate_u64, u64, u64_scalar, UNSPEC_VCREATE)
|
||||
ENTRY_UNARY (vcreate_f16, f16, u64_scalar, UNSPEC_VCREATE)
|
||||
ENTRY_UNARY (vcreate_f32, f32, u64_scalar, UNSPEC_VCREATE)
|
||||
ENTRY_UNARY (vcreate_f64, f64, u64_scalar, UNSPEC_VCREATE)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
// vcopy_lane
|
||||
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
|
||||
ENTRY_BINARY_TWO_LANES (vcopy_lane_p8, p8, p8, p8, UNSPEC_VEC_COPY)
|
||||
ENTRY_BINARY_TWO_LANES (vcopy_lane_s8, s8, s8, s8, UNSPEC_VEC_COPY)
|
||||
ENTRY_BINARY_TWO_LANES (vcopy_lane_u8, u8, u8, u8, UNSPEC_VEC_COPY)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
// vdupb_lane
|
||||
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
|
||||
ENTRY_UNARY_LANE (vdupb_lane_s8, s8_scalar, s8, UNSPEC_DUPB)
|
||||
ENTRY_UNARY_LANE (vdupb_lane_u8, u8_scalar, u8, UNSPEC_DUPB)
|
||||
ENTRY_UNARY_LANE (vdupb_lane_p8, p8_scalar, p8, UNSPEC_DUPB)
|
||||
ENTRY_UNARY_LANE (vdupb_laneq_s8, s8_scalar, s8q, UNSPEC_DUPB)
|
||||
ENTRY_UNARY_LANE (vdupb_laneq_u8, u8_scalar, u8q, UNSPEC_DUPB)
|
||||
ENTRY_UNARY_LANE (vdupb_laneq_p8, p8_scalar, p8q, UNSPEC_DUPB)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
// ld1
|
||||
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
|
||||
ENTRY_UNARY_VALL_F16_CONST_PTR (vld1, UNSPEC_LD1)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
@@ -112,7 +112,7 @@
|
||||
}
|
||||
)
|
||||
|
||||
(define_insn "aarch64_dup_lane<mode>"
|
||||
(define_insn "@aarch64_dup_lane<mode>"
|
||||
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
|
||||
(vec_duplicate:VALL_F16
|
||||
(vec_select:<VEL>
|
||||
@@ -121,6 +121,7 @@
|
||||
)))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
/* TODO: Need to use ENDIAN_LANE_N this in existing intrinsics too. We still need the next line. */
|
||||
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
|
||||
return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
|
||||
}
|
||||
@@ -1164,7 +1165,7 @@
|
||||
[(set_attr "type" "neon_logic<q>")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_simd_vec_set<mode>"
|
||||
(define_insn "@aarch64_simd_vec_set<mode>"
|
||||
[(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
|
||||
(vec_merge:VALL_F16
|
||||
(vec_duplicate:VALL_F16
|
||||
@@ -1178,9 +1179,9 @@
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 0:
|
||||
return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
|
||||
return "ins1\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
|
||||
case 1:
|
||||
return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
|
||||
return "ins2\\t%0.<Vetype>[%p2], %<vwcore>1";
|
||||
case 2:
|
||||
return "ld1\\t{%0.<Vetype>}[%p2], %1";
|
||||
default:
|
||||
@@ -1190,7 +1191,7 @@
|
||||
[(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_simd_vec_set_zero<mode>"
|
||||
(define_insn "@aarch64_simd_vec_set_zero<mode>"
|
||||
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
|
||||
(vec_merge:VALL_F16
|
||||
(match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
|
||||
@@ -1200,7 +1201,7 @@
|
||||
{
|
||||
int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
|
||||
operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
|
||||
return "ins\\t%0.<Vetype>[%p2], <vwcore>zr";
|
||||
return "ins3\\t%0.<Vetype>[%p2], <vwcore>zr";
|
||||
}
|
||||
)
|
||||
|
||||
@@ -1220,7 +1221,7 @@
|
||||
operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
|
||||
operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
|
||||
|
||||
return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
|
||||
return "ins4\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
|
||||
}
|
||||
[(set_attr "type" "neon_ins<q>")]
|
||||
)
|
||||
@@ -1242,7 +1243,7 @@
|
||||
operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
|
||||
INTVAL (operands[4]));
|
||||
|
||||
return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
|
||||
return "ins5\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
|
||||
}
|
||||
[(set_attr "type" "neon_ins<q>")]
|
||||
)
|
||||
@@ -4357,7 +4358,7 @@
|
||||
;; RTL uses GCC vector extension indices throughout so flip only for assembly.
|
||||
;; Extracting lane zero is split into a simple move when it is between SIMD
|
||||
;; registers or a store.
|
||||
(define_insn_and_split "aarch64_get_lane<mode>"
|
||||
(define_insn_and_split "@aarch64_get_lane<mode>"
|
||||
[(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
|
||||
(vec_select:<VEL>
|
||||
(match_operand:VALL_F16 1 "register_operand" "w, w, w")
|
||||
@@ -8401,7 +8402,7 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "aarch64_ld1<VALL_F16:mode>"
|
||||
(define_expand "@aarch64_ld1<VALL_F16:mode>"
|
||||
[(match_operand:VALL_F16 0 "register_operand")
|
||||
(match_operand:DI 1 "register_operand")]
|
||||
"TARGET_SIMD"
|
||||
@@ -9999,3 +10000,188 @@
|
||||
"TARGET_FAMINMAX"
|
||||
"<faminmax_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||
)
|
||||
|
||||
;; fpm unary instructions for brain float modes.
|
||||
(define_insn "@aarch64_<fpm_unary_bf_uns_op><V8BF_ONLY:mode><VB:mode>"
|
||||
[(set (match_operand:V8BF_ONLY 0 "register_operand" "=w")
|
||||
(unspec:V8BF_ONLY
|
||||
[(match_operand:VB 1 "register_operand" "w")
|
||||
(reg:DI FPM_REGNUM)]
|
||||
FPM_UNARY_UNS))]
|
||||
"TARGET_FP8"
|
||||
"<fpm_unary_bf_uns_op>\t%0.<V8BF_ONLY:Vtype>, %1.<VB:Vtype>"
|
||||
)
|
||||
|
||||
;; fpm unary instructions for half float modes.
|
||||
(define_insn "@aarch64_<fpm_unary_hf_uns_op><V8HF_ONLY:mode><VB:mode>"
|
||||
[(set (match_operand:V8HF_ONLY 0 "register_operand" "=w")
|
||||
(unspec:V8HF_ONLY
|
||||
[(match_operand:VB 1 "register_operand" "w")
|
||||
(reg:DI FPM_REGNUM)]
|
||||
FPM_UNARY_UNS))]
|
||||
"TARGET_FP8"
|
||||
"<fpm_unary_hf_uns_op>\t%0.<V8HF_ONLY:Vtype>, %1.<VB:Vtype>"
|
||||
)
|
||||
|
||||
;; fpm unary instructions for brain float modes, where the input is
|
||||
;; lowered from V16QI to V8QI.
|
||||
(define_insn
|
||||
"@aarch64_lower_<fpm_unary_bf_uns_op><V8BF_ONLY:mode><V16QI_ONLY:mode>"
|
||||
[(set (match_operand:V8BF_ONLY 0 "register_operand" "=w")
|
||||
(unspec:V8BF_ONLY
|
||||
[(match_operand:V16QI_ONLY 1 "register_operand" "w")
|
||||
(reg:DI FPM_REGNUM)]
|
||||
FPM_UNARY_LOW_UNS))]
|
||||
"TARGET_FP8"
|
||||
{
|
||||
operands[1] = force_lowpart_subreg (V8QImode,
|
||||
operands[1],
|
||||
recog_data.operand[1]->mode);
|
||||
return "<fpm_unary_bf_uns_op>\t%0.<V8BF_ONLY:Vtype>, %1.8b";
|
||||
}
|
||||
)
|
||||
|
||||
;; fpm unary instructions for half float modes, where the input is
|
||||
;; lowered from V16QI to V8QI.
|
||||
(define_insn
|
||||
"@aarch64_lower_<fpm_unary_hf_uns_op><V8HF_ONLY:mode><V16QI_ONLY:mode>"
|
||||
[(set (match_operand:V8HF_ONLY 0 "register_operand" "=w")
|
||||
(unspec:V8HF_ONLY
|
||||
[(match_operand:V16QI_ONLY 1 "register_operand" "w")
|
||||
(reg:DI FPM_REGNUM)]
|
||||
FPM_UNARY_LOW_UNS))]
|
||||
"TARGET_FP8"
|
||||
{
|
||||
operands[1] = force_lowpart_subreg (V8QImode,
|
||||
operands[1],
|
||||
recog_data.operand[1]->mode);
|
||||
return "<fpm_unary_hf_uns_op>\t%0.<V8HF_ONLY:Vtype>, %1.8b";
|
||||
}
|
||||
)
|
||||
|
||||
;; fpm binary instructions.
|
||||
(define_insn
|
||||
"@aarch64_<fpm_uns_op><VB:mode><VCVTFPM:mode><VH_SF:mode>"
|
||||
[(set (match_operand:VB 0 "register_operand" "=w")
|
||||
(unspec:VB
|
||||
[(match_operand:VCVTFPM 1 "register_operand" "w")
|
||||
(match_operand:VH_SF 2 "register_operand" "w")
|
||||
(reg:DI FPM_REGNUM)]
|
||||
FPM_BINARY_UNS))]
|
||||
"TARGET_FP8"
|
||||
"<fpm_uns_op>\t%0.<VB:Vtype>, %1.<VCVTFPM:Vtype>, %2.<VH_SF:Vtype>"
|
||||
)
|
||||
|
||||
;; fpm ternary instructions.
|
||||
(define_insn
|
||||
"@aarch64_<fpm_uns_op><V16QI_ONLY:mode><V8QI_ONLY:mode><V4SF_ONLY:mode><V4SF_ONLY:mode>"
|
||||
[(set (match_operand:V16QI_ONLY 0 "register_operand" "=w")
|
||||
(unspec:V16QI_ONLY
|
||||
[(match_operand:V8QI_ONLY 1 "register_operand" "w")
|
||||
(match_operand:V4SF_ONLY 2 "register_operand" "w")
|
||||
(match_operand:V4SF_ONLY 3 "register_operand" "w")
|
||||
(reg:DI FPM_REGNUM)]
|
||||
FPM_TERNARY_VCVT_UNS))]
|
||||
"TARGET_FP8"
|
||||
{
|
||||
operands[1] = force_reg (V16QImode, operands[1]);
|
||||
return "<fpm_uns_op>\t%1.16b, %2.<V4SF_ONLY:Vtype>, %3.<V4SF_ONLY:Vtype>";
|
||||
}
|
||||
)
|
||||
|
||||
;; fpm scale instructions
|
||||
(define_insn "@aarch64_<fpm_uns_op><VHSDF:mode><VHSDI:mode>"
|
||||
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
|
||||
(match_operand:VHSDI 2 "register_operand" "w")]
|
||||
FPM_SCALE_UNS))]
|
||||
"TARGET_FP8"
|
||||
"<fpm_uns_op>\t%0.<VHSDF:Vtype>, %1.<VHSDF:Vtype>, %2.<VHSDI:Vtype>"
|
||||
)
|
||||
|
||||
;; fpm vdot2 instructions.
|
||||
(define_insn
|
||||
"@aarch64_<fpm_uns_op><VHF:mode><VHF:mode><VB:mode><VB:mode>"
|
||||
[(set (match_operand:VHF 0 "register_operand" "=w")
|
||||
(unspec:VHF
|
||||
[(match_operand:VHF 1 "register_operand" "w")
|
||||
(match_operand:VB 2 "register_operand" "w")
|
||||
(match_operand:VB 3 "register_operand" "w")
|
||||
(reg:DI FPM_REGNUM)]
|
||||
FPM_VDOT2_UNS))]
|
||||
"TARGET_FP8DOT2"
|
||||
"<fpm_uns_op>\t%1.<VHF:Vtype>, %2.<VB:Vtype>, %3.<VB:Vtype>"
|
||||
)
|
||||
|
||||
;; fpm vdot2 instructions with lane.
|
||||
(define_insn
|
||||
"@aarch64_<fpm_uns_op><VHF:mode><VHF:mode><VB:mode><VB2:mode><SI_ONLY:mode>"
|
||||
[(set (match_operand:VHF 0 "register_operand" "=w")
|
||||
(unspec:VHF
|
||||
[(match_operand:VHF 1 "register_operand" "w")
|
||||
(match_operand:VB 2 "register_operand" "w")
|
||||
(match_operand:VB2 3 "register_operand" "w")
|
||||
(match_operand:SI_ONLY 4 "const_int_operand" "n")
|
||||
(reg:DI FPM_REGNUM)]
|
||||
FPM_VDOT2_UNS))]
|
||||
"TARGET_FP8DOT2"
|
||||
"<fpm_uns_op>\t%1.<VHF:Vtype>, %2.<VB:Vtype>, %3.<VHF:Vdotlanetype>[%4]"
|
||||
)
|
||||
|
||||
;; fpm vdot4 instructions.
|
||||
(define_insn
|
||||
"@aarch64_<fpm_uns_op><VDQSF:mode><VDQSF:mode><VB:mode><VB:mode>"
|
||||
[(set (match_operand:VDQSF 0 "register_operand" "=w")
|
||||
(unspec:VDQSF
|
||||
[(match_operand:VDQSF 1 "register_operand" "w")
|
||||
(match_operand:VB 2 "register_operand" "w")
|
||||
(match_operand:VB 3 "register_operand" "w")
|
||||
(reg:DI FPM_REGNUM)]
|
||||
FPM_VDOT4_UNS))]
|
||||
"TARGET_FP8DOT4"
|
||||
"<fpm_uns_op>\t%1.<VDQSF:Vtype>, %2.<VB:Vtype>, %3.<VB:Vtype>"
|
||||
)
|
||||
|
||||
;; fpm vdot4 instructions with lane.
|
||||
(define_insn
|
||||
"@aarch64_<fpm_uns_op><VDQSF:mode><VDQSF:mode><VB:mode><VB2:mode><SI_ONLY:mode>"
|
||||
[(set (match_operand:VDQSF 0 "register_operand" "=w")
|
||||
(unspec:VDQSF
|
||||
[(match_operand:VDQSF 1 "register_operand" "w")
|
||||
(match_operand:VB 2 "register_operand" "w")
|
||||
(match_operand:VB2 3 "register_operand" "w")
|
||||
(match_operand:SI_ONLY 4 "const_int_operand" "n")
|
||||
(reg:DI FPM_REGNUM)]
|
||||
FPM_VDOT4_UNS))]
|
||||
"TARGET_FP8DOT4"
|
||||
"<fpm_uns_op>\t%1.<VDQSF:Vtype>, %2.<VB:Vtype>, %3.<VDQSF:Vdotlanetype>[%4]"
|
||||
)
|
||||
|
||||
;; fpm fma instructions.
|
||||
(define_insn
|
||||
"@aarch64_<fpm_uns_op><VQ_HSF:mode><VQ_HSF:mode><V16QI_ONLY:mode><V16QI_ONLY:mode>"
|
||||
[(set (match_operand:VQ_HSF 0 "register_operand" "=w")
|
||||
(unspec:VQ_HSF
|
||||
[(match_operand:VQ_HSF 1 "register_operand" "w")
|
||||
(match_operand:V16QI_ONLY 2 "register_operand" "w")
|
||||
(match_operand:V16QI_ONLY 3 "register_operand" "w")
|
||||
(reg:DI FPM_REGNUM)]
|
||||
FPM_FMA_UNS))]
|
||||
"TARGET_FP8FMA"
|
||||
"<fpm_uns_op>\t%1.<VQ_HSF:Vtype>, %2.<V16QI_ONLY:Vtype>, %3.<V16QI_ONLY:Vtype>"
|
||||
)
|
||||
|
||||
;; fpm fma instructions with lane.
|
||||
(define_insn
|
||||
"@aarch64_<fpm_uns_op><VQ_HSF:mode><VQ_HSF:mode><V16QI_ONLY:mode><VB:mode><SI_ONLY:mode>"
|
||||
[(set (match_operand:VQ_HSF 0 "register_operand" "=w")
|
||||
(unspec:VQ_HSF
|
||||
[(match_operand:VQ_HSF 1 "register_operand" "w")
|
||||
(match_operand:V16QI_ONLY 2 "register_operand" "w")
|
||||
(match_operand:VB 3 "register_operand" "w")
|
||||
(match_operand:SI_ONLY 4 "const_int_operand" "n")
|
||||
(reg:DI FPM_REGNUM)]
|
||||
FPM_FMA_UNS))]
|
||||
"TARGET_FP8FMA"
|
||||
"<fpm_uns_op>\t%1.<VQ_HSF:Vtype>, %2.<V16QI_ONLY:Vtype>, %3.b[%4]"
|
||||
)
|
||||
|
||||
@@ -494,6 +494,15 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
|
||||
((TARGET_SVE2p1 || TARGET_STREAMING) \
|
||||
&& (TARGET_SME2 || TARGET_NON_STREAMING))
|
||||
|
||||
/* fp8 dot product instructions are enabled through +fp8dot2. */
|
||||
#define TARGET_FP8DOT2 AARCH64_HAVE_ISA (FP8DOT2)
|
||||
|
||||
/* fp8 dot product instructions are enabled through +fp8dot4. */
|
||||
#define TARGET_FP8DOT4 AARCH64_HAVE_ISA (FP8DOT4)
|
||||
|
||||
/* fp8 multiply-add instructions are enabled through +fp8fma. */
|
||||
#define TARGET_FP8FMA AARCH64_HAVE_ISA (FP8FMA)
|
||||
|
||||
/* Standard register usage. */
|
||||
|
||||
/* 31 64-bit general purpose registers R0-R30:
|
||||
|
||||
@@ -2490,104 +2490,6 @@ vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
|
||||
return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_s8 (uint64_t __a)
|
||||
{
|
||||
return (int8x8_t) __a;
|
||||
}
|
||||
|
||||
__extension__ extern __inline int16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_s16 (uint64_t __a)
|
||||
{
|
||||
return (int16x4_t) __a;
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_s32 (uint64_t __a)
|
||||
{
|
||||
return (int32x2_t) __a;
|
||||
}
|
||||
|
||||
__extension__ extern __inline int64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_s64 (uint64_t __a)
|
||||
{
|
||||
return (int64x1_t) {__a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_f16 (uint64_t __a)
|
||||
{
|
||||
return (float16x4_t) __a;
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_f32 (uint64_t __a)
|
||||
{
|
||||
return (float32x2_t) __a;
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_u8 (uint64_t __a)
|
||||
{
|
||||
return (uint8x8_t) __a;
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_u16 (uint64_t __a)
|
||||
{
|
||||
return (uint16x4_t) __a;
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_u32 (uint64_t __a)
|
||||
{
|
||||
return (uint32x2_t) __a;
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_u64 (uint64_t __a)
|
||||
{
|
||||
return (uint64x1_t) {__a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline float64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_f64 (uint64_t __a)
|
||||
{
|
||||
return (float64x1_t) __a;
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_p8 (uint64_t __a)
|
||||
{
|
||||
return (poly8x8_t) __a;
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_p16 (uint64_t __a)
|
||||
{
|
||||
return (poly16x4_t) __a;
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcreate_p64 (uint64_t __a)
|
||||
{
|
||||
return (poly64x1_t) __a;
|
||||
}
|
||||
|
||||
/* vget_lane */
|
||||
|
||||
__extension__ extern __inline float16_t
|
||||
@@ -9245,14 +9147,14 @@ vcopy_lane_f64 (float64x1_t __a, const int __lane1,
|
||||
__a, __lane1);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcopy_lane_p8 (poly8x8_t __a, const int __lane1,
|
||||
poly8x8_t __b, const int __lane2)
|
||||
{
|
||||
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
|
||||
__a, __lane1);
|
||||
}
|
||||
/* __extension__ extern __inline poly8x8_t */
|
||||
/* __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) */
|
||||
/* vcopy_lane_p8 (poly8x8_t __a, const int __lane1, */
|
||||
/* poly8x8_t __b, const int __lane2) */
|
||||
/* { */
|
||||
/* return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), */
|
||||
/* __a, __lane1); */
|
||||
/* } */
|
||||
|
||||
__extension__ extern __inline poly16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
@@ -9308,14 +9210,14 @@ vcopy_lane_s64 (int64x1_t __a, const int __lane1,
|
||||
__a, __lane1);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcopy_lane_u8 (uint8x8_t __a, const int __lane1,
|
||||
uint8x8_t __b, const int __lane2)
|
||||
{
|
||||
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
|
||||
__a, __lane1);
|
||||
}
|
||||
/* __extension__ extern __inline uint8x8_t */
|
||||
/* __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) */
|
||||
/* vcopy_lane_u8 (uint8x8_t __a, const int __lane1, */
|
||||
/* uint8x8_t __b, const int __lane2) */
|
||||
/* { */
|
||||
/* return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), */
|
||||
/* __a, __lane1); */
|
||||
/* } */
|
||||
|
||||
__extension__ extern __inline uint16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
@@ -10456,18 +10358,11 @@ vcvtpq_u64_f64 (float64x2_t __a)
|
||||
|
||||
/* vdup_n */
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__extension__ extern __inline poly64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_n_f16 (float16_t __a)
|
||||
vdup_n_p64 (poly64_t __a)
|
||||
{
|
||||
return (float16x4_t) {__a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_n_f32 (float32_t __a)
|
||||
{
|
||||
return (float32x2_t) {__a, __a};
|
||||
return (poly64x1_t) {__a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline float64x1_t
|
||||
@@ -10477,48 +10372,6 @@ vdup_n_f64 (float64_t __a)
|
||||
return (float64x1_t) {__a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_n_p8 (poly8_t __a)
|
||||
{
|
||||
return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_n_p16 (poly16_t __a)
|
||||
{
|
||||
return (poly16x4_t) {__a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_n_p64 (poly64_t __a)
|
||||
{
|
||||
return (poly64x1_t) {__a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline int8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_n_s8 (int8_t __a)
|
||||
{
|
||||
return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline int16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_n_s16 (int16_t __a)
|
||||
{
|
||||
return (int16x4_t) {__a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_n_s32 (int32_t __a)
|
||||
{
|
||||
return (int32x2_t) {__a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline int64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_n_s64 (int64_t __a)
|
||||
@@ -10526,27 +10379,6 @@ vdup_n_s64 (int64_t __a)
|
||||
return (int64x1_t) {__a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_n_u8 (uint8_t __a)
|
||||
{
|
||||
return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_n_u16 (uint16_t __a)
|
||||
{
|
||||
return (uint16x4_t) {__a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_n_u32 (uint32_t __a)
|
||||
{
|
||||
return (uint32x2_t) {__a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_n_u64 (uint64_t __a)
|
||||
@@ -10554,125 +10386,8 @@ vdup_n_u64 (uint64_t __a)
|
||||
return (uint64x1_t) {__a};
|
||||
}
|
||||
|
||||
/* vdupq_n */
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupq_n_f16 (float16_t __a)
|
||||
{
|
||||
return (float16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupq_n_f32 (float32_t __a)
|
||||
{
|
||||
return (float32x4_t) {__a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline float64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupq_n_f64 (float64_t __a)
|
||||
{
|
||||
return (float64x2_t) {__a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupq_n_p8 (poly8_t __a)
|
||||
{
|
||||
return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
|
||||
__a, __a, __a, __a, __a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupq_n_p16 (poly16_t __a)
|
||||
{
|
||||
return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupq_n_p64 (poly64_t __a)
|
||||
{
|
||||
return (poly64x2_t) {__a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline int8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupq_n_s8 (int8_t __a)
|
||||
{
|
||||
return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
|
||||
__a, __a, __a, __a, __a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline int16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupq_n_s16 (int16_t __a)
|
||||
{
|
||||
return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupq_n_s32 (int32_t __a)
|
||||
{
|
||||
return (int32x4_t) {__a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline int64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupq_n_s64 (int64_t __a)
|
||||
{
|
||||
return (int64x2_t) {__a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupq_n_u8 (uint8_t __a)
|
||||
{
|
||||
return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
|
||||
__a, __a, __a, __a, __a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupq_n_u16 (uint16_t __a)
|
||||
{
|
||||
return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupq_n_u32 (uint32_t __a)
|
||||
{
|
||||
return (uint32x4_t) {__a, __a, __a, __a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupq_n_u64 (uint64_t __a)
|
||||
{
|
||||
return (uint64x2_t) {__a, __a};
|
||||
}
|
||||
|
||||
/* vdup_lane */
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_lane_f16 (float16x4_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vdup_lane_f16 (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_lane_f32 (float32x2_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vdup_lane_f32 (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_lane_f64 (float64x1_t __a, const int __b)
|
||||
@@ -10680,20 +10395,6 @@ vdup_lane_f64 (float64x1_t __a, const int __b)
|
||||
return __aarch64_vdup_lane_f64 (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_lane_p8 (poly8x8_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vdup_lane_p8 (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_lane_p16 (poly16x4_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vdup_lane_p16 (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_lane_p64 (poly64x1_t __a, const int __b)
|
||||
@@ -10701,27 +10402,6 @@ vdup_lane_p64 (poly64x1_t __a, const int __b)
|
||||
return __aarch64_vdup_lane_p64 (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_lane_s8 (int8x8_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vdup_lane_s8 (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_lane_s16 (int16x4_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vdup_lane_s16 (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_lane_s32 (int32x2_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vdup_lane_s32 (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_lane_s64 (int64x1_t __a, const int __b)
|
||||
@@ -10729,27 +10409,6 @@ vdup_lane_s64 (int64x1_t __a, const int __b)
|
||||
return __aarch64_vdup_lane_s64 (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_lane_u8 (uint8x8_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vdup_lane_u8 (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_lane_u16 (uint16x4_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vdup_lane_u16 (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_lane_u32 (uint32x2_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vdup_lane_u32 (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdup_lane_u64 (uint64x1_t __a, const int __b)
|
||||
@@ -11057,28 +10716,6 @@ vdupq_laneq_u64 (uint64x2_t __a, const int __b)
|
||||
return __aarch64_vdupq_laneq_u64 (__a, __b);
|
||||
}
|
||||
|
||||
/* vdupb_lane */
|
||||
__extension__ extern __inline poly8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupb_lane_p8 (poly8x8_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vget_lane_any (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupb_lane_s8 (int8x8_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vget_lane_any (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupb_lane_u8 (uint8x8_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vget_lane_any (__a, __b);
|
||||
}
|
||||
|
||||
/* vduph_lane */
|
||||
|
||||
__extension__ extern __inline float16_t
|
||||
@@ -11157,28 +10794,6 @@ vdupd_lane_u64 (uint64x1_t __a, const int __b)
|
||||
return __a[0];
|
||||
}
|
||||
|
||||
/* vdupb_laneq */
|
||||
__extension__ extern __inline poly8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupb_laneq_p8 (poly8x16_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vget_lane_any (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupb_laneq_s8 (int8x16_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vget_lane_any (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vdupb_laneq_u8 (uint8x16_t __a, const int __b)
|
||||
{
|
||||
return __aarch64_vget_lane_any (__a, __b);
|
||||
}
|
||||
|
||||
/* vduph_laneq */
|
||||
|
||||
__extension__ extern __inline float16_t
|
||||
@@ -11962,111 +11577,6 @@ vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
|
||||
__a);
|
||||
}
|
||||
|
||||
/* vld1 */
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_f16 (const float16_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v4hf (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_f32 (const float32_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_f64 (const float64_t *__a)
|
||||
{
|
||||
return (float64x1_t) {*__a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_p8 (const poly8_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v8qi_ps (
|
||||
(const __builtin_aarch64_simd_qi *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_p16 (const poly16_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v4hi_ps (
|
||||
(const __builtin_aarch64_simd_hi *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_p64 (const poly64_t *__a)
|
||||
{
|
||||
return (poly64x1_t) {*__a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline int8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_s8 (const int8_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_s16 (const int16_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_s32 (const int32_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_s64 (const int64_t *__a)
|
||||
{
|
||||
return (int64x1_t) {*__a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_u8 (const uint8_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v8qi_us (
|
||||
(const __builtin_aarch64_simd_qi *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_u16 (const uint16_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v4hi_us (
|
||||
(const __builtin_aarch64_simd_hi *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_u32 (const uint32_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v2si_us (
|
||||
(const __builtin_aarch64_simd_si *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_u64 (const uint64_t *__a)
|
||||
{
|
||||
return (uint64x1_t) {*__a};
|
||||
}
|
||||
|
||||
/* vld1x3 */
|
||||
|
||||
__extension__ extern __inline uint8x8x3_t
|
||||
@@ -12282,87 +11792,6 @@ vld1q_p64_x3 (const poly64_t *__a)
|
||||
|
||||
/* vld1q */
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_f16 (const float16_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v8hf (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_f32 (const float32_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_f64 (const float64_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_p8 (const poly8_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v16qi_ps (
|
||||
(const __builtin_aarch64_simd_qi *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_p16 (const poly16_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v8hi_ps (
|
||||
(const __builtin_aarch64_simd_hi *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_p64 (const poly64_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v2di_ps (
|
||||
(const __builtin_aarch64_simd_di *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_s8 (const int8_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_s16 (const int16_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_s32 (const int32_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_s64 (const int64_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_u8 (const uint8_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v16qi_us (
|
||||
(const __builtin_aarch64_simd_qi *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint8x8x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_u8_x2 (const uint8_t *__a)
|
||||
@@ -12574,30 +12003,6 @@ vld1q_p64_x2 (const poly64_t *__a)
|
||||
(const __builtin_aarch64_simd_di *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_u16 (const uint16_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v8hi_us (
|
||||
(const __builtin_aarch64_simd_hi *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_u32 (const uint32_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v4si_us (
|
||||
(const __builtin_aarch64_simd_si *) __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_u64 (const uint64_t *__a)
|
||||
{
|
||||
return __builtin_aarch64_ld1v2di_us (
|
||||
(const __builtin_aarch64_simd_di *) __a);
|
||||
}
|
||||
|
||||
/* vld1(q)_x4. */
|
||||
|
||||
__extension__ extern __inline int8x8x4_t
|
||||
@@ -16709,18 +16114,11 @@ vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
|
||||
|
||||
/* vmov_n_ */
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__extension__ extern __inline poly64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmov_n_f16 (float16_t __a)
|
||||
vmov_n_p64 (poly64_t __a)
|
||||
{
|
||||
return vdup_n_f16 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmov_n_f32 (float32_t __a)
|
||||
{
|
||||
return vdup_n_f32 (__a);
|
||||
return (poly64x1_t) {__a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline float64x1_t
|
||||
@@ -16730,48 +16128,6 @@ vmov_n_f64 (float64_t __a)
|
||||
return (float64x1_t) {__a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmov_n_p8 (poly8_t __a)
|
||||
{
|
||||
return vdup_n_p8 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmov_n_p16 (poly16_t __a)
|
||||
{
|
||||
return vdup_n_p16 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmov_n_p64 (poly64_t __a)
|
||||
{
|
||||
return vdup_n_p64 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmov_n_s8 (int8_t __a)
|
||||
{
|
||||
return vdup_n_s8 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmov_n_s16 (int16_t __a)
|
||||
{
|
||||
return vdup_n_s16 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmov_n_s32 (int32_t __a)
|
||||
{
|
||||
return vdup_n_s32 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmov_n_s64 (int64_t __a)
|
||||
@@ -16779,27 +16135,6 @@ vmov_n_s64 (int64_t __a)
|
||||
return (int64x1_t) {__a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint8x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmov_n_u8 (uint8_t __a)
|
||||
{
|
||||
return vdup_n_u8 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmov_n_u16 (uint16_t __a)
|
||||
{
|
||||
return vdup_n_u16 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmov_n_u32 (uint32_t __a)
|
||||
{
|
||||
return vdup_n_u32 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint64x1_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmov_n_u64 (uint64_t __a)
|
||||
@@ -16807,104 +16142,6 @@ vmov_n_u64 (uint64_t __a)
|
||||
return (uint64x1_t) {__a};
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmovq_n_f16 (float16_t __a)
|
||||
{
|
||||
return vdupq_n_f16 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmovq_n_f32 (float32_t __a)
|
||||
{
|
||||
return vdupq_n_f32 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmovq_n_f64 (float64_t __a)
|
||||
{
|
||||
return vdupq_n_f64 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmovq_n_p8 (poly8_t __a)
|
||||
{
|
||||
return vdupq_n_p8 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmovq_n_p16 (poly16_t __a)
|
||||
{
|
||||
return vdupq_n_p16 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmovq_n_p64 (poly64_t __a)
|
||||
{
|
||||
return vdupq_n_p64 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmovq_n_s8 (int8_t __a)
|
||||
{
|
||||
return vdupq_n_s8 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmovq_n_s16 (int16_t __a)
|
||||
{
|
||||
return vdupq_n_s16 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmovq_n_s32 (int32_t __a)
|
||||
{
|
||||
return vdupq_n_s32 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmovq_n_s64 (int64_t __a)
|
||||
{
|
||||
return vdupq_n_s64 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmovq_n_u8 (uint8_t __a)
|
||||
{
|
||||
return vdupq_n_u8 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmovq_n_u16 (uint16_t __a)
|
||||
{
|
||||
return vdupq_n_u16 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmovq_n_u32 (uint32_t __a)
|
||||
{
|
||||
return vdupq_n_u32 (__a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint64x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmovq_n_u64 (uint64_t __a)
|
||||
{
|
||||
return vdupq_n_u64 (__a);
|
||||
}
|
||||
|
||||
/* vmul_lane */
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
|
||||
@@ -41,6 +41,9 @@
|
||||
;; Iterators for single modes, for "@" patterns.
|
||||
(define_mode_iterator SI_ONLY [SI])
|
||||
(define_mode_iterator DI_ONLY [DI])
|
||||
(define_mode_iterator V8QI_ONLY [V8QI])
|
||||
(define_mode_iterator V16QI_ONLY [V16QI])
|
||||
(define_mode_iterator V4SF_ONLY [V4SF])
|
||||
|
||||
;; Iterator for all integer modes (up to 64-bit)
|
||||
(define_mode_iterator ALLI [QI HI SI DI])
|
||||
@@ -160,9 +163,19 @@
|
||||
|
||||
;; Advanced SIMD Float modes.
|
||||
(define_mode_iterator VDQF [V2SF V4SF V2DF])
|
||||
|
||||
(define_mode_iterator VHF [(V4HF "TARGET_SIMD_F16INST")
|
||||
(V8HF "TARGET_SIMD_F16INST")])
|
||||
|
||||
(define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST")
|
||||
(V8HF "TARGET_SIMD_F16INST")
|
||||
V2SF V4SF V2DF])
|
||||
(define_mode_iterator VH_SF [(V4HF "TARGET_SIMD_F16INST")
|
||||
(V8HF "TARGET_SIMD_F16INST")
|
||||
V4SF])
|
||||
|
||||
;; Advanced SIMD Integer modes.
|
||||
(define_mode_iterator VHSDI [V4HI V8HI V2SI V4SI V2DI])
|
||||
|
||||
;; Advanced SIMD Float modes, and DF.
|
||||
(define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF])
|
||||
@@ -312,6 +325,7 @@
|
||||
|
||||
;; All byte modes.
|
||||
(define_mode_iterator VB [V8QI V16QI])
|
||||
(define_mode_iterator VB2 [VB])
|
||||
|
||||
;; 1 and 2 lane DI and DF modes.
|
||||
(define_mode_iterator V12DIF [V1DI V1DF V2DI V2DF])
|
||||
@@ -426,6 +440,12 @@
|
||||
(V8HF "TARGET_SIMD_F16INST")
|
||||
V2SF V4SF])
|
||||
|
||||
;; Modes available for Advanced SIMD FP8 conversion operations.
|
||||
(define_mode_iterator VCVTFPM [V8QI
|
||||
(V4HF "TARGET_SIMD_F16INST")
|
||||
(V8HF "TARGET_SIMD_F16INST")
|
||||
V4SF])
|
||||
|
||||
;; Iterators for single modes, for "@" patterns.
|
||||
(define_mode_iterator VNx16QI_ONLY [VNx16QI])
|
||||
(define_mode_iterator VNx16SI_ONLY [VNx16SI])
|
||||
@@ -635,6 +655,10 @@
|
||||
;; Bfloat16 modes to which V4SF can be converted
|
||||
(define_mode_iterator V4SF_TO_BF [V4BF V8BF])
|
||||
|
||||
;; Float16 and Bfloat16 modes separately
|
||||
(define_mode_iterator V8HF_ONLY [V8HF])
|
||||
(define_mode_iterator V8BF_ONLY [V8BF])
|
||||
|
||||
(define_mode_iterator SVE_BHSx24 [VNx32QI VNx16HI VNx8SI
|
||||
VNx16BF VNx16HF VNx8SF
|
||||
VNx64QI VNx32HI VNx16SI
|
||||
@@ -691,6 +715,9 @@
|
||||
UNSPEC_ASHIFT_SIGNED ; Used in aarch-simd.md.
|
||||
UNSPEC_ASHIFT_UNSIGNED ; Used in aarch64-simd.md.
|
||||
UNSPEC_ABS ; Used in aarch64-simd.md.
|
||||
UNSPEC_DUP ; Used in aarch64-simd.md.
|
||||
UNSPEC_DUPB ; Used in aarch64-simd.md.
|
||||
UNSPEC_DUP_LANE ; Used in aarch64-simd.md.
|
||||
UNSPEC_FMAX ; Used in aarch64-simd.md.
|
||||
UNSPEC_FMAXNMV ; Used in aarch64-simd.md.
|
||||
UNSPEC_FMAXV ; Used in aarch64-simd.md.
|
||||
@@ -698,7 +725,12 @@
|
||||
UNSPEC_FMINNMV ; Used in aarch64-simd.md.
|
||||
UNSPEC_FMINV ; Used in aarch64-simd.md.
|
||||
UNSPEC_FADDV ; Used in aarch64-simd.md.
|
||||
UNSPEC_FMLALLBB ; Used in aarch64-simd.md.
|
||||
UNSPEC_FMLALLBT ; Used in aarch64-simd.md.
|
||||
UNSPEC_FMLALLTB ; Used in aarch64-simd.md.
|
||||
UNSPEC_FMLALLTT ; Used in aarch64-simd.md.
|
||||
UNSPEC_FNEG ; Used in aarch64-simd.md.
|
||||
UNSPEC_FSCALE ; Used in aarch64-simd.md.
|
||||
UNSPEC_ADDV ; Used in aarch64-simd.md.
|
||||
UNSPEC_SMAXV ; Used in aarch64-simd.md.
|
||||
UNSPEC_SMINV ; Used in aarch64-simd.md.
|
||||
@@ -736,6 +768,17 @@
|
||||
UNSPEC_SSHLL ; Used in aarch64-simd.md.
|
||||
UNSPEC_USHLL ; Used in aarch64-simd.md.
|
||||
UNSPEC_ADDP ; Used in aarch64-simd.md.
|
||||
UNSPEC_VCREATE ; Used in aarch64-simd.md.
|
||||
UNSPEC_VCVT ; Used in aarch64-simd.md.
|
||||
UNSPEC_VCVT_HIGH ; Used in aarch64-simd.md.
|
||||
UNSPEC_VCVT1 ; Used in aarch64-simd.md.
|
||||
UNSPEC_VCVT1_HIGH ; Used in aarch64-simd.md.
|
||||
UNSPEC_VCVT1_LOW ; Used in aarch64-simd.md.
|
||||
UNSPEC_VCVT2 ; Used in aarch64-simd.md.
|
||||
UNSPEC_VCVT2_HIGH ; Used in aarch64-simd.md.
|
||||
UNSPEC_VCVT2_LOW ; Used in aarch64-simd.md.
|
||||
UNSPEC_VDOT2 ; Used in aarch64-simd.md.
|
||||
UNSPEC_VDOT4 ; Used in aarch64-simd.md.
|
||||
UNSPEC_TBL ; Used in vector permute patterns.
|
||||
UNSPEC_TBLQ ; Used in vector permute patterns.
|
||||
UNSPEC_TBX ; Used in vector permute patterns.
|
||||
@@ -773,6 +816,7 @@
|
||||
UNSPEC_PMULL ; Used in aarch64-simd.md.
|
||||
UNSPEC_PMULL2 ; Used in aarch64-simd.md.
|
||||
UNSPEC_REV_REGLIST ; Used in aarch64-simd.md.
|
||||
UNSPEC_VEC_COPY ; Used in aarch64-simd.md.
|
||||
UNSPEC_VEC_SHR ; Used in aarch64-simd.md.
|
||||
UNSPEC_SQRDMLAH ; Used in aarch64-simd.md.
|
||||
UNSPEC_SQRDMLSH ; Used in aarch64-simd.md.
|
||||
@@ -2463,6 +2507,11 @@
|
||||
(VNx8HF ".h") (VNx16HF "") (VNx32HF "")
|
||||
(VNx8HI ".h") (VNx16HI "") (VNx32HI "")])
|
||||
|
||||
|
||||
;; Lane index suffix for fp8 vdot operations depends on the output mode
|
||||
(define_mode_attr Vdotlanetype [(V4HF "2b") (V8HF "2b")
|
||||
(V2SF "4b") (V4SF "4b")])
|
||||
|
||||
;; The number of bytes controlled by a predicate
|
||||
(define_mode_attr data_bytes [(VNx16BI "1") (VNx8BI "2")
|
||||
(VNx4BI "4") (VNx2BI "8")])
|
||||
@@ -4659,3 +4708,59 @@
|
||||
|
||||
(define_code_attr faminmax_op
|
||||
[(smax "famax") (smin "famin")])
|
||||
|
||||
;; Iterators and attributes for fpm instructions
|
||||
|
||||
(define_int_iterator FPM_UNARY_UNS
|
||||
[UNSPEC_VCVT1
|
||||
UNSPEC_VCVT1_HIGH
|
||||
UNSPEC_VCVT2
|
||||
UNSPEC_VCVT2_HIGH])
|
||||
|
||||
(define_int_iterator FPM_UNARY_LOW_UNS [UNSPEC_VCVT1_LOW UNSPEC_VCVT2_LOW])
|
||||
|
||||
(define_int_iterator FPM_BINARY_UNS [UNSPEC_VCVT])
|
||||
|
||||
(define_int_iterator FPM_SCALE_UNS [UNSPEC_FSCALE])
|
||||
|
||||
(define_int_iterator FPM_TERNARY_VCVT_UNS [UNSPEC_VCVT_HIGH])
|
||||
|
||||
(define_int_attr fpm_unary_bf_uns_op
|
||||
[(UNSPEC_VCVT1 "bf1cvtl")
|
||||
(UNSPEC_VCVT1_HIGH "bf1cvtl2")
|
||||
(UNSPEC_VCVT1_LOW "bf1cvtl")
|
||||
(UNSPEC_VCVT2 "bf2cvtl")
|
||||
(UNSPEC_VCVT2_HIGH "bf2cvtl2")
|
||||
(UNSPEC_VCVT2_LOW "bf2cvtl")])
|
||||
|
||||
(define_int_attr fpm_unary_hf_uns_op
|
||||
[(UNSPEC_VCVT1 "f1cvtl")
|
||||
(UNSPEC_VCVT1_HIGH "f1cvtl2")
|
||||
(UNSPEC_VCVT1_LOW "f1cvtl")
|
||||
(UNSPEC_VCVT2 "f2cvtl")
|
||||
(UNSPEC_VCVT2_HIGH "f2cvtl2")
|
||||
(UNSPEC_VCVT2_LOW "f2cvtl")])
|
||||
|
||||
(define_int_iterator FPM_VDOT2_UNS [UNSPEC_VDOT2])
|
||||
(define_int_iterator FPM_VDOT4_UNS [UNSPEC_VDOT4])
|
||||
|
||||
(define_int_iterator FPM_FMA_UNS
|
||||
[UNSPEC_FMLALB
|
||||
UNSPEC_FMLALT
|
||||
UNSPEC_FMLALLBB
|
||||
UNSPEC_FMLALLBT
|
||||
UNSPEC_FMLALLTB
|
||||
UNSPEC_FMLALLTT])
|
||||
|
||||
(define_int_attr fpm_uns_op
|
||||
[(UNSPEC_FSCALE "fscale")
|
||||
(UNSPEC_VCVT "fcvtn")
|
||||
(UNSPEC_VCVT_HIGH "fcvtn2")
|
||||
(UNSPEC_FMLALB "fmlalb")
|
||||
(UNSPEC_FMLALT "fmlalt")
|
||||
(UNSPEC_FMLALLBB "fmlallbb")
|
||||
(UNSPEC_FMLALLBT "fmlallbt")
|
||||
(UNSPEC_FMLALLTB "fmlalltb")
|
||||
(UNSPEC_FMLALLTT "fmlalltt")
|
||||
(UNSPEC_VDOT2 "fdot")
|
||||
(UNSPEC_VDOT4 "fdot")])
|
||||
|
||||
@@ -21807,6 +21807,12 @@ Enable support for Armv8.9-a/9.4-a translation hardening extension.
|
||||
Enable the RCpc3 (Release Consistency) extension.
|
||||
@item fp8
|
||||
Enable the fp8 (8-bit floating point) extension.
|
||||
@item fp8dot2
|
||||
Enable the fp8dot2 (8-bit floating point dot product) extension.
|
||||
@item fp8dot4
|
||||
Enable the fp8dot4 (8-bit floating point dot product) extension.
|
||||
@item fp8fma
|
||||
Enable the fp8fma (8-bit floating point multiply-add) extension.
|
||||
@item faminmax
|
||||
Enable the Floating Point Absolute Maximum/Minimum extension.
|
||||
|
||||
|
||||
@@ -5,19 +5,9 @@
|
||||
|
||||
#include <arm_acle.h>
|
||||
|
||||
#ifdef __ARM_FEATURE_FP8
|
||||
#error "__ARM_FEATURE_FP8 feature macro defined."
|
||||
#endif
|
||||
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("arch=armv9.4-a+fp8")
|
||||
|
||||
/* We do not define __ARM_FEATURE_FP8 until all
|
||||
relevant features have been added. */
|
||||
#ifdef __ARM_FEATURE_FP8
|
||||
#error "__ARM_FEATURE_FP8 feature macro defined."
|
||||
#endif
|
||||
|
||||
/*
|
||||
**test_write_fpmr_sysreg_asm_64:
|
||||
** msr fpmr, x0
|
||||
|
||||
221
gcc/testsuite/gcc.target/aarch64/simd/fma_fpm.c
Normal file
221
gcc/testsuite/gcc.target/aarch64/simd/fma_fpm.c
Normal file
@@ -0,0 +1,221 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-O3 -march=armv9-a+fp8fma" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
/*
|
||||
** test_vmlalbq_f16_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlalb v0.8h, v1.16b, v2.16b
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_vmlalbq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vmlalbq_f16_mf8_fpm (a, b, c, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlaltq_f16_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlalt v0.8h, v1.16b, v2.16b
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_vmlaltq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vmlaltq_f16_mf8_fpm (a, b, c, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlallbbq_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlallbb v0.4s, v1.16b, v2.16b
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vmlallbbq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vmlallbbq_f32_mf8_fpm (a, b, c, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlallbtq_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlallbt v0.4s, v1.16b, v2.16b
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vmlallbtq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vmlallbtq_f32_mf8_fpm (a, b, c, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlalltbq_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlalltb v0.4s, v1.16b, v2.16b
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vmlalltbq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vmlalltbq_f32_mf8_fpm (a, b, c, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlallttq_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlalltt v0.4s, v1.16b, v2.16b
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vmlallttq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vmlallttq_f32_mf8_fpm (a, b, c, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlalbq_lane_f16_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlalb v0.8h, v1.16b, v2.b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_vmlalbq_lane_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
|
||||
{
|
||||
return vmlalbq_lane_f16_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlalbq_laneq_f16_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlalb v0.8h, v1.16b, v2.b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_vmlalbq_laneq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vmlalbq_laneq_f16_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlaltq_lane_f16_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlalt v0.8h, v1.16b, v2.b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_vmlaltq_lane_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
|
||||
{
|
||||
return vmlaltq_lane_f16_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlaltq_laneq_f16_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlalt v0.8h, v1.16b, v2.b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_vmlaltq_laneq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vmlaltq_laneq_f16_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlallbbq_lane_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlallbb v0.4s, v1.16b, v2.b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vmlallbbq_lane_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
|
||||
{
|
||||
return vmlallbbq_lane_f32_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlallbbq_laneq_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlallbb v0.4s, v1.16b, v2.b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vmlallbbq_laneq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vmlallbbq_laneq_f32_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlallbtq_lane_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlallbt v0.4s, v1.16b, v2.b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vmlallbtq_lane_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
|
||||
{
|
||||
return vmlallbtq_lane_f32_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlallbtq_laneq_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlallbt v0.4s, v1.16b, v2.b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vmlallbtq_laneq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vmlallbtq_laneq_f32_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlalltbq_lane_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlalltb v0.4s, v1.16b, v2.b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vmlalltbq_lane_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
|
||||
{
|
||||
return vmlalltbq_lane_f32_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlalltbq_laneq_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlalltb v0.4s, v1.16b, v2.b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vmlalltbq_laneq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vmlalltbq_laneq_f32_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlallttq_lane_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlalltt v0.4s, v1.16b, v2.b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vmlallttq_lane_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
|
||||
{
|
||||
return vmlallttq_lane_f32_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vmlallttq_laneq_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fmlalltt v0.4s, v1.16b, v2.b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vmlallttq_laneq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vmlallttq_laneq_f32_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
60
gcc/testsuite/gcc.target/aarch64/simd/scale_fpm.c
Normal file
60
gcc/testsuite/gcc.target/aarch64/simd/scale_fpm.c
Normal file
@@ -0,0 +1,60 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-O3 -march=armv9-a+fp8" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
/*
|
||||
** test_vscale_f16:
|
||||
** fscale v0.4h, v0.4h, v1.4h
|
||||
** ret
|
||||
*/
|
||||
float16x4_t
|
||||
test_vscale_f16 (float16x4_t a, int16x4_t b)
|
||||
{
|
||||
return vscale_f16 (a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vscaleq_f16:
|
||||
** fscale v0.8h, v0.8h, v1.8h
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_vscaleq_f16 (float16x8_t a, int16x8_t b)
|
||||
{
|
||||
return vscaleq_f16 (a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vscale_f32:
|
||||
** fscale v0.2s, v0.2s, v1.2s
|
||||
** ret
|
||||
*/
|
||||
float32x2_t
|
||||
test_vscale_f32 (float32x2_t a, int32x2_t b)
|
||||
{
|
||||
return vscale_f32 (a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vscaleq_f32:
|
||||
** fscale v0.4s, v0.4s, v1.4s
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vscaleq_f32 (float32x4_t a, int32x4_t b)
|
||||
{
|
||||
return vscaleq_f32 (a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vscaleq_f64:
|
||||
** fscale v0.2d, v0.2d, v1.2d
|
||||
** ret
|
||||
*/
|
||||
float64x2_t
|
||||
test_vscaleq_f64 (float64x2_t a, int64x2_t b)
|
||||
{
|
||||
return vscaleq_f64 (a, b);
|
||||
}
|
||||
197
gcc/testsuite/gcc.target/aarch64/simd/vcvt_fpm.c
Normal file
197
gcc/testsuite/gcc.target/aarch64/simd/vcvt_fpm.c
Normal file
@@ -0,0 +1,197 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-O3 -march=armv9-a+fp8" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
/*
|
||||
** test_vcvt1_bf16:
|
||||
** msr fpmr, x0
|
||||
** bf1cvtl v0.8h, v0.8b
|
||||
** ret
|
||||
*/
|
||||
bfloat16x8_t
|
||||
test_vcvt1_bf16 (mfloat8x8_t a, fpm_t b)
|
||||
{
|
||||
return vcvt1_bf16_mf8_fpm(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_high_vcvt1_bf16:
|
||||
** msr fpmr, x0
|
||||
** bf1cvtl2 v0.8h, v0.16b
|
||||
** ret
|
||||
*/
|
||||
bfloat16x8_t
|
||||
test_high_vcvt1_bf16 (mfloat8x16_t a, fpm_t b)
|
||||
{
|
||||
return vcvt1_high_bf16_mf8_fpm(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_low_vcvt1_bf16:
|
||||
** msr fpmr, x0
|
||||
** bf1cvtl v0.8h, v0.8b
|
||||
** ret
|
||||
*/
|
||||
bfloat16x8_t
|
||||
test_low_vcvt1_bf16 (mfloat8x16_t a, fpm_t b)
|
||||
{
|
||||
return vcvt1_low_bf16_mf8_fpm(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vcvt1_f16:
|
||||
** msr fpmr, x0
|
||||
** f1cvtl v0.8h, v0.8b
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_vcvt1_f16 (mfloat8x8_t a, fpm_t b)
|
||||
{
|
||||
return vcvt1_f16_mf8_fpm(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_high_vcvt1_f16:
|
||||
** msr fpmr, x0
|
||||
** f1cvtl2 v0.8h, v0.16b
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_high_vcvt1_f16 (mfloat8x16_t a, fpm_t b)
|
||||
{
|
||||
return vcvt1_high_f16_mf8_fpm(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_low_vcvt1_f16:
|
||||
** msr fpmr, x0
|
||||
** f1cvtl v0.8h, v0.8b
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_low_vcvt1_f16 (mfloat8x16_t a, fpm_t b)
|
||||
{
|
||||
return vcvt1_low_f16_mf8_fpm(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vcvt2_bf16:
|
||||
** msr fpmr, x0
|
||||
** bf2cvtl v0.8h, v0.8b
|
||||
** ret
|
||||
*/
|
||||
bfloat16x8_t
|
||||
test_vcvt2_bf16 (mfloat8x8_t a, fpm_t b)
|
||||
{
|
||||
return vcvt2_bf16_mf8_fpm(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_high_vcvt2_bf16:
|
||||
** msr fpmr, x0
|
||||
** bf2cvtl2 v0.8h, v0.16b
|
||||
** ret
|
||||
*/
|
||||
bfloat16x8_t
|
||||
test_high_vcvt2_bf16 (mfloat8x16_t a, fpm_t b)
|
||||
{
|
||||
return vcvt2_high_bf16_mf8_fpm(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_low_vcvt2_bf16:
|
||||
** msr fpmr, x0
|
||||
** bf1cvtl v0.8h, v0.8b
|
||||
** ret
|
||||
*/
|
||||
bfloat16x8_t
|
||||
test_low_vcvt2_bf16 (mfloat8x16_t a, fpm_t b)
|
||||
{
|
||||
return vcvt1_low_bf16_mf8_fpm(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vcvt2_f16:
|
||||
** msr fpmr, x0
|
||||
** f2cvtl v0.8h, v0.8b
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_vcvt2_f16 (mfloat8x8_t a, fpm_t b)
|
||||
{
|
||||
return vcvt2_f16_mf8_fpm(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_high_vcvt2_f16:
|
||||
** msr fpmr, x0
|
||||
** f2cvtl2 v0.8h, v0.16b
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_high_vcvt2_f16 (mfloat8x16_t a, fpm_t b)
|
||||
{
|
||||
return vcvt2_high_f16_mf8_fpm(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_low_vcvt2_f16:
|
||||
** msr fpmr, x0
|
||||
** f1cvtl v0.8h, v0.8b
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_low_vcvt2_f16 (mfloat8x16_t a, fpm_t b)
|
||||
{
|
||||
return vcvt1_low_f16_mf8_fpm(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vcvt_f16:
|
||||
** msr fpmr, x0
|
||||
** fcvtn v0.8b, v0.4h, v1.4h
|
||||
** ret
|
||||
*/
|
||||
mfloat8x8_t
|
||||
test_vcvt_f16 (float16x4_t a, float16x4_t b, fpm_t c)
|
||||
{
|
||||
return vcvt_mf8_f16_fpm(a, b, c);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vcvtq_f16:
|
||||
** msr fpmr, x0
|
||||
** fcvtn v0.16b, v0.8h, v1.8h
|
||||
** ret
|
||||
*/
|
||||
mfloat8x16_t
|
||||
test_vcvtq_f16 (float16x8_t a, float16x8_t b, fpm_t c)
|
||||
{
|
||||
return vcvtq_mf8_f16_fpm(a, b, c);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vcvt_f32:
|
||||
** msr fpmr, x0
|
||||
** fcvtn v0.8b, v0.4s, v1.4s
|
||||
** ret
|
||||
*/
|
||||
mfloat8x8_t
|
||||
test_vcvt_f32 (float32x4_t a, float32x4_t b, fpm_t c)
|
||||
{
|
||||
return vcvt_mf8_f32_fpm(a, b, c);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vcvt_high_f32:
|
||||
** msr fpmr, x0
|
||||
** fcvtn2 v0.16b, v1.4s, v2.4s
|
||||
** ret
|
||||
*/
|
||||
mfloat8x16_t
|
||||
test_vcvt_high_f32 (mfloat8x8_t a, float32x4_t b, float32x4_t c, fpm_t d)
|
||||
{
|
||||
return vcvt_high_mf8_f32_fpm(a, b, c, d);
|
||||
}
|
||||
77
gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpmdot.c
Normal file
77
gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpmdot.c
Normal file
@@ -0,0 +1,77 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-O3 -march=armv9-a+fp8dot2" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
/*
|
||||
** test_vdot_f16_fpm:
|
||||
** msr fpmr, x0
|
||||
** fdot v0.4h, v1.8b, v2.8b
|
||||
** ret
|
||||
*/
|
||||
float16x4_t
|
||||
test_vdot_f16_fpm (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
|
||||
{
|
||||
return vdot_f16_mf8_fpm (a, b, c, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vdotq_f16_fpm:
|
||||
** msr fpmr, x0
|
||||
** fdot v0.8h, v1.16b, v2.16b
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_vdotq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vdotq_f16_mf8_fpm (a, b, c, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vdot_lane_f16_fpm:
|
||||
** msr fpmr, x0
|
||||
** fdot v0.4h, v1.8b, v2.2b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float16x4_t
|
||||
test_vdot_lane_f16_fpm (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
|
||||
{
|
||||
return vdot_lane_f16_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vdot_laneq_f16_fpm:
|
||||
** msr fpmr, x0
|
||||
** fdot v0.4h, v1.8b, v2.2b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float16x4_t
|
||||
test_vdot_laneq_f16_fpm (float16x4_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vdot_laneq_f16_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vdotq_lane_f16_fpm:
|
||||
** msr fpmr, x0
|
||||
** fdot v0.8h, v1.16b, v2.2b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_vdotq_lane_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
|
||||
{
|
||||
return vdotq_lane_f16_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vdotq_laneq_f16_fpm:
|
||||
** msr fpmr, x0
|
||||
** fdot v0.8h, v1.16b, v2.2b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float16x8_t
|
||||
test_vdotq_laneq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vdotq_laneq_f16_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
77
gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpmdot.c
Normal file
77
gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpmdot.c
Normal file
@@ -0,0 +1,77 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-O3 -march=armv9-a+fp8dot4" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
/*
|
||||
** test_vdot_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fdot v0.2s, v1.8b, v2.8b
|
||||
** ret
|
||||
*/
|
||||
float32x2_t
|
||||
test_vdot_f32_fpm (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
|
||||
{
|
||||
return vdot_f32_mf8_fpm (a, b, c, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vdotq_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fdot v0.4s, v1.16b, v2.16b
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vdotq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vdotq_f32_mf8_fpm (a, b, c, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vdot_lane_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fdot v0.2s, v1.8b, v2.4b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float32x2_t
|
||||
test_vdot_lane_f32_fpm (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
|
||||
{
|
||||
return vdot_lane_f32_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vdot_laneq_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fdot v0.2s, v1.8b, v2.4b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float32x2_t
|
||||
test_vdot_laneq_f32_fpm (float32x2_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vdot_laneq_f32_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vdotq_lane_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fdot v0.4s, v1.16b, v2.4b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vdotq_lane_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
|
||||
{
|
||||
return vdotq_lane_f32_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_vdotq_laneq_f32_fpm:
|
||||
** msr fpmr, x0
|
||||
** fdot v0.4s, v1.16b, v2.4b\[1\]
|
||||
** ret
|
||||
*/
|
||||
float32x4_t
|
||||
test_vdotq_laneq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
|
||||
{
|
||||
return vdotq_laneq_f32_mf8_fpm (a, b, c, 1, d);
|
||||
}
|
||||
Reference in New Issue
Block a user