arm: Add missing vec_cmp and vcond patterns

This patch does several things at once:

(1) Add vector compare patterns (vec_cmp and vec_cmpu).

(2) Add vector selects between floating-point modes when the
    values being compared are integers (affects vcond and vcondu).

(3) Add vector selects between integer modes when the values being
    compared are floating-point (affects vcond).

(4) Add standalone vector select patterns (vcond_mask).

(5) Tweak the handling of compound comparisons with zeros.

Unfortunately it proved too difficult (for me) to separate this
out into a series of smaller patches, since everything is so
inter-related.  Defining only some of the new patterns does
not leave things in a happy state.

The handling of comparisons is mostly taken from the vcond patterns.
This means that it remains non-compliant with IEEE: “quiet” comparisons
use signalling instructions.  But that shouldn't matter for floats,
since we require -funsafe-math-optimizations to vectorize for them
anyway.

It remains the case that comparisons and selects aren't implemented
at all for HF vectors.  Implementing those feels like separate work.

gcc/
	PR target/96528
	PR target/97288
	* config/arm/arm-protos.h (arm_expand_vector_compare): Declare.
	(arm_expand_vcond): Likewise.
	* config/arm/arm.c (arm_expand_vector_compare): New function.
	(arm_expand_vcond): Likewise.
	* config/arm/neon.md (vec_cmp<VDQW:mode><v_cmp_result>): New pattern.
	(vec_cmpu<VDQW:mode><VDQW:mode>): Likewise.
	(vcond<VDQW:mode><VDQW:mode>): Require operand 5 to be a register
	or zero.  Use arm_expand_vcond.
	(vcond<V_cvtto><V32:mode>): New pattern.
	(vcondu<VDQIW:mode><VDQIW:mode>): Generalize to...
	(vcondu<VDQW:mode><v_cmp_result): ...this.  Require operand 5
	to be a register or zero.  Use arm_expand_vcond.
	(vcond_mask_<VDQW:mode><v_cmp_result>): New pattern.
	(neon_vc<cmp_op><mode>, neon_vc<cmp_op><mode>_insn): Add "@" marker.
	(neon_vbsl<mode>): Likewise.
	(neon_vc<cmp_op>u<mode>): Reexpress as...
	(@neon_vc<code><mode>): ...this.

gcc/testsuite/
	* lib/target-supports.exp (check_effective_target_vect_cond_mixed): Add
	arm neon targets.
	* gcc.target/arm/neon-compare-1.c: New test.
	* gcc.target/arm/neon-compare-2.c: Likewise.
	* gcc.target/arm/neon-compare-3.c: Likewise.
	* gcc.target/arm/neon-compare-4.c: Likewise.
	* gcc.target/arm/neon-compare-5.c: Likewise.
	* gcc.target/arm/neon-vcond-gt.c: Expect comparisons with zero.
	* gcc.target/arm/neon-vcond-ltgt.c: Likewise.
	* gcc.target/arm/neon-vcond-unordered.c: Likewise.
This commit is contained in:
Richard Sandiford
2020-10-01 17:41:15 +01:00
parent 92f2c04d38
commit c2978b3405
12 changed files with 443 additions and 222 deletions

View File

@@ -372,9 +372,11 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
extern bool arm_fusion_enabled_p (tune_params::fuse_ops);
extern bool arm_valid_symbolic_address_p (rtx);
extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool);
#endif /* RTX_CODE */
extern bool arm_gen_setmem (rtx *);
extern void arm_expand_vcond (rtx *, machine_mode);
extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
extern bool arm_autoinc_modes_ok_p (machine_mode, enum arm_auto_incmodes);

View File

@@ -30634,6 +30634,127 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
arm_post_atomic_barrier (model);
}
/* Expand code to compare vectors OP0 and OP1 using condition CODE.
If CAN_INVERT, store either the result or its inverse in TARGET
and return true if TARGET contains the inverse. If !CAN_INVERT,
always store the result in TARGET, never its inverse.
Note that the handling of floating-point comparisons is not
IEEE compliant. */
bool
arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
bool can_invert)
{
machine_mode cmp_result_mode = GET_MODE (target);
machine_mode cmp_mode = GET_MODE (op0);
bool inverted;
switch (code)
{
/* For these we need to compute the inverse of the requested
comparison. */
case UNORDERED:
case UNLT:
case UNLE:
case UNGT:
case UNGE:
case UNEQ:
case NE:
code = reverse_condition_maybe_unordered (code);
if (!can_invert)
{
/* Recursively emit the inverted comparison into a temporary
and then store its inverse in TARGET. This avoids reusing
TARGET (which for integer NE could be one of the inputs). */
rtx tmp = gen_reg_rtx (cmp_result_mode);
if (arm_expand_vector_compare (tmp, code, op0, op1, true))
gcc_unreachable ();
emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
return false;
}
inverted = true;
break;
default:
inverted = false;
break;
}
switch (code)
{
/* These are natively supported for zero comparisons, but otherwise
require the operands to be swapped. */
case LE:
case LT:
if (op1 != CONST0_RTX (cmp_mode))
{
code = swap_condition (code);
std::swap (op0, op1);
}
/* Fall through. */
/* These are natively supported for both register and zero operands. */
case EQ:
case GE:
case GT:
emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
return inverted;
/* These are natively supported for register operands only.
Comparisons with zero aren't useful and should be folded
or canonicalized by target-independent code. */
case GEU:
case GTU:
emit_insn (gen_neon_vc (code, cmp_mode, target,
op0, force_reg (cmp_mode, op1)));
return inverted;
/* These require the operands to be swapped and likewise do not
support comparisons with zero. */
case LEU:
case LTU:
emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
target, force_reg (cmp_mode, op1), op0));
return inverted;
/* These need a combination of two comparisons. */
case LTGT:
case ORDERED:
{
/* Operands are LTGT iff (a > b || a > b).
Operands are ORDERED iff (a > b || a <= b). */
rtx gt_res = gen_reg_rtx (cmp_result_mode);
rtx alt_res = gen_reg_rtx (cmp_result_mode);
rtx_code alt_code = (code == LTGT ? LT : LE);
if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
|| arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
gcc_unreachable ();
emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
gt_res, alt_res)));
return inverted;
}
default:
gcc_unreachable ();
}
}
/* Expand a vcond or vcondu pattern with operands OPERANDS.
CMP_RESULT_MODE is the mode of the comparison result. */
void
arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
{
rtx mask = gen_reg_rtx (cmp_result_mode);
bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
operands[4], operands[5], true);
if (inverted)
std::swap (operands[1], operands[2]);
emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
mask, operands[1], operands[2]));
}
#define MAX_VECT_LEN 16
struct expand_vec_perm_d

View File

@@ -1530,6 +1530,30 @@
[(set_attr "type" "neon_qsub<q>")]
)
(define_expand "vec_cmp<mode><v_cmp_result>"
[(set (match_operand:<V_cmp_result> 0 "s_register_operand")
(match_operator:<V_cmp_result> 1 "comparison_operator"
[(match_operand:VDQW 2 "s_register_operand")
(match_operand:VDQW 3 "reg_or_zero_operand")]))]
"TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
operands[2], operands[3], false);
DONE;
})
(define_expand "vec_cmpu<mode><mode>"
[(set (match_operand:VDQIW 0 "s_register_operand")
(match_operator:VDQIW 1 "comparison_operator"
[(match_operand:VDQIW 2 "s_register_operand")
(match_operand:VDQIW 3 "reg_or_zero_operand")]))]
"TARGET_NEON"
{
arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
operands[2], operands[3], false);
DONE;
})
;; Conditional instructions. These are comparisons with conditional moves for
;; vectors. They perform the assignment:
;;
@@ -1543,230 +1567,53 @@
(if_then_else:VDQW
(match_operator 3 "comparison_operator"
[(match_operand:VDQW 4 "s_register_operand")
(match_operand:VDQW 5 "nonmemory_operand")])
(match_operand:VDQW 5 "reg_or_zero_operand")])
(match_operand:VDQW 1 "s_register_operand")
(match_operand:VDQW 2 "s_register_operand")))]
"TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
int inverse = 0;
int use_zero_form = 0;
int swap_bsl_operands = 0;
rtx mask = gen_reg_rtx (<V_cmp_result>mode);
rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
rtx (*base_comparison) (rtx, rtx, rtx);
rtx (*complimentary_comparison) (rtx, rtx, rtx);
switch (GET_CODE (operands[3]))
{
case GE:
case GT:
case LE:
case LT:
case EQ:
if (operands[5] == CONST0_RTX (<MODE>mode))
{
use_zero_form = 1;
break;
}
/* Fall through. */
default:
if (!REG_P (operands[5]))
operands[5] = force_reg (<MODE>mode, operands[5]);
}
switch (GET_CODE (operands[3]))
{
case LT:
case UNLT:
inverse = 1;
/* Fall through. */
case GE:
case UNGE:
case ORDERED:
case UNORDERED:
base_comparison = gen_neon_vcge<mode>;
complimentary_comparison = gen_neon_vcgt<mode>;
break;
case LE:
case UNLE:
inverse = 1;
/* Fall through. */
case GT:
case UNGT:
base_comparison = gen_neon_vcgt<mode>;
complimentary_comparison = gen_neon_vcge<mode>;
break;
case EQ:
case NE:
case UNEQ:
base_comparison = gen_neon_vceq<mode>;
complimentary_comparison = gen_neon_vceq<mode>;
break;
default:
gcc_unreachable ();
}
switch (GET_CODE (operands[3]))
{
case LT:
case LE:
case GT:
case GE:
case EQ:
/* The easy case. Here we emit one of vcge, vcgt or vceq.
As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
a GE b -> a GE b
a GT b -> a GT b
a LE b -> b GE a
a LT b -> b GT a
a EQ b -> a EQ b
Note that there also exist direct comparison against 0 forms,
so catch those as a special case. */
if (use_zero_form)
{
inverse = 0;
switch (GET_CODE (operands[3]))
{
case LT:
base_comparison = gen_neon_vclt<mode>;
break;
case LE:
base_comparison = gen_neon_vcle<mode>;
break;
default:
/* Do nothing, other zero form cases already have the correct
base_comparison. */
break;
}
}
if (!inverse)
emit_insn (base_comparison (mask, operands[4], operands[5]));
else
emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
break;
case UNLT:
case UNLE:
case UNGT:
case UNGE:
case NE:
/* Vector compare returns false for lanes which are unordered, so if we use
the inverse of the comparison we actually want to emit, then
swap the operands to BSL, we will end up with the correct result.
Note that a NE NaN and NaN NE b are true for all a, b.
Our transformations are:
a GE b -> !(b GT a)
a GT b -> !(b GE a)
a LE b -> !(a GT b)
a LT b -> !(a GE b)
a NE b -> !(a EQ b) */
if (inverse)
emit_insn (base_comparison (mask, operands[4], operands[5]));
else
emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
swap_bsl_operands = 1;
break;
case UNEQ:
/* We check (a > b || b > a). combining these comparisons give us
true iff !(a != b && a ORDERED b), swapping the operands to BSL
will then give us (a == b || a UNORDERED b) as intended. */
emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
swap_bsl_operands = 1;
break;
case UNORDERED:
/* Operands are ORDERED iff (a > b || b >= a).
Swapping the operands to BSL will give the UNORDERED case. */
swap_bsl_operands = 1;
/* Fall through. */
case ORDERED:
emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
break;
default:
gcc_unreachable ();
}
if (swap_bsl_operands)
emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
operands[1]));
else
emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
operands[2]));
arm_expand_vcond (operands, <V_cmp_result>mode);
DONE;
})
(define_expand "vcondu<mode><mode>"
[(set (match_operand:VDQIW 0 "s_register_operand")
(if_then_else:VDQIW
(define_expand "vcond<V_cvtto><mode>"
[(set (match_operand:<V_CVTTO> 0 "s_register_operand")
(if_then_else:<V_CVTTO>
(match_operator 3 "comparison_operator"
[(match_operand:V32 4 "s_register_operand")
(match_operand:V32 5 "reg_or_zero_operand")])
(match_operand:<V_CVTTO> 1 "s_register_operand")
(match_operand:<V_CVTTO> 2 "s_register_operand")))]
"TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
arm_expand_vcond (operands, <V_cmp_result>mode);
DONE;
})
(define_expand "vcondu<mode><v_cmp_result>"
[(set (match_operand:VDQW 0 "s_register_operand")
(if_then_else:VDQW
(match_operator 3 "arm_comparison_operator"
[(match_operand:VDQIW 4 "s_register_operand")
(match_operand:VDQIW 5 "s_register_operand")])
(match_operand:VDQIW 1 "s_register_operand")
(match_operand:VDQIW 2 "s_register_operand")))]
[(match_operand:<V_cmp_result> 4 "s_register_operand")
(match_operand:<V_cmp_result> 5 "reg_or_zero_operand")])
(match_operand:VDQW 1 "s_register_operand")
(match_operand:VDQW 2 "s_register_operand")))]
"TARGET_NEON"
{
rtx mask;
int inverse = 0, immediate_zero = 0;
mask = gen_reg_rtx (<V_cmp_result>mode);
if (operands[5] == CONST0_RTX (<MODE>mode))
immediate_zero = 1;
else if (!REG_P (operands[5]))
operands[5] = force_reg (<MODE>mode, operands[5]);
switch (GET_CODE (operands[3]))
{
case GEU:
emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
break;
case GTU:
emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
break;
case EQ:
emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
break;
case LEU:
if (immediate_zero)
emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
else
emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
break;
case LTU:
if (immediate_zero)
emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
else
emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
break;
case NE:
emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
inverse = 1;
break;
default:
gcc_unreachable ();
}
if (inverse)
emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
operands[1]));
else
emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
operands[2]));
arm_expand_vcond (operands, <V_cmp_result>mode);
DONE;
})
(define_expand "vcond_mask_<mode><v_cmp_result>"
[(set (match_operand:VDQW 0 "s_register_operand")
(if_then_else:VDQW
(match_operand:<V_cmp_result> 3 "s_register_operand")
(match_operand:VDQW 1 "s_register_operand")
(match_operand:VDQW 2 "s_register_operand")))]
"TARGET_NEON"
{
emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
operands[2]));
DONE;
})
@@ -2601,7 +2448,7 @@
;; These may expand to an UNSPEC pattern when a floating point mode is used
;; without unsafe math optimizations.
(define_expand "neon_vc<cmp_op><mode>"
(define_expand "@neon_vc<cmp_op><mode>"
[(match_operand:<V_cmp_result> 0 "s_register_operand")
(neg:<V_cmp_result>
(COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
@@ -2641,7 +2488,7 @@
}
)
(define_insn "neon_vc<cmp_op><mode>_insn"
(define_insn "@neon_vc<cmp_op><mode>_insn"
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
(neg:<V_cmp_result>
(COMPARISONS:<V_cmp_result>
@@ -2685,7 +2532,7 @@
[(set_attr "type" "neon_fp_compare_s<q>")]
)
(define_expand "neon_vc<cmp_op><mode>"
(define_expand "@neon_vc<cmp_op><mode>"
[(match_operand:<V_cmp_result> 0 "s_register_operand")
(neg:<V_cmp_result>
(COMPARISONS:VH
@@ -2751,7 +2598,7 @@
}
[(set_attr "type" "neon_fp_compare_s<q>")])
(define_insn "neon_vc<cmp_op>u<mode>"
(define_insn "@neon_vc<code><mode>"
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
(neg:<V_cmp_result>
(GTUGEU:<V_cmp_result>
@@ -4708,7 +4555,7 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_bsl<q>")]
)
(define_expand "neon_vbsl<mode>"
(define_expand "@neon_vbsl<mode>"
[(set (match_operand:VDQX 0 "s_register_operand")
(unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
(match_operand:VDQX 2 "s_register_operand")

View File

@@ -0,0 +1,84 @@
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O1" } */
/* { dg-add-options arm_neon } */
#define COMPARE_REG(NAME, OP, TYPE) \
TYPE \
cmp_##NAME##_##TYPE##_reg (TYPE a, TYPE b) \
{ \
return a OP b; \
}
#define COMPARE_REG_AND_ZERO(NAME, OP, TYPE) \
COMPARE_REG (NAME, OP, TYPE) \
\
TYPE \
cmp_##NAME##_##TYPE##_zero (TYPE a) \
{ \
return a OP (TYPE) {}; \
}
#define COMPARE_TYPE(TYPE, COMPARE_ORDERED) \
COMPARE_REG_AND_ZERO (eq, ==, TYPE) \
COMPARE_REG_AND_ZERO (ne, !=, TYPE) \
COMPARE_ORDERED (lt, <, TYPE) \
COMPARE_ORDERED (le, <=, TYPE) \
COMPARE_ORDERED (gt, >, TYPE) \
COMPARE_ORDERED (ge, >=, TYPE)
#define TEST_TYPE(NAME, ELEM, COMPARE_ORDERED) \
typedef ELEM NAME __attribute__((vector_size(16))); \
COMPARE_TYPE (NAME, COMPARE_ORDERED)
TEST_TYPE (vs8, __INT8_TYPE__, COMPARE_REG_AND_ZERO)
TEST_TYPE (vu8, __UINT8_TYPE__, COMPARE_REG)
TEST_TYPE (vs16, __INT16_TYPE__, COMPARE_REG_AND_ZERO)
TEST_TYPE (vu16, __UINT16_TYPE__, COMPARE_REG)
TEST_TYPE (vs32, __INT32_TYPE__, COMPARE_REG_AND_ZERO)
TEST_TYPE (vu32, __UINT32_TYPE__, COMPARE_REG)
/* { s8, u8 } x { eq, ne }.
/* { dg-final { scan-assembler-times {\tvceq.i8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
/* { dg-final { scan-assembler-times {\tvceq.i8\tq[0-9]+, q[0-9]+, #0\n} 4 } } */
/* { dg-final { scan-assembler-times {\tvcgt.s8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcgt.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvclt.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvcge.s8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcge.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvcle.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvcgt.u8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcge.u8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { s16, u16 } x { eq, ne }.
/* { dg-final { scan-assembler-times {\tvceq.i16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
/* { dg-final { scan-assembler-times {\tvceq.i16\tq[0-9]+, q[0-9]+, #0\n} 4 } } */
/* { dg-final { scan-assembler-times {\tvcgt.s16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcgt.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvclt.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvcge.s16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcge.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvcle.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvcgt.u16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcge.u16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { s32, u32 } x { eq, ne }.
/* { dg-final { scan-assembler-times {\tvceq.i32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
/* { dg-final { scan-assembler-times {\tvceq.i32\tq[0-9]+, q[0-9]+, #0\n} 4 } } */
/* { dg-final { scan-assembler-times {\tvcgt.s32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcgt.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvclt.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvcge.s32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcge.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvcle.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvcgt.u32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcge.u32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */

View File

@@ -0,0 +1,45 @@
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O1 -funsafe-math-optimizations" } */
/* { dg-add-options arm_neon } */
#ifndef ELEM_TYPE
#define ELEM_TYPE float
#endif
#ifndef INT_ELEM_TYPE
#define INT_ELEM_TYPE __INT32_TYPE__
#endif
#define COMPARE(NAME, OP) \
int_vec \
cmp_##NAME##_reg (vec a, vec b) \
{ \
return a OP b; \
} \
\
int_vec \
cmp_##NAME##_zero (vec a) \
{ \
return a OP (vec) {}; \
}
typedef INT_ELEM_TYPE int_vec __attribute__((vector_size(16)));
typedef ELEM_TYPE vec __attribute__((vector_size(16)));
COMPARE (eq, ==)
COMPARE (ne, !=)
COMPARE (lt, <)
COMPARE (le, <=)
COMPARE (gt, >)
COMPARE (ge, >=)
/* { dg-final { scan-assembler-times {\tvceq.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvceq.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */

View File

@@ -0,0 +1,44 @@
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */
/* { dg-add-options arm_neon } */
#define ult(a, b) (!__builtin_isgreaterequal (a, b))
#define ule(a, b) (!__builtin_isgreater (a, b))
#define ugt(a, b) (!__builtin_islessequal (a, b))
#define uge(a, b) (!__builtin_isless (a, b))
int x[16];
float a[16];
float b[16];
#define COMPARE(NAME) \
void \
cmp_##NAME##_reg (void) \
{ \
for (int i = 0; i < 16; ++i) \
x[i] = NAME (a[i], b[i]) ? 2 : 0; \
} \
\
void \
cmp_##NAME##_zero (void) \
{ \
for (int i = 0; i < 16; ++i) \
x[i] = NAME (a[i], 0) ? 2 : 0; \
}
typedef int int_vec __attribute__((vector_size(16)));
typedef float vec __attribute__((vector_size(16)));
COMPARE (ult)
COMPARE (ule)
COMPARE (ugt)
COMPARE (uge)
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */

View File

@@ -0,0 +1,38 @@
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */
/* { dg-add-options arm_neon } */
#define ordered(a, b) (!__builtin_isunordered (a, b))
#define unordered(a, b) (__builtin_isunordered (a, b))
int x[16];
float a[16];
float b[16];
#define COMPARE(NAME) \
void \
cmp_##NAME##_reg (void) \
{ \
for (int i = 0; i < 16; ++i) \
x[i] = NAME (a[i], b[i]) ? 2 : 0; \
} \
\
void \
cmp_##NAME##_zero (void) \
{ \
for (int i = 0; i < 16; ++i) \
x[i] = NAME (a[i], 0) ? 2 : 0; \
}
typedef int int_vec __attribute__((vector_size(16)));
typedef float vec __attribute__((vector_size(16)));
COMPARE (ordered)
COMPARE (unordered)
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */

View File

@@ -0,0 +1,37 @@
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */
/* { dg-add-options arm_neon } */
#define uneq(a, b) (!__builtin_islessgreater (a, b))
/* RTL's LTGT is a signaling comparison. */
#define ltgt(a, b) (a < b || b < a)
int x[16];
float a[16];
float b[16];
#define COMPARE(NAME) \
void \
cmp_##NAME##_reg (void) \
{ \
for (int i = 0; i < 16; ++i) \
x[i] = NAME (a[i], b[i]) ? 2 : 0; \
} \
\
void \
cmp_##NAME##_zero (void) \
{ \
for (int i = 0; i < 16; ++i) \
x[i] = NAME (a[i], 0) ? 2 : 0; \
}
typedef int int_vec __attribute__((vector_size(16)));
typedef float vec __attribute__((vector_size(16)));
COMPARE (uneq)
COMPARE (ltgt)
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */

View File

@@ -13,5 +13,5 @@ void foo (int ilast,float* w, float* w2)
}
}
/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
/* { dg-final { scan-assembler "vclt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
/* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */

View File

@@ -13,6 +13,7 @@ void foo (int ilast,float* w, float* w2)
}
}
/* { dg-final { scan-assembler-times "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" 2 } } */
/* { dg-final { scan-assembler "vclt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
/* { dg-final { scan-assembler "vorr\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
/* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */

View File

@@ -13,7 +13,7 @@ void foo (int ilast,float* w, float* w2)
}
}
/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
/* { dg-final { scan-assembler "vcge\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
/* { dg-final { scan-assembler "vcle\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
/* { dg-final { scan-assembler "vorr\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
/* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */

View File

@@ -7233,6 +7233,8 @@ proc check_effective_target_vect_cond_mixed { } {
expr { [istarget i?86-*-*] || [istarget x86_64-*-*]
|| [istarget aarch64*-*-*]
|| [istarget powerpc*-*-*]
|| ([istarget arm*-*-*]
&& [check_effective_target_arm_neon_ok])
|| ([istarget mips*-*-*]
&& [et-is-effective-target mips_msa])
|| ([istarget s390*-*-*]