mirror of
https://gcc.gnu.org/git/gcc.git
synced 2026-02-22 12:00:03 -05:00
arm: Add missing vec_cmp and vcond patterns
This patch does several things at once:
(1) Add vector compare patterns (vec_cmp and vec_cmpu).
(2) Add vector selects between floating-point modes when the
values being compared are integers (affects vcond and vcondu).
(3) Add vector selects between integer modes when the values being
compared are floating-point (affects vcond).
(4) Add standalone vector select patterns (vcond_mask).
(5) Tweak the handling of compound comparisons with zeros.
Unfortunately it proved too difficult (for me) to separate this
out into a series of smaller patches, since everything is so
inter-related. Defining only some of the new patterns does
not leave things in a happy state.
The handling of comparisons is mostly taken from the vcond patterns.
This means that it remains non-compliant with IEEE: “quiet” comparisons
use signalling instructions. But that shouldn't matter for floats,
since we require -funsafe-math-optimizations to vectorize for them
anyway.
It remains the case that comparisons and selects aren't implemented
at all for HF vectors. Implementing those feels like separate work.
gcc/
PR target/96528
PR target/97288
* config/arm/arm-protos.h (arm_expand_vector_compare): Declare.
(arm_expand_vcond): Likewise.
* config/arm/arm.c (arm_expand_vector_compare): New function.
(arm_expand_vcond): Likewise.
* config/arm/neon.md (vec_cmp<VDQW:mode><v_cmp_result>): New pattern.
(vec_cmpu<VDQW:mode><VDQW:mode>): Likewise.
(vcond<VDQW:mode><VDQW:mode>): Require operand 5 to be a register
or zero. Use arm_expand_vcond.
(vcond<V_cvtto><V32:mode>): New pattern.
(vcondu<VDQIW:mode><VDQIW:mode>): Generalize to...
(vcondu<VDQW:mode><v_cmp_result): ...this. Require operand 5
to be a register or zero. Use arm_expand_vcond.
(vcond_mask_<VDQW:mode><v_cmp_result>): New pattern.
(neon_vc<cmp_op><mode>, neon_vc<cmp_op><mode>_insn): Add "@" marker.
(neon_vbsl<mode>): Likewise.
(neon_vc<cmp_op>u<mode>): Reexpress as...
(@neon_vc<code><mode>): ...this.
gcc/testsuite/
* lib/target-supports.exp (check_effective_target_vect_cond_mixed): Add
arm neon targets.
* gcc.target/arm/neon-compare-1.c: New test.
* gcc.target/arm/neon-compare-2.c: Likewise.
* gcc.target/arm/neon-compare-3.c: Likewise.
* gcc.target/arm/neon-compare-4.c: Likewise.
* gcc.target/arm/neon-compare-5.c: Likewise.
* gcc.target/arm/neon-vcond-gt.c: Expect comparisons with zero.
* gcc.target/arm/neon-vcond-ltgt.c: Likewise.
* gcc.target/arm/neon-vcond-unordered.c: Likewise.
This commit is contained in:
@@ -372,9 +372,11 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
|
||||
extern bool arm_fusion_enabled_p (tune_params::fuse_ops);
|
||||
extern bool arm_valid_symbolic_address_p (rtx);
|
||||
extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
|
||||
extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool);
|
||||
#endif /* RTX_CODE */
|
||||
|
||||
extern bool arm_gen_setmem (rtx *);
|
||||
extern void arm_expand_vcond (rtx *, machine_mode);
|
||||
extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
|
||||
|
||||
extern bool arm_autoinc_modes_ok_p (machine_mode, enum arm_auto_incmodes);
|
||||
|
||||
@@ -30634,6 +30634,127 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
|
||||
arm_post_atomic_barrier (model);
|
||||
}
|
||||
|
||||
/* Expand code to compare vectors OP0 and OP1 using condition CODE.
|
||||
If CAN_INVERT, store either the result or its inverse in TARGET
|
||||
and return true if TARGET contains the inverse. If !CAN_INVERT,
|
||||
always store the result in TARGET, never its inverse.
|
||||
|
||||
Note that the handling of floating-point comparisons is not
|
||||
IEEE compliant. */
|
||||
|
||||
bool
|
||||
arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
|
||||
bool can_invert)
|
||||
{
|
||||
machine_mode cmp_result_mode = GET_MODE (target);
|
||||
machine_mode cmp_mode = GET_MODE (op0);
|
||||
|
||||
bool inverted;
|
||||
switch (code)
|
||||
{
|
||||
/* For these we need to compute the inverse of the requested
|
||||
comparison. */
|
||||
case UNORDERED:
|
||||
case UNLT:
|
||||
case UNLE:
|
||||
case UNGT:
|
||||
case UNGE:
|
||||
case UNEQ:
|
||||
case NE:
|
||||
code = reverse_condition_maybe_unordered (code);
|
||||
if (!can_invert)
|
||||
{
|
||||
/* Recursively emit the inverted comparison into a temporary
|
||||
and then store its inverse in TARGET. This avoids reusing
|
||||
TARGET (which for integer NE could be one of the inputs). */
|
||||
rtx tmp = gen_reg_rtx (cmp_result_mode);
|
||||
if (arm_expand_vector_compare (tmp, code, op0, op1, true))
|
||||
gcc_unreachable ();
|
||||
emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
|
||||
return false;
|
||||
}
|
||||
inverted = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
inverted = false;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (code)
|
||||
{
|
||||
/* These are natively supported for zero comparisons, but otherwise
|
||||
require the operands to be swapped. */
|
||||
case LE:
|
||||
case LT:
|
||||
if (op1 != CONST0_RTX (cmp_mode))
|
||||
{
|
||||
code = swap_condition (code);
|
||||
std::swap (op0, op1);
|
||||
}
|
||||
/* Fall through. */
|
||||
|
||||
/* These are natively supported for both register and zero operands. */
|
||||
case EQ:
|
||||
case GE:
|
||||
case GT:
|
||||
emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
|
||||
return inverted;
|
||||
|
||||
/* These are natively supported for register operands only.
|
||||
Comparisons with zero aren't useful and should be folded
|
||||
or canonicalized by target-independent code. */
|
||||
case GEU:
|
||||
case GTU:
|
||||
emit_insn (gen_neon_vc (code, cmp_mode, target,
|
||||
op0, force_reg (cmp_mode, op1)));
|
||||
return inverted;
|
||||
|
||||
/* These require the operands to be swapped and likewise do not
|
||||
support comparisons with zero. */
|
||||
case LEU:
|
||||
case LTU:
|
||||
emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
|
||||
target, force_reg (cmp_mode, op1), op0));
|
||||
return inverted;
|
||||
|
||||
/* These need a combination of two comparisons. */
|
||||
case LTGT:
|
||||
case ORDERED:
|
||||
{
|
||||
/* Operands are LTGT iff (a > b || a > b).
|
||||
Operands are ORDERED iff (a > b || a <= b). */
|
||||
rtx gt_res = gen_reg_rtx (cmp_result_mode);
|
||||
rtx alt_res = gen_reg_rtx (cmp_result_mode);
|
||||
rtx_code alt_code = (code == LTGT ? LT : LE);
|
||||
if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
|
||||
|| arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
|
||||
gcc_unreachable ();
|
||||
emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
|
||||
gt_res, alt_res)));
|
||||
return inverted;
|
||||
}
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Expand a vcond or vcondu pattern with operands OPERANDS.
|
||||
CMP_RESULT_MODE is the mode of the comparison result. */
|
||||
|
||||
void
|
||||
arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
|
||||
{
|
||||
rtx mask = gen_reg_rtx (cmp_result_mode);
|
||||
bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
|
||||
operands[4], operands[5], true);
|
||||
if (inverted)
|
||||
std::swap (operands[1], operands[2]);
|
||||
emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
|
||||
mask, operands[1], operands[2]));
|
||||
}
|
||||
|
||||
#define MAX_VECT_LEN 16
|
||||
|
||||
struct expand_vec_perm_d
|
||||
|
||||
@@ -1530,6 +1530,30 @@
|
||||
[(set_attr "type" "neon_qsub<q>")]
|
||||
)
|
||||
|
||||
(define_expand "vec_cmp<mode><v_cmp_result>"
|
||||
[(set (match_operand:<V_cmp_result> 0 "s_register_operand")
|
||||
(match_operator:<V_cmp_result> 1 "comparison_operator"
|
||||
[(match_operand:VDQW 2 "s_register_operand")
|
||||
(match_operand:VDQW 3 "reg_or_zero_operand")]))]
|
||||
"TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
|
||||
{
|
||||
arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
|
||||
operands[2], operands[3], false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_cmpu<mode><mode>"
|
||||
[(set (match_operand:VDQIW 0 "s_register_operand")
|
||||
(match_operator:VDQIW 1 "comparison_operator"
|
||||
[(match_operand:VDQIW 2 "s_register_operand")
|
||||
(match_operand:VDQIW 3 "reg_or_zero_operand")]))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
|
||||
operands[2], operands[3], false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Conditional instructions. These are comparisons with conditional moves for
|
||||
;; vectors. They perform the assignment:
|
||||
;;
|
||||
@@ -1543,230 +1567,53 @@
|
||||
(if_then_else:VDQW
|
||||
(match_operator 3 "comparison_operator"
|
||||
[(match_operand:VDQW 4 "s_register_operand")
|
||||
(match_operand:VDQW 5 "nonmemory_operand")])
|
||||
(match_operand:VDQW 5 "reg_or_zero_operand")])
|
||||
(match_operand:VDQW 1 "s_register_operand")
|
||||
(match_operand:VDQW 2 "s_register_operand")))]
|
||||
"TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
|
||||
{
|
||||
int inverse = 0;
|
||||
int use_zero_form = 0;
|
||||
int swap_bsl_operands = 0;
|
||||
rtx mask = gen_reg_rtx (<V_cmp_result>mode);
|
||||
rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
|
||||
|
||||
rtx (*base_comparison) (rtx, rtx, rtx);
|
||||
rtx (*complimentary_comparison) (rtx, rtx, rtx);
|
||||
|
||||
switch (GET_CODE (operands[3]))
|
||||
{
|
||||
case GE:
|
||||
case GT:
|
||||
case LE:
|
||||
case LT:
|
||||
case EQ:
|
||||
if (operands[5] == CONST0_RTX (<MODE>mode))
|
||||
{
|
||||
use_zero_form = 1;
|
||||
break;
|
||||
}
|
||||
/* Fall through. */
|
||||
default:
|
||||
if (!REG_P (operands[5]))
|
||||
operands[5] = force_reg (<MODE>mode, operands[5]);
|
||||
}
|
||||
|
||||
switch (GET_CODE (operands[3]))
|
||||
{
|
||||
case LT:
|
||||
case UNLT:
|
||||
inverse = 1;
|
||||
/* Fall through. */
|
||||
case GE:
|
||||
case UNGE:
|
||||
case ORDERED:
|
||||
case UNORDERED:
|
||||
base_comparison = gen_neon_vcge<mode>;
|
||||
complimentary_comparison = gen_neon_vcgt<mode>;
|
||||
break;
|
||||
case LE:
|
||||
case UNLE:
|
||||
inverse = 1;
|
||||
/* Fall through. */
|
||||
case GT:
|
||||
case UNGT:
|
||||
base_comparison = gen_neon_vcgt<mode>;
|
||||
complimentary_comparison = gen_neon_vcge<mode>;
|
||||
break;
|
||||
case EQ:
|
||||
case NE:
|
||||
case UNEQ:
|
||||
base_comparison = gen_neon_vceq<mode>;
|
||||
complimentary_comparison = gen_neon_vceq<mode>;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
switch (GET_CODE (operands[3]))
|
||||
{
|
||||
case LT:
|
||||
case LE:
|
||||
case GT:
|
||||
case GE:
|
||||
case EQ:
|
||||
/* The easy case. Here we emit one of vcge, vcgt or vceq.
|
||||
As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
|
||||
a GE b -> a GE b
|
||||
a GT b -> a GT b
|
||||
a LE b -> b GE a
|
||||
a LT b -> b GT a
|
||||
a EQ b -> a EQ b
|
||||
Note that there also exist direct comparison against 0 forms,
|
||||
so catch those as a special case. */
|
||||
if (use_zero_form)
|
||||
{
|
||||
inverse = 0;
|
||||
switch (GET_CODE (operands[3]))
|
||||
{
|
||||
case LT:
|
||||
base_comparison = gen_neon_vclt<mode>;
|
||||
break;
|
||||
case LE:
|
||||
base_comparison = gen_neon_vcle<mode>;
|
||||
break;
|
||||
default:
|
||||
/* Do nothing, other zero form cases already have the correct
|
||||
base_comparison. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!inverse)
|
||||
emit_insn (base_comparison (mask, operands[4], operands[5]));
|
||||
else
|
||||
emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
|
||||
break;
|
||||
case UNLT:
|
||||
case UNLE:
|
||||
case UNGT:
|
||||
case UNGE:
|
||||
case NE:
|
||||
/* Vector compare returns false for lanes which are unordered, so if we use
|
||||
the inverse of the comparison we actually want to emit, then
|
||||
swap the operands to BSL, we will end up with the correct result.
|
||||
Note that a NE NaN and NaN NE b are true for all a, b.
|
||||
|
||||
Our transformations are:
|
||||
a GE b -> !(b GT a)
|
||||
a GT b -> !(b GE a)
|
||||
a LE b -> !(a GT b)
|
||||
a LT b -> !(a GE b)
|
||||
a NE b -> !(a EQ b) */
|
||||
|
||||
if (inverse)
|
||||
emit_insn (base_comparison (mask, operands[4], operands[5]));
|
||||
else
|
||||
emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
|
||||
|
||||
swap_bsl_operands = 1;
|
||||
break;
|
||||
case UNEQ:
|
||||
/* We check (a > b || b > a). combining these comparisons give us
|
||||
true iff !(a != b && a ORDERED b), swapping the operands to BSL
|
||||
will then give us (a == b || a UNORDERED b) as intended. */
|
||||
|
||||
emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
|
||||
emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
|
||||
emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
|
||||
swap_bsl_operands = 1;
|
||||
break;
|
||||
case UNORDERED:
|
||||
/* Operands are ORDERED iff (a > b || b >= a).
|
||||
Swapping the operands to BSL will give the UNORDERED case. */
|
||||
swap_bsl_operands = 1;
|
||||
/* Fall through. */
|
||||
case ORDERED:
|
||||
emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
|
||||
emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
|
||||
emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
if (swap_bsl_operands)
|
||||
emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
|
||||
operands[1]));
|
||||
else
|
||||
emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
|
||||
operands[2]));
|
||||
arm_expand_vcond (operands, <V_cmp_result>mode);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vcondu<mode><mode>"
|
||||
[(set (match_operand:VDQIW 0 "s_register_operand")
|
||||
(if_then_else:VDQIW
|
||||
(define_expand "vcond<V_cvtto><mode>"
|
||||
[(set (match_operand:<V_CVTTO> 0 "s_register_operand")
|
||||
(if_then_else:<V_CVTTO>
|
||||
(match_operator 3 "comparison_operator"
|
||||
[(match_operand:V32 4 "s_register_operand")
|
||||
(match_operand:V32 5 "reg_or_zero_operand")])
|
||||
(match_operand:<V_CVTTO> 1 "s_register_operand")
|
||||
(match_operand:<V_CVTTO> 2 "s_register_operand")))]
|
||||
"TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
|
||||
{
|
||||
arm_expand_vcond (operands, <V_cmp_result>mode);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vcondu<mode><v_cmp_result>"
|
||||
[(set (match_operand:VDQW 0 "s_register_operand")
|
||||
(if_then_else:VDQW
|
||||
(match_operator 3 "arm_comparison_operator"
|
||||
[(match_operand:VDQIW 4 "s_register_operand")
|
||||
(match_operand:VDQIW 5 "s_register_operand")])
|
||||
(match_operand:VDQIW 1 "s_register_operand")
|
||||
(match_operand:VDQIW 2 "s_register_operand")))]
|
||||
[(match_operand:<V_cmp_result> 4 "s_register_operand")
|
||||
(match_operand:<V_cmp_result> 5 "reg_or_zero_operand")])
|
||||
(match_operand:VDQW 1 "s_register_operand")
|
||||
(match_operand:VDQW 2 "s_register_operand")))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtx mask;
|
||||
int inverse = 0, immediate_zero = 0;
|
||||
|
||||
mask = gen_reg_rtx (<V_cmp_result>mode);
|
||||
|
||||
if (operands[5] == CONST0_RTX (<MODE>mode))
|
||||
immediate_zero = 1;
|
||||
else if (!REG_P (operands[5]))
|
||||
operands[5] = force_reg (<MODE>mode, operands[5]);
|
||||
|
||||
switch (GET_CODE (operands[3]))
|
||||
{
|
||||
case GEU:
|
||||
emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
|
||||
break;
|
||||
|
||||
case GTU:
|
||||
emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
|
||||
break;
|
||||
|
||||
case EQ:
|
||||
emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
|
||||
break;
|
||||
|
||||
case LEU:
|
||||
if (immediate_zero)
|
||||
emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
|
||||
else
|
||||
emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
|
||||
break;
|
||||
|
||||
case LTU:
|
||||
if (immediate_zero)
|
||||
emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
|
||||
else
|
||||
emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
|
||||
break;
|
||||
|
||||
case NE:
|
||||
emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
|
||||
inverse = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
if (inverse)
|
||||
emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
|
||||
operands[1]));
|
||||
else
|
||||
emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
|
||||
operands[2]));
|
||||
arm_expand_vcond (operands, <V_cmp_result>mode);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vcond_mask_<mode><v_cmp_result>"
|
||||
[(set (match_operand:VDQW 0 "s_register_operand")
|
||||
(if_then_else:VDQW
|
||||
(match_operand:<V_cmp_result> 3 "s_register_operand")
|
||||
(match_operand:VDQW 1 "s_register_operand")
|
||||
(match_operand:VDQW 2 "s_register_operand")))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
|
||||
operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
@@ -2601,7 +2448,7 @@
|
||||
|
||||
;; These may expand to an UNSPEC pattern when a floating point mode is used
|
||||
;; without unsafe math optimizations.
|
||||
(define_expand "neon_vc<cmp_op><mode>"
|
||||
(define_expand "@neon_vc<cmp_op><mode>"
|
||||
[(match_operand:<V_cmp_result> 0 "s_register_operand")
|
||||
(neg:<V_cmp_result>
|
||||
(COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
|
||||
@@ -2641,7 +2488,7 @@
|
||||
}
|
||||
)
|
||||
|
||||
(define_insn "neon_vc<cmp_op><mode>_insn"
|
||||
(define_insn "@neon_vc<cmp_op><mode>_insn"
|
||||
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
|
||||
(neg:<V_cmp_result>
|
||||
(COMPARISONS:<V_cmp_result>
|
||||
@@ -2685,7 +2532,7 @@
|
||||
[(set_attr "type" "neon_fp_compare_s<q>")]
|
||||
)
|
||||
|
||||
(define_expand "neon_vc<cmp_op><mode>"
|
||||
(define_expand "@neon_vc<cmp_op><mode>"
|
||||
[(match_operand:<V_cmp_result> 0 "s_register_operand")
|
||||
(neg:<V_cmp_result>
|
||||
(COMPARISONS:VH
|
||||
@@ -2751,7 +2598,7 @@
|
||||
}
|
||||
[(set_attr "type" "neon_fp_compare_s<q>")])
|
||||
|
||||
(define_insn "neon_vc<cmp_op>u<mode>"
|
||||
(define_insn "@neon_vc<code><mode>"
|
||||
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
|
||||
(neg:<V_cmp_result>
|
||||
(GTUGEU:<V_cmp_result>
|
||||
@@ -4708,7 +4555,7 @@ if (BYTES_BIG_ENDIAN)
|
||||
[(set_attr "type" "neon_bsl<q>")]
|
||||
)
|
||||
|
||||
(define_expand "neon_vbsl<mode>"
|
||||
(define_expand "@neon_vbsl<mode>"
|
||||
[(set (match_operand:VDQX 0 "s_register_operand")
|
||||
(unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
|
||||
(match_operand:VDQX 2 "s_register_operand")
|
||||
|
||||
84
gcc/testsuite/gcc.target/arm/neon-compare-1.c
Normal file
84
gcc/testsuite/gcc.target/arm/neon-compare-1.c
Normal file
@@ -0,0 +1,84 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_neon_ok } */
|
||||
/* { dg-options "-O1" } */
|
||||
/* { dg-add-options arm_neon } */
|
||||
|
||||
#define COMPARE_REG(NAME, OP, TYPE) \
|
||||
TYPE \
|
||||
cmp_##NAME##_##TYPE##_reg (TYPE a, TYPE b) \
|
||||
{ \
|
||||
return a OP b; \
|
||||
}
|
||||
|
||||
#define COMPARE_REG_AND_ZERO(NAME, OP, TYPE) \
|
||||
COMPARE_REG (NAME, OP, TYPE) \
|
||||
\
|
||||
TYPE \
|
||||
cmp_##NAME##_##TYPE##_zero (TYPE a) \
|
||||
{ \
|
||||
return a OP (TYPE) {}; \
|
||||
}
|
||||
|
||||
#define COMPARE_TYPE(TYPE, COMPARE_ORDERED) \
|
||||
COMPARE_REG_AND_ZERO (eq, ==, TYPE) \
|
||||
COMPARE_REG_AND_ZERO (ne, !=, TYPE) \
|
||||
COMPARE_ORDERED (lt, <, TYPE) \
|
||||
COMPARE_ORDERED (le, <=, TYPE) \
|
||||
COMPARE_ORDERED (gt, >, TYPE) \
|
||||
COMPARE_ORDERED (ge, >=, TYPE)
|
||||
|
||||
#define TEST_TYPE(NAME, ELEM, COMPARE_ORDERED) \
|
||||
typedef ELEM NAME __attribute__((vector_size(16))); \
|
||||
COMPARE_TYPE (NAME, COMPARE_ORDERED)
|
||||
|
||||
TEST_TYPE (vs8, __INT8_TYPE__, COMPARE_REG_AND_ZERO)
|
||||
TEST_TYPE (vu8, __UINT8_TYPE__, COMPARE_REG)
|
||||
TEST_TYPE (vs16, __INT16_TYPE__, COMPARE_REG_AND_ZERO)
|
||||
TEST_TYPE (vu16, __UINT16_TYPE__, COMPARE_REG)
|
||||
TEST_TYPE (vs32, __INT32_TYPE__, COMPARE_REG_AND_ZERO)
|
||||
TEST_TYPE (vu32, __UINT32_TYPE__, COMPARE_REG)
|
||||
|
||||
/* { s8, u8 } x { eq, ne }.
|
||||
/* { dg-final { scan-assembler-times {\tvceq.i8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvceq.i8\tq[0-9]+, q[0-9]+, #0\n} 4 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.s8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvclt.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcge.s8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcge.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcle.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.u8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcge.u8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
|
||||
/* { s16, u16 } x { eq, ne }.
|
||||
/* { dg-final { scan-assembler-times {\tvceq.i16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvceq.i16\tq[0-9]+, q[0-9]+, #0\n} 4 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.s16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvclt.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcge.s16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcge.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcle.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.u16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcge.u16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
|
||||
/* { s32, u32 } x { eq, ne }.
|
||||
/* { dg-final { scan-assembler-times {\tvceq.i32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvceq.i32\tq[0-9]+, q[0-9]+, #0\n} 4 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.s32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvclt.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcge.s32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcge.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcle.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.u32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcge.u32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
45
gcc/testsuite/gcc.target/arm/neon-compare-2.c
Normal file
45
gcc/testsuite/gcc.target/arm/neon-compare-2.c
Normal file
@@ -0,0 +1,45 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_neon_ok } */
|
||||
/* { dg-options "-O1 -funsafe-math-optimizations" } */
|
||||
/* { dg-add-options arm_neon } */
|
||||
|
||||
#ifndef ELEM_TYPE
|
||||
#define ELEM_TYPE float
|
||||
#endif
|
||||
#ifndef INT_ELEM_TYPE
|
||||
#define INT_ELEM_TYPE __INT32_TYPE__
|
||||
#endif
|
||||
|
||||
#define COMPARE(NAME, OP) \
|
||||
int_vec \
|
||||
cmp_##NAME##_reg (vec a, vec b) \
|
||||
{ \
|
||||
return a OP b; \
|
||||
} \
|
||||
\
|
||||
int_vec \
|
||||
cmp_##NAME##_zero (vec a) \
|
||||
{ \
|
||||
return a OP (vec) {}; \
|
||||
}
|
||||
|
||||
typedef INT_ELEM_TYPE int_vec __attribute__((vector_size(16)));
|
||||
typedef ELEM_TYPE vec __attribute__((vector_size(16)));
|
||||
|
||||
COMPARE (eq, ==)
|
||||
COMPARE (ne, !=)
|
||||
COMPARE (lt, <)
|
||||
COMPARE (le, <=)
|
||||
COMPARE (gt, >)
|
||||
COMPARE (ge, >=)
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvceq.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvceq.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
44
gcc/testsuite/gcc.target/arm/neon-compare-3.c
Normal file
44
gcc/testsuite/gcc.target/arm/neon-compare-3.c
Normal file
@@ -0,0 +1,44 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_neon_ok } */
|
||||
/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */
|
||||
/* { dg-add-options arm_neon } */
|
||||
|
||||
#define ult(a, b) (!__builtin_isgreaterequal (a, b))
|
||||
#define ule(a, b) (!__builtin_isgreater (a, b))
|
||||
#define ugt(a, b) (!__builtin_islessequal (a, b))
|
||||
#define uge(a, b) (!__builtin_isless (a, b))
|
||||
|
||||
int x[16];
|
||||
float a[16];
|
||||
float b[16];
|
||||
|
||||
#define COMPARE(NAME) \
|
||||
void \
|
||||
cmp_##NAME##_reg (void) \
|
||||
{ \
|
||||
for (int i = 0; i < 16; ++i) \
|
||||
x[i] = NAME (a[i], b[i]) ? 2 : 0; \
|
||||
} \
|
||||
\
|
||||
void \
|
||||
cmp_##NAME##_zero (void) \
|
||||
{ \
|
||||
for (int i = 0; i < 16; ++i) \
|
||||
x[i] = NAME (a[i], 0) ? 2 : 0; \
|
||||
}
|
||||
|
||||
typedef int int_vec __attribute__((vector_size(16)));
|
||||
typedef float vec __attribute__((vector_size(16)));
|
||||
|
||||
COMPARE (ult)
|
||||
COMPARE (ule)
|
||||
COMPARE (ugt)
|
||||
COMPARE (uge)
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
|
||||
38
gcc/testsuite/gcc.target/arm/neon-compare-4.c
Normal file
38
gcc/testsuite/gcc.target/arm/neon-compare-4.c
Normal file
@@ -0,0 +1,38 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_neon_ok } */
|
||||
/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */
|
||||
/* { dg-add-options arm_neon } */
|
||||
|
||||
#define ordered(a, b) (!__builtin_isunordered (a, b))
|
||||
#define unordered(a, b) (__builtin_isunordered (a, b))
|
||||
|
||||
int x[16];
|
||||
float a[16];
|
||||
float b[16];
|
||||
|
||||
#define COMPARE(NAME) \
|
||||
void \
|
||||
cmp_##NAME##_reg (void) \
|
||||
{ \
|
||||
for (int i = 0; i < 16; ++i) \
|
||||
x[i] = NAME (a[i], b[i]) ? 2 : 0; \
|
||||
} \
|
||||
\
|
||||
void \
|
||||
cmp_##NAME##_zero (void) \
|
||||
{ \
|
||||
for (int i = 0; i < 16; ++i) \
|
||||
x[i] = NAME (a[i], 0) ? 2 : 0; \
|
||||
}
|
||||
|
||||
typedef int int_vec __attribute__((vector_size(16)));
|
||||
typedef float vec __attribute__((vector_size(16)));
|
||||
|
||||
COMPARE (ordered)
|
||||
COMPARE (unordered)
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
|
||||
37
gcc/testsuite/gcc.target/arm/neon-compare-5.c
Normal file
37
gcc/testsuite/gcc.target/arm/neon-compare-5.c
Normal file
@@ -0,0 +1,37 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_neon_ok } */
|
||||
/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */
|
||||
/* { dg-add-options arm_neon } */
|
||||
|
||||
#define uneq(a, b) (!__builtin_islessgreater (a, b))
|
||||
/* RTL's LTGT is a signaling comparison. */
|
||||
#define ltgt(a, b) (a < b || b < a)
|
||||
|
||||
int x[16];
|
||||
float a[16];
|
||||
float b[16];
|
||||
|
||||
#define COMPARE(NAME) \
|
||||
void \
|
||||
cmp_##NAME##_reg (void) \
|
||||
{ \
|
||||
for (int i = 0; i < 16; ++i) \
|
||||
x[i] = NAME (a[i], b[i]) ? 2 : 0; \
|
||||
} \
|
||||
\
|
||||
void \
|
||||
cmp_##NAME##_zero (void) \
|
||||
{ \
|
||||
for (int i = 0; i < 16; ++i) \
|
||||
x[i] = NAME (a[i], 0) ? 2 : 0; \
|
||||
}
|
||||
|
||||
typedef int int_vec __attribute__((vector_size(16)));
|
||||
typedef float vec __attribute__((vector_size(16)));
|
||||
|
||||
COMPARE (uneq)
|
||||
COMPARE (ltgt)
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
|
||||
@@ -13,5 +13,5 @@ void foo (int ilast,float* w, float* w2)
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
|
||||
/* { dg-final { scan-assembler "vclt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
|
||||
/* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
|
||||
|
||||
@@ -13,6 +13,7 @@ void foo (int ilast,float* w, float* w2)
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" 2 } } */
|
||||
/* { dg-final { scan-assembler "vclt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
|
||||
/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
|
||||
/* { dg-final { scan-assembler "vorr\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
|
||||
/* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
|
||||
|
||||
@@ -13,7 +13,7 @@ void foo (int ilast,float* w, float* w2)
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
|
||||
/* { dg-final { scan-assembler "vcge\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
|
||||
/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
|
||||
/* { dg-final { scan-assembler "vcle\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
|
||||
/* { dg-final { scan-assembler "vorr\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
|
||||
/* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
|
||||
|
||||
@@ -7233,6 +7233,8 @@ proc check_effective_target_vect_cond_mixed { } {
|
||||
expr { [istarget i?86-*-*] || [istarget x86_64-*-*]
|
||||
|| [istarget aarch64*-*-*]
|
||||
|| [istarget powerpc*-*-*]
|
||||
|| ([istarget arm*-*-*]
|
||||
&& [check_effective_target_arm_neon_ok])
|
||||
|| ([istarget mips*-*-*]
|
||||
&& [et-is-effective-target mips_msa])
|
||||
|| ([istarget s390*-*-*]
|
||||
|
||||
Reference in New Issue
Block a user