Files
gcc/libgcc/config/gcn/amdgcn_veclib.h
Jakub Jelinek 45ab93d9af non-gcc: Remove trailing whitespace
I've tried to build stage3 with
-Wleading-whitespace=blanks -Wtrailing-whitespace=blank -Wno-error=leading-whitespace=blanks -Wno-error=trailing-whitespace=blank
added to STRICT_WARN and that expectably resulted in about
2744 unique trailing whitespace warnings and 124837 leading whitespace
warnings when excluding *.md files (which obviously is in big part a
generator issue).  Others from that are generator related, I think those
need to be solved later.

The following patch just fixes up the easy case (trailing whitespace),
which could be easily automated:
for i in `find . -name \*.h -o -name \*.cc -o -name \*.c | xargs grep -l '[ 	]$' | grep -v testsuite/`; do sed -i -e 's/[ 	]*$//' $i; done
I've excluded files which I knew are obviously generated or go FE.

Is there anything else we'd want to avoid the changes?

Due to patch size, I've split it between gcc/ part
and rest (include/, libiberty/, libgcc/, libcpp/, libstdc++-v3/;
this part).

2024-10-24  Jakub Jelinek  <jakub@redhat.com>

include/
	* dyn-string.h: Remove trailing whitespace.
	* libiberty.h: Likewise.
	* xregex.h: Likewise.
	* splay-tree.h: Likewise.
	* partition.h: Likewise.
	* plugin-api.h: Likewise.
	* demangle.h: Likewise.
	* vtv-change-permission.h: Likewise.
	* fibheap.h: Likewise.
	* hsa_ext_image.h: Likewise.
	* hashtab.h: Likewise.
	* libcollector.h: Likewise.
	* sort.h: Likewise.
	* symcat.h: Likewise.
	* hsa_ext_amd.h: Likewise.
libcpp/
	* directives.cc: Remove trailing whitespace.
	* mkdeps.cc: Likewise.
	* line-map.cc: Likewise.
	* internal.h: Likewise.
	* files.cc: Likewise.
	* init.cc: Likewise.
	* makeucnid.cc: Likewise.
	* system.h: Likewise.
	* include/line-map.h: Likewise.
	* include/symtab.h: Likewise.
	* include/cpplib.h: Likewise.
	* expr.cc: Likewise.
	* charset.cc: Likewise.
	* macro.cc: Likewise.
	* errors.cc: Likewise.
	* lex.cc: Likewise.
	* traditional.cc: Likewise.
libgcc/
	* crtstuff.c: Remove trailing whitespace.
	* libgcov.h: Likewise.
	* config/alpha/crtfastmath.c: Likewise.
	* config/alpha/vms-gcc_shell_handler.c: Likewise.
	* config/alpha/vms-unwind.h: Likewise.
	* config/pa/linux-atomic.c: Likewise.
	* config/pa/linux-unwind.h: Likewise.
	* config/pa/quadlib.c: Likewise.
	* config/pa/fptr.c: Likewise.
	* config/s390/32/_fixsfdi.c: Likewise.
	* config/s390/32/_fixunssfdi.c: Likewise.
	* config/s390/32/_fixunsdfdi.c: Likewise.
	* config/c6x/pr-support.c: Likewise.
	* config/lm32/_udivsi3.c: Likewise.
	* config/lm32/libgcc_lm32.h: Likewise.
	* config/lm32/_udivmodsi4.c: Likewise.
	* config/lm32/_mulsi3.c: Likewise.
	* config/lm32/_modsi3.c: Likewise.
	* config/lm32/_umodsi3.c: Likewise.
	* config/lm32/_divsi3.c: Likewise.
	* config/darwin-crt3.c: Likewise.
	* config/msp430/mpy.c: Likewise.
	* config/ia64/tf-signs.c: Likewise.
	* config/ia64/fde-vms.c: Likewise.
	* config/ia64/unwind-ia64.c: Likewise.
	* config/ia64/vms-unwind.h: Likewise.
	* config/ia64/sfp-exceptions.c: Likewise.
	* config/ia64/quadlib.c: Likewise.
	* config/ia64/unwind-ia64.h: Likewise.
	* config/rl78/vregs.h: Likewise.
	* config/arm/bpabi.c: Likewise.
	* config/arm/unwind-arm.c: Likewise.
	* config/arm/pr-support.c: Likewise.
	* config/arm/linux-atomic.c: Likewise.
	* config/arm/bpabi-lib.h: Likewise.
	* config/frv/frvend.c: Likewise.
	* config/frv/cmovw.c: Likewise.
	* config/frv/frvbegin.c: Likewise.
	* config/frv/cmovd.c: Likewise.
	* config/frv/cmovh.c: Likewise.
	* config/aarch64/cpuinfo.c: Likewise.
	* config/i386/crtfastmath.c: Likewise.
	* config/i386/cygming-crtend.c: Likewise.
	* config/i386/32/tf-signs.c: Likewise.
	* config/i386/crtprec.c: Likewise.
	* config/i386/sfp-exceptions.c: Likewise.
	* config/i386/w32-unwind.h: Likewise.
	* config/m32r/initfini.c: Likewise.
	* config/sparc/crtfastmath.c: Likewise.
	* config/gcn/amdgcn_veclib.h: Likewise.
	* config/nios2/linux-atomic.c: Likewise.
	* config/nios2/linux-unwind.h: Likewise.
	* config/nios2/lib2-mul.c: Likewise.
	* config/nios2/lib2-nios2.h: Likewise.
	* config/xtensa/unwind-dw2-xtensa.c: Likewise.
	* config/rs6000/darwin-fallback.c: Likewise.
	* config/rs6000/ibm-ldouble.c: Likewise.
	* config/rs6000/sfp-machine.h: Likewise.
	* config/rs6000/darwin-asm.h: Likewise.
	* config/rs6000/darwin-crt2.c: Likewise.
	* config/rs6000/aix-unwind.h: Likewise.
	* config/rs6000/sfp-exceptions.c: Likewise.
	* config/gthr-vxworks.c: Likewise.
	* config/riscv/atomic.c: Likewise.
	* config/visium/memcpy.c: Likewise.
	* config/darwin-crt-tm.c: Likewise.
	* config/stormy16/lib2funcs.c: Likewise.
	* config/arc/ieee-754/divtab-arc-sf.c: Likewise.
	* config/arc/ieee-754/divtab-arc-df.c: Likewise.
	* config/arc/initfini.c: Likewise.
	* config/sol2/gmon.c: Likewise.
	* config/microblaze/divsi3_table.c: Likewise.
	* config/m68k/fpgnulib.c: Likewise.
	* libgcov-driver.c: Likewise.
	* unwind-dw2.c: Likewise.
	* fp-bit.c: Likewise.
	* dfp-bit.h: Likewise.
	* dfp-bit.c: Likewise.
	* libgcov-driver-system.c: Likewise.
libgcc/config/libbid/
	* _le_td.c: Remove trailing whitespace.
	* bid128_compare.c: Likewise.
	* bid_div_macros.h: Likewise.
	* bid64_to_bid128.c: Likewise.
	* bid64_to_uint32.c: Likewise.
	* bid128_to_uint64.c: Likewise.
	* bid64_div.c: Likewise.
	* bid128_round_integral.c: Likewise.
	* bid_binarydecimal.c: Likewise.
	* bid128_string.c: Likewise.
	* bid_flag_operations.c: Likewise.
	* bid128_to_int64.c: Likewise.
	* _mul_sd.c: Likewise.
	* bid64_mul.c: Likewise.
	* bid128_noncomp.c: Likewise.
	* _gt_dd.c: Likewise.
	* bid64_add.c: Likewise.
	* bid64_string.c: Likewise.
	* bid_from_int.c: Likewise.
	* bid128.c: Likewise.
	* _ge_dd.c: Likewise.
	* _ne_sd.c: Likewise.
	* _dd_to_td.c: Likewise.
	* _unord_sd.c: Likewise.
	* bid64_to_uint64.c: Likewise.
	* _gt_sd.c: Likewise.
	* _sd_to_td.c: Likewise.
	* _addsub_td.c: Likewise.
	* _ne_td.c: Likewise.
	* bid_dpd.c: Likewise.
	* bid128_add.c: Likewise.
	* bid128_next.c: Likewise.
	* _lt_sd.c: Likewise.
	* bid64_next.c: Likewise.
	* bid128_mul.c: Likewise.
	* _lt_dd.c: Likewise.
	* _ge_td.c: Likewise.
	* _unord_dd.c: Likewise.
	* bid64_sqrt.c: Likewise.
	* bid_sqrt_macros.h: Likewise.
	* bid64_fma.c: Likewise.
	* _sd_to_dd.c: Likewise.
	* bid_conf.h: Likewise.
	* bid64_noncomp.c: Likewise.
	* bid_gcc_intrinsics.h: Likewise.
	* _gt_td.c: Likewise.
	* _ge_sd.c: Likewise.
	* bid128_minmax.c: Likewise.
	* bid128_quantize.c: Likewise.
	* bid32_to_bid64.c: Likewise.
	* bid_round.c: Likewise.
	* _td_to_sd.c: Likewise.
	* bid_inline_add.h: Likewise.
	* bid128_fma.c: Likewise.
	* _eq_td.c: Likewise.
	* bid32_to_bid128.c: Likewise.
	* bid64_rem.c: Likewise.
	* bid128_2_str_tables.c: Likewise.
	* _mul_dd.c: Likewise.
	* _dd_to_sd.c: Likewise.
	* bid128_div.c: Likewise.
	* _lt_td.c: Likewise.
	* bid64_compare.c: Likewise.
	* bid64_to_int32.c: Likewise.
	* _unord_td.c: Likewise.
	* bid128_rem.c: Likewise.
	* bid_internal.h: Likewise.
	* bid64_to_int64.c: Likewise.
	* _eq_dd.c: Likewise.
	* _td_to_dd.c: Likewise.
	* bid128_to_int32.c: Likewise.
	* bid128_to_uint32.c: Likewise.
	* _ne_dd.c: Likewise.
	* bid64_quantize.c: Likewise.
	* _le_dd.c: Likewise.
	* bid64_round_integral.c: Likewise.
	* _le_sd.c: Likewise.
	* bid64_minmax.c: Likewise.
libgcc/config/avr/libf7/
	* f7-renames.h: Remove trailing whitespace.
libstdc++-v3/
	* include/debug/debug.h: Remove trailing whitespace.
	* include/parallel/base.h: Likewise.
	* include/parallel/types.h: Likewise.
	* include/parallel/settings.h: Likewise.
	* include/parallel/multiseq_selection.h: Likewise.
	* include/parallel/partition.h: Likewise.
	* include/parallel/random_number.h: Likewise.
	* include/parallel/find_selectors.h: Likewise.
	* include/parallel/partial_sum.h: Likewise.
	* include/parallel/list_partition.h: Likewise.
	* include/parallel/search.h: Likewise.
	* include/parallel/algorithmfwd.h: Likewise.
	* include/parallel/random_shuffle.h: Likewise.
	* include/parallel/multiway_mergesort.h: Likewise.
	* include/parallel/sort.h: Likewise.
	* include/parallel/algobase.h: Likewise.
	* include/parallel/numericfwd.h: Likewise.
	* include/parallel/multiway_merge.h: Likewise.
	* include/parallel/losertree.h: Likewise.
	* include/bits/basic_ios.h: Likewise.
	* include/bits/stringfwd.h: Likewise.
	* include/bits/ostream_insert.h: Likewise.
	* include/bits/stl_heap.h: Likewise.
	* include/bits/unordered_map.h: Likewise.
	* include/bits/hashtable_policy.h: Likewise.
	* include/bits/stl_iterator_base_funcs.h: Likewise.
	* include/bits/valarray_before.h: Likewise.
	* include/bits/regex.h: Likewise.
	* include/bits/postypes.h: Likewise.
	* include/bits/stl_iterator.h: Likewise.
	* include/bits/localefwd.h: Likewise.
	* include/bits/stl_algo.h: Likewise.
	* include/bits/ios_base.h: Likewise.
	* include/bits/stl_function.h: Likewise.
	* include/bits/basic_string.h: Likewise.
	* include/bits/hashtable.h: Likewise.
	* include/bits/valarray_after.h: Likewise.
	* include/bits/char_traits.h: Likewise.
	* include/bits/gslice.h: Likewise.
	* include/bits/locale_facets_nonio.h: Likewise.
	* include/bits/mask_array.h: Likewise.
	* include/bits/specfun.h: Likewise.
	* include/bits/random.h: Likewise.
	* include/bits/slice_array.h: Likewise.
	* include/bits/valarray_array.h: Likewise.
	* include/tr1/float.h: Likewise.
	* include/tr1/functional_hash.h: Likewise.
	* include/tr1/math.h: Likewise.
	* include/tr1/hashtable_policy.h: Likewise.
	* include/tr1/stdio.h: Likewise.
	* include/tr1/complex.h: Likewise.
	* include/tr1/stdbool.h: Likewise.
	* include/tr1/stdarg.h: Likewise.
	* include/tr1/inttypes.h: Likewise.
	* include/tr1/fenv.h: Likewise.
	* include/tr1/stdlib.h: Likewise.
	* include/tr1/wchar.h: Likewise.
	* include/tr1/tgmath.h: Likewise.
	* include/tr1/limits.h: Likewise.
	* include/tr1/wctype.h: Likewise.
	* include/tr1/stdint.h: Likewise.
	* include/tr1/ctype.h: Likewise.
	* include/tr1/random.h: Likewise.
	* include/tr1/shared_ptr.h: Likewise.
	* include/ext/mt_allocator.h: Likewise.
	* include/ext/sso_string_base.h: Likewise.
	* include/ext/debug_allocator.h: Likewise.
	* include/ext/vstring_fwd.h: Likewise.
	* include/ext/pointer.h: Likewise.
	* include/ext/pod_char_traits.h: Likewise.
	* include/ext/malloc_allocator.h: Likewise.
	* include/ext/vstring.h: Likewise.
	* include/ext/bitmap_allocator.h: Likewise.
	* include/ext/pool_allocator.h: Likewise.
	* include/ext/type_traits.h: Likewise.
	* include/ext/ropeimpl.h: Likewise.
	* include/ext/codecvt_specializations.h: Likewise.
	* include/ext/throw_allocator.h: Likewise.
	* include/ext/extptr_allocator.h: Likewise.
	* include/ext/atomicity.h: Likewise.
	* include/ext/concurrence.h: Likewise.
	* include/c_compatibility/wchar.h: Likewise.
	* include/c_compatibility/stdint.h: Likewise.
	* include/backward/hash_fun.h: Likewise.
	* include/backward/binders.h: Likewise.
	* include/backward/hashtable.h: Likewise.
	* include/backward/auto_ptr.h: Likewise.
	* libsupc++/eh_arm.cc: Likewise.
	* libsupc++/unwind-cxx.h: Likewise.
	* libsupc++/si_class_type_info.cc: Likewise.
	* libsupc++/vec.cc: Likewise.
	* libsupc++/class_type_info.cc: Likewise.
	* libsupc++/vmi_class_type_info.cc: Likewise.
	* libsupc++/guard_error.cc: Likewise.
	* libsupc++/bad_typeid.cc: Likewise.
	* libsupc++/eh_personality.cc: Likewise.
	* libsupc++/atexit_arm.cc: Likewise.
	* libsupc++/pmem_type_info.cc: Likewise.
	* libsupc++/vterminate.cc: Likewise.
	* libsupc++/eh_terminate.cc: Likewise.
	* libsupc++/bad_cast.cc: Likewise.
	* libsupc++/exception_ptr.h: Likewise.
	* libsupc++/eh_throw.cc: Likewise.
	* libsupc++/bad_alloc.cc: Likewise.
	* libsupc++/nested_exception.cc: Likewise.
	* libsupc++/pointer_type_info.cc: Likewise.
	* libsupc++/pbase_type_info.cc: Likewise.
	* libsupc++/bad_array_new.cc: Likewise.
	* libsupc++/pure.cc: Likewise.
	* libsupc++/eh_exception.cc: Likewise.
	* libsupc++/bad_array_length.cc: Likewise.
	* libsupc++/cxxabi.h: Likewise.
	* libsupc++/guard.cc: Likewise.
	* libsupc++/eh_catch.cc: Likewise.
	* libsupc++/cxxabi_forced.h: Likewise.
	* libsupc++/tinfo.h: Likewise.
2024-10-25 10:03:17 +02:00

324 lines
9.3 KiB
C

/* Macro library used to help during conversion of scalar math functions to
vectorized SIMD equivalents on AMD GCN.
Copyright (C) 2023-2024 Free Software Foundation, Inc.
Contributed by Siemens.
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
typedef union {
v2sf t_v2sf;
v4sf t_v4sf;
v8sf t_v8sf;
v16sf t_v16sf;
v32sf t_v32sf;
v64sf t_v64sf;
v2df t_v2df;
v4df t_v4df;
v8df t_v8df;
v16df t_v16df;
v32df t_v32df;
v64df t_v64df;
v64qi t_v64qi;
v64hi t_v64hi;
v2si t_v2si;
v4si t_v4si;
v8si t_v8si;
v16si t_v16si;
v32si t_v32si;
v64si t_v64si;
v64usi t_v64usi;
v2di t_v2di;
v4di t_v4di;
v8di t_v8di;
v16di t_v16di;
v32di t_v32di;
v64di t_v64di;
} vector_union;
/* Cast between vectors with a different number of elements, or type. */
#define VGPR_CAST(to_t, from) \
({ \
to_t __res; \
__asm__ ("" : "=v"(__res) : "0"(from)); \
__res; \
})
#define PACK_SI_PAIR(low, high) \
({ \
v64udi __res; \
asm ("v_mov_b32\t%L0, %1\n\t" \
"v_mov_b32\t%H0, %2" \
: "=&v"(__res) : "v0"(low), "v"(high), "e"(-1L)); \
__res; \
})
#define UNPACK_SI_LOW(to_t, pair) VGPR_CAST(to_t, pair)
#define UNPACK_SI_HIGH(to_t, pair) \
({ \
to_t __res; \
asm ("v_mov_b32\t%0, %H1" : "=v"(__res) : "v"(pair), "e"(-1L)); \
__res; \
})
#define PACK_DI_PAIR(low, high) \
({ \
v64uti __res; \
asm ("v_mov_b32\t%L0, %L1\n\t" \
"v_mov_b32\t%H0, %H1\n\t" \
"v_mov_b32\t%J0, %L2\n\t" \
"v_mov_b32\t%K0, %H2" \
: "=&v"(__res) : "v0"(low), "v"(high), "e"(-1L)); \
__res; \
})
#define UNPACK_DI_LOW(to_t, pair) VGPR_CAST(to_t, pair)
#define UNPACK_DI_HIGH(to_t, pair) \
({ \
to_t __res; \
asm ("v_mov_b32\t%L0, %J1\n\t" \
"v_mov_b32\t%H0, %K1" : "=v"(__res) : "v"(pair), "e"(-1L)); \
__res; \
})
#define NO_COND __mask
/* Note - __mask is _not_ accounted for in VECTOR_MERGE! */
#define VECTOR_MERGE(vec1, vec2, cond) \
({ \
_Static_assert (__builtin_types_compatible_p (typeof (vec1), typeof (vec2))); \
union { \
typeof (vec1) val; \
v64qi t_v64qi; \
v64hi t_v64hi; \
v64si t_v64si; \
v64di t_v64di; \
} __vec1, __vec2, __res; \
__vec1.val = (vec1); \
__vec2.val = (vec2); \
__builtin_choose_expr ( \
sizeof (vec1) == sizeof (v64si), \
({ \
v64si __bitmask = __builtin_convertvector ((cond), v64si); \
__res.t_v64si = (__vec1.t_v64si & __bitmask) \
| (__vec2.t_v64si & ~__bitmask); \
}), \
__builtin_choose_expr ( \
sizeof (vec1) == sizeof (v64hi), \
({ \
v64hi __bitmask = __builtin_convertvector ((cond), v64hi); \
__res.t_v64hi = (__vec1.t_v64hi & __bitmask) \
| (__vec2.t_v64hi & ~__bitmask); \
}), \
__builtin_choose_expr ( \
sizeof (vec1) == sizeof (v64qi), \
({ \
v64qi __bitmask = __builtin_convertvector ((cond), v64qi); \
__res.t_v64qi = (__vec1.t_v64qi & __bitmask) \
| (__vec2.t_v64qi & ~__bitmask); \
}), \
({ \
v64di __bitmask = __builtin_convertvector ((cond), v64di); \
__res.t_v64di = (__vec1.t_v64di & __bitmask) \
| (__vec2.t_v64di & ~__bitmask); \
})))); \
__res.val; \
})
#define VECTOR_COND_MOVE(var, val, cond) \
do { \
_Static_assert (__builtin_types_compatible_p (typeof (var), typeof (val))); \
__auto_type __cond = __builtin_convertvector ((cond), typeof (__mask)); \
var = VECTOR_MERGE ((val), var, __cond & __mask); \
} while (0)
#define VECTOR_IF(cond, cond_var) \
{ \
__auto_type cond_var = (cond); \
__auto_type __inv_cond __attribute__((unused)) = ~cond_var; \
if (!ALL_ZEROES_P (cond_var)) \
{
#define VECTOR_ELSEIF(cond, cond_var) \
} \
cond_var = __inv_cond & (cond); \
__inv_cond &= ~(cond); \
if (!ALL_ZEROES_P (cond_var)) \
{
#define VECTOR_ELSE(cond_var) \
} \
cond_var = __inv_cond; \
if (!ALL_ZEROES_P (cond_var)) \
{
#define VECTOR_IF2(cond, cond_var, prev_cond_var) \
{ \
__auto_type cond_var = (cond) & __builtin_convertvector (prev_cond_var, typeof (cond)); \
__auto_type __inv_cond __attribute__((unused)) = ~cond_var; \
if (!ALL_ZEROES_P (cond_var)) \
{
#define VECTOR_ELSEIF2(cond, cond_var, prev_cond_var) \
} \
cond_var = (cond) & __inv_cond & __builtin_convertvector (prev_cond_var, typeof (cond)); \
__inv_cond &= ~(cond); \
if (!ALL_ZEROES_P (cond_var)) \
{
#define VECTOR_ELSE2(cond_var, prev_cond_var) \
} \
cond_var = __inv_cond & __builtin_convertvector (prev_cond_var, typeof (__inv_cond)); \
if (!ALL_ZEROES_P (cond_var)) \
{
#define VECTOR_ENDIF \
} \
}
#define VECTOR_INIT_AUX(x, type) \
({ \
typeof (x) __e = (x); \
type __tmp = { \
__e, __e, __e, __e, __e, __e, __e, __e, \
__e, __e, __e, __e, __e, __e, __e, __e, \
__e, __e, __e, __e, __e, __e, __e, __e, \
__e, __e, __e, __e, __e, __e, __e, __e, \
__e, __e, __e, __e, __e, __e, __e, __e, \
__e, __e, __e, __e, __e, __e, __e, __e, \
__e, __e, __e, __e, __e, __e, __e, __e, \
__e, __e, __e, __e, __e, __e, __e, __e }; \
__tmp; \
})
#define VECTOR_INIT(x) \
(_Generic ((x), int: VECTOR_INIT_AUX ((x), v64si), \
unsigned: VECTOR_INIT_AUX ((x), v64usi), \
char: VECTOR_INIT_AUX ((x), v64qi), \
unsigned char: VECTOR_INIT_AUX ((x), v64uqi), \
short: VECTOR_INIT_AUX ((x), v64hi), \
unsigned short: VECTOR_INIT_AUX ((x), v64uhi), \
long: VECTOR_INIT_AUX ((x), v64di), \
unsigned long: VECTOR_INIT_AUX ((x), v64udi), \
float: VECTOR_INIT_AUX ((x), v64sf), \
double: VECTOR_INIT_AUX ((x), v64df)))
#if defined (__GCN3__) || defined (__GCN5__) \
|| defined (__CDNA1__) || defined (__CDNA2__) \
|| defined (__RDNA2__) || defined (__RDNA3__)
#define CDNA3_PLUS 0
#else
#define CDNA3_PLUS 1
#endif
#define VECTOR_INIT_MASK(COUNT) \
({ \
MASKMODE __mask; \
int count = (COUNT); \
if (count == 64) \
{ \
if (sizeof (MASKMODE) < 512 || CDNA3_PLUS) \
asm ("v_mov%B0\t%0, -1" : "=v"(__mask) : "e"(-1L)); \
else \
asm ("v_mov_b32\t%L0, -1\n\t" \
"v_mov_b32\t%H0, -1" : "=v"(__mask) : "e"(-1L)); \
} \
else \
{ \
long bitmask = (count == 64 ? -1 : (1<<count)-1); \
if (sizeof (MASKMODE) < 512 || CDNA3_PLUS) \
{ \
asm ("v_mov%B0\t%0, 0" : "=v"(__mask) : "e"(-1L)); \
asm ("v_mov%B0\t%0, -1" : "+v"(__mask) : "e"(bitmask)); \
} \
else \
{ \
asm ("v_mov_b32\t%L0, 0\n\t" \
"v_mov_b32\t%H0, 0" : "=v"(__mask) : "e"(-1L)); \
asm ("v_mov_b32\t%L0, -1\n\t" \
"v_mov_b32\t%H0, -1" : "+v"(__mask) : "e"(bitmask)); \
} \
} \
__mask; \
})
#define ALL_ZEROES_P(x) (COND_TO_BITMASK(x) == 0)
#define COND_TO_BITMASK(x) \
({ \
long __tmp = 0; \
__auto_type __x = __builtin_convertvector((x), typeof (__mask)) & __mask; \
__builtin_choose_expr (sizeof (__mask) != 512, \
({ asm ("v_cmp_ne_u32_e64 %0, %1, 0" \
: "=Sg" (__tmp) \
: "v" (__x)); }), \
({ asm ("v_cmp_ne_u64_e64 %0, %1, 0" \
: "=Sg" (__tmp) \
: "v" (__x)); })); \
__tmp; \
})
#define VECTOR_WHILE(cond, cond_var, prev_cond_var) \
{ \
__auto_type cond_var = prev_cond_var; \
for (;;) { \
cond_var &= (cond); \
if (ALL_ZEROES_P (cond_var)) \
break;
#define VECTOR_ENDWHILE \
} \
}
#define DEF_VARIANT(FUN, SUFFIX, OTYPE, TYPE, COUNT) \
v##COUNT##OTYPE \
FUN##v##COUNT##SUFFIX (v##COUNT##TYPE __arg1, v##COUNT##TYPE __arg2) \
{ \
__auto_type __upsized_arg1 = VGPR_CAST (v64##TYPE, __arg1); \
__auto_type __upsized_arg2 = VGPR_CAST (v64##TYPE, __arg2); \
__auto_type __mask = VECTOR_INIT_MASK (COUNT); \
__auto_type __result = FUN##v64##SUFFIX##_aux (__upsized_arg1, __upsized_arg2, __mask); \
return VGPR_CAST (v##COUNT##OTYPE, __result); \
}
#define DEF_VARIANTS(FUN, SUFFIX, TYPE) \
DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 2) \
DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 4) \
DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 8) \
DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 16) \
DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 32) \
DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 64)
#define DEF_VARIANTS_B(FUN, SUFFIX, OTYPE, TYPE) \
DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 2) \
DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 4) \
DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 8) \
DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 16) \
DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 32) \
DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 64)