Merge master r11-576.

2020-05-22  Iain Sandoe  <iain@sandoe.co.uk>

	* Merge master r11-576-gdc50686d78d4.
This commit is contained in:
Iain Sandoe
2020-05-22 20:22:11 +01:00
44 changed files with 1138 additions and 252 deletions

View File

@@ -1,3 +1,14 @@
2020-05-22 Jakub Jelinek <jakub@redhat.com>
* gcc-changelog/git_commit.py: Add trailing / to
gcc/testsuite/go.test/test and replace gcc/go/frontend/
with gcc/go/gofrontend/ in ignored locations.
2020-05-22 Martin Liska <mliska@suse.cz>
* gcc-changelog/git_commit.py: Add gcc/testsuite/go.test/test
to ignored locations.
2020-05-21 Martin Liska <mliska@suse.cz>
* gcc-changelog/git_update_version.py: Prepare the script, the

View File

@@ -127,7 +127,8 @@ bug_components = set([
ignored_prefixes = [
'gcc/d/dmd/',
'gcc/go/frontend/',
'gcc/go/gofrontend/',
'gcc/testsuite/go.test/test/',
'libgo/',
'libphobos/libdruntime',
'libphobos/src/',

View File

@@ -1,3 +1,115 @@
2020-05-22 Jan Hubicka <hubicka@ucw.cz>
* lto-streamer-out.c (DFS::DFS): Silence warning.
2020-05-22 Uroš Bizjak <ubizjak@gmail.com>
PR target/95255
* config/i386/i386.md (<rounding_insn><mode>2): Do not try to
expand non-sse4 ROUND_ROUNDEVEN rounding via SSE support routines.
2020-05-22 Jan Hubicka <hubicka@ucw.cz>
* lto-streamer-out.c (lto_output_tree): Do not stream final ref if
it is not needed.
2020-05-22 Jan Hubicka <hubicka@ucw.cz>
* lto-section-out.c (lto_output_decl_index): Adjust dump indentation.
* lto-streamer-out.c (create_output_block): Fix whitespace
(lto_write_tree_1): Add (debug) dump.
(DFS::DFS): Add dump.
(DFS::DFS_write_tree_body): Do not dump here.
(lto_output_tree): Improve dumping; do not stream ref when not needed.
(produce_asm_for_decls): Fix whitespace.
* tree-streamer-out.c (streamer_write_tree_header): Add dump.
* tree-streamer-out.c (streamer_write_integer_cst): Add debug dump.
2020-05-22 Hongtao.liu <hongtao.liu@intel.com>
PR target/92658
* config/i386/sse.md (trunc<pmov_src_lower><mode>2): New expander
(truncv32hiv32qi2): Ditto.
(trunc<ssedoublemodelower><mode>2): Ditto.
(trunc<mode><pmov_dst_3>2): Ditto.
(trunc<mode><pmov_dst_mode_4>2): Ditto.
(truncv2div2si2): Ditto.
(truncv8div8qi2): Ditto.
(avx512f_<code>v8div16qi2): Renaming from *avx512f_<code>v8div16qi2.
(avx512vl_<code>v2div2si): Renaming from *avx512vl_<code>v2div2si2.
(avx512vl_<code><mode>v2<ssecakarnum>qi2): Renaming from
*avx512vl_<code><mode>v<ssescalarnum>qi2.
2020-05-22 H.J. Lu <hongjiu.lu@intel.com>
PR target/95258
* config/i386/driver-i386.c (host_detect_local_cpu): Detect
AVX512VPOPCNTDQ.
2020-05-22 Richard Biener <rguenther@suse.de>
PR tree-optimization/95268
* tree-ssa-sink.c (sink_common_stores_to_bb): Handle clobbers
properly.
2020-05-22 Jan Hubicka <hubicka@ucw.cz>
* tree-streamer.c (record_common_node): Fix hash value of pre-streamed
nodes.
2020-05-22 Jan Hubicka <hubicka@ucw.cz>
* lto-streamer-in.c (lto_read_tree): Do not stream end markers.
(lto_input_scc): Optimize streaming of entry lengths.
* lto-streamer-out.c (lto_write_tree): Do not stream end markers
(DFS::DFS): Optimize stremaing of entry lengths
2020-05-22 Richard Biener <rguenther@suse.de>
PR lto/95190
* doc/invoke.texi (flto): Document behavior of diagnostic
options.
2020-05-22 Richard Biener <rguenther@suse.de>
* tree-vectorizer.h (vect_is_simple_use): New overload.
(vect_maybe_update_slp_op_vectype): New.
* tree-vect-stmts.c (vect_is_simple_use): New overload
accessing operands of SLP vs. non-SLP operation transparently.
(vect_maybe_update_slp_op_vectype): New function updating
the possibly shared SLP operands vector type.
(vectorizable_operation): Be a bit more SLP vs non-SLP agnostic
using the new vect_is_simple_use overload; update SLP invariant
operand nodes vector type.
(vectorizable_comparison): Likewise.
(vectorizable_call): Likewise.
(vectorizable_conversion): Likewise.
(vectorizable_shift): Likewise.
(vectorizable_store): Likewise.
(vectorizable_condition): Likewise.
(vectorizable_assignment): Likewise.
* tree-vect-loop.c (vectorizable_reduction): Likewise.
* tree-vect-slp.c (vect_get_constant_vectors): Enforce
present SLP_TREE_VECTYPE and check it matches previous
behavior.
2020-05-22 Richard Biener <rguenther@suse.de>
PR tree-optimization/95248
* tree-ssa-loop-im.c (sm_seq_valid_bb): Remove bogus early out.
2020-05-22 Richard Biener <rguenther@suse.de>
* tree-vectorizer.h (_slp_tree::_slp_tree): New.
(_slp_tree::~_slp_tree): Likewise.
* tree-vect-slp.c (_slp_tree::_slp_tree): Factor out code
from allocators.
(_slp_tree::~_slp_tree): Implement.
(vect_free_slp_tree): Simplify.
(vect_create_new_slp_node): Likewise. Add nops parameter.
(vect_build_slp_tree_2): Adjust.
(vect_analyze_slp_instance): Likewise.
2020-05-21 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* adjust-alignment.c: Include memmodel.h.

View File

@@ -1 +1 @@
20200521
20200522

View File

@@ -420,6 +420,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
unsigned int has_gfni = 0, has_avx512vbmi2 = 0;
unsigned int has_avx512bitalg = 0;
unsigned int has_avx512vpopcntdq = 0;
unsigned int has_shstk = 0;
unsigned int has_avx512vnni = 0, has_vaes = 0;
unsigned int has_vpclmulqdq = 0;
@@ -528,6 +529,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_vaes = ecx & bit_VAES;
has_vpclmulqdq = ecx & bit_VPCLMULQDQ;
has_avx512bitalg = ecx & bit_AVX512BITALG;
has_avx512vpopcntdq = ecx & bit_AVX512VPOPCNTDQ;
has_movdiri = ecx & bit_MOVDIRI;
has_movdir64b = ecx & bit_MOVDIR64B;
has_enqcmd = ecx & bit_ENQCMD;
@@ -1189,6 +1191,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *avx512vp2intersect = has_avx512vp2intersect ? " -mavx512vp2intersect" : " -mno-avx512vp2intersect";
const char *tsxldtrk = has_tsxldtrk ? " -mtsxldtrk " : " -mno-tsxldtrk";
const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
const char *avx512vpopcntdq = has_avx512vpopcntdq ? " -mavx512vpopcntdq" : " -mno-avx512vpopcntdq";
const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri";
const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b";
const char *enqcmd = has_enqcmd ? " -menqcmd" : " -mno-enqcmd";
@@ -1210,9 +1213,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
avx512bitalg, movdiri, movdir64b, waitpkg, cldemote,
ptwrite, avx512bf16, enqcmd, avx512vp2intersect,
serialize, tsxldtrk, NULL);
avx512bitalg, avx512vpopcntdq, movdiri, movdir64b,
waitpkg, cldemote, ptwrite, avx512bf16, enqcmd,
avx512vp2intersect, serialize, tsxldtrk, NULL);
}
done:

View File

@@ -17115,16 +17115,18 @@
&& (flag_fp_int_builtin_inexact || !flag_trapping_math))
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& (TARGET_SSE4_1
|| (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
&& (flag_fp_int_builtin_inexact || !flag_trapping_math))))"
|| (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
&& (flag_fp_int_builtin_inexact || !flag_trapping_math))))"
{
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& (TARGET_SSE4_1 || flag_fp_int_builtin_inexact || !flag_trapping_math))
&& (TARGET_SSE4_1
|| (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
&& (flag_fp_int_builtin_inexact || !flag_trapping_math))))
{
if (TARGET_SSE4_1)
emit_insn (gen_sse4_1_round<mode>2
(operands[0], operands[1], GEN_INT (ROUND_<ROUNDING>
| ROUND_NO_EXC)));
(operands[0], operands[1],
GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
else if (TARGET_64BIT || (<MODE>mode != DFmode))
{
if (ROUND_<ROUNDING> == ROUND_FLOOR)

View File

@@ -10513,6 +10513,12 @@
(define_mode_attr pmov_suff_1
[(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
(define_expand "trunc<pmov_src_lower><mode>2"
[(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand")
(truncate:PMOV_DST_MODE_1
(match_operand:<pmov_src_mode> 1 "register_operand")))]
"TARGET_AVX512F")
(define_insn "*avx512f_<code><pmov_src_lower><mode>2"
[(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
(any_truncate:PMOV_DST_MODE_1
@@ -10547,6 +10553,12 @@
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
"TARGET_AVX512F")
(define_expand "truncv32hiv32qi2"
[(set (match_operand:V32QI 0 "nonimmediate_operand")
(truncate:V32QI
(match_operand:V32HI 1 "register_operand")))]
"TARGET_AVX512BW")
(define_insn "avx512bw_<code>v32hiv32qi2"
[(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
(any_truncate:V32QI
@@ -10586,6 +10598,12 @@
(define_mode_attr pmov_suff_2
[(V16QI "wb") (V8HI "dw") (V4SI "qd")])
(define_expand "trunc<ssedoublemodelower><mode>2"
[(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
(truncate:PMOV_DST_MODE_2
(match_operand:<ssedoublemode> 1 "register_operand")))]
"TARGET_AVX512VL")
(define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
[(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
(any_truncate:PMOV_DST_MODE_2
@@ -10628,7 +10646,20 @@
(define_mode_attr pmov_suff_3
[(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
(define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
(define_expand "trunc<mode><pmov_dst_3>2"
[(set (match_operand:<pmov_dst_3> 0 "register_operand")
(truncate:<pmov_dst_3>
(match_operand:PMOV_SRC_MODE_3 1 "register_operand")))]
"TARGET_AVX512VL"
{
operands[0] = simplify_gen_subreg (V16QImode, operands[0], <pmov_dst_3>mode, 0);
emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>qi2 (operands[0],
operands[1],
CONST0_RTX (<pmov_dst_zeroed_3>mode)));
DONE;
})
(define_insn "avx512vl_<code><mode>v<ssescalarnum>qi2"
[(set (match_operand:V16QI 0 "register_operand" "=v")
(vec_concat:V16QI
(any_truncate:<pmov_dst_3>
@@ -10920,7 +10951,21 @@
(define_mode_attr pmov_suff_4
[(V4DI "qw") (V2DI "qw") (V4SI "dw")])
(define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
(define_expand "trunc<mode><pmov_dst_4>2"
[(set (match_operand:<pmov_dst_4> 0 "register_operand")
(truncate:<pmov_dst_4>
(match_operand:PMOV_SRC_MODE_4 1 "register_operand")))]
"TARGET_AVX512VL"
{
operands[0] = simplify_gen_subreg (V8HImode, operands[0], <pmov_dst_4>mode, 0);
emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>hi2 (operands[0],
operands[1],
CONST0_RTX (<pmov_dst_zeroed_4>mode)));
DONE;
})
(define_insn "avx512vl_<code><mode>v<ssescalarnum>hi2"
[(set (match_operand:V8HI 0 "register_operand" "=v")
(vec_concat:V8HI
(any_truncate:<pmov_dst_4>
@@ -11085,7 +11130,20 @@
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
(define_insn "*avx512vl_<code>v2div2si2"
(define_expand "truncv2div2si2"
[(set (match_operand:V2SI 0 "register_operand")
(truncate:V2SI
(match_operand:V2DI 1 "register_operand")))]
"TARGET_AVX512VL"
{
operands[0] = simplify_gen_subreg (V4SImode, operands[0], V2SImode, 0);
emit_insn (gen_avx512vl_truncatev2div2si2 (operands[0],
operands[1],
CONST0_RTX (V2SImode)));
DONE;
})
(define_insn "avx512vl_<code>v2div2si2"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(vec_concat:V4SI
(any_truncate:V2SI
@@ -11164,7 +11222,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
(define_insn "*avx512f_<code>v8div16qi2"
(define_expand "truncv8div8qi2"
[(set (match_operand:V8QI 0 "register_operand")
(truncate:V8QI
(match_operand:V8DI 1 "register_operand")))]
"TARGET_AVX512F"
{
operands[0] = simplify_gen_subreg (V16QImode, operands[0], V8QImode, 0);
emit_insn (gen_avx512f_truncatev8div16qi2 (operands[0], operands[1]));
DONE;
})
(define_insn "avx512f_<code>v8div16qi2"
[(set (match_operand:V16QI 0 "register_operand" "=v")
(vec_concat:V16QI
(any_truncate:V8QI

View File

@@ -8359,6 +8359,14 @@ is_constant_expression (tree t)
return potential_constant_expression_1 (t, false, true, true, tf_none);
}
/* As above, but expect an rvalue. */
bool
is_rvalue_constant_expression (tree t)
{
return potential_constant_expression_1 (t, true, true, true, tf_none);
}
/* Like above, but complain about non-constant expressions. */
bool

View File

@@ -7939,6 +7939,7 @@ extern tree constexpr_fn_retval (tree);
extern tree ensure_literal_type_for_constexpr_object (tree);
extern bool potential_constant_expression (tree);
extern bool is_constant_expression (tree);
extern bool is_rvalue_constant_expression (tree);
extern bool is_nondependent_constant_expression (tree);
extern bool is_nondependent_static_init_expression (tree);
extern bool is_static_init_expression (tree);

View File

@@ -7612,7 +7612,7 @@ cp_finish_decl (tree decl, tree init, bool init_const_expr_p,
init = boolean_true_node;
}
else if (init
&& init_const_expr_p
&& (init_const_expr_p || DECL_DECLARED_CONSTEXPR_P (decl))
&& !TYPE_REF_P (type)
&& decl_maybe_constant_var_p (decl)
&& !(dep_init = value_dependent_init_p (init)))
@@ -10328,13 +10328,14 @@ compute_array_index_type_loc (location_t name_loc, tree name, tree size,
dependent type or whose size is specified by a constant expression
that is value-dependent. */
/* We can only call value_dependent_expression_p on integral constant
expressions; treat non-constant expressions as dependent, too. */
expressions. */
if (processing_template_decl
&& (!TREE_CONSTANT (size) || value_dependent_expression_p (size)))
&& potential_constant_expression (size)
&& value_dependent_expression_p (size))
{
/* We cannot do any checking for a SIZE that isn't known to be
constant. Just build the index type and mark that it requires
/* Just build the index type and mark that it requires
structural equality checks. */
in_template:
itype = build_index_type (build_min (MINUS_EXPR, sizetype,
size, size_one_node));
TYPE_DEPENDENT_P (itype) = 1;
@@ -10447,8 +10448,7 @@ compute_array_index_type_loc (location_t name_loc, tree name, tree size,
}
if (processing_template_decl && !TREE_CONSTANT (size))
/* A variable sized array. */
itype = build_min (MINUS_EXPR, sizetype, size, integer_one_node);
goto in_template;
else
{
if (!TREE_CONSTANT (size))
@@ -13961,7 +13961,10 @@ grokparms (tree parmlist, tree *parms)
break;
if (! decl || TREE_TYPE (decl) == error_mark_node)
continue;
{
any_error = 1;
continue;
}
type = TREE_TYPE (decl);
if (VOID_TYPE_P (type))
@@ -14014,7 +14017,8 @@ grokparms (tree parmlist, tree *parms)
TREE_TYPE (decl) = type;
}
else if (abstract_virtuals_error (decl, type))
any_error = 1; /* Seems like a good idea. */
/* Ignore any default argument. */
init = NULL_TREE;
else if (cxx_dialect < cxx17 && INDIRECT_TYPE_P (type))
{
/* Before C++17 DR 393:
@@ -14043,9 +14047,7 @@ grokparms (tree parmlist, tree *parms)
decl, t);
}
if (any_error)
init = NULL_TREE;
else if (init && !processing_template_decl)
if (init && !processing_template_decl)
init = check_default_argument (decl, init, tf_warning_or_error);
}
@@ -14058,6 +14060,12 @@ grokparms (tree parmlist, tree *parms)
if (parm)
result = chainon (result, void_list_node);
*parms = decls;
if (any_error)
result = NULL_TREE;
if (any_error)
/* We had parm errors, recover by giving the function (...) type. */
result = NULL_TREE;
return result;
}

View File

@@ -10184,10 +10184,10 @@ cp_parser_constant_expression (cp_parser* parser,
if (TREE_TYPE (expression)
&& TREE_CODE (TREE_TYPE (expression)) == ARRAY_TYPE)
decay = build_address (expression);
bool is_const = potential_rvalue_constant_expression (decay);
bool is_const = is_rvalue_constant_expression (decay);
parser->non_integral_constant_expression_p = !is_const;
if (!is_const && !allow_non_constant_p)
require_potential_rvalue_constant_expression (decay);
require_rvalue_constant_expression (decay);
}
if (allow_non_constant_p)
*non_constant_p = parser->non_integral_constant_expression_p;
@@ -21366,6 +21366,8 @@ cp_parser_direct_declarator (cp_parser* parser,
/* OK */;
else if (error_operand_p (bounds))
/* Already gave an error. */;
else if (!cp_parser_uncommitted_to_tentative_parse_p (parser))
/* Let compute_array_index_type diagnose this. */;
else if (!parser->in_function_body
|| current_binding_level->kind == sk_function_parms)
{

View File

@@ -8768,6 +8768,9 @@ build_x_modify_expr (location_t loc, tree lhs, enum tree_code modifycode,
tree overload = NULL_TREE;
tree op = build_nt (modifycode, NULL_TREE, NULL_TREE);
if (lhs == error_mark_node || rhs == error_mark_node)
return cp_expr (error_mark_node, loc);
if (processing_template_decl)
{
if (modifycode == NOP_EXPR

View File

@@ -11205,6 +11205,14 @@ conflicting translation units. Specifically
precedence; and for example @option{-ffp-contract=off} takes precedence
over @option{-ffp-contract=fast}. You can override them at link time.
Diagnostic options such as @option{-Wstringop-overflow} are passed
through to the link stage and their setting matches that of the
compile-step at function granularity. Note that this matters only
for diagnostics emitted during optimization. Note that code
transforms such as inlining can lead to warnings being enabled
or disabled for regions if code not consistent with the setting
at compile time.
When you need to pass options to the assembler via @option{-Wa} or
@option{-Xassembler} make sure to either compile such translation
units with @option{-fno-lto} or consistently use the same assembler

View File

@@ -170,7 +170,7 @@ lto_output_decl_index (struct lto_output_stream *obs,
index = encoder->trees.length ();
if (streamer_dump_file)
{
print_node_brief (streamer_dump_file, " Encoding indexable ",
print_node_brief (streamer_dump_file, " Encoding indexable ",
name, 4);
fprintf (streamer_dump_file, " as %i \n", index);
}

View File

@@ -1417,8 +1417,6 @@ lto_read_tree (class lto_input_block *ib, class data_in *data_in,
lto_read_tree_1 (ib, data_in, result);
/* end_marker = */ streamer_read_uchar (ib);
return result;
}
@@ -1431,12 +1429,18 @@ hashval_t
lto_input_scc (class lto_input_block *ib, class data_in *data_in,
unsigned *len, unsigned *entry_len, bool shared_scc)
{
/* A blob of unnamed tree nodes, fill the cache from it and
recurse. */
unsigned size = streamer_read_uhwi (ib);
hashval_t scc_hash = shared_scc ? streamer_read_uhwi (ib) : 0;
hashval_t scc_hash = 0;
unsigned scc_entry_len = 1;
if (shared_scc)
{
if (size & 1)
scc_entry_len = streamer_read_uhwi (ib);
size /= 2;
scc_hash = streamer_read_uhwi (ib);
}
if (size == 1)
{
enum LTO_tags tag = streamer_read_record_start (ib);
@@ -1447,8 +1451,6 @@ lto_input_scc (class lto_input_block *ib, class data_in *data_in,
unsigned int first = data_in->reader_cache->nodes.length ();
tree result;
scc_entry_len = streamer_read_uhwi (ib);
/* Materialize size trees by reading their headers. */
for (unsigned i = 0; i < size; ++i)
{
@@ -1471,7 +1473,6 @@ lto_input_scc (class lto_input_block *ib, class data_in *data_in,
result = streamer_tree_cache_get_tree (data_in->reader_cache,
first + i);
lto_read_tree_1 (ib, data_in, result);
/* end_marker = */ streamer_read_uchar (ib);
}
}

View File

@@ -72,7 +72,7 @@ create_output_block (enum lto_section_type section_type)
struct output_block *ob = XCNEW (struct output_block);
if (streamer_dump_file)
fprintf (streamer_dump_file, "Creating output block for %s\n",
lto_section_name [section_type]);
lto_section_name[section_type]);
ob->section_type = section_type;
ob->decl_state = lto_get_out_decl_state ();
@@ -417,6 +417,14 @@ get_symbol_initial_value (lto_symtab_encoder_t encoder, tree expr)
static void
lto_write_tree_1 (struct output_block *ob, tree expr, bool ref_p)
{
if (streamer_dump_file)
{
print_node_brief (streamer_dump_file, " Streaming body of ",
expr, 4);
fprintf (streamer_dump_file, " to %s\n",
lto_section_name[ob->section_type]);
}
/* Pack all the non-pointer fields in EXPR into a bitpack and write
the resulting bitpack. */
streamer_write_tree_bitfields (ob, expr);
@@ -473,9 +481,6 @@ lto_write_tree (struct output_block *ob, tree expr, bool ref_p)
streamer_write_tree_header (ob, expr);
lto_write_tree_1 (ob, expr, ref_p);
/* Mark the end of EXPR. */
streamer_write_zero (ob);
}
/* Emit the physical representation of tree node EXPR to output block OB,
@@ -740,6 +745,8 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p,
worklist_vec.pop ();
unsigned int prev_size = ob->main_stream->total_size;
/* Only global decl sections are considered by tree merging. */
if (ob->section_type != LTO_section_decls)
{
@@ -747,6 +754,11 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p,
by itself then we do not need to stream SCC at all. */
if (worklist_vec.is_empty () && first == 0 && size == 1)
return;
if (streamer_dump_file)
{
fprintf (streamer_dump_file,
" Start of LTO_trees of size %i\n", size);
}
streamer_write_record_start (ob, LTO_trees);
streamer_write_uhwi (ob, size);
}
@@ -763,16 +775,35 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p,
{
gcc_checking_assert (ob->section_type == LTO_section_decls);
if (streamer_dump_file)
{
fprintf (streamer_dump_file,
" Start of LTO_tree_scc of size %i\n", size);
}
streamer_write_record_start (ob, LTO_tree_scc);
streamer_write_uhwi (ob, size);
/* In wast majority of cases scc_entry_len is 1 and size is small
integer. Use extra bit of size to stream info about
exceptions. */
streamer_write_uhwi (ob, size * 2 + (scc_entry_len != 1));
if (scc_entry_len != 1)
streamer_write_uhwi (ob, scc_entry_len);
streamer_write_uhwi (ob, scc_hash);
}
/* Non-trivial SCCs must be packed to trees blocks so forward
references work correctly. */
else if (size != 1)
{
streamer_write_record_start (ob, LTO_trees);
streamer_write_uhwi (ob, size);
if (streamer_dump_file)
{
fprintf (streamer_dump_file,
" Start of LTO_trees of size %i\n", size);
}
streamer_write_record_start (ob, LTO_trees);
streamer_write_uhwi (ob, size);
}
else if (streamer_dump_file)
{
fprintf (streamer_dump_file, " Streaming single tree\n");
}
/* Write size-1 SCCs without wrapping them inside SCC bundles.
@@ -783,8 +814,6 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p,
lto_output_tree_1 (ob, expr, scc_hash, ref_p, this_ref_p);
else
{
/* Write the size of the SCC entry candidates. */
streamer_write_uhwi (ob, scc_entry_len);
/* Write all headers and populate the streamer cache. */
for (unsigned i = 0; i < size; ++i)
@@ -807,13 +836,11 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p,
/* Write the bitpacks and tree references. */
for (unsigned i = 0; i < size; ++i)
{
lto_write_tree_1 (ob, sccstack[first+i].t, ref_p);
/* Mark the end of the tree. */
streamer_write_zero (ob);
}
lto_write_tree_1 (ob, sccstack[first+i].t, ref_p);
}
if (streamer_dump_file)
fprintf (streamer_dump_file, " %u bytes\n",
ob->main_stream->total_size - prev_size);
/* Finally truncate the vector. */
sccstack.truncate (first);
@@ -850,14 +877,6 @@ DFS::DFS_write_tree_body (struct output_block *ob,
enum tree_code code;
if (streamer_dump_file)
{
print_node_brief (streamer_dump_file, " Streaming ",
expr, 4);
fprintf (streamer_dump_file, " to %s\n",
lto_section_name [ob->section_type]);
}
code = TREE_CODE (expr);
if (CODE_CONTAINS_STRUCT (code, TS_TYPED))
@@ -1251,7 +1270,7 @@ hash_tree (struct streamer_tree_cache_d *cache, hash_map<tree, hashval_t> *map,
{
hstate.add_hwi (TYPE_MODE (t));
/* TYPE_NO_FORCE_BLK is private to stor-layout and need
no streaming. */
no streaming. */
hstate.add_flag (TYPE_PACKED (t));
hstate.add_flag (TYPE_RESTRICT (t));
hstate.add_flag (TYPE_USER_ALIGN (t));
@@ -1694,6 +1713,10 @@ lto_output_tree (struct output_block *ob, tree expr,
{
unsigned ix;
bool existed_p;
unsigned int size = ob->main_stream->total_size;
/* This is the first time we see EXPR, write all reachable
trees to OB. */
static bool in_dfs_walk;
if (expr == NULL_TREE)
{
@@ -1710,6 +1733,16 @@ lto_output_tree (struct output_block *ob, tree expr,
existed_p = streamer_tree_cache_lookup (ob->writer_cache, expr, &ix);
if (existed_p)
{
if (streamer_dump_file)
{
if (in_dfs_walk)
print_node_brief (streamer_dump_file, " Streaming ref to ",
expr, 4);
else
print_node_brief (streamer_dump_file, " Streaming ref to ",
expr, 4);
fprintf (streamer_dump_file, "\n");
}
/* If a node has already been streamed out, make sure that
we don't write it more than once. Otherwise, the reader
will instantiate two different nodes for the same object. */
@@ -1721,20 +1754,16 @@ lto_output_tree (struct output_block *ob, tree expr,
}
else
{
/* This is the first time we see EXPR, write all reachable
trees to OB. */
static bool in_dfs_walk;
/* Protect against recursion which means disconnect between
what tree edges we walk in the DFS walk and what edges
what tree edges we walk in the DFS walk and what edges
we stream out. */
gcc_assert (!in_dfs_walk);
if (streamer_dump_file)
{
print_node_brief (streamer_dump_file, " Streaming SCC of ",
print_node_brief (streamer_dump_file, " Streaming tree ",
expr, 4);
fprintf (streamer_dump_file, "\n");
fprintf (streamer_dump_file, "\n");
}
/* Start the DFS walk. */
@@ -1742,7 +1771,6 @@ lto_output_tree (struct output_block *ob, tree expr,
/* let's see ... */
in_dfs_walk = true;
DFS (ob, expr, ref_p, this_ref_p, false);
in_dfs_walk = false;
/* Finally append a reference to the tree we were writing. */
existed_p = streamer_tree_cache_lookup (ob->writer_cache, expr, &ix);
@@ -1751,21 +1779,26 @@ lto_output_tree (struct output_block *ob, tree expr,
it. */
if (!existed_p)
lto_output_tree_1 (ob, expr, 0, ref_p, this_ref_p);
else
else if (this_ref_p)
{
if (streamer_dump_file)
{
print_node_brief (streamer_dump_file,
" Streaming final ref to ",
expr, 4);
fprintf (streamer_dump_file, "\n");
}
streamer_write_record_start (ob, LTO_tree_pickle_reference);
streamer_write_uhwi (ob, ix);
streamer_write_enum (ob->main_stream, LTO_tags, LTO_NUM_TAGS,
lto_tree_code_to_tag (TREE_CODE (expr)));
}
if (streamer_dump_file)
{
print_node_brief (streamer_dump_file, " Finished SCC of ",
expr, 4);
fprintf (streamer_dump_file, "\n\n");
}
in_dfs_walk = false;
lto_stats.num_pickle_refs_output++;
}
if (streamer_dump_file && !in_dfs_walk)
fprintf (streamer_dump_file, " %u bytes\n",
ob->main_stream->total_size - size);
}
@@ -2705,7 +2738,7 @@ write_global_stream (struct output_block *ob,
static void
write_global_references (struct output_block *ob,
struct lto_tree_ref_encoder *encoder)
struct lto_tree_ref_encoder *encoder)
{
tree t;
uint32_t index;
@@ -3141,7 +3174,7 @@ produce_asm_for_decls (void)
fn_out_state =
lto_function_decl_states[idx];
if (streamer_dump_file)
fprintf (streamer_dump_file, "Outputting stream for %s\n",
fprintf (streamer_dump_file, "Outputting stream for %s\n",
IDENTIFIER_POINTER
(DECL_ASSEMBLER_NAME (fn_out_state->fn_decl)));
lto_output_decl_state_streams (ob, fn_out_state);

View File

@@ -1,3 +1,7 @@
2020-05-22 Jan Hubicka <hubicka@ucw.cz>
* lto-common.c (lto_read_decls): Do not skip stray refs.
2020-05-20 Jan Hubicka <hubicka@ucw.cz>
* lto-common.c (compare_tree_sccs_1): Sanity check that we never

View File

@@ -1955,25 +1955,19 @@ lto_read_decls (struct lto_file_decl_data *decl_data, const void *data,
else
{
t = lto_input_tree_1 (&ib_main, data_in, tag, 0);
/* We streamed in new tree. Add it to cache and process dref. */
if (data_in->reader_cache->nodes.length () == from + 1)
{
num_unshared_trees_read++;
data_in->location_cache.accept_location_cache ();
process_dref (data_in, t, from);
if (TREE_CODE (t) == IDENTIFIER_NODE
|| (TREE_CODE (t) == INTEGER_CST
&& !TREE_OVERFLOW (t)))
;
else
{
lto_maybe_register_decl (data_in, t, from);
process_new_tree (t, &hm, from, &total, data_in);
}
}
gcc_assert (data_in->reader_cache->nodes.length () == from + 1);
num_unshared_trees_read++;
data_in->location_cache.accept_location_cache ();
process_dref (data_in, t, from);
if (TREE_CODE (t) == IDENTIFIER_NODE
|| (TREE_CODE (t) == INTEGER_CST
&& !TREE_OVERFLOW (t)))
;
else
/* FIXME: It seems useless to pickle stray references. */
gcc_assert (data_in->reader_cache->nodes.length () == from);
{
lto_maybe_register_decl (data_in, t, from);
process_new_tree (t, &hm, from, &total, data_in);
}
}
}

View File

@@ -1,3 +1,24 @@
2020-05-22 Uroš Bizjak <ubizjak@gmail.com>
PR target/95255
* gcc.target/i386/pr95255.c: New test.
2020-05-22 Hongtao.liu <hongtao.liu@intel.com>
* gcc.target/i386/pr92658-avx512f.c: New test.
* gcc.target/i386/pr92658-avx512vl.c: Ditto.
* gcc.target/i386/pr92658-avx512bw-trunc.c: Ditto.
2020-05-22 Richard Biener <rguenther@suse.de>
PR tree-optimization/95268
* g++.dg/torture/pr95268.C: New testcase.
2020-05-22 Richard Biener <rguenther@suse.de>
PR tree-optimization/95248
* gcc.dg/torture/pr95248.c: New testcase.
2020-05-21 Patrick Palka <ppalka@redhat.com>
PR c++/94038

View File

@@ -41,7 +41,7 @@ f1 (void)
;
#pragma omp task depend (iterator (int i = 0:4, \
struct U { int (*p)[i + 2]; } *p = 0:2) , in : a) /* { dg-error "type of iterator 'p' refers to outer iterator 'i'" "" { target c } } */
; /* { dg-error "types may not be defined in iterator type|not an integer constant" "" { target c++ } .-1 } */
; /* { dg-error "types may not be defined in iterator type|not an integral constant" "" { target c++ } .-1 } */
#pragma omp task depend (iterator (i = 0:4, j = i:16) , in : a) /* { dg-error "begin expression refers to outer iterator 'i'" } */
;
#pragma omp task depend (iterator (i = 0:4, j = 2:i:1) , in : a) /* { dg-error "end expression refers to outer iterator 'i'" } */

View File

@@ -3,11 +3,11 @@
template<int> void foo()
{
int x[=]; // { dg-error "expected" }
int x[=]; // { dg-error "" }
[&x]{};
}
void bar()
{
foo<0>();
foo<0>(); // { dg-prune-output "not declared" }
}

View File

@@ -4,5 +4,5 @@
template<typename T> void foo()
{
T x[=]; // { dg-error "expected" }
[&x]{};
[&x]{}; // { dg-prune-output "not declared" }
}

View File

@@ -6,3 +6,5 @@ template<int> struct A {};
template<typename> struct B : A<sizeof(0=0r)> {}; // { dg-error "not supported" }
template<typename> struct C : A<sizeof(0=0r)> {}; // { dg-error "not supported" }
// { dg-prune-output "template argument" }

View File

@@ -19,8 +19,7 @@ class B { B (int); };
B::B (int i)
{
struct S {
int ar[1][i]; // { dg-error "15:size of array .ar. is not an integral" "" { target c++11 } }
// { dg-error "array bound" "" { target c++98_only } .-1 }
int ar[1][i]; // { dg-error "15:size of array .ar. is not an integral" }
} s;
s.ar[0][0] = 0; // { dg-prune-output "no member" }

View File

@@ -8,9 +8,9 @@ struct A
typedef void (A::T)(); /* { dg-error "15:typedef name may not be a nested" } */
void bar(T); /* { dg-message "note: declared here" } */
void bar(T);
void baz()
{
bar(&A::foo); /* { dg-error "too many arguments" } */
bar(&A::foo);
}

View File

@@ -7,8 +7,8 @@ struct Tree {
Tree* R[subtrees]; // { dg-error "" }
~Tree()
{
delete [] L[0]; // { dg-error "" }
delete [] R[0]; // { dg-error "" }
delete [] L[0];
delete [] R[0];
}
};

View File

@@ -10,9 +10,9 @@ template<typename FP_> struct Vec { // { dg-message "note" "" { target c++17_dow
X = y*rhs.z() - z*rhs.y(); // { dg-error "not declared|no member" }
}
Vec& operator^(Vec& rhs) {
return Vec(*this)^=rhs; // { dg-message "required" }
return Vec(*this)^=rhs;
}
};
Vec<double> v(3,4,12); // { dg-error "no matching|too many initializers" }
Vec<double> V(12,4,3); // { dg-error "no matching|too many initializers" }
Vec<double> c = v^V; // { dg-message "required" }
Vec<double> c = v^V;

View File

@@ -1,3 +1,3 @@
// PR c++/33494
template<int> void foo(int(*f=0)()); // { dg-error "declared void|scope|erroneous-expression" }
template<int> void foo(int(*f=0)()); // { dg-error "declared void|scope|cannot be used as a function" }

View File

@@ -8,5 +8,5 @@ struct A
template <int> struct B
{
int x[A::i]; // { dg-error "array bound is not an integer constant" }
int x[A::i]; // { dg-error "not an integral constant-expression" }
};

View File

@@ -3,9 +3,9 @@
template<int> struct A {};
template<typename> struct B : A <sizeof(=)> {}; /* { dg-error "expected primary-expression" } */
template<typename> struct B : A <sizeof(=)> {}; /* { dg-error "" } */
template<typename> struct C : A <sizeof(=)> {}; /* { dg-error "expected primary-expression" } */
template<typename> struct C : A <sizeof(=)> {}; /* { dg-error "" } */
int a;

View File

@@ -0,0 +1,46 @@
// { dg-do compile }
// { dg-require-effective-target lp64 }
// { dg-additional-options "-Wno-overflow" }
#include <algorithm>
extern short var_0, var_2, var_3, var_9, var_11, var_13, var_14, var_19, var_22,
var_32, var_37, var_44, var_57, var_59, var_63, var_70;
extern unsigned var_5;
extern char var_6, var_12, var_18, var_38, var_39, var_43, var_55, var_64,
arr_35;
extern long var_7, var_8, var_10, var_15, var_25, var_56;
extern int var_21, var_36, var_51, var_65, var_68, arr_7;
extern bool var_46, var_58, var_67;
void test() {
var_12 = 0 >= 0;
var_13 = arr_7;
var_14 = (unsigned long)var_7 >> -564810131 + 564810189;
var_15 = var_5;
var_18 = -602739307623583391;
var_19 = -~0;
var_21 = var_10 >> var_8 - 17101301574577641170ULL;
var_22 = var_5;
var_25 = var_9;
var_32 = std::max((unsigned)var_2, var_5);
var_36 = 9557;
var_37 = 394545925;
var_38 = 0 >= 0;
var_39 = var_5;
var_43 = 0;
var_44 = arr_35;
var_46 = var_7;
for (short a = 0; a < 9; a = 021)
for (short b = 0; b < 024; b += 4)
var_51 = std::min((long long)(var_2 ?: var_9), (long long)var_9);
var_55 = var_0;
var_56 = 3896150587;
var_57 = var_11;
var_58 = var_59 = var_11;
var_63 = 73;
var_64 = 10393232284806619711ULL;
var_65 = var_3;
var_67 = var_6;
var_68 = var_70 = 0;
}

View File

@@ -0,0 +1,28 @@
/* { dg-do run } */
/* { dg-require-effective-target int32plus } */
int var_2 = -2013646301;
int var_3 = -1126567434;
unsigned int var_12 = 1;
unsigned int var_19;
unsigned int arr_25 [24] [21] [15] [17] [15] ;
void __attribute__((noipa)) test()
{
for (int a = 0; a < 3; a = 42)
for (int b = 0; b < 20; b++)
for (int c = 0; c < 4; c = 4)
for (int d = 0; d < 6; d += 4)
for (int e = 0; e < 4; e += 2) {
arr_25[a][b][c][d][e] = var_2 || var_3;
var_19 = var_12;
}
}
int main()
{
test();
if (var_19 != 1)
__builtin_abort ();
return 0;
}

View File

@@ -0,0 +1,91 @@
/* PR target/92658 */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -mavx512bw -mavx512vl" } */
typedef unsigned char v8qi __attribute__((vector_size (8)));
typedef unsigned char v16qi __attribute__((vector_size (16)));
typedef unsigned char v32qi __attribute__((vector_size (32)));
typedef unsigned short v8hi __attribute__((vector_size (16)));
typedef unsigned short v16hi __attribute__((vector_size (32)));
typedef unsigned short v32hi __attribute__((vector_size (64)));
void
truncwb_512 (v32qi * dst, v32hi * __restrict src)
{
unsigned char tem[8];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
tem[4] = (*src)[4];
tem[5] = (*src)[5];
tem[6] = (*src)[6];
tem[7] = (*src)[7];
tem[8] = (*src)[8];
tem[9] = (*src)[9];
tem[10] = (*src)[10];
tem[11] = (*src)[11];
tem[12] = (*src)[12];
tem[13] = (*src)[13];
tem[14] = (*src)[14];
tem[15] = (*src)[15];
tem[16] = (*src)[16];
tem[17] = (*src)[17];
tem[18] = (*src)[18];
tem[19] = (*src)[19];
tem[20] = (*src)[20];
tem[21] = (*src)[21];
tem[22] = (*src)[22];
tem[23] = (*src)[23];
tem[24] = (*src)[24];
tem[25] = (*src)[25];
tem[26] = (*src)[26];
tem[27] = (*src)[27];
tem[28] = (*src)[28];
tem[29] = (*src)[29];
tem[30] = (*src)[30];
tem[31] = (*src)[31];
dst[0] = *(v32qi *) tem;
}
void
truncwb_256 (v16qi * dst, v16hi * __restrict src)
{
unsigned char tem[8];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
tem[4] = (*src)[4];
tem[5] = (*src)[5];
tem[6] = (*src)[6];
tem[7] = (*src)[7];
tem[8] = (*src)[8];
tem[9] = (*src)[9];
tem[10] = (*src)[10];
tem[11] = (*src)[11];
tem[12] = (*src)[12];
tem[13] = (*src)[13];
tem[14] = (*src)[14];
tem[15] = (*src)[15];
dst[0] = *(v16qi *) tem;
}
void
truncwb_128 (v16qi * dst, v8hi * __restrict src)
{
unsigned char tem[8];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
tem[4] = (*src)[4];
tem[5] = (*src)[5];
tem[6] = (*src)[6];
tem[7] = (*src)[7];
dst[0] = *(v16qi *) tem;
}
/* { dg-final { scan-assembler-times "vpmovwb" 2 } } */
/* { dg-final { scan-assembler-times "vpmovwb" 3 { xfail *-*-* } } } */

View File

@@ -0,0 +1,106 @@
/* PR target/92658 */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -mavx512f" } */
typedef unsigned char v8qi __attribute__((vector_size (8)));
typedef unsigned char v16qi __attribute__((vector_size (16)));
typedef unsigned short v8hi __attribute__((vector_size (16)));
typedef unsigned short v16hi __attribute__((vector_size (32)));
typedef unsigned int v8si __attribute__((vector_size (32)));
typedef unsigned int v16si __attribute__((vector_size (64)));
typedef unsigned long long v8di __attribute__((vector_size (64)));
void
truncqd (v8si * dst, v8di * __restrict src)
{
unsigned tem[8];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
tem[4] = (*src)[4];
tem[5] = (*src)[5];
tem[6] = (*src)[6];
tem[7] = (*src)[7];
dst[0] = *(v8si *) tem;
}
void
truncqw (v8hi * dst, v8di * __restrict src)
{
unsigned short tem[8];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
tem[4] = (*src)[4];
tem[5] = (*src)[5];
tem[6] = (*src)[6];
tem[7] = (*src)[7];
dst[0] = *(v8hi *) tem;
}
void
truncqb (v8qi * dst, v8di * __restrict src)
{
unsigned char tem[8];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
dst[0] = *(v8qi *) tem;
}
void
truncdw (v16hi * dst, v16si * __restrict src)
{
unsigned short tem[8];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
tem[4] = (*src)[4];
tem[5] = (*src)[5];
tem[6] = (*src)[6];
tem[7] = (*src)[7];
tem[8] = (*src)[8];
tem[9] = (*src)[9];
tem[10] = (*src)[10];
tem[11] = (*src)[11];
tem[12] = (*src)[12];
tem[13] = (*src)[13];
tem[14] = (*src)[14];
tem[15] = (*src)[15];
dst[0] = *(v16hi *) tem;
}
void
truncdb (v16qi * dst, v16si * __restrict src)
{
unsigned char tem[8];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
tem[4] = (*src)[4];
tem[5] = (*src)[5];
tem[6] = (*src)[6];
tem[7] = (*src)[7];
tem[8] = (*src)[8];
tem[9] = (*src)[9];
tem[10] = (*src)[10];
tem[11] = (*src)[11];
tem[12] = (*src)[12];
tem[13] = (*src)[13];
tem[14] = (*src)[14];
tem[15] = (*src)[15];
dst[0] = *(v16qi *) tem;
}
/* { dg-final { scan-assembler-times "vpmovqd" 1 } } */
/* { dg-final { scan-assembler-times "vpmovqw" 1 } } */
/* { dg-final { scan-assembler-times "vpmovqb" 1 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "vpmovdw" 1 } } */
/* { dg-final { scan-assembler-times "vpmovdb" 1 } } */

View File

@@ -0,0 +1,129 @@
/* PR target/92658 */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -mavx512f -mavx512vl" } */
typedef unsigned char v16qi __attribute__((vector_size (16)));
typedef unsigned short v8hi __attribute__((vector_size (16)));
typedef unsigned int v4si __attribute__((vector_size (16)));
typedef unsigned int v8si __attribute__((vector_size (32)));
typedef unsigned long long v2di __attribute__((vector_size (16)));
typedef unsigned long long v4di __attribute__((vector_size (32)));
void
truncqd_256 (v4si * dst, v4di * __restrict src)
{
unsigned tem[4];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
dst[0] = *(v4si *) tem;
}
void
truncqw_256 (v8hi * dst, v4di * __restrict src)
{
unsigned short tem[4];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
dst[0] = *(v8hi *) tem;
}
void
truncqb_256 (v16qi * dst, v4di * __restrict src)
{
unsigned char tem[4];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
dst[0] = *(v16qi *) tem;
}
void
truncqd_128 (v4si * dst, v2di * __restrict src)
{
unsigned tem[4];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
dst[0] = *(v4si *) tem;
}
void
truncqw_128 (v8hi * dst, v2di * __restrict src)
{
unsigned short tem[4];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
dst[0] = *(v8hi *) tem;
}
void
truncqb_128 (v16qi * dst, v2di * __restrict src)
{
unsigned char tem[4];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
dst[0] = *(v16qi *) tem;
}
void
truncdw_256 (v8hi * dst, v8si * __restrict src)
{
unsigned short tem[8];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
tem[4] = (*src)[4];
tem[5] = (*src)[5];
tem[6] = (*src)[6];
tem[7] = (*src)[7];
dst[0] = *(v8hi *) tem;
}
void
truncdb_256 (v16qi * dst, v8si * __restrict src)
{
unsigned char tem[8];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
tem[4] = (*src)[4];
tem[5] = (*src)[5];
tem[6] = (*src)[6];
tem[7] = (*src)[7];
dst[0] = *(v16qi *) tem;
}
void
truncdw_128 (v8hi * dst, v4si * __restrict src)
{
unsigned short tem[8];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
dst[0] = *(v8hi *) tem;
}
void
truncdb_128 (v16qi * dst, v4si * __restrict src)
{
unsigned char tem[8];
tem[0] = (*src)[0];
tem[1] = (*src)[1];
tem[2] = (*src)[2];
tem[3] = (*src)[3];
dst[0] = *(v16qi *) tem;
}
/* { dg-final { scan-assembler-times "vpmovqd" 2 } } } */
/* { dg-final { scan-assembler-times "vpmovqw" 2 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "vpmovqb" 2 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "vpmovdw" 1 } } */
/* { dg-final { scan-assembler-times "vpmovdw" 2 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "vpmovdb" 2 { xfail *-*-* } } } */

View File

@@ -0,0 +1,8 @@
/* PR target/95255 */
/* { dg-do compile } */
/* { dg-options "-O2 -msse2 -mno-sse4.1 -mfpmath=both" } */
double foo (double x)
{
return __builtin_roundeven (x);
}

View File

@@ -2436,8 +2436,6 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
/* Use the sequence from the first edge and push SMs down. */
for (unsigned i = 0; i < first_edge_seq.length (); ++i)
{
if (first_edge_seq[i].second == sm_other)
break;
unsigned id = first_edge_seq[i].first;
seq.safe_push (first_edge_seq[i]);
unsigned new_idx;

View File

@@ -534,7 +534,9 @@ sink_common_stores_to_bb (basic_block bb)
/* ??? We could handle differing SSA uses in the LHS by inserting
PHIs for them. */
else if (! operand_equal_p (gimple_assign_lhs (first_store),
gimple_assign_lhs (def), 0))
gimple_assign_lhs (def), 0)
|| (gimple_clobber_p (first_store)
&& !gimple_clobber_p (def)))
{
first_store = NULL;
break;
@@ -546,16 +548,17 @@ sink_common_stores_to_bb (basic_block bb)
/* Check if we need a PHI node to merge the stored values. */
bool allsame = true;
for (unsigned i = 1; i < vdefs.length (); ++i)
{
gimple *def = SSA_NAME_DEF_STMT (vdefs[i]);
if (! operand_equal_p (gimple_assign_rhs1 (first_store),
gimple_assign_rhs1 (def), 0))
{
allsame = false;
break;
}
}
if (!gimple_clobber_p (first_store))
for (unsigned i = 1; i < vdefs.length (); ++i)
{
gimple *def = SSA_NAME_DEF_STMT (vdefs[i]);
if (! operand_equal_p (gimple_assign_rhs1 (first_store),
gimple_assign_rhs1 (def), 0))
{
allsame = false;
break;
}
}
/* We cannot handle aggregate values if we need to merge them. */
tree type = TREE_TYPE (gimple_assign_lhs (first_store));

View File

@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see
#include "alias.h"
#include "stor-layout.h"
#include "gomp-constants.h"
#include "print-tree.h"
/* Output the STRING constant to the string
@@ -967,6 +968,14 @@ streamer_write_tree_header (struct output_block *ob, tree expr)
enum LTO_tags tag;
enum tree_code code;
if (streamer_dump_file)
{
print_node_brief (streamer_dump_file, " Streaming header of ",
expr, 4);
fprintf (streamer_dump_file, " to %s\n",
lto_section_name[ob->section_type]);
}
/* We should not see any tree nodes not handled by the streamer. */
code = TREE_CODE (expr);
@@ -1016,6 +1025,12 @@ streamer_write_integer_cst (struct output_block *ob, tree cst, bool ref_p)
int i;
int len = TREE_INT_CST_NUNITS (cst);
gcc_assert (!TREE_OVERFLOW (cst));
if (streamer_dump_file)
{
print_node_brief (streamer_dump_file, " Streaming integer ",
cst, 4);
fprintf (streamer_dump_file, "\n");
}
streamer_write_record_start (ob, LTO_integer_cst);
stream_write_tree (ob, TREE_TYPE (cst), ref_p);
/* We're effectively streaming a non-sign-extended wide_int here,

View File

@@ -299,10 +299,11 @@ record_common_node (struct streamer_tree_cache_d *cache, tree node)
if (!node)
node = error_mark_node;
/* ??? FIXME, devise a better hash value. But the hash needs to be equal
for all frontend and lto1 invocations. So just use the position
in the cache as hash value. */
streamer_tree_cache_append (cache, node, cache->nodes.length ());
/* This hash needs to be equal for all frontend and lto1 invocations. So
just use the position in the cache as hash value.
Small integers are used by hash_tree to record positions within scc
hash. Values are not in same range. */
streamer_tree_cache_append (cache, node, cache->next_idx + 0xc001);
switch (TREE_CODE (node))
{

View File

@@ -6185,17 +6185,29 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
The last use is the reduction variable. In case of nested cycle this
assumption is not true: we use reduc_index to record the index of the
reduction variable. */
reduc_def = PHI_RESULT (reduc_def_phi);
/* ??? To get at invariant/constant uses on the SLP node we have to
get to it here, slp_node is still the reduction PHI. */
slp_tree slp_for_stmt_info = NULL;
if (slp_node)
{
slp_for_stmt_info = slp_node_instance->root;
/* And then there's reduction chain with a conversion ... */
if (SLP_TREE_SCALAR_STMTS (slp_for_stmt_info)[0] != stmt_info)
slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0];
gcc_assert (SLP_TREE_SCALAR_STMTS (slp_for_stmt_info)[0] == stmt_info);
}
slp_tree *slp_op = XALLOCAVEC (slp_tree, op_type);
for (i = 0; i < op_type; i++)
{
tree op = gimple_op (stmt, i + 1);
/* The condition of COND_EXPR is checked in vectorizable_condition(). */
if (i == 0 && code == COND_EXPR)
continue;
stmt_vec_info def_stmt_info;
enum vect_def_type dt;
if (!vect_is_simple_use (op, loop_vinfo, &dt, &tem,
tree op;
if (!vect_is_simple_use (loop_vinfo, stmt_info, slp_for_stmt_info,
i, &op, &slp_op[i], &dt, &tem,
&def_stmt_info))
{
if (dump_enabled_p ())
@@ -6729,6 +6741,21 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
return false;
}
if (slp_node
&& !(!single_defuse_cycle
&& code != DOT_PROD_EXPR
&& code != WIDEN_SUM_EXPR
&& code != SAD_EXPR
&& reduction_type != FOLD_LEFT_REDUCTION))
for (i = 0; i < op_type; i++)
if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"incompatible vector types for invariants\n");
return false;
}
if (slp_node)
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
else

View File

@@ -46,6 +46,34 @@ along with GCC; see the file COPYING3. If not see
#include "internal-fn.h"
/* Initialize a SLP node. */
_slp_tree::_slp_tree ()
{
SLP_TREE_SCALAR_STMTS (this) = vNULL;
SLP_TREE_SCALAR_OPS (this) = vNULL;
SLP_TREE_VEC_STMTS (this).create (0);
SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0;
SLP_TREE_CHILDREN (this) = vNULL;
SLP_TREE_LOAD_PERMUTATION (this) = vNULL;
SLP_TREE_TWO_OPERATORS (this) = false;
SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def;
SLP_TREE_VECTYPE (this) = NULL_TREE;
this->refcnt = 1;
this->max_nunits = 1;
}
/* Tear down a SLP node. */
_slp_tree::~_slp_tree ()
{
SLP_TREE_CHILDREN (this).release ();
SLP_TREE_SCALAR_STMTS (this).release ();
SLP_TREE_SCALAR_OPS (this).release ();
SLP_TREE_VEC_STMTS (this).release ();
SLP_TREE_LOAD_PERMUTATION (this).release ();
}
/* Recursively free the memory allocated for the SLP tree rooted at NODE.
FINAL_P is true if we have vectorized the instance or if we have
made a final decision not to vectorize the statements in any way. */
@@ -76,13 +104,7 @@ vect_free_slp_tree (slp_tree node, bool final_p)
}
}
SLP_TREE_CHILDREN (node).release ();
SLP_TREE_SCALAR_STMTS (node).release ();
SLP_TREE_SCALAR_OPS (node).release ();
SLP_TREE_VEC_STMTS (node).release ();
SLP_TREE_LOAD_PERMUTATION (node).release ();
free (node);
delete node;
}
/* Free the memory allocated for the SLP instance. FINAL_P is true if we
@@ -101,39 +123,15 @@ vect_free_slp_instance (slp_instance instance, bool final_p)
/* Create an SLP node for SCALAR_STMTS. */
static slp_tree
vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts, unsigned nops)
{
slp_tree node;
stmt_vec_info stmt_info = scalar_stmts[0];
unsigned int nops;
if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
nops = gimple_call_num_args (stmt);
else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
{
nops = gimple_num_ops (stmt) - 1;
if (gimple_assign_rhs_code (stmt) == COND_EXPR)
nops++;
}
else if (is_a <gphi *> (stmt_info->stmt))
nops = 0;
else
return NULL;
node = XNEW (struct _slp_tree);
slp_tree node = new _slp_tree;
SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
SLP_TREE_SCALAR_OPS (node) = vNULL;
SLP_TREE_VEC_STMTS (node).create (0);
SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
SLP_TREE_CHILDREN (node).create (nops);
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
SLP_TREE_TWO_OPERATORS (node) = false;
SLP_TREE_DEF_TYPE (node) = vect_internal_def;
SLP_TREE_VECTYPE (node) = NULL_TREE;
node->refcnt = 1;
node->max_nunits = 1;
unsigned i;
stmt_vec_info stmt_info;
FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
STMT_VINFO_NUM_SLP_USES (stmt_info)++;
@@ -145,21 +143,9 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
static slp_tree
vect_create_new_slp_node (vec<tree> ops)
{
slp_tree node;
node = XNEW (struct _slp_tree);
SLP_TREE_SCALAR_STMTS (node) = vNULL;
slp_tree node = new _slp_tree;
SLP_TREE_SCALAR_OPS (node) = ops;
SLP_TREE_VEC_STMTS (node).create (0);
SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
SLP_TREE_CHILDREN (node) = vNULL;
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
SLP_TREE_TWO_OPERATORS (node) = false;
SLP_TREE_DEF_TYPE (node) = vect_external_def;
SLP_TREE_VECTYPE (node) = NULL_TREE;
node->refcnt = 1;
node->max_nunits = 1;
return node;
}
@@ -1284,7 +1270,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
else
return NULL;
(*tree_size)++;
node = vect_create_new_slp_node (stmts);
node = vect_create_new_slp_node (stmts, 0);
return node;
}
@@ -1309,7 +1295,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
{
*max_nunits = this_max_nunits;
(*tree_size)++;
node = vect_create_new_slp_node (stmts);
node = vect_create_new_slp_node (stmts, 0);
/* And compute the load permutation. Whether it is actually
a permutation depends on the unrolling factor which is
decided later. */
@@ -1450,7 +1436,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"Building vector operands from scalars\n");
this_tree_size++;
child = vect_create_new_slp_node (oprnd_info->def_stmts);
child = vect_create_new_slp_node (oprnd_info->def_stmts, 0);
SLP_TREE_DEF_TYPE (child) = vect_external_def;
SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
children.safe_push (child);
@@ -1587,7 +1573,7 @@ fail:
*tree_size += this_tree_size + 1;
*max_nunits = this_max_nunits;
node = vect_create_new_slp_node (stmts);
node = vect_create_new_slp_node (stmts, nops);
SLP_TREE_TWO_OPERATORS (node) = two_operators;
SLP_TREE_CHILDREN (node).splice (children);
return node;
@@ -1726,9 +1712,12 @@ slp_copy_subtree (slp_tree node, hash_map<slp_tree, slp_tree> &map)
if (existed_p)
return copy_ref;
copy_ref = XNEW (_slp_tree);
copy_ref = new _slp_tree;
slp_tree copy = copy_ref;
memcpy (copy, node, sizeof (_slp_tree));
SLP_TREE_DEF_TYPE (copy) = SLP_TREE_DEF_TYPE (node);
SLP_TREE_VECTYPE (copy) = SLP_TREE_VECTYPE (node);
copy->max_nunits = node->max_nunits;
copy->refcnt = 0;
if (SLP_TREE_SCALAR_STMTS (node).exists ())
{
SLP_TREE_SCALAR_STMTS (copy) = SLP_TREE_SCALAR_STMTS (node).copy ();
@@ -1743,7 +1732,6 @@ slp_copy_subtree (slp_tree node, hash_map<slp_tree, slp_tree> &map)
if (SLP_TREE_CHILDREN (node).exists ())
SLP_TREE_CHILDREN (copy) = SLP_TREE_CHILDREN (node).copy ();
gcc_assert (!SLP_TREE_VEC_STMTS (node).exists ());
copy->refcnt = 0;
slp_tree child;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy), i, child)
@@ -2230,7 +2218,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
scalar_stmts.create (group_size);
for (unsigned i = 0; i < group_size; ++i)
scalar_stmts.quick_push (next_info);
slp_tree conv = vect_create_new_slp_node (scalar_stmts);
slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1);
SLP_TREE_CHILDREN (conv).quick_push (node);
SLP_INSTANCE_TREE (new_instance) = conv;
/* We also have to fake this conversion stmt as SLP reduction
@@ -3633,16 +3621,22 @@ vect_get_constant_vectors (vec_info *vinfo,
gimple_seq ctor_seq = NULL;
auto_vec<tree, 16> permute_results;
/* ??? SLP analysis should compute the vector type for the
constant / invariant and store it in the SLP node. */
/* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */
vector_type = SLP_TREE_VECTYPE (op_node);
{
tree op = op_node->ops[0];
/* Check if vector type is a boolean vector. */
tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
&& vect_mask_constant_operand_p (vinfo, stmt_vinfo, op_num))
vector_type = truth_type_for (stmt_vectype);
gcc_assert (vector_type
&& types_compatible_p (vector_type,
truth_type_for (stmt_vectype)));
else
vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
gcc_assert (vector_type
&& types_compatible_p (vector_type,
get_vectype_for_scalar_type
(vinfo, TREE_TYPE (op), op_node)));
}
poly_uint64 vf = 1;
if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))

View File

@@ -3170,6 +3170,7 @@ vectorizable_call (vec_info *vinfo,
= { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
vect_unknown_def_type };
tree vectypes[ARRAY_SIZE (dt)] = {};
slp_tree slp_op[ARRAY_SIZE (dt)] = {};
int ndts = ARRAY_SIZE (dt);
int ncopies, j;
auto_vec<tree, 8> vargs;
@@ -3209,7 +3210,7 @@ vectorizable_call (vec_info *vinfo,
vectype_in = NULL_TREE;
nargs = gimple_call_num_args (stmt);
/* Bail out if the function has more than three arguments, we do not have
/* Bail out if the function has more than four arguments, we do not have
interesting builtin functions to vectorize with more than two arguments
except for fma. No arguments is also not good. */
if (nargs == 0 || nargs > 4)
@@ -3229,17 +3230,17 @@ vectorizable_call (vec_info *vinfo,
for (i = 0; i < nargs; i++)
{
op = gimple_call_arg (stmt, i);
if ((int) i == mask_opno)
{
op = gimple_call_arg (stmt, i);
if (!vect_check_scalar_mask (vinfo,
stmt_info, op, &dt[i], &vectypes[i]))
return false;
continue;
}
if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
i, &op, &slp_op[i], &dt[i], &vectypes[i]))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3400,6 +3401,15 @@ vectorizable_call (vec_info *vinfo,
vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
if (!vec_stmt) /* transformation not required. */
{
if (slp_node)
for (i = 0; i < nargs; ++i)
if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"incompatible vector types for invariants\n");
return false;
}
STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_call");
vect_model_simple_cost (vinfo, stmt_info,
@@ -4725,9 +4735,18 @@ vectorizable_conversion (vec_info *vinfo,
lhs_type = TREE_TYPE (scalar_dest);
vectype_out = STMT_VINFO_VECTYPE (stmt_info);
op0 = gimple_assign_rhs1 (stmt);
rhs_type = TREE_TYPE (op0);
/* Check the operands of the operation. */
slp_tree slp_op0, slp_op1 = NULL;
if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
0, &op0, &slp_op0, &dt[0], &vectype_in))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"use not simple.\n");
return false;
}
rhs_type = TREE_TYPE (op0);
if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
&& !((INTEGRAL_TYPE_P (lhs_type)
&& INTEGRAL_TYPE_P (rhs_type))
@@ -4748,34 +4767,24 @@ vectorizable_conversion (vec_info *vinfo,
return false;
}
/* Check the operands of the operation. */
if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"use not simple.\n");
return false;
}
if (op_type == binary_op)
{
bool ok;
gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
op1 = gimple_assign_rhs2 (stmt);
gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
/* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
OP1. */
if (CONSTANT_CLASS_P (op0))
ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
else
ok = vect_is_simple_use (op1, vinfo, &dt[1]);
if (!ok)
tree vectype1_in;
if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
&op1, &slp_op1, &dt[1], &vectype1_in))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"use not simple.\n");
return false;
}
/* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
OP1. */
if (!vectype_in)
vectype_in = vectype1_in;
}
/* If op0 is an external or constant def, infer the vector type
@@ -4949,6 +4958,15 @@ vectorizable_conversion (vec_info *vinfo,
if (!vec_stmt) /* transformation not required. */
{
if (slp_node
&& (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
|| !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"incompatible vector types for invariants\n");
return false;
}
DUMP_VECT_SCOPE ("vectorizable_conversion");
if (modifier == NONE)
{
@@ -5306,16 +5324,14 @@ vectorizable_assignment (vec_info *vinfo,
if (TREE_CODE (scalar_dest) != SSA_NAME)
return false;
code = gimple_assign_rhs_code (stmt);
if (gimple_assign_single_p (stmt)
|| code == PAREN_EXPR
|| CONVERT_EXPR_CODE_P (code))
op = gimple_assign_rhs1 (stmt);
else
if (STMT_VINFO_DATA_REF (stmt_info))
return false;
if (code == VIEW_CONVERT_EXPR)
op = TREE_OPERAND (op, 0);
code = gimple_assign_rhs_code (stmt);
if (!(gimple_assign_single_p (stmt)
|| code == PAREN_EXPR
|| CONVERT_EXPR_CODE_P (code)))
return false;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
@@ -5330,13 +5346,17 @@ vectorizable_assignment (vec_info *vinfo,
gcc_assert (ncopies >= 1);
if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
slp_tree slp_op;
if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
&dt[0], &vectype_in))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"use not simple.\n");
return false;
}
if (!vectype_in)
vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
/* We can handle NOP_EXPR conversions that do not change the number
of elements or the vector size. */
@@ -5373,6 +5393,14 @@ vectorizable_assignment (vec_info *vinfo,
if (!vec_stmt) /* transformation not required. */
{
if (slp_node
&& !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"incompatible vector types for invariants\n");
return false;
}
STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_assignment");
if (!vect_nop_conversion_p (stmt_info))
@@ -5540,8 +5568,9 @@ vectorizable_shift (vec_info *vinfo,
return false;
}
op0 = gimple_assign_rhs1 (stmt);
if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
slp_tree slp_op0;
if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
0, &op0, &slp_op0, &dt[0], &vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5567,10 +5596,10 @@ vectorizable_shift (vec_info *vinfo,
if (maybe_ne (nunits_out, nunits_in))
return false;
op1 = gimple_assign_rhs2 (stmt);
stmt_vec_info op1_def_stmt_info;
if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
&op1_def_stmt_info))
slp_tree slp_op1;
if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
&dt[1], &op1_vectype, &op1_def_stmt_info))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5743,6 +5772,15 @@ vectorizable_shift (vec_info *vinfo,
if (!vec_stmt) /* transformation not required. */
{
if (slp_node
&& (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
|| !vect_maybe_update_slp_op_vectype (slp_op1, op1_vectype)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"incompatible vector types for invariants\n");
return false;
}
STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_shift");
vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
@@ -5931,7 +5969,8 @@ vectorizable_operation (vec_info *vinfo,
if (!stmt)
return false;
if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
/* Loads and stores are handled in vectorizable_{load,store}. */
if (STMT_VINFO_DATA_REF (stmt_info))
return false;
orig_code = code = gimple_assign_rhs_code (stmt);
@@ -5988,8 +6027,9 @@ vectorizable_operation (vec_info *vinfo,
return false;
}
op0 = gimple_assign_rhs1 (stmt);
if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
slp_tree slp_op0;
if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
0, &op0, &slp_op0, &dt[0], &vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6038,10 +6078,11 @@ vectorizable_operation (vec_info *vinfo,
return false;
tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
slp_tree slp_op1 = NULL, slp_op2 = NULL;
if (op_type == binary_op || op_type == ternary_op)
{
op1 = gimple_assign_rhs2 (stmt);
if (!vect_is_simple_use (op1, vinfo, &dt[1], &vectype2))
if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
1, &op1, &slp_op1, &dt[1], &vectype2))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6051,8 +6092,8 @@ vectorizable_operation (vec_info *vinfo,
}
if (op_type == ternary_op)
{
op2 = gimple_assign_rhs3 (stmt);
if (!vect_is_simple_use (op2, vinfo, &dt[2], &vectype3))
if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
2, &op2, &slp_op2, &dt[2], &vectype3))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6164,6 +6205,18 @@ vectorizable_operation (vec_info *vinfo,
vectype, NULL);
}
/* Put types on constant and invariant SLP children. */
if (slp_node
&& (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
|| !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
|| !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"incompatible vector types for invariants\n");
return false;
}
STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_operation");
vect_model_simple_cost (vinfo, stmt_info,
@@ -7479,6 +7532,16 @@ vectorizable_store (vec_info *vinfo,
check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
memory_access_type, &gs_info, mask);
if (slp_node
&& !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"incompatible vector types for invariants\n");
return false;
}
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
vect_model_store_cost (vinfo, stmt_info, ncopies,
memory_access_type, vls_type, slp_node, cost_vec);
@@ -10060,17 +10123,23 @@ vectorizable_condition (vec_info *vinfo,
return false; /* FORNOW */
cond_expr = gimple_assign_rhs1 (stmt);
then_clause = gimple_assign_rhs2 (stmt);
else_clause = gimple_assign_rhs3 (stmt);
if (!vect_is_simple_cond (cond_expr, vinfo, slp_node,
&comp_vectype, &dts[0], vectype)
|| !comp_vectype)
return false;
if (!vect_is_simple_use (then_clause, vinfo, &dts[2], &vectype1))
unsigned slp_adjust = 0;
if (slp_node && SLP_TREE_CHILDREN (slp_node).length () == 4)
/* ??? Hack. Hope for COND_EXPR GIMPLE sanitizing or refactor
things more... */
slp_adjust = 1;
slp_tree then_slp_node, else_slp_node;
if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + slp_adjust,
&then_clause, &then_slp_node, &dts[2], &vectype1))
return false;
if (!vect_is_simple_use (else_clause, vinfo, &dts[3], &vectype2))
if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + slp_adjust,
&else_clause, &else_slp_node, &dts[3], &vectype2))
return false;
if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
@@ -10188,12 +10257,6 @@ vectorizable_condition (vec_info *vinfo,
}
}
if (loop_vinfo
&& LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
&& reduction_type == EXTRACT_LAST_REDUCTION)
vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
ncopies * vec_num, vectype, NULL);
vect_cost_for_stmt kind = vector_stmt;
if (reduction_type == EXTRACT_LAST_REDUCTION)
/* Count one reduction-like operation per vector. */
@@ -10201,6 +10264,27 @@ vectorizable_condition (vec_info *vinfo,
else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
return false;
if (slp_node
&& (!vect_maybe_update_slp_op_vectype
(SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
|| (slp_adjust == 1
&& !vect_maybe_update_slp_op_vectype
(SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
|| !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
|| !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"incompatible vector types for invariants\n");
return false;
}
if (loop_vinfo
&& LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
&& reduction_type == EXTRACT_LAST_REDUCTION)
vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
ncopies * vec_num, vectype, NULL);
STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
cost_vec, kind);
@@ -10550,13 +10634,13 @@ vectorizable_comparison (vec_info *vinfo,
if (TREE_CODE_CLASS (code) != tcc_comparison)
return false;
rhs1 = gimple_assign_rhs1 (stmt);
rhs2 = gimple_assign_rhs2 (stmt);
if (!vect_is_simple_use (rhs1, vinfo, &dts[0], &vectype1))
slp_tree slp_rhs1, slp_rhs2;
if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
return false;
if (!vect_is_simple_use (rhs2, vinfo, &dts[1], &vectype2))
if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
return false;
if (vectype1 && vectype2
@@ -10649,6 +10733,17 @@ vectorizable_comparison (vec_info *vinfo,
}
}
/* Put types on constant and invariant SLP children. */
if (slp_node
&& (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
|| !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"incompatible vector types for invariants\n");
return false;
}
STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
vect_model_simple_cost (vinfo, stmt_info,
ncopies * (1 + (bitop2 != NOP_EXPR)),
@@ -11705,6 +11800,61 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
return true;
}
/* Function vect_is_simple_use.
Same as vect_is_simple_use but determines the operand by operand
position OPERAND from either STMT or SLP_NODE, filling in *OP
and *SLP_DEF (when SLP_NODE is not NULL). */
bool
vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
unsigned operand, tree *op, slp_tree *slp_def,
enum vect_def_type *dt,
tree *vectype, stmt_vec_info *def_stmt_info_out)
{
if (slp_node)
{
slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
*slp_def = child;
if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
*op = gimple_get_lhs (SLP_TREE_SCALAR_STMTS (child)[0]->stmt);
else
*op = SLP_TREE_SCALAR_OPS (child)[0];
}
else
{
if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
{
*op = gimple_op (ass, operand + 1);
/* ??? Ick. But it will vanish with SLP only. */
if (TREE_CODE (*op) == VIEW_CONVERT_EXPR)
*op = TREE_OPERAND (*op, 0);
}
else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
*op = gimple_call_arg (call, operand);
else
gcc_unreachable ();
}
/* ??? We might want to update *vectype from *slp_def here though
when sharing nodes this would prevent unsharing in the caller. */
return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
}
/* If OP is not NULL and is external or constant update its vector
type with VECTYPE. Returns true if successful or false if not,
for example when conflicting vector types are present. */
bool
vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
{
if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
return true;
if (SLP_TREE_VECTYPE (op))
return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
SLP_TREE_VECTYPE (op) = vectype;
return true;
}
/* Function supportable_widening_operation

View File

@@ -118,6 +118,9 @@ typedef struct _slp_tree *slp_tree;
/* A computation tree of an SLP instance. Each node corresponds to a group of
stmts to be packed in a SIMD stmt. */
struct _slp_tree {
_slp_tree ();
~_slp_tree ();
/* Nodes that contain def-stmts of this node statements operands. */
vec<slp_tree> children;
@@ -1695,6 +1698,11 @@ extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *,
extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *,
tree *, stmt_vec_info * = NULL,
gimple ** = NULL);
extern bool vect_is_simple_use (vec_info *, stmt_vec_info, slp_tree,
unsigned, tree *, slp_tree *,
enum vect_def_type *,
tree *, stmt_vec_info * = NULL);
extern bool vect_maybe_update_slp_op_vectype (slp_tree, tree);
extern bool supportable_widening_operation (vec_info *,
enum tree_code, stmt_vec_info,
tree, tree, enum tree_code *,