diff --git a/contrib/ChangeLog b/contrib/ChangeLog index 51541f141768..64a0db18e58f 100644 --- a/contrib/ChangeLog +++ b/contrib/ChangeLog @@ -1,3 +1,14 @@ +2020-05-22 Jakub Jelinek + + * gcc-changelog/git_commit.py: Add trailing / to + gcc/testsuite/go.test/test and replace gcc/go/frontend/ + with gcc/go/gofrontend/ in ignored locations. + +2020-05-22 Martin Liska + + * gcc-changelog/git_commit.py: Add gcc/testsuite/go.test/test + to ignored locations. + 2020-05-21 Martin Liska * gcc-changelog/git_update_version.py: Prepare the script, the diff --git a/contrib/gcc-changelog/git_commit.py b/contrib/gcc-changelog/git_commit.py index e22d3a2d6f00..8c5fa2c0fc93 100755 --- a/contrib/gcc-changelog/git_commit.py +++ b/contrib/gcc-changelog/git_commit.py @@ -127,7 +127,8 @@ bug_components = set([ ignored_prefixes = [ 'gcc/d/dmd/', - 'gcc/go/frontend/', + 'gcc/go/gofrontend/', + 'gcc/testsuite/go.test/test/', 'libgo/', 'libphobos/libdruntime', 'libphobos/src/', diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1d01682dc94f..7a7b599ff939 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,115 @@ +2020-05-22 Jan Hubicka + + * lto-streamer-out.c (DFS::DFS): Silence warning. + +2020-05-22 Uroš Bizjak + + PR target/95255 + * config/i386/i386.md (2): Do not try to + expand non-sse4 ROUND_ROUNDEVEN rounding via SSE support routines. + +2020-05-22 Jan Hubicka + + * lto-streamer-out.c (lto_output_tree): Do not stream final ref if + it is not needed. + +2020-05-22 Jan Hubicka + + * lto-section-out.c (lto_output_decl_index): Adjust dump indentation. + * lto-streamer-out.c (create_output_block): Fix whitespace + (lto_write_tree_1): Add (debug) dump. + (DFS::DFS): Add dump. + (DFS::DFS_write_tree_body): Do not dump here. + (lto_output_tree): Improve dumping; do not stream ref when not needed. + (produce_asm_for_decls): Fix whitespace. + * tree-streamer-out.c (streamer_write_tree_header): Add dump. + * tree-streamer-out.c (streamer_write_integer_cst): Add debug dump. + +2020-05-22 Hongtao.liu + + PR target/92658 + * config/i386/sse.md (trunc2): New expander + (truncv32hiv32qi2): Ditto. + (trunc2): Ditto. + (trunc2): Ditto. + (trunc2): Ditto. + (truncv2div2si2): Ditto. + (truncv8div8qi2): Ditto. + (avx512f_v8div16qi2): Renaming from *avx512f_v8div16qi2. + (avx512vl_v2div2si): Renaming from *avx512vl_v2div2si2. + (avx512vl_v2qi2): Renaming from + *avx512vl_vqi2. + +2020-05-22 H.J. Lu + + PR target/95258 + * config/i386/driver-i386.c (host_detect_local_cpu): Detect + AVX512VPOPCNTDQ. + +2020-05-22 Richard Biener + + PR tree-optimization/95268 + * tree-ssa-sink.c (sink_common_stores_to_bb): Handle clobbers + properly. + +2020-05-22 Jan Hubicka + + * tree-streamer.c (record_common_node): Fix hash value of pre-streamed + nodes. + +2020-05-22 Jan Hubicka + + * lto-streamer-in.c (lto_read_tree): Do not stream end markers. + (lto_input_scc): Optimize streaming of entry lengths. + * lto-streamer-out.c (lto_write_tree): Do not stream end markers + (DFS::DFS): Optimize stremaing of entry lengths + +2020-05-22 Richard Biener + + PR lto/95190 + * doc/invoke.texi (flto): Document behavior of diagnostic + options. + +2020-05-22 Richard Biener + + * tree-vectorizer.h (vect_is_simple_use): New overload. + (vect_maybe_update_slp_op_vectype): New. + * tree-vect-stmts.c (vect_is_simple_use): New overload + accessing operands of SLP vs. non-SLP operation transparently. + (vect_maybe_update_slp_op_vectype): New function updating + the possibly shared SLP operands vector type. + (vectorizable_operation): Be a bit more SLP vs non-SLP agnostic + using the new vect_is_simple_use overload; update SLP invariant + operand nodes vector type. + (vectorizable_comparison): Likewise. + (vectorizable_call): Likewise. + (vectorizable_conversion): Likewise. + (vectorizable_shift): Likewise. + (vectorizable_store): Likewise. + (vectorizable_condition): Likewise. + (vectorizable_assignment): Likewise. + * tree-vect-loop.c (vectorizable_reduction): Likewise. + * tree-vect-slp.c (vect_get_constant_vectors): Enforce + present SLP_TREE_VECTYPE and check it matches previous + behavior. + +2020-05-22 Richard Biener + + PR tree-optimization/95248 + * tree-ssa-loop-im.c (sm_seq_valid_bb): Remove bogus early out. + +2020-05-22 Richard Biener + + * tree-vectorizer.h (_slp_tree::_slp_tree): New. + (_slp_tree::~_slp_tree): Likewise. + * tree-vect-slp.c (_slp_tree::_slp_tree): Factor out code + from allocators. + (_slp_tree::~_slp_tree): Implement. + (vect_free_slp_tree): Simplify. + (vect_create_new_slp_node): Likewise. Add nops parameter. + (vect_build_slp_tree_2): Adjust. + (vect_analyze_slp_instance): Likewise. + 2020-05-21 Rainer Orth * adjust-alignment.c: Include memmodel.h. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index bf0113a0c6ed..c444971dd5d6 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20200521 +20200522 diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index 7612ddfb846e..3a8164007294 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -420,6 +420,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0; unsigned int has_gfni = 0, has_avx512vbmi2 = 0; unsigned int has_avx512bitalg = 0; + unsigned int has_avx512vpopcntdq = 0; unsigned int has_shstk = 0; unsigned int has_avx512vnni = 0, has_vaes = 0; unsigned int has_vpclmulqdq = 0; @@ -528,6 +529,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) has_vaes = ecx & bit_VAES; has_vpclmulqdq = ecx & bit_VPCLMULQDQ; has_avx512bitalg = ecx & bit_AVX512BITALG; + has_avx512vpopcntdq = ecx & bit_AVX512VPOPCNTDQ; has_movdiri = ecx & bit_MOVDIRI; has_movdir64b = ecx & bit_MOVDIR64B; has_enqcmd = ecx & bit_ENQCMD; @@ -1189,6 +1191,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) const char *avx512vp2intersect = has_avx512vp2intersect ? " -mavx512vp2intersect" : " -mno-avx512vp2intersect"; const char *tsxldtrk = has_tsxldtrk ? " -mtsxldtrk " : " -mno-tsxldtrk"; const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg"; + const char *avx512vpopcntdq = has_avx512vpopcntdq ? " -mavx512vpopcntdq" : " -mno-avx512vpopcntdq"; const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri"; const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b"; const char *enqcmd = has_enqcmd ? " -menqcmd" : " -mno-enqcmd"; @@ -1210,9 +1213,9 @@ const char *host_detect_local_cpu (int argc, const char **argv) avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw, clwb, mwaitx, clzero, pku, rdpid, gfni, shstk, avx512vbmi2, avx512vnni, vaes, vpclmulqdq, - avx512bitalg, movdiri, movdir64b, waitpkg, cldemote, - ptwrite, avx512bf16, enqcmd, avx512vp2intersect, - serialize, tsxldtrk, NULL); + avx512bitalg, avx512vpopcntdq, movdiri, movdir64b, + waitpkg, cldemote, ptwrite, avx512bf16, enqcmd, + avx512vp2intersect, serialize, tsxldtrk, NULL); } done: diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e81c737b7bc6..459cf62b0deb 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -17115,16 +17115,18 @@ && (flag_fp_int_builtin_inexact || !flag_trapping_math)) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && (TARGET_SSE4_1 - || (ROUND_ != ROUND_ROUNDEVEN - && (flag_fp_int_builtin_inexact || !flag_trapping_math))))" + || (ROUND_ != ROUND_ROUNDEVEN + && (flag_fp_int_builtin_inexact || !flag_trapping_math))))" { if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH - && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact || !flag_trapping_math)) + && (TARGET_SSE4_1 + || (ROUND_ != ROUND_ROUNDEVEN + && (flag_fp_int_builtin_inexact || !flag_trapping_math)))) { if (TARGET_SSE4_1) emit_insn (gen_sse4_1_round2 - (operands[0], operands[1], GEN_INT (ROUND_ - | ROUND_NO_EXC))); + (operands[0], operands[1], + GEN_INT (ROUND_ | ROUND_NO_EXC))); else if (TARGET_64BIT || (mode != DFmode)) { if (ROUND_ == ROUND_FLOOR) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5071fb2895ac..bb8ee19b64bc 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -10513,6 +10513,12 @@ (define_mode_attr pmov_suff_1 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")]) +(define_expand "trunc2" + [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand") + (truncate:PMOV_DST_MODE_1 + (match_operand: 1 "register_operand")))] + "TARGET_AVX512F") + (define_insn "*avx512f_2" [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m") (any_truncate:PMOV_DST_MODE_1 @@ -10547,6 +10553,12 @@ (match_operand: 2 "register_operand")))] "TARGET_AVX512F") +(define_expand "truncv32hiv32qi2" + [(set (match_operand:V32QI 0 "nonimmediate_operand") + (truncate:V32QI + (match_operand:V32HI 1 "register_operand")))] + "TARGET_AVX512BW") + (define_insn "avx512bw_v32hiv32qi2" [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") (any_truncate:V32QI @@ -10586,6 +10598,12 @@ (define_mode_attr pmov_suff_2 [(V16QI "wb") (V8HI "dw") (V4SI "qd")]) +(define_expand "trunc2" + [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand") + (truncate:PMOV_DST_MODE_2 + (match_operand: 1 "register_operand")))] + "TARGET_AVX512VL") + (define_insn "*avx512vl_2" [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m") (any_truncate:PMOV_DST_MODE_2 @@ -10628,7 +10646,20 @@ (define_mode_attr pmov_suff_3 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")]) -(define_insn "*avx512vl_vqi2" +(define_expand "trunc2" + [(set (match_operand: 0 "register_operand") + (truncate: + (match_operand:PMOV_SRC_MODE_3 1 "register_operand")))] + "TARGET_AVX512VL" +{ + operands[0] = simplify_gen_subreg (V16QImode, operands[0], mode, 0); + emit_insn (gen_avx512vl_truncatevqi2 (operands[0], + operands[1], + CONST0_RTX (mode))); + DONE; +}) + +(define_insn "avx512vl_vqi2" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_concat:V16QI (any_truncate: @@ -10920,7 +10951,21 @@ (define_mode_attr pmov_suff_4 [(V4DI "qw") (V2DI "qw") (V4SI "dw")]) -(define_insn "*avx512vl_vhi2" +(define_expand "trunc2" + [(set (match_operand: 0 "register_operand") + (truncate: + (match_operand:PMOV_SRC_MODE_4 1 "register_operand")))] + "TARGET_AVX512VL" +{ + operands[0] = simplify_gen_subreg (V8HImode, operands[0], mode, 0); + emit_insn (gen_avx512vl_truncatevhi2 (operands[0], + operands[1], + CONST0_RTX (mode))); + DONE; + +}) + +(define_insn "avx512vl_vhi2" [(set (match_operand:V8HI 0 "register_operand" "=v") (vec_concat:V8HI (any_truncate: @@ -11085,7 +11130,20 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512vl_v2div2si2" +(define_expand "truncv2div2si2" + [(set (match_operand:V2SI 0 "register_operand") + (truncate:V2SI + (match_operand:V2DI 1 "register_operand")))] + "TARGET_AVX512VL" +{ + operands[0] = simplify_gen_subreg (V4SImode, operands[0], V2SImode, 0); + emit_insn (gen_avx512vl_truncatev2div2si2 (operands[0], + operands[1], + CONST0_RTX (V2SImode))); + DONE; +}) + +(define_insn "avx512vl_v2div2si2" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_concat:V4SI (any_truncate:V2SI @@ -11164,7 +11222,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512f_v8div16qi2" +(define_expand "truncv8div8qi2" + [(set (match_operand:V8QI 0 "register_operand") + (truncate:V8QI + (match_operand:V8DI 1 "register_operand")))] + "TARGET_AVX512F" +{ + operands[0] = simplify_gen_subreg (V16QImode, operands[0], V8QImode, 0); + emit_insn (gen_avx512f_truncatev8div16qi2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "avx512f_v8div16qi2" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_concat:V16QI (any_truncate:V8QI diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c index c1ab9288f72e..98c974e657f6 100644 --- a/gcc/cp/constexpr.c +++ b/gcc/cp/constexpr.c @@ -8359,6 +8359,14 @@ is_constant_expression (tree t) return potential_constant_expression_1 (t, false, true, true, tf_none); } +/* As above, but expect an rvalue. */ + +bool +is_rvalue_constant_expression (tree t) +{ + return potential_constant_expression_1 (t, true, true, true, tf_none); +} + /* Like above, but complain about non-constant expressions. */ bool diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 07c16144c988..db125a3a1db9 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -7939,6 +7939,7 @@ extern tree constexpr_fn_retval (tree); extern tree ensure_literal_type_for_constexpr_object (tree); extern bool potential_constant_expression (tree); extern bool is_constant_expression (tree); +extern bool is_rvalue_constant_expression (tree); extern bool is_nondependent_constant_expression (tree); extern bool is_nondependent_static_init_expression (tree); extern bool is_static_init_expression (tree); diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 024ddc88a4cf..2e1390837e86 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -7612,7 +7612,7 @@ cp_finish_decl (tree decl, tree init, bool init_const_expr_p, init = boolean_true_node; } else if (init - && init_const_expr_p + && (init_const_expr_p || DECL_DECLARED_CONSTEXPR_P (decl)) && !TYPE_REF_P (type) && decl_maybe_constant_var_p (decl) && !(dep_init = value_dependent_init_p (init))) @@ -10328,13 +10328,14 @@ compute_array_index_type_loc (location_t name_loc, tree name, tree size, dependent type or whose size is specified by a constant expression that is value-dependent. */ /* We can only call value_dependent_expression_p on integral constant - expressions; treat non-constant expressions as dependent, too. */ + expressions. */ if (processing_template_decl - && (!TREE_CONSTANT (size) || value_dependent_expression_p (size))) + && potential_constant_expression (size) + && value_dependent_expression_p (size)) { - /* We cannot do any checking for a SIZE that isn't known to be - constant. Just build the index type and mark that it requires + /* Just build the index type and mark that it requires structural equality checks. */ + in_template: itype = build_index_type (build_min (MINUS_EXPR, sizetype, size, size_one_node)); TYPE_DEPENDENT_P (itype) = 1; @@ -10447,8 +10448,7 @@ compute_array_index_type_loc (location_t name_loc, tree name, tree size, } if (processing_template_decl && !TREE_CONSTANT (size)) - /* A variable sized array. */ - itype = build_min (MINUS_EXPR, sizetype, size, integer_one_node); + goto in_template; else { if (!TREE_CONSTANT (size)) @@ -13961,7 +13961,10 @@ grokparms (tree parmlist, tree *parms) break; if (! decl || TREE_TYPE (decl) == error_mark_node) - continue; + { + any_error = 1; + continue; + } type = TREE_TYPE (decl); if (VOID_TYPE_P (type)) @@ -14014,7 +14017,8 @@ grokparms (tree parmlist, tree *parms) TREE_TYPE (decl) = type; } else if (abstract_virtuals_error (decl, type)) - any_error = 1; /* Seems like a good idea. */ + /* Ignore any default argument. */ + init = NULL_TREE; else if (cxx_dialect < cxx17 && INDIRECT_TYPE_P (type)) { /* Before C++17 DR 393: @@ -14043,9 +14047,7 @@ grokparms (tree parmlist, tree *parms) decl, t); } - if (any_error) - init = NULL_TREE; - else if (init && !processing_template_decl) + if (init && !processing_template_decl) init = check_default_argument (decl, init, tf_warning_or_error); } @@ -14058,6 +14060,12 @@ grokparms (tree parmlist, tree *parms) if (parm) result = chainon (result, void_list_node); *parms = decls; + if (any_error) + result = NULL_TREE; + + if (any_error) + /* We had parm errors, recover by giving the function (...) type. */ + result = NULL_TREE; return result; } diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index a8082d39aca2..54ca875ce54c 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -10184,10 +10184,10 @@ cp_parser_constant_expression (cp_parser* parser, if (TREE_TYPE (expression) && TREE_CODE (TREE_TYPE (expression)) == ARRAY_TYPE) decay = build_address (expression); - bool is_const = potential_rvalue_constant_expression (decay); + bool is_const = is_rvalue_constant_expression (decay); parser->non_integral_constant_expression_p = !is_const; if (!is_const && !allow_non_constant_p) - require_potential_rvalue_constant_expression (decay); + require_rvalue_constant_expression (decay); } if (allow_non_constant_p) *non_constant_p = parser->non_integral_constant_expression_p; @@ -21366,6 +21366,8 @@ cp_parser_direct_declarator (cp_parser* parser, /* OK */; else if (error_operand_p (bounds)) /* Already gave an error. */; + else if (!cp_parser_uncommitted_to_tentative_parse_p (parser)) + /* Let compute_array_index_type diagnose this. */; else if (!parser->in_function_body || current_binding_level->kind == sk_function_parms) { diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c index 0181984bb995..f01ae6562546 100644 --- a/gcc/cp/typeck.c +++ b/gcc/cp/typeck.c @@ -8768,6 +8768,9 @@ build_x_modify_expr (location_t loc, tree lhs, enum tree_code modifycode, tree overload = NULL_TREE; tree op = build_nt (modifycode, NULL_TREE, NULL_TREE); + if (lhs == error_mark_node || rhs == error_mark_node) + return cp_expr (error_mark_node, loc); + if (processing_template_decl) { if (modifycode == NOP_EXPR diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 7217e27151d6..8b9935dfe652 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -11205,6 +11205,14 @@ conflicting translation units. Specifically precedence; and for example @option{-ffp-contract=off} takes precedence over @option{-ffp-contract=fast}. You can override them at link time. +Diagnostic options such as @option{-Wstringop-overflow} are passed +through to the link stage and their setting matches that of the +compile-step at function granularity. Note that this matters only +for diagnostics emitted during optimization. Note that code +transforms such as inlining can lead to warnings being enabled +or disabled for regions if code not consistent with the setting +at compile time. + When you need to pass options to the assembler via @option{-Wa} or @option{-Xassembler} make sure to either compile such translation units with @option{-fno-lto} or consistently use the same assembler diff --git a/gcc/lto-section-out.c b/gcc/lto-section-out.c index 8eda3b5fde10..0182cd6059ef 100644 --- a/gcc/lto-section-out.c +++ b/gcc/lto-section-out.c @@ -170,7 +170,7 @@ lto_output_decl_index (struct lto_output_stream *obs, index = encoder->trees.length (); if (streamer_dump_file) { - print_node_brief (streamer_dump_file, " Encoding indexable ", + print_node_brief (streamer_dump_file, " Encoding indexable ", name, 4); fprintf (streamer_dump_file, " as %i \n", index); } diff --git a/gcc/lto-streamer-in.c b/gcc/lto-streamer-in.c index 85d0edf49a70..d0532c5ac51a 100644 --- a/gcc/lto-streamer-in.c +++ b/gcc/lto-streamer-in.c @@ -1417,8 +1417,6 @@ lto_read_tree (class lto_input_block *ib, class data_in *data_in, lto_read_tree_1 (ib, data_in, result); - /* end_marker = */ streamer_read_uchar (ib); - return result; } @@ -1431,12 +1429,18 @@ hashval_t lto_input_scc (class lto_input_block *ib, class data_in *data_in, unsigned *len, unsigned *entry_len, bool shared_scc) { - /* A blob of unnamed tree nodes, fill the cache from it and - recurse. */ unsigned size = streamer_read_uhwi (ib); - hashval_t scc_hash = shared_scc ? streamer_read_uhwi (ib) : 0; + hashval_t scc_hash = 0; unsigned scc_entry_len = 1; + if (shared_scc) + { + if (size & 1) + scc_entry_len = streamer_read_uhwi (ib); + size /= 2; + scc_hash = streamer_read_uhwi (ib); + } + if (size == 1) { enum LTO_tags tag = streamer_read_record_start (ib); @@ -1447,8 +1451,6 @@ lto_input_scc (class lto_input_block *ib, class data_in *data_in, unsigned int first = data_in->reader_cache->nodes.length (); tree result; - scc_entry_len = streamer_read_uhwi (ib); - /* Materialize size trees by reading their headers. */ for (unsigned i = 0; i < size; ++i) { @@ -1471,7 +1473,6 @@ lto_input_scc (class lto_input_block *ib, class data_in *data_in, result = streamer_tree_cache_get_tree (data_in->reader_cache, first + i); lto_read_tree_1 (ib, data_in, result); - /* end_marker = */ streamer_read_uchar (ib); } } diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c index 0e1794680434..887a51d3eaeb 100644 --- a/gcc/lto-streamer-out.c +++ b/gcc/lto-streamer-out.c @@ -72,7 +72,7 @@ create_output_block (enum lto_section_type section_type) struct output_block *ob = XCNEW (struct output_block); if (streamer_dump_file) fprintf (streamer_dump_file, "Creating output block for %s\n", - lto_section_name [section_type]); + lto_section_name[section_type]); ob->section_type = section_type; ob->decl_state = lto_get_out_decl_state (); @@ -417,6 +417,14 @@ get_symbol_initial_value (lto_symtab_encoder_t encoder, tree expr) static void lto_write_tree_1 (struct output_block *ob, tree expr, bool ref_p) { + if (streamer_dump_file) + { + print_node_brief (streamer_dump_file, " Streaming body of ", + expr, 4); + fprintf (streamer_dump_file, " to %s\n", + lto_section_name[ob->section_type]); + } + /* Pack all the non-pointer fields in EXPR into a bitpack and write the resulting bitpack. */ streamer_write_tree_bitfields (ob, expr); @@ -473,9 +481,6 @@ lto_write_tree (struct output_block *ob, tree expr, bool ref_p) streamer_write_tree_header (ob, expr); lto_write_tree_1 (ob, expr, ref_p); - - /* Mark the end of EXPR. */ - streamer_write_zero (ob); } /* Emit the physical representation of tree node EXPR to output block OB, @@ -740,6 +745,8 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p, worklist_vec.pop (); + unsigned int prev_size = ob->main_stream->total_size; + /* Only global decl sections are considered by tree merging. */ if (ob->section_type != LTO_section_decls) { @@ -747,6 +754,11 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p, by itself then we do not need to stream SCC at all. */ if (worklist_vec.is_empty () && first == 0 && size == 1) return; + if (streamer_dump_file) + { + fprintf (streamer_dump_file, + " Start of LTO_trees of size %i\n", size); + } streamer_write_record_start (ob, LTO_trees); streamer_write_uhwi (ob, size); } @@ -763,16 +775,35 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p, { gcc_checking_assert (ob->section_type == LTO_section_decls); + if (streamer_dump_file) + { + fprintf (streamer_dump_file, + " Start of LTO_tree_scc of size %i\n", size); + } streamer_write_record_start (ob, LTO_tree_scc); - streamer_write_uhwi (ob, size); + /* In wast majority of cases scc_entry_len is 1 and size is small + integer. Use extra bit of size to stream info about + exceptions. */ + streamer_write_uhwi (ob, size * 2 + (scc_entry_len != 1)); + if (scc_entry_len != 1) + streamer_write_uhwi (ob, scc_entry_len); streamer_write_uhwi (ob, scc_hash); } /* Non-trivial SCCs must be packed to trees blocks so forward references work correctly. */ else if (size != 1) { - streamer_write_record_start (ob, LTO_trees); - streamer_write_uhwi (ob, size); + if (streamer_dump_file) + { + fprintf (streamer_dump_file, + " Start of LTO_trees of size %i\n", size); + } + streamer_write_record_start (ob, LTO_trees); + streamer_write_uhwi (ob, size); + } + else if (streamer_dump_file) + { + fprintf (streamer_dump_file, " Streaming single tree\n"); } /* Write size-1 SCCs without wrapping them inside SCC bundles. @@ -783,8 +814,6 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p, lto_output_tree_1 (ob, expr, scc_hash, ref_p, this_ref_p); else { - /* Write the size of the SCC entry candidates. */ - streamer_write_uhwi (ob, scc_entry_len); /* Write all headers and populate the streamer cache. */ for (unsigned i = 0; i < size; ++i) @@ -807,13 +836,11 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p, /* Write the bitpacks and tree references. */ for (unsigned i = 0; i < size; ++i) - { - lto_write_tree_1 (ob, sccstack[first+i].t, ref_p); - - /* Mark the end of the tree. */ - streamer_write_zero (ob); - } + lto_write_tree_1 (ob, sccstack[first+i].t, ref_p); } + if (streamer_dump_file) + fprintf (streamer_dump_file, " %u bytes\n", + ob->main_stream->total_size - prev_size); /* Finally truncate the vector. */ sccstack.truncate (first); @@ -850,14 +877,6 @@ DFS::DFS_write_tree_body (struct output_block *ob, enum tree_code code; - if (streamer_dump_file) - { - print_node_brief (streamer_dump_file, " Streaming ", - expr, 4); - fprintf (streamer_dump_file, " to %s\n", - lto_section_name [ob->section_type]); - } - code = TREE_CODE (expr); if (CODE_CONTAINS_STRUCT (code, TS_TYPED)) @@ -1251,7 +1270,7 @@ hash_tree (struct streamer_tree_cache_d *cache, hash_map *map, { hstate.add_hwi (TYPE_MODE (t)); /* TYPE_NO_FORCE_BLK is private to stor-layout and need - no streaming. */ + no streaming. */ hstate.add_flag (TYPE_PACKED (t)); hstate.add_flag (TYPE_RESTRICT (t)); hstate.add_flag (TYPE_USER_ALIGN (t)); @@ -1694,6 +1713,10 @@ lto_output_tree (struct output_block *ob, tree expr, { unsigned ix; bool existed_p; + unsigned int size = ob->main_stream->total_size; + /* This is the first time we see EXPR, write all reachable + trees to OB. */ + static bool in_dfs_walk; if (expr == NULL_TREE) { @@ -1710,6 +1733,16 @@ lto_output_tree (struct output_block *ob, tree expr, existed_p = streamer_tree_cache_lookup (ob->writer_cache, expr, &ix); if (existed_p) { + if (streamer_dump_file) + { + if (in_dfs_walk) + print_node_brief (streamer_dump_file, " Streaming ref to ", + expr, 4); + else + print_node_brief (streamer_dump_file, " Streaming ref to ", + expr, 4); + fprintf (streamer_dump_file, "\n"); + } /* If a node has already been streamed out, make sure that we don't write it more than once. Otherwise, the reader will instantiate two different nodes for the same object. */ @@ -1721,20 +1754,16 @@ lto_output_tree (struct output_block *ob, tree expr, } else { - /* This is the first time we see EXPR, write all reachable - trees to OB. */ - static bool in_dfs_walk; - /* Protect against recursion which means disconnect between - what tree edges we walk in the DFS walk and what edges + what tree edges we walk in the DFS walk and what edges we stream out. */ gcc_assert (!in_dfs_walk); if (streamer_dump_file) { - print_node_brief (streamer_dump_file, " Streaming SCC of ", + print_node_brief (streamer_dump_file, " Streaming tree ", expr, 4); - fprintf (streamer_dump_file, "\n"); + fprintf (streamer_dump_file, "\n"); } /* Start the DFS walk. */ @@ -1742,7 +1771,6 @@ lto_output_tree (struct output_block *ob, tree expr, /* let's see ... */ in_dfs_walk = true; DFS (ob, expr, ref_p, this_ref_p, false); - in_dfs_walk = false; /* Finally append a reference to the tree we were writing. */ existed_p = streamer_tree_cache_lookup (ob->writer_cache, expr, &ix); @@ -1751,21 +1779,26 @@ lto_output_tree (struct output_block *ob, tree expr, it. */ if (!existed_p) lto_output_tree_1 (ob, expr, 0, ref_p, this_ref_p); - else + else if (this_ref_p) { + if (streamer_dump_file) + { + print_node_brief (streamer_dump_file, + " Streaming final ref to ", + expr, 4); + fprintf (streamer_dump_file, "\n"); + } streamer_write_record_start (ob, LTO_tree_pickle_reference); streamer_write_uhwi (ob, ix); streamer_write_enum (ob->main_stream, LTO_tags, LTO_NUM_TAGS, lto_tree_code_to_tag (TREE_CODE (expr))); } - if (streamer_dump_file) - { - print_node_brief (streamer_dump_file, " Finished SCC of ", - expr, 4); - fprintf (streamer_dump_file, "\n\n"); - } + in_dfs_walk = false; lto_stats.num_pickle_refs_output++; } + if (streamer_dump_file && !in_dfs_walk) + fprintf (streamer_dump_file, " %u bytes\n", + ob->main_stream->total_size - size); } @@ -2705,7 +2738,7 @@ write_global_stream (struct output_block *ob, static void write_global_references (struct output_block *ob, - struct lto_tree_ref_encoder *encoder) + struct lto_tree_ref_encoder *encoder) { tree t; uint32_t index; @@ -3141,7 +3174,7 @@ produce_asm_for_decls (void) fn_out_state = lto_function_decl_states[idx]; if (streamer_dump_file) - fprintf (streamer_dump_file, "Outputting stream for %s\n", + fprintf (streamer_dump_file, "Outputting stream for %s\n", IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (fn_out_state->fn_decl))); lto_output_decl_state_streams (ob, fn_out_state); diff --git a/gcc/lto/ChangeLog b/gcc/lto/ChangeLog index aedcba9d83a9..047afbafc80e 100644 --- a/gcc/lto/ChangeLog +++ b/gcc/lto/ChangeLog @@ -1,3 +1,7 @@ +2020-05-22 Jan Hubicka + + * lto-common.c (lto_read_decls): Do not skip stray refs. + 2020-05-20 Jan Hubicka * lto-common.c (compare_tree_sccs_1): Sanity check that we never diff --git a/gcc/lto/lto-common.c b/gcc/lto/lto-common.c index d04b1c9ca7be..3ea1894ce96e 100644 --- a/gcc/lto/lto-common.c +++ b/gcc/lto/lto-common.c @@ -1955,25 +1955,19 @@ lto_read_decls (struct lto_file_decl_data *decl_data, const void *data, else { t = lto_input_tree_1 (&ib_main, data_in, tag, 0); - /* We streamed in new tree. Add it to cache and process dref. */ - if (data_in->reader_cache->nodes.length () == from + 1) - { - num_unshared_trees_read++; - data_in->location_cache.accept_location_cache (); - process_dref (data_in, t, from); - if (TREE_CODE (t) == IDENTIFIER_NODE - || (TREE_CODE (t) == INTEGER_CST - && !TREE_OVERFLOW (t))) - ; - else - { - lto_maybe_register_decl (data_in, t, from); - process_new_tree (t, &hm, from, &total, data_in); - } - } + gcc_assert (data_in->reader_cache->nodes.length () == from + 1); + num_unshared_trees_read++; + data_in->location_cache.accept_location_cache (); + process_dref (data_in, t, from); + if (TREE_CODE (t) == IDENTIFIER_NODE + || (TREE_CODE (t) == INTEGER_CST + && !TREE_OVERFLOW (t))) + ; else - /* FIXME: It seems useless to pickle stray references. */ - gcc_assert (data_in->reader_cache->nodes.length () == from); + { + lto_maybe_register_decl (data_in, t, from); + process_new_tree (t, &hm, from, &total, data_in); + } } } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6de0c5e89235..1dbdc389ab9d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,24 @@ +2020-05-22 Uroš Bizjak + + PR target/95255 + * gcc.target/i386/pr95255.c: New test. + +2020-05-22 Hongtao.liu + + * gcc.target/i386/pr92658-avx512f.c: New test. + * gcc.target/i386/pr92658-avx512vl.c: Ditto. + * gcc.target/i386/pr92658-avx512bw-trunc.c: Ditto. + +2020-05-22 Richard Biener + + PR tree-optimization/95268 + * g++.dg/torture/pr95268.C: New testcase. + +2020-05-22 Richard Biener + + PR tree-optimization/95248 + * gcc.dg/torture/pr95248.c: New testcase. + 2020-05-21 Patrick Palka PR c++/94038 diff --git a/gcc/testsuite/c-c++-common/gomp/depend-iterator-2.c b/gcc/testsuite/c-c++-common/gomp/depend-iterator-2.c index 127528271ee9..fff32a4761fb 100644 --- a/gcc/testsuite/c-c++-common/gomp/depend-iterator-2.c +++ b/gcc/testsuite/c-c++-common/gomp/depend-iterator-2.c @@ -41,7 +41,7 @@ f1 (void) ; #pragma omp task depend (iterator (int i = 0:4, \ struct U { int (*p)[i + 2]; } *p = 0:2) , in : a) /* { dg-error "type of iterator 'p' refers to outer iterator 'i'" "" { target c } } */ - ; /* { dg-error "types may not be defined in iterator type|not an integer constant" "" { target c++ } .-1 } */ + ; /* { dg-error "types may not be defined in iterator type|not an integral constant" "" { target c++ } .-1 } */ #pragma omp task depend (iterator (i = 0:4, j = i:16) , in : a) /* { dg-error "begin expression refers to outer iterator 'i'" } */ ; #pragma omp task depend (iterator (i = 0:4, j = 2:i:1) , in : a) /* { dg-error "end expression refers to outer iterator 'i'" } */ diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-ice30.C b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-ice30.C index 46350bd1d522..7b0a155a31a7 100644 --- a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-ice30.C +++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-ice30.C @@ -3,11 +3,11 @@ template void foo() { - int x[=]; // { dg-error "expected" } + int x[=]; // { dg-error "" } [&x]{}; } void bar() { - foo<0>(); + foo<0>(); // { dg-prune-output "not declared" } } diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-ice31.C b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-ice31.C index 54ded7bb2822..f0ecf8cc26f9 100644 --- a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-ice31.C +++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-ice31.C @@ -4,5 +4,5 @@ template void foo() { T x[=]; // { dg-error "expected" } - [&x]{}; + [&x]{}; // { dg-prune-output "not declared" } } diff --git a/gcc/testsuite/g++.dg/ext/fixed1.C b/gcc/testsuite/g++.dg/ext/fixed1.C index 5a479d6891ab..92e3184ea591 100644 --- a/gcc/testsuite/g++.dg/ext/fixed1.C +++ b/gcc/testsuite/g++.dg/ext/fixed1.C @@ -6,3 +6,5 @@ template struct A {}; template struct B : A {}; // { dg-error "not supported" } template struct C : A {}; // { dg-error "not supported" } + +// { dg-prune-output "template argument" } diff --git a/gcc/testsuite/g++.dg/ext/vla1.C b/gcc/testsuite/g++.dg/ext/vla1.C index c017b6e90ed8..cae3f82135a5 100644 --- a/gcc/testsuite/g++.dg/ext/vla1.C +++ b/gcc/testsuite/g++.dg/ext/vla1.C @@ -19,8 +19,7 @@ class B { B (int); }; B::B (int i) { struct S { - int ar[1][i]; // { dg-error "15:size of array .ar. is not an integral" "" { target c++11 } } -// { dg-error "array bound" "" { target c++98_only } .-1 } + int ar[1][i]; // { dg-error "15:size of array .ar. is not an integral" } } s; s.ar[0][0] = 0; // { dg-prune-output "no member" } diff --git a/gcc/testsuite/g++.dg/parse/error33.C b/gcc/testsuite/g++.dg/parse/error33.C index 0d25386a879c..61b0cc3f2dcc 100644 --- a/gcc/testsuite/g++.dg/parse/error33.C +++ b/gcc/testsuite/g++.dg/parse/error33.C @@ -8,9 +8,9 @@ struct A typedef void (A::T)(); /* { dg-error "15:typedef name may not be a nested" } */ -void bar(T); /* { dg-message "note: declared here" } */ +void bar(T); void baz() { - bar(&A::foo); /* { dg-error "too many arguments" } */ + bar(&A::foo); } diff --git a/gcc/testsuite/g++.dg/template/array9.C b/gcc/testsuite/g++.dg/template/array9.C index f3e8335c943f..ce9fb649d3a2 100644 --- a/gcc/testsuite/g++.dg/template/array9.C +++ b/gcc/testsuite/g++.dg/template/array9.C @@ -7,8 +7,8 @@ struct Tree { Tree* R[subtrees]; // { dg-error "" } ~Tree() { - delete [] L[0]; // { dg-error "" } - delete [] R[0]; // { dg-error "" } + delete [] L[0]; + delete [] R[0]; } }; diff --git a/gcc/testsuite/g++.dg/template/crash107.C b/gcc/testsuite/g++.dg/template/crash107.C index 932a3a680592..aab882d8446e 100644 --- a/gcc/testsuite/g++.dg/template/crash107.C +++ b/gcc/testsuite/g++.dg/template/crash107.C @@ -10,9 +10,9 @@ template struct Vec { // { dg-message "note" "" { target c++17_dow X = y*rhs.z() - z*rhs.y(); // { dg-error "not declared|no member" } } Vec& operator^(Vec& rhs) { - return Vec(*this)^=rhs; // { dg-message "required" } + return Vec(*this)^=rhs; } }; Vec v(3,4,12); // { dg-error "no matching|too many initializers" } Vec V(12,4,3); // { dg-error "no matching|too many initializers" } -Vec c = v^V; // { dg-message "required" } +Vec c = v^V; diff --git a/gcc/testsuite/g++.dg/template/error35.C b/gcc/testsuite/g++.dg/template/error35.C index d52e599315e2..0ba8635507be 100644 --- a/gcc/testsuite/g++.dg/template/error35.C +++ b/gcc/testsuite/g++.dg/template/error35.C @@ -1,3 +1,3 @@ // PR c++/33494 -template void foo(int(*f=0)()); // { dg-error "declared void|scope|erroneous-expression" } +template void foo(int(*f=0)()); // { dg-error "declared void|scope|cannot be used as a function" } diff --git a/gcc/testsuite/g++.dg/template/error41.C b/gcc/testsuite/g++.dg/template/error41.C index c92b8497aff2..21e8ffbc20ec 100644 --- a/gcc/testsuite/g++.dg/template/error41.C +++ b/gcc/testsuite/g++.dg/template/error41.C @@ -8,5 +8,5 @@ struct A template struct B { - int x[A::i]; // { dg-error "array bound is not an integer constant" } + int x[A::i]; // { dg-error "not an integral constant-expression" } }; diff --git a/gcc/testsuite/g++.dg/template/sizeof-template-argument.C b/gcc/testsuite/g++.dg/template/sizeof-template-argument.C index 2bfff6d9a18a..b5e12d699ae1 100644 --- a/gcc/testsuite/g++.dg/template/sizeof-template-argument.C +++ b/gcc/testsuite/g++.dg/template/sizeof-template-argument.C @@ -3,9 +3,9 @@ template struct A {}; -template struct B : A {}; /* { dg-error "expected primary-expression" } */ +template struct B : A {}; /* { dg-error "" } */ -template struct C : A {}; /* { dg-error "expected primary-expression" } */ +template struct C : A {}; /* { dg-error "" } */ int a; diff --git a/gcc/testsuite/g++.dg/torture/pr95268.C b/gcc/testsuite/g++.dg/torture/pr95268.C new file mode 100644 index 000000000000..8385b86e179d --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr95268.C @@ -0,0 +1,46 @@ +// { dg-do compile } +// { dg-require-effective-target lp64 } +// { dg-additional-options "-Wno-overflow" } + +#include + +extern short var_0, var_2, var_3, var_9, var_11, var_13, var_14, var_19, var_22, + var_32, var_37, var_44, var_57, var_59, var_63, var_70; +extern unsigned var_5; +extern char var_6, var_12, var_18, var_38, var_39, var_43, var_55, var_64, + arr_35; +extern long var_7, var_8, var_10, var_15, var_25, var_56; +extern int var_21, var_36, var_51, var_65, var_68, arr_7; +extern bool var_46, var_58, var_67; + +void test() { + var_12 = 0 >= 0; + var_13 = arr_7; + var_14 = (unsigned long)var_7 >> -564810131 + 564810189; + var_15 = var_5; + var_18 = -602739307623583391; + var_19 = -~0; + var_21 = var_10 >> var_8 - 17101301574577641170ULL; + var_22 = var_5; + var_25 = var_9; + var_32 = std::max((unsigned)var_2, var_5); + var_36 = 9557; + var_37 = 394545925; + var_38 = 0 >= 0; + var_39 = var_5; + var_43 = 0; + var_44 = arr_35; + var_46 = var_7; + for (short a = 0; a < 9; a = 021) + for (short b = 0; b < 024; b += 4) + var_51 = std::min((long long)(var_2 ?: var_9), (long long)var_9); + var_55 = var_0; + var_56 = 3896150587; + var_57 = var_11; + var_58 = var_59 = var_11; + var_63 = 73; + var_64 = 10393232284806619711ULL; + var_65 = var_3; + var_67 = var_6; + var_68 = var_70 = 0; +} diff --git a/gcc/testsuite/gcc.dg/torture/pr95248.c b/gcc/testsuite/gcc.dg/torture/pr95248.c new file mode 100644 index 000000000000..f0efcc12b512 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr95248.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-require-effective-target int32plus } */ + +int var_2 = -2013646301; +int var_3 = -1126567434; +unsigned int var_12 = 1; +unsigned int var_19; +unsigned int arr_25 [24] [21] [15] [17] [15] ; + +void __attribute__((noipa)) test() +{ + for (int a = 0; a < 3; a = 42) + for (int b = 0; b < 20; b++) + for (int c = 0; c < 4; c = 4) + for (int d = 0; d < 6; d += 4) + for (int e = 0; e < 4; e += 2) { + arr_25[a][b][c][d][e] = var_2 || var_3; + var_19 = var_12; + } +} + +int main() +{ + test(); + if (var_19 != 1) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c new file mode 100644 index 000000000000..bdfad7a4d18d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c @@ -0,0 +1,91 @@ +/* PR target/92658 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512bw -mavx512vl" } */ + +typedef unsigned char v8qi __attribute__((vector_size (8))); +typedef unsigned char v16qi __attribute__((vector_size (16))); +typedef unsigned char v32qi __attribute__((vector_size (32))); +typedef unsigned short v8hi __attribute__((vector_size (16))); +typedef unsigned short v16hi __attribute__((vector_size (32))); +typedef unsigned short v32hi __attribute__((vector_size (64))); + + +void +truncwb_512 (v32qi * dst, v32hi * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + tem[8] = (*src)[8]; + tem[9] = (*src)[9]; + tem[10] = (*src)[10]; + tem[11] = (*src)[11]; + tem[12] = (*src)[12]; + tem[13] = (*src)[13]; + tem[14] = (*src)[14]; + tem[15] = (*src)[15]; + tem[16] = (*src)[16]; + tem[17] = (*src)[17]; + tem[18] = (*src)[18]; + tem[19] = (*src)[19]; + tem[20] = (*src)[20]; + tem[21] = (*src)[21]; + tem[22] = (*src)[22]; + tem[23] = (*src)[23]; + tem[24] = (*src)[24]; + tem[25] = (*src)[25]; + tem[26] = (*src)[26]; + tem[27] = (*src)[27]; + tem[28] = (*src)[28]; + tem[29] = (*src)[29]; + tem[30] = (*src)[30]; + tem[31] = (*src)[31]; + dst[0] = *(v32qi *) tem; +} + +void +truncwb_256 (v16qi * dst, v16hi * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + tem[8] = (*src)[8]; + tem[9] = (*src)[9]; + tem[10] = (*src)[10]; + tem[11] = (*src)[11]; + tem[12] = (*src)[12]; + tem[13] = (*src)[13]; + tem[14] = (*src)[14]; + tem[15] = (*src)[15]; + dst[0] = *(v16qi *) tem; +} + +void +truncwb_128 (v16qi * dst, v8hi * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v16qi *) tem; +} + +/* { dg-final { scan-assembler-times "vpmovwb" 2 } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 3 { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c new file mode 100644 index 000000000000..2ba29074a816 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c @@ -0,0 +1,106 @@ +/* PR target/92658 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512f" } */ + +typedef unsigned char v8qi __attribute__((vector_size (8))); +typedef unsigned char v16qi __attribute__((vector_size (16))); +typedef unsigned short v8hi __attribute__((vector_size (16))); +typedef unsigned short v16hi __attribute__((vector_size (32))); +typedef unsigned int v8si __attribute__((vector_size (32))); +typedef unsigned int v16si __attribute__((vector_size (64))); +typedef unsigned long long v8di __attribute__((vector_size (64))); + +void +truncqd (v8si * dst, v8di * __restrict src) +{ + unsigned tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v8si *) tem; +} + +void +truncqw (v8hi * dst, v8di * __restrict src) +{ + unsigned short tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v8hi *) tem; +} + +void +truncqb (v8qi * dst, v8di * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v8qi *) tem; +} + +void +truncdw (v16hi * dst, v16si * __restrict src) +{ + unsigned short tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + tem[8] = (*src)[8]; + tem[9] = (*src)[9]; + tem[10] = (*src)[10]; + tem[11] = (*src)[11]; + tem[12] = (*src)[12]; + tem[13] = (*src)[13]; + tem[14] = (*src)[14]; + tem[15] = (*src)[15]; + dst[0] = *(v16hi *) tem; +} + + +void +truncdb (v16qi * dst, v16si * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + tem[8] = (*src)[8]; + tem[9] = (*src)[9]; + tem[10] = (*src)[10]; + tem[11] = (*src)[11]; + tem[12] = (*src)[12]; + tem[13] = (*src)[13]; + tem[14] = (*src)[14]; + tem[15] = (*src)[15]; + dst[0] = *(v16qi *) tem; +} + +/* { dg-final { scan-assembler-times "vpmovqd" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovqw" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovqb" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpmovdw" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovdb" 1 } } */ + diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c new file mode 100644 index 000000000000..50b32f968ac3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c @@ -0,0 +1,129 @@ +/* PR target/92658 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512f -mavx512vl" } */ + +typedef unsigned char v16qi __attribute__((vector_size (16))); +typedef unsigned short v8hi __attribute__((vector_size (16))); +typedef unsigned int v4si __attribute__((vector_size (16))); +typedef unsigned int v8si __attribute__((vector_size (32))); +typedef unsigned long long v2di __attribute__((vector_size (16))); +typedef unsigned long long v4di __attribute__((vector_size (32))); + +void +truncqd_256 (v4si * dst, v4di * __restrict src) +{ + unsigned tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v4si *) tem; +} + +void +truncqw_256 (v8hi * dst, v4di * __restrict src) +{ + unsigned short tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v8hi *) tem; +} + +void +truncqb_256 (v16qi * dst, v4di * __restrict src) +{ + unsigned char tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v16qi *) tem; +} + +void +truncqd_128 (v4si * dst, v2di * __restrict src) +{ + unsigned tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + dst[0] = *(v4si *) tem; +} + +void +truncqw_128 (v8hi * dst, v2di * __restrict src) +{ + unsigned short tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + dst[0] = *(v8hi *) tem; +} + +void +truncqb_128 (v16qi * dst, v2di * __restrict src) +{ + unsigned char tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + dst[0] = *(v16qi *) tem; +} + +void +truncdw_256 (v8hi * dst, v8si * __restrict src) +{ + unsigned short tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v8hi *) tem; +} + +void +truncdb_256 (v16qi * dst, v8si * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v16qi *) tem; +} + +void +truncdw_128 (v8hi * dst, v4si * __restrict src) +{ + unsigned short tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v8hi *) tem; +} + +void +truncdb_128 (v16qi * dst, v4si * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v16qi *) tem; +} + +/* { dg-final { scan-assembler-times "vpmovqd" 2 } } } */ +/* { dg-final { scan-assembler-times "vpmovqw" 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpmovqb" 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpmovdw" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovdw" 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpmovdb" 2 { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr95255.c b/gcc/testsuite/gcc.target/i386/pr95255.c new file mode 100644 index 000000000000..5b731941f723 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95255.c @@ -0,0 +1,8 @@ +/* PR target/95255 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mno-sse4.1 -mfpmath=both" } */ + +double foo (double x) +{ + return __builtin_roundeven (x); +} diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 63f4ef8883c0..fcca099355a7 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -2436,8 +2436,6 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, /* Use the sequence from the first edge and push SMs down. */ for (unsigned i = 0; i < first_edge_seq.length (); ++i) { - if (first_edge_seq[i].second == sm_other) - break; unsigned id = first_edge_seq[i].first; seq.safe_push (first_edge_seq[i]); unsigned new_idx; diff --git a/gcc/tree-ssa-sink.c b/gcc/tree-ssa-sink.c index c5b535bed4d1..b61ecf12d1fe 100644 --- a/gcc/tree-ssa-sink.c +++ b/gcc/tree-ssa-sink.c @@ -534,7 +534,9 @@ sink_common_stores_to_bb (basic_block bb) /* ??? We could handle differing SSA uses in the LHS by inserting PHIs for them. */ else if (! operand_equal_p (gimple_assign_lhs (first_store), - gimple_assign_lhs (def), 0)) + gimple_assign_lhs (def), 0) + || (gimple_clobber_p (first_store) + && !gimple_clobber_p (def))) { first_store = NULL; break; @@ -546,16 +548,17 @@ sink_common_stores_to_bb (basic_block bb) /* Check if we need a PHI node to merge the stored values. */ bool allsame = true; - for (unsigned i = 1; i < vdefs.length (); ++i) - { - gimple *def = SSA_NAME_DEF_STMT (vdefs[i]); - if (! operand_equal_p (gimple_assign_rhs1 (first_store), - gimple_assign_rhs1 (def), 0)) - { - allsame = false; - break; - } - } + if (!gimple_clobber_p (first_store)) + for (unsigned i = 1; i < vdefs.length (); ++i) + { + gimple *def = SSA_NAME_DEF_STMT (vdefs[i]); + if (! operand_equal_p (gimple_assign_rhs1 (first_store), + gimple_assign_rhs1 (def), 0)) + { + allsame = false; + break; + } + } /* We cannot handle aggregate values if we need to merge them. */ tree type = TREE_TYPE (gimple_assign_lhs (first_store)); diff --git a/gcc/tree-streamer-out.c b/gcc/tree-streamer-out.c index 127a3d8c248a..4e8a12c71e63 100644 --- a/gcc/tree-streamer-out.c +++ b/gcc/tree-streamer-out.c @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see #include "alias.h" #include "stor-layout.h" #include "gomp-constants.h" +#include "print-tree.h" /* Output the STRING constant to the string @@ -967,6 +968,14 @@ streamer_write_tree_header (struct output_block *ob, tree expr) enum LTO_tags tag; enum tree_code code; + if (streamer_dump_file) + { + print_node_brief (streamer_dump_file, " Streaming header of ", + expr, 4); + fprintf (streamer_dump_file, " to %s\n", + lto_section_name[ob->section_type]); + } + /* We should not see any tree nodes not handled by the streamer. */ code = TREE_CODE (expr); @@ -1016,6 +1025,12 @@ streamer_write_integer_cst (struct output_block *ob, tree cst, bool ref_p) int i; int len = TREE_INT_CST_NUNITS (cst); gcc_assert (!TREE_OVERFLOW (cst)); + if (streamer_dump_file) + { + print_node_brief (streamer_dump_file, " Streaming integer ", + cst, 4); + fprintf (streamer_dump_file, "\n"); + } streamer_write_record_start (ob, LTO_integer_cst); stream_write_tree (ob, TREE_TYPE (cst), ref_p); /* We're effectively streaming a non-sign-extended wide_int here, diff --git a/gcc/tree-streamer.c b/gcc/tree-streamer.c index f6181fafc4c8..b0afa1dc6c0c 100644 --- a/gcc/tree-streamer.c +++ b/gcc/tree-streamer.c @@ -299,10 +299,11 @@ record_common_node (struct streamer_tree_cache_d *cache, tree node) if (!node) node = error_mark_node; - /* ??? FIXME, devise a better hash value. But the hash needs to be equal - for all frontend and lto1 invocations. So just use the position - in the cache as hash value. */ - streamer_tree_cache_append (cache, node, cache->nodes.length ()); + /* This hash needs to be equal for all frontend and lto1 invocations. So + just use the position in the cache as hash value. + Small integers are used by hash_tree to record positions within scc + hash. Values are not in same range. */ + streamer_tree_cache_append (cache, node, cache->next_idx + 0xc001); switch (TREE_CODE (node)) { diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index ecce348b39c8..4f94b4baad9f 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -6185,17 +6185,29 @@ vectorizable_reduction (loop_vec_info loop_vinfo, The last use is the reduction variable. In case of nested cycle this assumption is not true: we use reduc_index to record the index of the reduction variable. */ - reduc_def = PHI_RESULT (reduc_def_phi); + /* ??? To get at invariant/constant uses on the SLP node we have to + get to it here, slp_node is still the reduction PHI. */ + slp_tree slp_for_stmt_info = NULL; + if (slp_node) + { + slp_for_stmt_info = slp_node_instance->root; + /* And then there's reduction chain with a conversion ... */ + if (SLP_TREE_SCALAR_STMTS (slp_for_stmt_info)[0] != stmt_info) + slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0]; + gcc_assert (SLP_TREE_SCALAR_STMTS (slp_for_stmt_info)[0] == stmt_info); + } + slp_tree *slp_op = XALLOCAVEC (slp_tree, op_type); for (i = 0; i < op_type; i++) { - tree op = gimple_op (stmt, i + 1); /* The condition of COND_EXPR is checked in vectorizable_condition(). */ if (i == 0 && code == COND_EXPR) continue; stmt_vec_info def_stmt_info; enum vect_def_type dt; - if (!vect_is_simple_use (op, loop_vinfo, &dt, &tem, + tree op; + if (!vect_is_simple_use (loop_vinfo, stmt_info, slp_for_stmt_info, + i, &op, &slp_op[i], &dt, &tem, &def_stmt_info)) { if (dump_enabled_p ()) @@ -6729,6 +6741,21 @@ vectorizable_reduction (loop_vec_info loop_vinfo, return false; } + if (slp_node + && !(!single_defuse_cycle + && code != DOT_PROD_EXPR + && code != WIDEN_SUM_EXPR + && code != SAD_EXPR + && reduction_type != FOLD_LEFT_REDUCTION)) + for (i = 0; i < op_type; i++) + if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for invariants\n"); + return false; + } + if (slp_node) vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); else diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 69a2002717f2..ec3675e7070b 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -46,6 +46,34 @@ along with GCC; see the file COPYING3. If not see #include "internal-fn.h" +/* Initialize a SLP node. */ + +_slp_tree::_slp_tree () +{ + SLP_TREE_SCALAR_STMTS (this) = vNULL; + SLP_TREE_SCALAR_OPS (this) = vNULL; + SLP_TREE_VEC_STMTS (this).create (0); + SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0; + SLP_TREE_CHILDREN (this) = vNULL; + SLP_TREE_LOAD_PERMUTATION (this) = vNULL; + SLP_TREE_TWO_OPERATORS (this) = false; + SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def; + SLP_TREE_VECTYPE (this) = NULL_TREE; + this->refcnt = 1; + this->max_nunits = 1; +} + +/* Tear down a SLP node. */ + +_slp_tree::~_slp_tree () +{ + SLP_TREE_CHILDREN (this).release (); + SLP_TREE_SCALAR_STMTS (this).release (); + SLP_TREE_SCALAR_OPS (this).release (); + SLP_TREE_VEC_STMTS (this).release (); + SLP_TREE_LOAD_PERMUTATION (this).release (); +} + /* Recursively free the memory allocated for the SLP tree rooted at NODE. FINAL_P is true if we have vectorized the instance or if we have made a final decision not to vectorize the statements in any way. */ @@ -76,13 +104,7 @@ vect_free_slp_tree (slp_tree node, bool final_p) } } - SLP_TREE_CHILDREN (node).release (); - SLP_TREE_SCALAR_STMTS (node).release (); - SLP_TREE_SCALAR_OPS (node).release (); - SLP_TREE_VEC_STMTS (node).release (); - SLP_TREE_LOAD_PERMUTATION (node).release (); - - free (node); + delete node; } /* Free the memory allocated for the SLP instance. FINAL_P is true if we @@ -101,39 +123,15 @@ vect_free_slp_instance (slp_instance instance, bool final_p) /* Create an SLP node for SCALAR_STMTS. */ static slp_tree -vect_create_new_slp_node (vec scalar_stmts) +vect_create_new_slp_node (vec scalar_stmts, unsigned nops) { - slp_tree node; - stmt_vec_info stmt_info = scalar_stmts[0]; - unsigned int nops; - - if (gcall *stmt = dyn_cast (stmt_info->stmt)) - nops = gimple_call_num_args (stmt); - else if (gassign *stmt = dyn_cast (stmt_info->stmt)) - { - nops = gimple_num_ops (stmt) - 1; - if (gimple_assign_rhs_code (stmt) == COND_EXPR) - nops++; - } - else if (is_a (stmt_info->stmt)) - nops = 0; - else - return NULL; - - node = XNEW (struct _slp_tree); + slp_tree node = new _slp_tree; SLP_TREE_SCALAR_STMTS (node) = scalar_stmts; - SLP_TREE_SCALAR_OPS (node) = vNULL; - SLP_TREE_VEC_STMTS (node).create (0); - SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0; SLP_TREE_CHILDREN (node).create (nops); - SLP_TREE_LOAD_PERMUTATION (node) = vNULL; - SLP_TREE_TWO_OPERATORS (node) = false; SLP_TREE_DEF_TYPE (node) = vect_internal_def; - SLP_TREE_VECTYPE (node) = NULL_TREE; - node->refcnt = 1; - node->max_nunits = 1; unsigned i; + stmt_vec_info stmt_info; FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info) STMT_VINFO_NUM_SLP_USES (stmt_info)++; @@ -145,21 +143,9 @@ vect_create_new_slp_node (vec scalar_stmts) static slp_tree vect_create_new_slp_node (vec ops) { - slp_tree node; - - node = XNEW (struct _slp_tree); - SLP_TREE_SCALAR_STMTS (node) = vNULL; + slp_tree node = new _slp_tree; SLP_TREE_SCALAR_OPS (node) = ops; - SLP_TREE_VEC_STMTS (node).create (0); - SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0; - SLP_TREE_CHILDREN (node) = vNULL; - SLP_TREE_LOAD_PERMUTATION (node) = vNULL; - SLP_TREE_TWO_OPERATORS (node) = false; SLP_TREE_DEF_TYPE (node) = vect_external_def; - SLP_TREE_VECTYPE (node) = NULL_TREE; - node->refcnt = 1; - node->max_nunits = 1; - return node; } @@ -1284,7 +1270,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, else return NULL; (*tree_size)++; - node = vect_create_new_slp_node (stmts); + node = vect_create_new_slp_node (stmts, 0); return node; } @@ -1309,7 +1295,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, { *max_nunits = this_max_nunits; (*tree_size)++; - node = vect_create_new_slp_node (stmts); + node = vect_create_new_slp_node (stmts, 0); /* And compute the load permutation. Whether it is actually a permutation depends on the unrolling factor which is decided later. */ @@ -1450,7 +1436,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, dump_printf_loc (MSG_NOTE, vect_location, "Building vector operands from scalars\n"); this_tree_size++; - child = vect_create_new_slp_node (oprnd_info->def_stmts); + child = vect_create_new_slp_node (oprnd_info->def_stmts, 0); SLP_TREE_DEF_TYPE (child) = vect_external_def; SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops; children.safe_push (child); @@ -1587,7 +1573,7 @@ fail: *tree_size += this_tree_size + 1; *max_nunits = this_max_nunits; - node = vect_create_new_slp_node (stmts); + node = vect_create_new_slp_node (stmts, nops); SLP_TREE_TWO_OPERATORS (node) = two_operators; SLP_TREE_CHILDREN (node).splice (children); return node; @@ -1726,9 +1712,12 @@ slp_copy_subtree (slp_tree node, hash_map &map) if (existed_p) return copy_ref; - copy_ref = XNEW (_slp_tree); + copy_ref = new _slp_tree; slp_tree copy = copy_ref; - memcpy (copy, node, sizeof (_slp_tree)); + SLP_TREE_DEF_TYPE (copy) = SLP_TREE_DEF_TYPE (node); + SLP_TREE_VECTYPE (copy) = SLP_TREE_VECTYPE (node); + copy->max_nunits = node->max_nunits; + copy->refcnt = 0; if (SLP_TREE_SCALAR_STMTS (node).exists ()) { SLP_TREE_SCALAR_STMTS (copy) = SLP_TREE_SCALAR_STMTS (node).copy (); @@ -1743,7 +1732,6 @@ slp_copy_subtree (slp_tree node, hash_map &map) if (SLP_TREE_CHILDREN (node).exists ()) SLP_TREE_CHILDREN (copy) = SLP_TREE_CHILDREN (node).copy (); gcc_assert (!SLP_TREE_VEC_STMTS (node).exists ()); - copy->refcnt = 0; slp_tree child; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy), i, child) @@ -2230,7 +2218,7 @@ vect_analyze_slp_instance (vec_info *vinfo, scalar_stmts.create (group_size); for (unsigned i = 0; i < group_size; ++i) scalar_stmts.quick_push (next_info); - slp_tree conv = vect_create_new_slp_node (scalar_stmts); + slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1); SLP_TREE_CHILDREN (conv).quick_push (node); SLP_INSTANCE_TREE (new_instance) = conv; /* We also have to fake this conversion stmt as SLP reduction @@ -3633,16 +3621,22 @@ vect_get_constant_vectors (vec_info *vinfo, gimple_seq ctor_seq = NULL; auto_vec permute_results; - /* ??? SLP analysis should compute the vector type for the - constant / invariant and store it in the SLP node. */ + /* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */ + vector_type = SLP_TREE_VECTYPE (op_node); + { tree op = op_node->ops[0]; - /* Check if vector type is a boolean vector. */ tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo); if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op)) && vect_mask_constant_operand_p (vinfo, stmt_vinfo, op_num)) - vector_type = truth_type_for (stmt_vectype); + gcc_assert (vector_type + && types_compatible_p (vector_type, + truth_type_for (stmt_vectype))); else - vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node); + gcc_assert (vector_type + && types_compatible_p (vector_type, + get_vectype_for_scalar_type + (vinfo, TREE_TYPE (op), op_node))); + } poly_uint64 vf = 1; if (loop_vec_info loop_vinfo = dyn_cast (vinfo)) diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 82750a975aa7..e7822c449515 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -3170,6 +3170,7 @@ vectorizable_call (vec_info *vinfo, = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type }; tree vectypes[ARRAY_SIZE (dt)] = {}; + slp_tree slp_op[ARRAY_SIZE (dt)] = {}; int ndts = ARRAY_SIZE (dt); int ncopies, j; auto_vec vargs; @@ -3209,7 +3210,7 @@ vectorizable_call (vec_info *vinfo, vectype_in = NULL_TREE; nargs = gimple_call_num_args (stmt); - /* Bail out if the function has more than three arguments, we do not have + /* Bail out if the function has more than four arguments, we do not have interesting builtin functions to vectorize with more than two arguments except for fma. No arguments is also not good. */ if (nargs == 0 || nargs > 4) @@ -3229,17 +3230,17 @@ vectorizable_call (vec_info *vinfo, for (i = 0; i < nargs; i++) { - op = gimple_call_arg (stmt, i); - if ((int) i == mask_opno) { + op = gimple_call_arg (stmt, i); if (!vect_check_scalar_mask (vinfo, stmt_info, op, &dt[i], &vectypes[i])) return false; continue; } - if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i])) + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, + i, &op, &slp_op[i], &dt[i], &vectypes[i])) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -3400,6 +3401,15 @@ vectorizable_call (vec_info *vinfo, vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL); if (!vec_stmt) /* transformation not required. */ { + if (slp_node) + for (i = 0; i < nargs; ++i) + if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for invariants\n"); + return false; + } STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_call"); vect_model_simple_cost (vinfo, stmt_info, @@ -4725,9 +4735,18 @@ vectorizable_conversion (vec_info *vinfo, lhs_type = TREE_TYPE (scalar_dest); vectype_out = STMT_VINFO_VECTYPE (stmt_info); - op0 = gimple_assign_rhs1 (stmt); - rhs_type = TREE_TYPE (op0); + /* Check the operands of the operation. */ + slp_tree slp_op0, slp_op1 = NULL; + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, + 0, &op0, &slp_op0, &dt[0], &vectype_in)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "use not simple.\n"); + return false; + } + rhs_type = TREE_TYPE (op0); if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) && !((INTEGRAL_TYPE_P (lhs_type) && INTEGRAL_TYPE_P (rhs_type)) @@ -4748,34 +4767,24 @@ vectorizable_conversion (vec_info *vinfo, return false; } - /* Check the operands of the operation. */ - if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "use not simple.\n"); - return false; - } if (op_type == binary_op) { - bool ok; + gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR); op1 = gimple_assign_rhs2 (stmt); - gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR); - /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of - OP1. */ - if (CONSTANT_CLASS_P (op0)) - ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in); - else - ok = vect_is_simple_use (op1, vinfo, &dt[1]); - - if (!ok) + tree vectype1_in; + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, + &op1, &slp_op1, &dt[1], &vectype1_in)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "use not simple.\n"); return false; } + /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of + OP1. */ + if (!vectype_in) + vectype_in = vectype1_in; } /* If op0 is an external or constant def, infer the vector type @@ -4949,6 +4958,15 @@ vectorizable_conversion (vec_info *vinfo, if (!vec_stmt) /* transformation not required. */ { + if (slp_node + && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in) + || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for invariants\n"); + return false; + } DUMP_VECT_SCOPE ("vectorizable_conversion"); if (modifier == NONE) { @@ -5306,16 +5324,14 @@ vectorizable_assignment (vec_info *vinfo, if (TREE_CODE (scalar_dest) != SSA_NAME) return false; - code = gimple_assign_rhs_code (stmt); - if (gimple_assign_single_p (stmt) - || code == PAREN_EXPR - || CONVERT_EXPR_CODE_P (code)) - op = gimple_assign_rhs1 (stmt); - else + if (STMT_VINFO_DATA_REF (stmt_info)) return false; - if (code == VIEW_CONVERT_EXPR) - op = TREE_OPERAND (op, 0); + code = gimple_assign_rhs_code (stmt); + if (!(gimple_assign_single_p (stmt) + || code == PAREN_EXPR + || CONVERT_EXPR_CODE_P (code))) + return false; tree vectype = STMT_VINFO_VECTYPE (stmt_info); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); @@ -5330,13 +5346,17 @@ vectorizable_assignment (vec_info *vinfo, gcc_assert (ncopies >= 1); - if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in)) + slp_tree slp_op; + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op, + &dt[0], &vectype_in)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "use not simple.\n"); return false; } + if (!vectype_in) + vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node); /* We can handle NOP_EXPR conversions that do not change the number of elements or the vector size. */ @@ -5373,6 +5393,14 @@ vectorizable_assignment (vec_info *vinfo, if (!vec_stmt) /* transformation not required. */ { + if (slp_node + && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for invariants\n"); + return false; + } STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_assignment"); if (!vect_nop_conversion_p (stmt_info)) @@ -5540,8 +5568,9 @@ vectorizable_shift (vec_info *vinfo, return false; } - op0 = gimple_assign_rhs1 (stmt); - if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype)) + slp_tree slp_op0; + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, + 0, &op0, &slp_op0, &dt[0], &vectype)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5567,10 +5596,10 @@ vectorizable_shift (vec_info *vinfo, if (maybe_ne (nunits_out, nunits_in)) return false; - op1 = gimple_assign_rhs2 (stmt); stmt_vec_info op1_def_stmt_info; - if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype, - &op1_def_stmt_info)) + slp_tree slp_op1; + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1, + &dt[1], &op1_vectype, &op1_def_stmt_info)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5743,6 +5772,15 @@ vectorizable_shift (vec_info *vinfo, if (!vec_stmt) /* transformation not required. */ { + if (slp_node + && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype) + || !vect_maybe_update_slp_op_vectype (slp_op1, op1_vectype))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for invariants\n"); + return false; + } STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_shift"); vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, @@ -5931,7 +5969,8 @@ vectorizable_operation (vec_info *vinfo, if (!stmt) return false; - if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) + /* Loads and stores are handled in vectorizable_{load,store}. */ + if (STMT_VINFO_DATA_REF (stmt_info)) return false; orig_code = code = gimple_assign_rhs_code (stmt); @@ -5988,8 +6027,9 @@ vectorizable_operation (vec_info *vinfo, return false; } - op0 = gimple_assign_rhs1 (stmt); - if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype)) + slp_tree slp_op0; + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, + 0, &op0, &slp_op0, &dt[0], &vectype)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6038,10 +6078,11 @@ vectorizable_operation (vec_info *vinfo, return false; tree vectype2 = NULL_TREE, vectype3 = NULL_TREE; + slp_tree slp_op1 = NULL, slp_op2 = NULL; if (op_type == binary_op || op_type == ternary_op) { - op1 = gimple_assign_rhs2 (stmt); - if (!vect_is_simple_use (op1, vinfo, &dt[1], &vectype2)) + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, + 1, &op1, &slp_op1, &dt[1], &vectype2)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6051,8 +6092,8 @@ vectorizable_operation (vec_info *vinfo, } if (op_type == ternary_op) { - op2 = gimple_assign_rhs3 (stmt); - if (!vect_is_simple_use (op2, vinfo, &dt[2], &vectype3)) + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, + 2, &op2, &slp_op2, &dt[2], &vectype3)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6164,6 +6205,18 @@ vectorizable_operation (vec_info *vinfo, vectype, NULL); } + /* Put types on constant and invariant SLP children. */ + if (slp_node + && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype) + || !vect_maybe_update_slp_op_vectype (slp_op1, vectype) + || !vect_maybe_update_slp_op_vectype (slp_op2, vectype))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for invariants\n"); + return false; + } + STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_operation"); vect_model_simple_cost (vinfo, stmt_info, @@ -7479,6 +7532,16 @@ vectorizable_store (vec_info *vinfo, check_load_store_masking (loop_vinfo, vectype, vls_type, group_size, memory_access_type, &gs_info, mask); + if (slp_node + && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0], + vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for invariants\n"); + return false; + } + STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; vect_model_store_cost (vinfo, stmt_info, ncopies, memory_access_type, vls_type, slp_node, cost_vec); @@ -10060,17 +10123,23 @@ vectorizable_condition (vec_info *vinfo, return false; /* FORNOW */ cond_expr = gimple_assign_rhs1 (stmt); - then_clause = gimple_assign_rhs2 (stmt); - else_clause = gimple_assign_rhs3 (stmt); if (!vect_is_simple_cond (cond_expr, vinfo, slp_node, &comp_vectype, &dts[0], vectype) || !comp_vectype) return false; - if (!vect_is_simple_use (then_clause, vinfo, &dts[2], &vectype1)) + unsigned slp_adjust = 0; + if (slp_node && SLP_TREE_CHILDREN (slp_node).length () == 4) + /* ??? Hack. Hope for COND_EXPR GIMPLE sanitizing or refactor + things more... */ + slp_adjust = 1; + slp_tree then_slp_node, else_slp_node; + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + slp_adjust, + &then_clause, &then_slp_node, &dts[2], &vectype1)) return false; - if (!vect_is_simple_use (else_clause, vinfo, &dts[3], &vectype2)) + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + slp_adjust, + &else_clause, &else_slp_node, &dts[3], &vectype2)) return false; if (vectype1 && !useless_type_conversion_p (vectype, vectype1)) @@ -10188,12 +10257,6 @@ vectorizable_condition (vec_info *vinfo, } } - if (loop_vinfo - && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) - && reduction_type == EXTRACT_LAST_REDUCTION) - vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo), - ncopies * vec_num, vectype, NULL); - vect_cost_for_stmt kind = vector_stmt; if (reduction_type == EXTRACT_LAST_REDUCTION) /* Count one reduction-like operation per vector. */ @@ -10201,6 +10264,27 @@ vectorizable_condition (vec_info *vinfo, else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)) return false; + if (slp_node + && (!vect_maybe_update_slp_op_vectype + (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype) + || (slp_adjust == 1 + && !vect_maybe_update_slp_op_vectype + (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype)) + || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype) + || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for invariants\n"); + return false; + } + + if (loop_vinfo + && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) + && reduction_type == EXTRACT_LAST_REDUCTION) + vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo), + ncopies * vec_num, vectype, NULL); + STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node, cost_vec, kind); @@ -10550,13 +10634,13 @@ vectorizable_comparison (vec_info *vinfo, if (TREE_CODE_CLASS (code) != tcc_comparison) return false; - rhs1 = gimple_assign_rhs1 (stmt); - rhs2 = gimple_assign_rhs2 (stmt); - - if (!vect_is_simple_use (rhs1, vinfo, &dts[0], &vectype1)) + slp_tree slp_rhs1, slp_rhs2; + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, + 0, &rhs1, &slp_rhs1, &dts[0], &vectype1)) return false; - if (!vect_is_simple_use (rhs2, vinfo, &dts[1], &vectype2)) + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, + 1, &rhs2, &slp_rhs2, &dts[1], &vectype2)) return false; if (vectype1 && vectype2 @@ -10649,6 +10733,17 @@ vectorizable_comparison (vec_info *vinfo, } } + /* Put types on constant and invariant SLP children. */ + if (slp_node + && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype) + || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for invariants\n"); + return false; + } + STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type; vect_model_simple_cost (vinfo, stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)), @@ -11705,6 +11800,61 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt, return true; } +/* Function vect_is_simple_use. + + Same as vect_is_simple_use but determines the operand by operand + position OPERAND from either STMT or SLP_NODE, filling in *OP + and *SLP_DEF (when SLP_NODE is not NULL). */ + +bool +vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node, + unsigned operand, tree *op, slp_tree *slp_def, + enum vect_def_type *dt, + tree *vectype, stmt_vec_info *def_stmt_info_out) +{ + if (slp_node) + { + slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand]; + *slp_def = child; + if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) + *op = gimple_get_lhs (SLP_TREE_SCALAR_STMTS (child)[0]->stmt); + else + *op = SLP_TREE_SCALAR_OPS (child)[0]; + } + else + { + if (gassign *ass = dyn_cast (stmt->stmt)) + { + *op = gimple_op (ass, operand + 1); + /* ??? Ick. But it will vanish with SLP only. */ + if (TREE_CODE (*op) == VIEW_CONVERT_EXPR) + *op = TREE_OPERAND (*op, 0); + } + else if (gcall *call = dyn_cast (stmt->stmt)) + *op = gimple_call_arg (call, operand); + else + gcc_unreachable (); + } + + /* ??? We might want to update *vectype from *slp_def here though + when sharing nodes this would prevent unsharing in the caller. */ + return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out); +} + +/* If OP is not NULL and is external or constant update its vector + type with VECTYPE. Returns true if successful or false if not, + for example when conflicting vector types are present. */ + +bool +vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype) +{ + if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def) + return true; + if (SLP_TREE_VECTYPE (op)) + return types_compatible_p (SLP_TREE_VECTYPE (op), vectype); + SLP_TREE_VECTYPE (op) = vectype; + return true; +} /* Function supportable_widening_operation diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 38a0a1d278be..2eb3ab5d280d 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -118,6 +118,9 @@ typedef struct _slp_tree *slp_tree; /* A computation tree of an SLP instance. Each node corresponds to a group of stmts to be packed in a SIMD stmt. */ struct _slp_tree { + _slp_tree (); + ~_slp_tree (); + /* Nodes that contain def-stmts of this node statements operands. */ vec children; @@ -1695,6 +1698,11 @@ extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *, extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *, tree *, stmt_vec_info * = NULL, gimple ** = NULL); +extern bool vect_is_simple_use (vec_info *, stmt_vec_info, slp_tree, + unsigned, tree *, slp_tree *, + enum vect_def_type *, + tree *, stmt_vec_info * = NULL); +extern bool vect_maybe_update_slp_op_vectype (slp_tree, tree); extern bool supportable_widening_operation (vec_info *, enum tree_code, stmt_vec_info, tree, tree, enum tree_code *,