diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc index a041c3b91eb..754cdab2ae8 100644 --- a/gcc/c-family/c-attribs.cc +++ b/gcc/c-family/c-attribs.cc @@ -522,6 +522,8 @@ const struct attribute_spec c_common_attribute_table[] = handle_omp_declare_target_attribute, NULL }, { "omp declare target implicit", 0, 0, true, false, false, false, handle_omp_declare_target_attribute, NULL }, + { "omp declare target indirect", 0, 0, true, false, false, false, + handle_omp_declare_target_attribute, NULL }, { "omp declare target host", 0, 0, true, false, false, false, handle_omp_declare_target_attribute, NULL }, { "omp declare target nohost", 0, 0, true, false, false, false, diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h index 682157a4517..98177913053 100644 --- a/gcc/c-family/c-pragma.h +++ b/gcc/c-family/c-pragma.h @@ -125,6 +125,7 @@ enum pragma_omp_clause { PRAGMA_OMP_CLAUSE_IF, PRAGMA_OMP_CLAUSE_IN_REDUCTION, PRAGMA_OMP_CLAUSE_INBRANCH, + PRAGMA_OMP_CLAUSE_INDIRECT, PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR, PRAGMA_OMP_CLAUSE_LASTPRIVATE, PRAGMA_OMP_CLAUSE_LINEAR, diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc index 4d38750a7b4..64d3a941cb9 100644 --- a/gcc/c/c-decl.cc +++ b/gcc/c/c-decl.cc @@ -5363,6 +5363,14 @@ c_decl_attributes (tree *node, tree attributes, int flags) attributes = tree_cons (get_identifier ("omp declare target nohost"), NULL_TREE, attributes); + + int indirect + = current_omp_declare_target_attribute->last ().indirect; + if (indirect && !lookup_attribute ("omp declare target indirect", + attributes)) + attributes + = tree_cons (get_identifier ("omp declare target indirect"), + NULL_TREE, attributes); } } diff --git a/gcc/c/c-lang.h b/gcc/c/c-lang.h index 09f4d40cda2..0b6db6c98e5 100644 --- a/gcc/c/c-lang.h +++ b/gcc/c/c-lang.h @@ -63,6 +63,7 @@ struct GTY(()) language_function { struct GTY(()) c_omp_declare_target_attr { bool attr_syntax; int device_type; + int indirect; }; struct GTY(()) c_omp_begin_assumes_data { diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc index 134d3ed078c..703f9570dbc 100644 --- a/gcc/c/c-parser.cc +++ b/gcc/c/c-parser.cc @@ -14598,6 +14598,8 @@ c_parser_omp_clause_name (c_parser *parser) result = PRAGMA_OMP_CLAUSE_IN_REDUCTION; else if (!strcmp ("inbranch", p)) result = PRAGMA_OMP_CLAUSE_INBRANCH; + else if (!strcmp ("indirect", p)) + result = PRAGMA_OMP_CLAUSE_INDIRECT; else if (!strcmp ("independent", p)) result = PRAGMA_OACC_CLAUSE_INDEPENDENT; else if (!strcmp ("is_device_ptr", p)) @@ -15474,6 +15476,47 @@ c_parser_omp_clause_final (c_parser *parser, tree list) return list; } +/* OpenMP 5.1: + indirect [( expression )] +*/ + +static tree +c_parser_omp_clause_indirect (c_parser *parser, tree list) +{ + location_t location = c_parser_peek_token (parser)->location; + tree t; + + if (c_parser_peek_token (parser)->type == CPP_OPEN_PAREN) + { + matching_parens parens; + if (!parens.require_open (parser)) + return list; + + location_t loc = c_parser_peek_token (parser)->location; + c_expr expr = c_parser_expr_no_commas (parser, NULL); + expr = convert_lvalue_to_rvalue (loc, expr, true, true); + t = c_objc_common_truthvalue_conversion (loc, expr.value); + t = c_fully_fold (t, false, NULL); + if (!INTEGRAL_TYPE_P (TREE_TYPE (t)) + || TREE_CODE (t) != INTEGER_CST) + { + c_parser_error (parser, "expected constant logical expression"); + return list; + } + parens.skip_until_found_close (parser); + } + else + t = integer_one_node; + + check_no_duplicate_clause (list, OMP_CLAUSE_INDIRECT, "indirect"); + + tree c = build_omp_clause (location, OMP_CLAUSE_INDIRECT); + OMP_CLAUSE_INDIRECT_EXPR (c) = t; + OMP_CLAUSE_CHAIN (c) = list; + + return c; +} + /* OpenACC, OpenMP 2.5: if ( expression ) @@ -19035,6 +19078,10 @@ c_parser_omp_all_clauses (c_parser *parser, omp_clause_mask mask, true, clauses); c_name = "in_reduction"; break; + case PRAGMA_OMP_CLAUSE_INDIRECT: + clauses = c_parser_omp_clause_indirect (parser, clauses); + c_name = "indirect"; + break; case PRAGMA_OMP_CLAUSE_LASTPRIVATE: clauses = c_parser_omp_clause_lastprivate (parser, clauses); c_name = "lastprivate"; @@ -24608,14 +24655,16 @@ c_maybe_parse_omp_decl (tree decl, tree d) ( (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_TO) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ENTER) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_LINK) \ - | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE)) + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_INDIRECT)) static void c_parser_omp_declare_target (c_parser *parser) { tree clauses = NULL_TREE; int device_type = 0; - bool only_device_type = true; + bool indirect = false; + bool only_device_type_or_indirect = true; if (c_parser_next_token_is (parser, CPP_NAME) || (c_parser_next_token_is (parser, CPP_COMMA) && c_parser_peek_2nd_token (parser)->type == CPP_NAME)) @@ -24633,22 +24682,27 @@ c_parser_omp_declare_target (c_parser *parser) { bool attr_syntax = parser->in_omp_attribute_pragma != NULL; c_parser_skip_to_pragma_eol (parser); - c_omp_declare_target_attr attr = { attr_syntax, -1 }; + c_omp_declare_target_attr attr = { attr_syntax, -1, 0 }; vec_safe_push (current_omp_declare_target_attribute, attr); return; } - for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE) - device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c); for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) { if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE) + device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c); + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INDIRECT) + indirect |= !integer_zerop (OMP_CLAUSE_INDIRECT_EXPR (c)); + } + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + { + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INDIRECT) continue; tree t = OMP_CLAUSE_DECL (c), id; tree at1 = lookup_attribute ("omp declare target", DECL_ATTRIBUTES (t)); tree at2 = lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)); - only_device_type = false; + only_device_type_or_indirect = false; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINK) { id = get_identifier ("omp declare target link"); @@ -24710,10 +24764,25 @@ c_parser_omp_declare_target (c_parser *parser) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (t)); } } + if (indirect) + { + tree at4 = lookup_attribute ("omp declare target indirect", + DECL_ATTRIBUTES (t)); + if (at4 == NULL_TREE) + { + id = get_identifier ("omp declare target indirect"); + DECL_ATTRIBUTES (t) + = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (t)); + } + } } - if (device_type && only_device_type) + if ((device_type || indirect) && only_device_type_or_indirect) error_at (OMP_CLAUSE_LOCATION (clauses), - "directive with only % clause"); + "directive with only % or % clauses"); + if (indirect && device_type && device_type != OMP_CLAUSE_DEVICE_TYPE_ANY) + error_at (OMP_CLAUSE_LOCATION (clauses), + "% clause must specify 'any' when used with " + "an % clause"); } /* OpenMP 5.1 @@ -24722,7 +24791,8 @@ c_parser_omp_declare_target (c_parser *parser) #pragma omp begin declare target clauses[optseq] new-line */ #define OMP_BEGIN_DECLARE_TARGET_CLAUSE_MASK \ - (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE) + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_INDIRECT)) static void c_parser_omp_begin (c_parser *parser) @@ -24746,10 +24816,16 @@ c_parser_omp_begin (c_parser *parser) OMP_BEGIN_DECLARE_TARGET_CLAUSE_MASK, "#pragma omp begin declare target"); int device_type = 0; + int indirect = 0; for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE) - device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c); - c_omp_declare_target_attr attr = { attr_syntax, device_type }; + { + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE) + device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c); + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INDIRECT) + indirect |= !integer_zerop (OMP_CLAUSE_INDIRECT_EXPR (c)); + } + c_omp_declare_target_attr attr = { attr_syntax, device_type, + indirect }; vec_safe_push (current_omp_declare_target_attribute, attr); } else diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc index 9c879457828..4580ff0576b 100644 --- a/gcc/c/c-typeck.cc +++ b/gcc/c/c-typeck.cc @@ -15914,6 +15914,7 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_FINALIZE: case OMP_CLAUSE_NOHOST: + case OMP_CLAUSE_INDIRECT: pc = &OMP_CLAUSE_CHAIN (c); continue; diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index f6d56b798e1..0e224ca8f65 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -479,7 +479,8 @@ copy_early_debug_info (const char *infile, const char *outfile) static void process_asm (FILE *in, FILE *out, FILE *cfile) { - int fn_count = 0, var_count = 0, dims_count = 0, regcount_count = 0; + int fn_count = 0, var_count = 0, ind_fn_count = 0; + int dims_count = 0, regcount_count = 0; struct obstack fns_os, dims_os, regcounts_os; obstack_init (&fns_os); obstack_init (&dims_os); @@ -508,7 +509,8 @@ process_asm (FILE *in, FILE *out, FILE *cfile) { IN_CODE, IN_METADATA, IN_VARS, - IN_FUNCS + IN_FUNCS, + IN_IND_FUNCS, } state = IN_CODE; while (fgets (buf, sizeof (buf), in)) { @@ -570,6 +572,17 @@ process_asm (FILE *in, FILE *out, FILE *cfile) } break; } + case IN_IND_FUNCS: + { + char *funcname; + if (sscanf (buf, "\t.8byte\t%ms\n", &funcname)) + { + fputs (buf, out); + ind_fn_count++; + continue; + } + break; + } } char dummy; @@ -597,6 +610,15 @@ process_asm (FILE *in, FILE *out, FILE *cfile) ".offload_func_table:\n", out); } + else if (sscanf (buf, " .section .gnu.offload_ind_funcs%c", &dummy) > 0) + { + state = IN_IND_FUNCS; + fputs (buf, out); + fputs ("\t.global .offload_ind_func_table\n" + "\t.type .offload_ind_func_table, @object\n" + ".offload_ind_func_table:\n", + out); + } else if (sscanf (buf, " .amdgpu_metadata%c", &dummy) > 0) { state = IN_METADATA; @@ -634,6 +656,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile) fprintf (cfile, "#include \n\n"); fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count); + fprintf (cfile, "static const int gcn_num_ind_funcs = %d;\n\n", ind_fn_count); /* Dump out function idents. */ fprintf (cfile, "static const struct hsa_kernel_description {\n" @@ -728,12 +751,14 @@ process_obj (FILE *in, FILE *cfile, uint32_t omp_requires) " const struct gcn_image *gcn_image;\n" " unsigned kernel_count;\n" " const struct hsa_kernel_description *kernel_infos;\n" + " unsigned ind_func_count;\n" " unsigned global_variable_count;\n" "} gcn_data = {\n" " %d,\n" " &gcn_image,\n" " sizeof (gcn_kernels) / sizeof (gcn_kernels[0]),\n" " gcn_kernels,\n" + " gcn_num_ind_funcs,\n" " gcn_num_vars\n" "};\n\n", omp_requires); diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc index aaea9fb320d..fb75ca090df 100644 --- a/gcc/config/nvptx/mkoffload.cc +++ b/gcc/config/nvptx/mkoffload.cc @@ -51,6 +51,7 @@ struct id_map }; static id_map *func_ids, **funcs_tail = &func_ids; +static id_map *ind_func_ids, **ind_funcs_tail = &ind_func_ids; static id_map *var_ids, **vars_tail = &var_ids; /* Files to unlink. */ @@ -302,6 +303,11 @@ process (FILE *in, FILE *out, uint32_t omp_requires) output_fn_ptr = true; record_id (input + i + 9, &funcs_tail); } + else if (startswith (input + i, "IND_FUNC_MAP ")) + { + output_fn_ptr = true; + record_id (input + i + 13, &ind_funcs_tail); + } else abort (); /* Skip to next line. */ @@ -422,6 +428,77 @@ process (FILE *in, FILE *out, uint32_t omp_requires) fprintf (out, "};\\n\";\n\n"); } + if (ind_func_ids) + { + const char needle[] = "// BEGIN GLOBAL FUNCTION DECL: "; + + fprintf (out, "static const char ptx_code_%u[] =\n", obj_count++); + fprintf (out, "\t\".version "); + for (size_t i = 0; version[i] != '\0' && version[i] != '\n'; i++) + fputc (version[i], out); + fprintf (out, "\"\n\t\".target sm_"); + for (size_t i = 0; sm_ver[i] != '\0' && sm_ver[i] != '\n'; i++) + fputc (sm_ver[i], out); + fprintf (out, "\"\n\t\".file 2 \\\"\\\"\"\n"); + + /* WORKAROUND - see PR 108098 + It seems as if older CUDA JIT compiler optimizes the function pointers + in offload_func_table to NULL, which can be prevented by adding a + dummy procedure. With CUDA 11.1, it seems to work fine without + workaround while CUDA 10.2 as some ancient version have need the + workaround. Assuming CUDA 11.0 fixes it, emitting it could be + restricted to 'if (sm_ver2[0] < 8 && version2[0] < 7)' as sm_80 and + PTX ISA 7.0 are new in CUDA 11.0; for 11.1 it would be sm_86 and + PTX ISA 7.1. */ + fprintf (out, "\n\t\".func __dummy$func2 ( );\"\n"); + fprintf (out, "\t\".func __dummy$func2 ( )\"\n"); + fprintf (out, "\t\"{\"\n"); + fprintf (out, "\t\"}\"\n"); + + size_t fidx = 0; + for (id = ind_func_ids; id; id = id->next) + { + fprintf (out, "\t\".extern "); + const char *p = input + file_idx[fidx]; + while (true) + { + p = strstr (p, needle); + if (!p) + { + fidx++; + if (fidx >= file_cnt) + break; + p = input + file_idx[fidx]; + continue; + } + p += strlen (needle); + if (!startswith (p, id->ptx_name)) + continue; + p += strlen (id->ptx_name); + if (*p != '\n') + continue; + p++; + /* Skip over any directives. */ + while (!startswith (p, ".func")) + while (*p++ != ' '); + for (; *p != '\0' && *p != '\n'; p++) + fputc (*p, out); + break; + } + fprintf (out, "\"\n"); + if (fidx == file_cnt) + fatal_error (input_location, + "Cannot find function declaration for %qs", + id->ptx_name); + } + + fprintf (out, "\t\".visible .global .align 8 .u64 " + "$offload_ind_func_table[] = {"); + for (comma = "", id = ind_func_ids; id; comma = ",", id = id->next) + fprintf (out, "%s\"\n\t\t\"%s", comma, id->ptx_name); + fprintf (out, "};\\n\";\n\n"); + } + /* Dump out array of pointers to ptx object strings. */ fprintf (out, "static const struct ptx_obj {\n" " const char *code;\n" @@ -447,6 +524,12 @@ process (FILE *in, FILE *out, uint32_t omp_requires) id->dim ? id->dim : ""); fprintf (out, "\n};\n\n"); + /* Dump out indirect function idents. */ + fprintf (out, "static const char *const ind_func_mappings[] = {"); + for (comma = "", id = ind_func_ids; id; comma = ",", id = id->next) + fprintf (out, "%s\n\t\"%s\"", comma, id->ptx_name); + fprintf (out, "\n};\n\n"); + fprintf (out, "static const struct nvptx_data {\n" " uintptr_t omp_requires_mask;\n" @@ -456,12 +539,14 @@ process (FILE *in, FILE *out, uint32_t omp_requires) " unsigned var_num;\n" " const struct nvptx_fn *fn_names;\n" " unsigned fn_num;\n" + " unsigned ind_fn_num;\n" "} nvptx_data = {\n" " %d, ptx_objs, sizeof (ptx_objs) / sizeof (ptx_objs[0]),\n" " var_mappings," " sizeof (var_mappings) / sizeof (var_mappings[0]),\n" " func_mappings," - " sizeof (func_mappings) / sizeof (func_mappings[0])\n" + " sizeof (func_mappings) / sizeof (func_mappings[0]),\n" + " sizeof (ind_func_mappings) / sizeof (ind_func_mappings[0])\n" "};\n\n", omp_requires); fprintf (out, "#ifdef __cplusplus\n" diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 634c31673be..0eeff95b3f5 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -5919,7 +5919,11 @@ nvptx_record_offload_symbol (tree decl) /* OpenMP offloading does not set this attribute. */ tree dims = attr ? TREE_VALUE (attr) : NULL_TREE; - fprintf (asm_out_file, "//:FUNC_MAP \"%s\"", + fprintf (asm_out_file, "//:"); + if (lookup_attribute ("omp declare target indirect", + DECL_ATTRIBUTES (decl))) + fprintf (asm_out_file, "IND_"); + fprintf (asm_out_file, "FUNC_MAP \"%s\"", IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); for (; dims; dims = TREE_CHAIN (dims)) diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 98b29e9cf81..b2603d4830e 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -1831,6 +1831,7 @@ union GTY((desc ("cp_tree_node_structure (&%h)"), struct GTY(()) cp_omp_declare_target_attr { bool attr_syntax; int device_type; + bool indirect; }; struct GTY(()) cp_omp_begin_assumes_data { diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc index 0aa1e355972..9e666e5eece 100644 --- a/gcc/cp/decl2.cc +++ b/gcc/cp/decl2.cc @@ -1762,6 +1762,12 @@ cplus_decl_attributes (tree *decl, tree attributes, int flags) attributes = tree_cons (get_identifier ("omp declare target nohost"), NULL_TREE, attributes); + if (last.indirect + && !lookup_attribute ("omp declare target indirect", + attributes)) + attributes + = tree_cons (get_identifier ("omp declare target indirect"), + NULL_TREE, attributes); } } } diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index 0fff9819a3a..5116bcb78f6 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -37524,6 +37524,8 @@ cp_parser_omp_clause_name (cp_parser *parser) result = PRAGMA_OMP_CLAUSE_IN_REDUCTION; else if (!strcmp ("inbranch", p)) result = PRAGMA_OMP_CLAUSE_INBRANCH; + else if (!strcmp ("indirect", p)) + result = PRAGMA_OMP_CLAUSE_INDIRECT; else if (!strcmp ("independent", p)) result = PRAGMA_OACC_CLAUSE_INDEPENDENT; else if (!strcmp ("is_device_ptr", p)) @@ -38558,6 +38560,46 @@ cp_parser_omp_clause_final (cp_parser *parser, tree list, location_t location) return c; } +/* OpenMP 5.1: + indirect [( expression )] +*/ + +static tree +cp_parser_omp_clause_indirect (cp_parser *parser, tree list, + location_t location) +{ + tree t; + + if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN)) + { + matching_parens parens; + if (!parens.require_open (parser)) + return list; + + bool non_constant_p; + t = cp_parser_constant_expression (parser, true, &non_constant_p); + + if (t != error_mark_node && non_constant_p) + error_at (location, "expected constant logical expression"); + + if (t == error_mark_node + || !parens.require_close (parser)) + cp_parser_skip_to_closing_parenthesis (parser, /*recovering=*/true, + /*or_comma=*/false, + /*consume_paren=*/true); + } + else + t = integer_one_node; + + check_no_duplicate_clause (list, OMP_CLAUSE_INDIRECT, "indirect", location); + + tree c = build_omp_clause (location, OMP_CLAUSE_INDIRECT); + OMP_CLAUSE_INDIRECT_EXPR (c) = t; + OMP_CLAUSE_CHAIN (c) = list; + + return c; +} + /* OpenMP 2.5: if ( expression ) @@ -41629,6 +41671,11 @@ cp_parser_omp_all_clauses (cp_parser *parser, omp_clause_mask mask, true, clauses); c_name = "in_reduction"; break; + case PRAGMA_OMP_CLAUSE_INDIRECT: + clauses = cp_parser_omp_clause_indirect (parser, clauses, + token->location); + c_name = "indirect"; + break; case PRAGMA_OMP_CLAUSE_LASTPRIVATE: clauses = cp_parser_omp_clause_lastprivate (parser, clauses); c_name = "lastprivate"; @@ -48171,7 +48218,8 @@ cp_maybe_parse_omp_decl (tree decl, tree d) on #pragma omp declare target. Return false if errors were reported. */ static bool -handle_omp_declare_target_clause (tree c, tree t, int device_type) +handle_omp_declare_target_clause (tree c, tree t, int device_type, + bool indirect) { tree at1 = lookup_attribute ("omp declare target", DECL_ATTRIBUTES (t)); tree at2 = lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)); @@ -48235,6 +48283,17 @@ handle_omp_declare_target_clause (tree c, tree t, int device_type) DECL_ATTRIBUTES (t) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (t)); } } + if (indirect) + { + tree at4 = lookup_attribute ("omp declare target indirect", + DECL_ATTRIBUTES (t)); + if (at4 == NULL_TREE) + { + id = get_identifier ("omp declare target indirect"); + DECL_ATTRIBUTES (t) + = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (t)); + } + } return true; } @@ -48252,14 +48311,16 @@ handle_omp_declare_target_clause (tree c, tree t, int device_type) ( (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_TO) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ENTER) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_LINK) \ - | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE)) + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_INDIRECT)) static void cp_parser_omp_declare_target (cp_parser *parser, cp_token *pragma_tok) { tree clauses = NULL_TREE; int device_type = 0; - bool only_device_type = true; + bool indirect = false; + bool only_device_type_or_indirect = true; if (cp_lexer_next_token_is (parser->lexer, CPP_NAME) || (cp_lexer_next_token_is (parser->lexer, CPP_COMMA) && cp_lexer_nth_token_is (parser->lexer, 2, CPP_NAME))) @@ -48277,21 +48338,26 @@ cp_parser_omp_declare_target (cp_parser *parser, cp_token *pragma_tok) else { cp_omp_declare_target_attr a - = { parser->lexer->in_omp_attribute_pragma, -1 }; + = { parser->lexer->in_omp_attribute_pragma, -1, false }; vec_safe_push (scope_chain->omp_declare_target_attribute, a); cp_parser_require_pragma_eol (parser, pragma_tok); return; } - for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE) - device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c); for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) { if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE) + device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c); + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INDIRECT) + indirect |= !integer_zerop (OMP_CLAUSE_INDIRECT_EXPR (c)); + } + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + { + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INDIRECT) continue; tree t = OMP_CLAUSE_DECL (c); - only_device_type = false; - if (!handle_omp_declare_target_clause (c, t, device_type)) + only_device_type_or_indirect = false; + if (!handle_omp_declare_target_clause (c, t, device_type, indirect)) continue; if (VAR_OR_FUNCTION_DECL_P (t) && DECL_LOCAL_DECL_P (t) @@ -48299,11 +48365,15 @@ cp_parser_omp_declare_target (cp_parser *parser, cp_token *pragma_tok) && DECL_LOCAL_DECL_ALIAS (t) && DECL_LOCAL_DECL_ALIAS (t) != error_mark_node) handle_omp_declare_target_clause (c, DECL_LOCAL_DECL_ALIAS (t), - device_type); + device_type, indirect); } - if (device_type && only_device_type) + if ((device_type || indirect) && only_device_type_or_indirect) error_at (OMP_CLAUSE_LOCATION (clauses), - "directive with only % clause"); + "directive with only % or % clauses"); + if (indirect && device_type && device_type != OMP_CLAUSE_DEVICE_TYPE_ANY) + error_at (OMP_CLAUSE_LOCATION (clauses), + "% clause must specify 'any' when used with " + "an % clause"); } /* OpenMP 5.1 @@ -48312,7 +48382,8 @@ cp_parser_omp_declare_target (cp_parser *parser, cp_token *pragma_tok) # pragma omp begin declare target clauses[optseq] new-line */ #define OMP_BEGIN_DECLARE_TARGET_CLAUSE_MASK \ - (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE) + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_INDIRECT)) static void cp_parser_omp_begin (cp_parser *parser, cp_token *pragma_tok) @@ -48342,11 +48413,16 @@ cp_parser_omp_begin (cp_parser *parser, cp_token *pragma_tok) "#pragma omp begin declare target", pragma_tok); int device_type = 0; + bool indirect = 0; for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE) - device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c); + { + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE) + device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c); + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INDIRECT) + indirect |= !integer_zerop (OMP_CLAUSE_INDIRECT_EXPR (c)); + } cp_omp_declare_target_attr a - = { in_omp_attribute_pragma, device_type }; + = { in_omp_attribute_pragma, device_type, indirect }; vec_safe_push (scope_chain->omp_declare_target_attribute, a); } else diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 37bffca8e55..4059e74bdb7 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -8888,6 +8888,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_FINALIZE: case OMP_CLAUSE_NOHOST: + case OMP_CLAUSE_INDIRECT: break; case OMP_CLAUSE_MERGEABLE: diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc index 32c0f5ac6db..db6a22a444e 100644 --- a/gcc/lto-cgraph.cc +++ b/gcc/lto-cgraph.cc @@ -68,6 +68,7 @@ enum LTO_symtab_tags LTO_symtab_edge, LTO_symtab_indirect_edge, LTO_symtab_variable, + LTO_symtab_indirect_function, LTO_symtab_last_tag }; @@ -1111,6 +1112,18 @@ output_offload_tables (void) (*offload_vars)[i]); } + for (unsigned i = 0; i < vec_safe_length (offload_ind_funcs); i++) + { + symtab_node *node = symtab_node::get ((*offload_ind_funcs)[i]); + if (!node) + continue; + node->force_output = true; + streamer_write_enum (ob->main_stream, LTO_symtab_tags, + LTO_symtab_last_tag, LTO_symtab_indirect_function); + lto_output_fn_decl_ref (ob->decl_state, ob->main_stream, + (*offload_ind_funcs)[i]); + } + if (output_requires) { HOST_WIDE_INT val = ((HOST_WIDE_INT) omp_requires_mask @@ -1134,6 +1147,7 @@ output_offload_tables (void) { vec_free (offload_funcs); vec_free (offload_vars); + vec_free (offload_ind_funcs); } } @@ -1863,6 +1877,19 @@ input_offload_tables (bool do_force_output) varpool_node::get (var_decl)->force_output = 1; tmp_decl = var_decl; } + else if (tag == LTO_symtab_indirect_function) + { + tree fn_decl + = lto_input_fn_decl_ref (ib, file_data); + vec_safe_push (offload_ind_funcs, fn_decl); + + /* Prevent IPA from removing fn_decl as unreachable, since there + may be no refs from the parent function to child_fn in offload + LTO mode. */ + if (do_force_output) + cgraph_node::get (fn_decl)->mark_force_output (); + tmp_decl = fn_decl; + } else if (tag == LTO_symtab_edge) { static bool error_emitted = false; diff --git a/gcc/lto-section-names.h b/gcc/lto-section-names.h index aa1b2f2eeff..f7ed622772f 100644 --- a/gcc/lto-section-names.h +++ b/gcc/lto-section-names.h @@ -37,5 +37,6 @@ extern const char *section_name_prefix; #define OFFLOAD_VAR_TABLE_SECTION_NAME ".gnu.offload_vars" #define OFFLOAD_FUNC_TABLE_SECTION_NAME ".gnu.offload_funcs" +#define OFFLOAD_IND_FUNC_TABLE_SECTION_NAME ".gnu.offload_ind_funcs" #endif /* GCC_LTO_SECTION_NAMES_H */ diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index e0f03263db0..ed78d49d205 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -445,6 +445,9 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_UPDATE, "GOMP_target_update_ext", DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA, "GOMP_target_enter_exit_data", BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_UINT_PTR, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR, + "GOMP_target_map_indirect_ptr", + BT_FN_PTR_PTR, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS4, "GOMP_teams4", BT_FN_BOOL_UINT_UINT_UINT_BOOL, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS_REG, "GOMP_teams_reg", diff --git a/gcc/omp-offload.cc b/gcc/omp-offload.cc index 0d3c8794d54..1d6dfef74fc 100644 --- a/gcc/omp-offload.cc +++ b/gcc/omp-offload.cc @@ -86,7 +86,7 @@ struct oacc_loop }; /* Holds offload tables with decls. */ -vec *offload_funcs, *offload_vars; +vec *offload_funcs, *offload_vars, *offload_ind_funcs; /* Return level at which oacc routine may spawn a partitioned loop, or -1 if it is not a routine (i.e. is an offload fn). */ @@ -351,6 +351,9 @@ omp_discover_implicit_declare_target (void) if (DECL_SAVED_TREE (node->decl)) { struct cgraph_node *cgn; + if (lookup_attribute ("omp declare target indirect", + DECL_ATTRIBUTES (node->decl))) + vec_safe_push (offload_ind_funcs, node->decl); if (omp_declare_target_fn_p (node->decl)) worklist.safe_push (node->decl); else if (DECL_STRUCT_FUNCTION (node->decl) @@ -397,49 +400,66 @@ omp_finish_file (void) { unsigned num_funcs = vec_safe_length (offload_funcs); unsigned num_vars = vec_safe_length (offload_vars); + unsigned num_ind_funcs = vec_safe_length (offload_ind_funcs); - if (num_funcs == 0 && num_vars == 0) + if (num_funcs == 0 && num_vars == 0 && num_ind_funcs == 0) return; if (targetm_common.have_named_sections) { - vec *v_f, *v_v; + vec *v_f, *v_v, *v_if; vec_alloc (v_f, num_funcs); vec_alloc (v_v, num_vars * 2); + vec_alloc (v_if, num_ind_funcs); add_decls_addresses_to_decl_constructor (offload_funcs, v_f); add_decls_addresses_to_decl_constructor (offload_vars, v_v); + add_decls_addresses_to_decl_constructor (offload_ind_funcs, v_if); tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node, vec_safe_length (v_v)); tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node, num_funcs); + tree ind_funcs_decl_type = build_array_type_nelts (pointer_sized_int_node, + num_ind_funcs); + SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node)); SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node)); + SET_TYPE_ALIGN (ind_funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node)); tree ctor_v = build_constructor (vars_decl_type, v_v); tree ctor_f = build_constructor (funcs_decl_type, v_f); - TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1; - TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1; + tree ctor_if = build_constructor (ind_funcs_decl_type, v_if); + TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = TREE_CONSTANT (ctor_if) = 1; + TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = TREE_STATIC (ctor_if) = 1; tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (".offload_func_table"), funcs_decl_type); tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (".offload_var_table"), vars_decl_type); - TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1; + tree ind_funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (".offload_ind_func_table"), + ind_funcs_decl_type); + TREE_STATIC (funcs_decl) = TREE_STATIC (ind_funcs_decl) = 1; + TREE_STATIC (vars_decl) = 1; /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node), otherwise a joint table in a binary will contain padding between tables from multiple object files. */ - DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1; + DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (ind_funcs_decl) = 1; + DECL_USER_ALIGN (vars_decl) = 1; SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type)); SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type)); + SET_DECL_ALIGN (ind_funcs_decl, TYPE_ALIGN (ind_funcs_decl_type)); DECL_INITIAL (funcs_decl) = ctor_f; DECL_INITIAL (vars_decl) = ctor_v; + DECL_INITIAL (ind_funcs_decl) = ctor_if; set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME); set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME); - + set_decl_section_name (ind_funcs_decl, + OFFLOAD_IND_FUNC_TABLE_SECTION_NAME); varpool_node::finalize_decl (vars_decl); varpool_node::finalize_decl (funcs_decl); + varpool_node::finalize_decl (ind_funcs_decl); } else { @@ -471,6 +491,15 @@ omp_finish_file (void) #endif targetm.record_offload_symbol (it); } + for (unsigned i = 0; i < num_ind_funcs; i++) + { + tree it = (*offload_ind_funcs)[i]; + /* See also add_decls_addresses_to_decl_constructor + and output_offload_tables in lto-cgraph.cc. */ + if (!in_lto_p && !symtab_node::get (it)) + continue; + targetm.record_offload_symbol (it); + } } } @@ -2603,6 +2632,11 @@ execute_omp_device_lower () gimple_stmt_iterator gsi; bool calls_declare_variant_alt = cgraph_node::get (cfun->decl)->calls_declare_variant_alt; +#ifdef ACCEL_COMPILER + bool omp_redirect_indirect_calls = vec_safe_length (offload_ind_funcs) > 0; + tree map_ptr_fn + = builtin_decl_explicit (BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR); +#endif FOR_EACH_BB_FN (bb, cfun) for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { @@ -2621,6 +2655,33 @@ execute_omp_device_lower () update_stmt (stmt); } } +#ifdef ACCEL_COMPILER + if (omp_redirect_indirect_calls + && gimple_call_fndecl (stmt) == NULL_TREE) + { + gcall *orig_call = dyn_cast (stmt); + tree call_fn = gimple_call_fn (stmt); + tree fn_ty = TREE_TYPE (call_fn); + + if (TREE_CODE (call_fn) == OBJ_TYPE_REF) + { + tree obj_ref = create_tmp_reg (TREE_TYPE (call_fn), + ".ind_fn_objref"); + gimple *gassign = gimple_build_assign (obj_ref, call_fn); + gsi_insert_before (&gsi, gassign, GSI_SAME_STMT); + call_fn = obj_ref; + } + tree mapped_fn = create_tmp_reg (fn_ty, ".ind_fn"); + gimple *gcall = + gimple_build_call (map_ptr_fn, 1, call_fn); + gimple_set_location (gcall, gimple_location (stmt)); + gimple_call_set_lhs (gcall, mapped_fn); + gsi_insert_before (&gsi, gcall, GSI_SAME_STMT); + + gimple_call_set_fn (orig_call, mapped_fn); + update_stmt (orig_call); + } +#endif continue; } tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE; @@ -2757,9 +2818,15 @@ public: /* opt_pass methods: */ bool gate (function *fun) final override { +#ifdef ACCEL_COMPILER + bool offload_ind_funcs_p = vec_safe_length (offload_ind_funcs) > 0; +#else + bool offload_ind_funcs_p = false; +#endif return (!(fun->curr_properties & PROP_gimple_lomp_dev) || (flag_openmp - && cgraph_node::get (fun->decl)->calls_declare_variant_alt)); + && (cgraph_node::get (fun->decl)->calls_declare_variant_alt + || offload_ind_funcs_p))); } unsigned int execute (function *) final override { diff --git a/gcc/omp-offload.h b/gcc/omp-offload.h index 73711e74c7d..ae364422417 100644 --- a/gcc/omp-offload.h +++ b/gcc/omp-offload.h @@ -28,6 +28,7 @@ extern int oacc_fn_attrib_level (tree attr); extern GTY(()) vec *offload_funcs; extern GTY(()) vec *offload_vars; +extern GTY(()) vec *offload_ind_funcs; extern void omp_finish_file (void); extern void omp_discover_implicit_declare_target (void); diff --git a/gcc/testsuite/c-c++-common/gomp/declare-target-7.c b/gcc/testsuite/c-c++-common/gomp/declare-target-7.c index 747000a74b9..e37b4652050 100644 --- a/gcc/testsuite/c-c++-common/gomp/declare-target-7.c +++ b/gcc/testsuite/c-c++-common/gomp/declare-target-7.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-fopenmp" } */ -#pragma omp declare target device_type (any) /* { dg-error "directive with only 'device_type' clause" } */ +#pragma omp declare target device_type (any) /* { dg-error "directive with only 'device_type' or 'indirect' clauses" } */ void f1 (void) {} #pragma omp declare target device_type (host) to (f1) device_type (nohost) /* { dg-error "too many 'device_type' clauses" } */ diff --git a/gcc/testsuite/c-c++-common/gomp/declare-target-indirect-1.c b/gcc/testsuite/c-c++-common/gomp/declare-target-indirect-1.c new file mode 100644 index 00000000000..0fcbb2d04e4 --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/declare-target-indirect-1.c @@ -0,0 +1,62 @@ +/* { dg-do compile } */ +/* { dg-options "-fopenmp" } */ + +extern int a, b; +#define X 1 +#define Y 0 + +#pragma omp begin declare target indirect +void fn1 (void) { } +#pragma omp end declare target + +#pragma omp begin declare target indirect (1) +void fn2 (void) { } +#pragma omp end declare target + +#pragma omp begin declare target indirect (0) +void fn3 (void) { } +#pragma omp end declare target + +void fn4 (void) { } +#pragma omp declare target indirect to (fn4) + +void fn5 (void) { } +#pragma omp declare target indirect (1) to (fn5) + +void fn6 (void) { } +#pragma omp declare target indirect (0) to (fn6) + +void fn7 (void) { } +#pragma omp declare target indirect (-1) to (fn7) + +/* Compile-time non-constant expressions are not allowed. */ +void fn8 (void) { } +#pragma omp declare target indirect (a + b) to (fn8) /* { dg-error "expected constant logical expression" } */ + +/* Compile-time constant expressions are permissible. */ +void fn9 (void) { } +#pragma omp declare target indirect (X*Y) to (fn9) + +/* 'omp declare target'...'omp end declare target' form cannot take clauses. */ +#pragma omp declare target indirect /* { dg-error "directive with only 'device_type' or 'indirect' clauses" }*/ +void fn10 (void) { } +#pragma omp end declare target /* { dg-error "'#pragma omp end declare target' without corresponding '#pragma omp declare target' or '#pragma omp begin declare target'" } */ + +void fn11 (void) { } +#pragma omp declare target indirect (1) indirect (0) to (fn11) /* { dg-error "too many .indirect. clauses" } */ + +void fn12 (void) { } +#pragma omp declare target indirect ("abs") to (fn12) + +void fn13 (void) { } +#pragma omp declare target indirect (5.5) enter (fn13) + +void fn14 (void) { } +#pragma omp declare target indirect (1) device_type (host) enter (fn14) /* { dg-error "'device_type' clause must specify 'any' when used with an 'indirect' clause" } */ + +void fn15 (void) { } +#pragma omp declare target indirect (0) device_type (nohost) enter (fn15) + +/* Indirect on a variable should have no effect. */ +int x; +#pragma omp declare target indirect to(x) diff --git a/gcc/testsuite/c-c++-common/gomp/declare-target-indirect-2.c b/gcc/testsuite/c-c++-common/gomp/declare-target-indirect-2.c new file mode 100644 index 00000000000..6ba278b3ef0 --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/declare-target-indirect-2.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-fopenmp -fdump-tree-gimple" } */ + +#pragma omp begin declare target indirect +void fn1 (void) { } +#pragma omp end declare target +/* { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare target, omp declare target block, omp declare target indirect\\\)\\\)\\\nvoid fn1" "gimple" } } */ + +#pragma omp begin declare target indirect (0) +void fn2 (void) { } +#pragma omp end declare target +/* { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare target, omp declare target block\\\)\\\)\\\nvoid fn2" "gimple" } } */ + +void fn3 (void) { } +#pragma omp declare target indirect to (fn3) +/* { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare target indirect, omp declare target\\\)\\\)\\\nvoid fn3" "gimple" } } */ + +void fn4 (void) { } +#pragma omp declare target indirect (0) to (fn4) +/* { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare target\\\)\\\)\\\nvoid fn4" "gimple" } } */ + +#pragma omp begin declare target indirect(1) + int foo(void) { return 5; } + #pragma omp begin declare target indirect(0) + int bar(void) { return 8; } + int baz(void) { return 11; } + #pragma omp declare target indirect enter(baz) + #pragma omp end declare target +#pragma omp end declare target +/* { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare target, omp declare target block, omp declare target indirect\\\)\\\)\\\nint foo" "gimple" } } */ +/* { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare target, omp declare target block\\\)\\\)\\\nint bar" "gimple" } } */ +/* { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare target indirect, omp declare target, omp declare target block\\\)\\\)\\\nint baz" "gimple" } } */ diff --git a/gcc/testsuite/g++.dg/gomp/attrs-21.C b/gcc/testsuite/g++.dg/gomp/attrs-21.C index 46bdef268da..03c760f78ab 100644 --- a/gcc/testsuite/g++.dg/gomp/attrs-21.C +++ b/gcc/testsuite/g++.dg/gomp/attrs-21.C @@ -20,7 +20,7 @@ foo () [[omp::decl (declare target (v8))]] static int v9; // { dg-error "expected end of line before '\\\(' token" } [[omp::decl (declare target enter (v8))]] static int v10; // { dg-error "expected an OpenMP clause before '\\\(' token" } [[omp::decl (declare target, link (v9))]] static int v11; // { dg-error "expected an OpenMP clause before '\\\(' token" } - [[omp::decl (declare target device_type (any))]] static int v12; // { dg-error "directive with only 'device_type' clause" } + [[omp::decl (declare target device_type (any))]] static int v12; // { dg-error "directive with only 'device_type' or 'indirect' clauses" } } int i; diff --git a/gcc/testsuite/g++.dg/gomp/declare-target-indirect-1.C b/gcc/testsuite/g++.dg/gomp/declare-target-indirect-1.C new file mode 100644 index 00000000000..1d66ec9f741 --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/declare-target-indirect-1.C @@ -0,0 +1,17 @@ +// { dg-skip-if "c++98 does not support attributes" { c++98_only } } + +[[omp::decl (declare target, indirect(1))]] // { dg-error "directive with only 'device_type' or 'indirect' clause" } +int f (void) { return 5; } + +[[omp::decl (declare target indirect)]] // { dg-error "directive with only 'device_type' or 'indirect' clause" } +int g (void) { return 8; } + +[[omp::directive (begin declare target, indirect)]]; +int h (void) { return 11; } +[[omp::directive (end declare target)]]; + +int i (void) { return 8; } +[[omp::directive (declare target to(i), indirect (1))]]; + +int j (void) { return 11; } +[[omp::directive (declare target indirect enter (j))]]; diff --git a/gcc/testsuite/gcc.dg/gomp/attrs-21.c b/gcc/testsuite/gcc.dg/gomp/attrs-21.c index bd8ff112f00..ca97b76e82b 100644 --- a/gcc/testsuite/gcc.dg/gomp/attrs-21.c +++ b/gcc/testsuite/gcc.dg/gomp/attrs-21.c @@ -21,7 +21,7 @@ foo () [[omp::decl (declare target (v8))]] static int v9; /* { dg-error "expected end of line before '\\\(' token" } */ [[omp::decl (declare target enter (v8))]] static int v10; /* { dg-error "expected an OpenMP clause before '\\\(' token" } */ [[omp::decl (declare target, link (v9))]] static int v11; /* { dg-error "expected an OpenMP clause before '\\\(' token" } */ - [[omp::decl (declare target device_type (any))]] static int v12; /* { dg-error "directive with only 'device_type' clause" } */ + [[omp::decl (declare target device_type (any))]] static int v12; /* { dg-error "directive with only 'device_type' or 'indirect' clauses" } */ } int i; diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 13435344401..65e51b939a2 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -350,6 +350,9 @@ enum omp_clause_code { /* OpenMP clause: doacross ({source,sink}:vec). */ OMP_CLAUSE_DOACROSS, + /* OpenMP clause: indirect [(constant-integer-expression)]. */ + OMP_CLAUSE_INDIRECT, + /* Internal structure to hold OpenACC cache directive's variable-list. #pragma acc cache (variable-list). */ OMP_CLAUSE__CACHE_, diff --git a/gcc/tree.cc b/gcc/tree.cc index 9c9b057cd88..33ea1d2e2d0 100644 --- a/gcc/tree.cc +++ b/gcc/tree.cc @@ -269,6 +269,7 @@ unsigned const char omp_clause_num_ops[] = 2, /* OMP_CLAUSE_MAP */ 1, /* OMP_CLAUSE_HAS_DEVICE_ADDR */ 1, /* OMP_CLAUSE_DOACROSS */ + 1, /* OMP_CLAUSE_INDIRECT */ 2, /* OMP_CLAUSE__CACHE_ */ 2, /* OMP_CLAUSE_GANG */ 1, /* OMP_CLAUSE_ASYNC */ @@ -361,6 +362,7 @@ const char * const omp_clause_code_name[] = "map", "has_device_addr", "doacross", + "indirect", "_cache_", "gang", "async", diff --git a/gcc/tree.h b/gcc/tree.h index 632a1af6324..086b55f0375 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1842,6 +1842,10 @@ class auto_suppress_location_wrappers #define OMP_CLAUSE_DEVICE_TYPE_KIND(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEVICE_TYPE)->omp_clause.subcode.device_type_kind) +#define OMP_CLAUSE_INDIRECT_EXPR(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_INDIRECT), 0) + + /* True if there is a device clause with a device-modifier 'ancestor'. */ #define OMP_CLAUSE_DEVICE_ANCESTOR(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEVICE)->base.public_flag) diff --git a/include/gomp-constants.h b/include/gomp-constants.h index 89b966e63c6..f1579bb2519 100644 --- a/include/gomp-constants.h +++ b/include/gomp-constants.h @@ -316,7 +316,7 @@ enum gomp_map_kind /* Versions of libgomp and device-specific plugins. GOMP_VERSION should be incremented whenever an ABI-incompatible change is introduced to the plugin interface defined in libgomp/libgomp.h. */ -#define GOMP_VERSION 2 +#define GOMP_VERSION 3 #define GOMP_VERSION_NVIDIA_PTX 1 #define GOMP_VERSION_GCN 3 @@ -324,6 +324,8 @@ enum gomp_map_kind #define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff) #define GOMP_VERSION_DEV(PACK) ((PACK) & 0xffff) +#define GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS(VER) (GOMP_VERSION_LIB(VER) >= 3) + #define GOMP_DIM_GANG 0 #define GOMP_DIM_WORKER 1 #define GOMP_DIM_VECTOR 2 diff --git a/libgcc/offloadstuff.c b/libgcc/offloadstuff.c index 4e1c4d41dd5..18c5bf89b69 100644 --- a/libgcc/offloadstuff.c +++ b/libgcc/offloadstuff.c @@ -43,6 +43,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #if defined(HAVE_GAS_HIDDEN) && ENABLE_OFFLOADING == 1 #define OFFLOAD_FUNC_TABLE_SECTION_NAME ".gnu.offload_funcs" +#define OFFLOAD_IND_FUNC_TABLE_SECTION_NAME ".gnu.offload_ind_funcs" #define OFFLOAD_VAR_TABLE_SECTION_NAME ".gnu.offload_vars" #ifdef CRT_BEGIN @@ -53,6 +54,9 @@ const void *const __offload_func_table[0] const void *const __offload_var_table[0] __attribute__ ((__used__, visibility ("hidden"), section (OFFLOAD_VAR_TABLE_SECTION_NAME))) = { }; +const void *const __offload_ind_func_table[0] + __attribute__ ((__used__, visibility ("hidden"), + section (OFFLOAD_IND_FUNC_TABLE_SECTION_NAME))) = { }; #elif defined CRT_END @@ -62,19 +66,25 @@ const void *const __offload_funcs_end[0] const void *const __offload_vars_end[0] __attribute__ ((__used__, visibility ("hidden"), section (OFFLOAD_VAR_TABLE_SECTION_NAME))) = { }; +const void *const __offload_ind_funcs_end[0] + __attribute__ ((__used__, visibility ("hidden"), + section (OFFLOAD_IND_FUNC_TABLE_SECTION_NAME))) = { }; #elif defined CRT_TABLE extern const void *const __offload_func_table[]; extern const void *const __offload_var_table[]; +extern const void *const __offload_ind_func_table[]; extern const void *const __offload_funcs_end[]; extern const void *const __offload_vars_end[]; +extern const void *const __offload_ind_funcs_end[]; const void *const __OFFLOAD_TABLE__[] __attribute__ ((__visibility__ ("hidden"))) = { &__offload_func_table, &__offload_funcs_end, - &__offload_var_table, &__offload_vars_end + &__offload_var_table, &__offload_vars_end, + &__offload_ind_func_table, &__offload_ind_funcs_end, }; #else /* ! CRT_BEGIN && ! CRT_END && ! CRT_TABLE */ diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index ceb8c910abd..1871590596d 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -72,7 +72,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \ target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \ - oacc-target.c + oacc-target.c target-indirect.c include $(top_srcdir)/plugin/Makefrag.am diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 186937da4e9..56a6beab867 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -219,7 +219,7 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \ oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \ oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \ affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \ - oacc-target.lo $(am__objects_1) + oacc-target.lo target-indirect.lo $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -552,7 +552,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ affinity-fmt.c teams.c allocator.c oacc-profiling.c \ - oacc-target.c $(am__append_3) + oacc-target.c target-indirect.c $(am__append_3) # Nvidia PTX OpenACC plugin. @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) @@ -780,6 +780,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-indirect.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@ diff --git a/libgomp/config/accel/target-indirect.c b/libgomp/config/accel/target-indirect.c new file mode 100644 index 00000000000..6ee82a0ebd0 --- /dev/null +++ b/libgomp/config/accel/target-indirect.c @@ -0,0 +1,126 @@ +/* Copyright (C) 2023 Free Software Foundation, Inc. + + Contributed by Siemens. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include +#include "libgomp.h" + +#define splay_tree_prefix indirect +#define splay_tree_c +#include "splay-tree.h" + +volatile void **GOMP_INDIRECT_ADDR_MAP = NULL; + +/* Use a splay tree to lookup the target address instead of using a + linear search. */ +#define USE_SPLAY_TREE_LOOKUP + +#ifdef USE_SPLAY_TREE_LOOKUP + +static struct indirect_splay_tree_s indirect_map; +static indirect_splay_tree_node indirect_array = NULL; + +/* Build the splay tree used for host->target address lookups. */ + +void +build_indirect_map (void) +{ + size_t num_ind_funcs = 0; + volatile void **map_entry; + static int lock = 0; /* == gomp_mutex_t lock; gomp_mutex_init (&lock); */ + + if (!GOMP_INDIRECT_ADDR_MAP) + return; + + gomp_mutex_lock (&lock); + + if (!indirect_array) + { + /* Count the number of entries in the NULL-terminated address map. */ + for (map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry; + map_entry += 2, num_ind_funcs++); + + /* Build splay tree for address lookup. */ + indirect_array = gomp_malloc (num_ind_funcs * sizeof (*indirect_array)); + indirect_splay_tree_node array = indirect_array; + map_entry = GOMP_INDIRECT_ADDR_MAP; + + for (int i = 0; i < num_ind_funcs; i++, array++) + { + indirect_splay_tree_key k = &array->key; + k->host_addr = (uint64_t) *map_entry++; + k->target_addr = (uint64_t) *map_entry++; + array->left = NULL; + array->right = NULL; + indirect_splay_tree_insert (&indirect_map, array); + } + } + + gomp_mutex_unlock (&lock); +} + +void * +GOMP_target_map_indirect_ptr (void *ptr) +{ + /* NULL pointers always resolve to NULL. */ + if (!ptr) + return ptr; + + assert (indirect_array); + + struct indirect_splay_tree_key_s k; + indirect_splay_tree_key node = NULL; + + k.host_addr = (uint64_t) ptr; + node = indirect_splay_tree_lookup (&indirect_map, &k); + + return node ? (void *) node->target_addr : ptr; +} + +#else + +void +build_indirect_map (void) +{ +} + +void * +GOMP_target_map_indirect_ptr (void *ptr) +{ + /* NULL pointers always resolve to NULL. */ + if (!ptr) + return ptr; + + assert (GOMP_INDIRECT_ADDR_MAP); + + for (volatile void **map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry; + map_entry += 2) + if (*map_entry == ptr) + return (void *) *(map_entry + 1); + + return ptr; +} + +#endif diff --git a/libgomp/config/gcn/team.c b/libgomp/config/gcn/team.c index f03207c84e3..fb20cbbcf9f 100644 --- a/libgomp/config/gcn/team.c +++ b/libgomp/config/gcn/team.c @@ -30,6 +30,7 @@ #include static void gomp_thread_start (struct gomp_thread_pool *); +extern void build_indirect_map (void); /* This externally visible function handles target region entry. It sets up a per-team thread pool and transfers control by returning to @@ -45,6 +46,9 @@ gomp_gcn_enter_kernel (void) { int threadid = __builtin_gcn_dim_pos (1); + /* Initialize indirect function support. */ + build_indirect_map (); + if (threadid == 0) { int numthreads = __builtin_gcn_dim_size (1); diff --git a/libgomp/config/linux/target-indirect.c b/libgomp/config/linux/target-indirect.c new file mode 100644 index 00000000000..0ab9bc52d79 --- /dev/null +++ b/libgomp/config/linux/target-indirect.c @@ -0,0 +1,32 @@ +/* Copyright (C) 2023 Free Software Foundation, Inc. + + Contributed by Siemens. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +void * +GOMP_target_map_indirect_ptr (void *ptr) +{ + /* Calls to this function should not be generated for host code. */ + __builtin_unreachable (); +} diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c index af5f3171a47..59521fabd99 100644 --- a/libgomp/config/nvptx/team.c +++ b/libgomp/config/nvptx/team.c @@ -35,6 +35,7 @@ struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon)); int __gomp_team_num __attribute__((shared,nocommon)); static void gomp_thread_start (struct gomp_thread_pool *); +extern void build_indirect_map (void); /* This externally visible function handles target region entry. It @@ -52,6 +53,10 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data) int tid, ntids; asm ("mov.u32 %0, %%tid.y;" : "=r" (tid)); asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids)); + + /* Initialize indirect function support. */ + build_indirect_map (); + if (tid == 0) { gomp_global_icv.nthreads_var = ntids; diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index dc993882c3b..3ce032c5cc0 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -107,6 +107,8 @@ struct addr_pair must be stringified). */ #define GOMP_ADDITIONAL_ICVS __gomp_additional_icvs +#define GOMP_INDIRECT_ADDR_MAP __gomp_indirect_addr_map + /* Miscellaneous functions. */ extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc)); extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc)); @@ -132,7 +134,8 @@ extern bool GOMP_OFFLOAD_init_device (int); extern bool GOMP_OFFLOAD_fini_device (int); extern unsigned GOMP_OFFLOAD_version (void); extern int GOMP_OFFLOAD_load_image (int, unsigned, const void *, - struct addr_pair **, uint64_t **); + struct addr_pair **, uint64_t **, + uint64_t *); extern bool GOMP_OFFLOAD_unload_image (int, unsigned, const void *); extern void *GOMP_OFFLOAD_alloc (int, size_t); extern bool GOMP_OFFLOAD_free (int, void *); diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 68f20651fbf..15a767cf317 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1274,6 +1274,29 @@ reverse_splay_compare (reverse_splay_tree_key x, reverse_splay_tree_key y) #define splay_tree_prefix reverse #include "splay-tree.h" +/* Indirect target function splay-tree handling. */ + +struct indirect_splay_tree_key_s { + uint64_t host_addr, target_addr; +}; + +typedef struct indirect_splay_tree_node_s *indirect_splay_tree_node; +typedef struct indirect_splay_tree_s *indirect_splay_tree; +typedef struct indirect_splay_tree_key_s *indirect_splay_tree_key; + +static inline int +indirect_splay_compare (indirect_splay_tree_key x, indirect_splay_tree_key y) +{ + if (x->host_addr < y->host_addr) + return -1; + if (x->host_addr > y->host_addr) + return 1; + return 0; +} + +#define splay_tree_prefix indirect +#include "splay-tree.h" + struct target_mem_desc { /* Reference count. */ uintptr_t refcount; diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index ce6b719a57f..90c401453b2 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -419,6 +419,7 @@ GOMP_5.1 { GOMP_5.1.1 { global: GOMP_taskwait_depend_nowait; + GOMP_target_map_indirect_ptr; } GOMP_5.1; OACC_2.0 { diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index b635f81750b..9cb893e7719 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -311,7 +311,7 @@ The OpenMP 4.5 specification is fully supported. @item Iterators in @code{target update} motion clauses and @code{map} clauses @tab N @tab @item Indirect calls to the device version of a procedure or function in - @code{target} regions @tab N @tab + @code{target} regions @tab P @tab Only C and C++ @item @code{interop} directive @tab N @tab @item @code{omp_interop_t} object support in runtime routines @tab N @tab @item @code{nowait} clause in @code{taskwait} directive @tab Y @tab @@ -360,7 +360,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab @item For Fortran, diagnose placing declarative before/between @code{USE}, @code{IMPORT}, and @code{IMPLICIT} as invalid @tab N @tab @item Optional comma between directive and clause in the @code{#pragma} form @tab Y @tab -@item @code{indirect} clause in @code{declare target} @tab N @tab +@item @code{indirect} clause in @code{declare target} @tab P @tab Only C and C++ @item @code{device_type(nohost)}/@code{device_type(host)} for variables @tab N @tab @item @code{present} modifier to the @code{map}, @code{to} and @code{from} clauses @tab Y @tab @@ -439,6 +439,8 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab @item @code{all} as @emph{implicit-behavior} for @code{defaultmap} @tab Y @tab @item @emph{interop_types} in any position of the modifier list for the @code{init} clause of the @code{interop} construct @tab N @tab +@item Invoke virtual member functions of C++ objects created on the host device + on other devices @tab N @tab @end multitable diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index 5c1675c7869..95046312ae9 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -357,6 +357,7 @@ extern void GOMP_target_enter_exit_data (int, size_t, void **, size_t *, void **); extern void GOMP_teams (unsigned int, unsigned int); extern bool GOMP_teams4 (unsigned int, unsigned int, unsigned int, bool); +extern void *GOMP_target_map_indirect_ptr (void *); /* teams.c */ diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 5980d510838..fbab75d7d43 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -82,7 +82,8 @@ host_load_image (int n __attribute__ ((unused)), unsigned v __attribute__ ((unused)), const void *t __attribute__ ((unused)), struct addr_pair **r __attribute__ ((unused)), - uint64_t **f __attribute__ ((unused))) + uint64_t **f __attribute__ ((unused)), + uint64_t *i __attribute__ ((unused))) { return 0; } diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 4328d3de14e..7e7e2d6edfe 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -365,6 +365,7 @@ struct gcn_image_desc } *gcn_image; const unsigned kernel_count; struct hsa_kernel_description *kernel_infos; + const unsigned ind_func_count; const unsigned global_variable_count; }; @@ -3366,7 +3367,8 @@ GOMP_OFFLOAD_init_device (int n) int GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, struct addr_pair **target_table, - uint64_t **rev_fn_table) + uint64_t **rev_fn_table, + uint64_t *host_ind_fn_table) { if (GOMP_VERSION_DEV (version) != GOMP_VERSION_GCN) { @@ -3382,6 +3384,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, struct module_info *module; struct kernel_info *kernel; int kernel_count = image_desc->kernel_count; + unsigned ind_func_count = GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version) + ? image_desc->ind_func_count : 0; unsigned var_count = image_desc->global_variable_count; /* Currently, "others" is a struct of ICVS. */ int other_count = 1; @@ -3400,6 +3404,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, return -1; GCN_DEBUG ("Encountered %d kernels in an image\n", kernel_count); + GCN_DEBUG ("Encountered %d indirect functions in an image\n", ind_func_count); GCN_DEBUG ("Encountered %u global variables in an image\n", var_count); GCN_DEBUG ("Expect %d other variables in an image\n", other_count); pair = GOMP_PLUGIN_malloc ((kernel_count + var_count + other_count - 2) @@ -3481,6 +3486,87 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, } } + if (ind_func_count > 0) + { + hsa_status_t status; + + /* Read indirect function table from image. */ + hsa_executable_symbol_t ind_funcs_symbol; + status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL, + ".offload_ind_func_table", + agent->id, + 0, &ind_funcs_symbol); + + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not find .offload_ind_func_table symbol in the " + "code object", status); + + uint64_t ind_funcs_table_addr; + status = hsa_fns.hsa_executable_symbol_get_info_fn + (ind_funcs_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, + &ind_funcs_table_addr); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not extract a variable from its symbol", status); + + uint64_t ind_funcs_table[ind_func_count]; + GOMP_OFFLOAD_dev2host (agent->device_id, ind_funcs_table, + (void*) ind_funcs_table_addr, + sizeof (ind_funcs_table)); + + /* Build host->target address map for indirect functions. */ + uint64_t ind_fn_map[ind_func_count * 2 + 1]; + for (unsigned i = 0; i < ind_func_count; i++) + { + ind_fn_map[i * 2] = host_ind_fn_table[i]; + ind_fn_map[i * 2 + 1] = ind_funcs_table[i]; + GCN_DEBUG ("Indirect function %d: %lx->%lx\n", + i, host_ind_fn_table[i], ind_funcs_table[i]); + } + ind_fn_map[ind_func_count * 2] = 0; + + /* Write the map onto the target. */ + void *map_target_addr + = GOMP_OFFLOAD_alloc (agent->device_id, sizeof (ind_fn_map)); + GCN_DEBUG ("Allocated indirect map at %p\n", map_target_addr); + + GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr, + (void*) ind_fn_map, + sizeof (ind_fn_map)); + + /* Write address of the map onto the target. */ + hsa_executable_symbol_t symbol; + + status + = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL, + XSTRING (GOMP_INDIRECT_ADDR_MAP), + agent->id, 0, &symbol); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not find GOMP_INDIRECT_ADDR_MAP in code object", + status); + + uint64_t varptr; + uint32_t varsize; + + status = hsa_fns.hsa_executable_symbol_get_info_fn + (symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, + &varptr); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not extract a variable from its symbol", status); + status = hsa_fns.hsa_executable_symbol_get_info_fn + (symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, + &varsize); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not extract a variable size from its symbol", + status); + + GCN_DEBUG ("Found GOMP_INDIRECT_ADDR_MAP at %lx with size %d\n", + varptr, varsize); + + GOMP_OFFLOAD_host2dev (agent->device_id, (void *) varptr, + &map_target_addr, + sizeof (map_target_addr)); + } + GCN_DEBUG ("Looking for variable %s\n", XSTRING (GOMP_ADDITIONAL_ICVS)); hsa_status_t status; diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 00d4241ae02..0548e7e09e5 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -266,6 +266,8 @@ typedef struct nvptx_tdata const struct targ_fn_launch *fn_descs; unsigned fn_num; + + unsigned ind_fn_num; } nvptx_tdata_t; /* Descriptor of a loaded function. */ @@ -1285,12 +1287,13 @@ nvptx_set_clocktick (CUmodule module, struct ptx_device *dev) int GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, struct addr_pair **target_table, - uint64_t **rev_fn_table) + uint64_t **rev_fn_table, + uint64_t *host_ind_fn_table) { CUmodule module; const char *const *var_names; const struct targ_fn_launch *fn_descs; - unsigned int fn_entries, var_entries, other_entries, i, j; + unsigned int fn_entries, var_entries, ind_fn_entries, other_entries, i, j; struct targ_fn_descriptor *targ_fns; struct addr_pair *targ_tbl; const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data; @@ -1319,6 +1322,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, var_names = img_header->var_names; fn_entries = img_header->fn_num; fn_descs = img_header->fn_descs; + ind_fn_entries = GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version) + ? img_header->ind_fn_num : 0; /* Currently, other_entries contains only the struct of ICVs. */ other_entries = 1; @@ -1373,6 +1378,60 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, targ_tbl->end = targ_tbl->start + bytes; } + if (ind_fn_entries > 0) + { + CUdeviceptr var; + size_t bytes; + + /* Read indirect function table from image. */ + CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &var, &bytes, module, + "$offload_ind_func_table"); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuModuleGetGlobal error: %s", cuda_error (r)); + assert (bytes == sizeof (uint64_t) * ind_fn_entries); + + uint64_t ind_fn_table[ind_fn_entries]; + r = CUDA_CALL_NOCHECK (cuMemcpyDtoH, ind_fn_table, var, bytes); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemcpyDtoH error: %s", cuda_error (r)); + + /* Build host->target address map for indirect functions. */ + uint64_t ind_fn_map[ind_fn_entries * 2 + 1]; + for (unsigned k = 0; k < ind_fn_entries; k++) + { + ind_fn_map[k * 2] = host_ind_fn_table[k]; + ind_fn_map[k * 2 + 1] = ind_fn_table[k]; + GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n", + k, host_ind_fn_table[k], ind_fn_table[k]); + } + ind_fn_map[ind_fn_entries * 2] = 0; + + /* Write the map onto the target. */ + void *map_target_addr + = GOMP_OFFLOAD_alloc (ord, sizeof (ind_fn_map)); + GOMP_PLUGIN_debug (0, "Allocated indirect map at %p\n", map_target_addr); + + GOMP_OFFLOAD_host2dev (ord, map_target_addr, + (void*) ind_fn_map, + sizeof (ind_fn_map)); + + /* Write address of the map onto the target. */ + CUdeviceptr varptr; + size_t varsize; + r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize, + module, XSTRING (GOMP_INDIRECT_ADDR_MAP)); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("Indirect map variable not found in image: %s", + cuda_error (r)); + + GOMP_PLUGIN_debug (0, + "Indirect map variable found at %llx with size %ld\n", + varptr, varsize); + + GOMP_OFFLOAD_host2dev (ord, (void *) varptr, &map_target_addr, + sizeof (map_target_addr)); + } + CUdeviceptr varptr; size_t varsize; CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize, diff --git a/libgomp/target.c b/libgomp/target.c index 812674d19a9..f30c20255d3 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -2256,11 +2256,20 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, void **host_funcs_end = ((void ***) host_table)[1]; void **host_var_table = ((void ***) host_table)[2]; void **host_vars_end = ((void ***) host_table)[3]; + void **host_ind_func_table = NULL; + void **host_ind_funcs_end = NULL; - /* The func table contains only addresses, the var table contains addresses - and corresponding sizes. */ + if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version)) + { + host_ind_func_table = ((void ***) host_table)[4]; + host_ind_funcs_end = ((void ***) host_table)[5]; + } + + /* The func and ind_func tables contain only addresses, the var table + contains addresses and corresponding sizes. */ int num_funcs = host_funcs_end - host_func_table; int num_vars = (host_vars_end - host_var_table) / 2; + int num_ind_funcs = (host_ind_funcs_end - host_ind_func_table); /* Load image to device and get target addresses for the image. */ struct addr_pair *target_table = NULL; @@ -2273,7 +2282,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, num_target_entries = devicep->load_image_func (devicep->target_id, version, target_data, &target_table, - rev_lookup ? &rev_target_fn_table : NULL); + rev_lookup ? &rev_target_fn_table : NULL, + num_ind_funcs + ? (uint64_t *) host_ind_func_table : NULL); if (num_target_entries != num_funcs + num_vars /* "+1" due to the additional ICV struct. */ diff --git a/libgomp/testsuite/libgomp.c++/declare-target-indirect-1.C b/libgomp/testsuite/libgomp.c++/declare-target-indirect-1.C new file mode 100644 index 00000000000..1eac6b3fa96 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-target-indirect-1.C @@ -0,0 +1,23 @@ +// { dg-run } + +#pragma omp begin declare target indirect +class C +{ +public: + int y; + int f (int x) { return x + y; } +}; +#pragma omp end declare target + +int main (void) +{ + C c; + c.y = 27; + int x; + int (C::*fn_ptr) (int) = &C::f; + +#pragma omp target map (to: c, fn_ptr) map (from: x) + x = (c.*fn_ptr) (42); + + return x != 27 + 42; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-1.c b/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-1.c new file mode 100644 index 00000000000..b20bfa64dca --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-1.c @@ -0,0 +1,21 @@ +/* { dg-do run } */ + +#pragma omp begin declare target indirect +int foo(void) { return 5; } +int bar(void) { return 8; } +int baz(void) { return 11; } +#pragma omp end declare target + +int main (void) +{ + int x; + int (*foo_ptr) (void) = &foo; + int (*bar_ptr) (void) = &bar; + int (*baz_ptr) (void) = &baz; + int expected = foo () + bar () + baz (); + +#pragma omp target map (to: foo_ptr, bar_ptr, baz_ptr) map (from: x) + x = (*foo_ptr) () + (*bar_ptr) () + (*baz_ptr) (); + + return x - expected; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c b/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c new file mode 100644 index 00000000000..9fe190efce8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ + +#define N 256 + +#pragma omp begin declare target indirect +int foo(void) { return 5; } +int bar(void) { return 8; } +int baz(void) { return 11; } +#pragma omp end declare target + +int main (void) +{ + int i, x = 0, expected = 0; + int (*fn_ptr[N])(void); + + for (i = 0; i < N; i++) + { + switch (i % 3) + { + case 0: fn_ptr[i] = &foo; + case 1: fn_ptr[i] = &bar; + case 2: fn_ptr[i] = &baz; + } + expected += (*fn_ptr[i]) (); + } + +#pragma omp target teams distribute parallel for reduction(+: x) \ + map (to: fn_ptr) map (tofrom: x) + for (int i = 0; i < N; i++) + x += (*fn_ptr[i]) (); + + return x - expected; +}