mirror of
https://forge.sourceware.org/marek/gcc.git
synced 2026-02-22 03:47:02 -05:00
Simplify OpenACC 'no_create' clause implementation
For 'OFFSET_INLINED', 'gomp_map_val' does the right thing, and we may then simplify the device plugins accordingly. This is a follow-up to Subversion r279551 (Git commita6163563f2) "Add OpenACC 2.6's no_create", Subversion r279622 (Git commit5bcd470bf0) "Use gomp_map_val for OpenACC host-to-device address translation". libgomp/ * target.c (gomp_map_vars_internal): Use 'OFFSET_INLINED' for 'GOMP_MAP_IF_PRESENT'. * plugin/plugin-gcn.c (gcn_exec, GOMP_OFFLOAD_openacc_exec) (GOMP_OFFLOAD_openacc_async_exec): Adjust. * plugin/plugin-nvptx.c (nvptx_exec, GOMP_OFFLOAD_openacc_exec) (GOMP_OFFLOAD_openacc_async_exec): Likewise. * testsuite/libgomp.oacc-c-c++-common/no_create-1.c: Add 'async' testing. * testsuite/libgomp.oacc-c-c++-common/no_create-2.c: Likewise.
This commit is contained in:
@@ -3064,7 +3064,7 @@ wait_queue (struct goacc_asyncqueue *aq)
|
||||
/* Execute an OpenACC kernel, synchronously or asynchronously. */
|
||||
|
||||
static void
|
||||
gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs,
|
||||
gcn_exec (struct kernel_info *kernel, size_t mapnum,
|
||||
void **devaddrs, unsigned *dims, void *targ_mem_desc, bool async,
|
||||
struct goacc_asyncqueue *aq)
|
||||
{
|
||||
@@ -3077,9 +3077,7 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs,
|
||||
/* devaddrs must be double-indirect on the target. */
|
||||
void **ind_da = alloc_by_agent (kernel->agent, sizeof (void*) * mapnum);
|
||||
for (size_t i = 0; i < mapnum; i++)
|
||||
hsa_fns.hsa_memory_copy_fn (&ind_da[i],
|
||||
devaddrs[i] ? &devaddrs[i] : &hostaddrs[i],
|
||||
sizeof (void *));
|
||||
hsa_fns.hsa_memory_copy_fn (&ind_da[i], &devaddrs[i], sizeof (void *));
|
||||
|
||||
struct hsa_kernel_description *hsa_kernel_desc = NULL;
|
||||
for (unsigned i = 0; i < kernel->module->image_desc->kernel_count; i++)
|
||||
@@ -3887,27 +3885,27 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
|
||||
|
||||
void
|
||||
GOMP_OFFLOAD_openacc_exec (void (*fn_ptr) (void *), size_t mapnum,
|
||||
void **hostaddrs, void **devaddrs, unsigned *dims,
|
||||
void **hostaddrs __attribute__((unused)),
|
||||
void **devaddrs, unsigned *dims,
|
||||
void *targ_mem_desc)
|
||||
{
|
||||
struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
|
||||
|
||||
gcn_exec (kernel, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc, false,
|
||||
NULL);
|
||||
gcn_exec (kernel, mapnum, devaddrs, dims, targ_mem_desc, false, NULL);
|
||||
}
|
||||
|
||||
/* Run an asynchronous OpenACC kernel on the specified queue. */
|
||||
|
||||
void
|
||||
GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *), size_t mapnum,
|
||||
void **hostaddrs, void **devaddrs,
|
||||
void **hostaddrs __attribute__((unused)),
|
||||
void **devaddrs,
|
||||
unsigned *dims, void *targ_mem_desc,
|
||||
struct goacc_asyncqueue *aq)
|
||||
{
|
||||
struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
|
||||
|
||||
gcn_exec (kernel, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc, true,
|
||||
aq);
|
||||
gcn_exec (kernel, mapnum, devaddrs, dims, targ_mem_desc, true, aq);
|
||||
}
|
||||
|
||||
/* Create a new asynchronous thread and queue for running future kernels. */
|
||||
|
||||
@@ -742,8 +742,7 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
|
||||
}
|
||||
|
||||
static void
|
||||
nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
|
||||
unsigned *dims, void *targ_mem_desc,
|
||||
nvptx_exec (void (*fn), size_t mapnum, unsigned *dims, void *targ_mem_desc,
|
||||
CUdeviceptr dp, CUstream stream)
|
||||
{
|
||||
struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn;
|
||||
@@ -1530,7 +1529,8 @@ GOMP_OFFLOAD_free (int ord, void *ptr)
|
||||
|
||||
void
|
||||
GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
|
||||
void **hostaddrs, void **devaddrs,
|
||||
void **hostaddrs __attribute__((unused)),
|
||||
void **devaddrs,
|
||||
unsigned *dims, void *targ_mem_desc)
|
||||
{
|
||||
GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__);
|
||||
@@ -1549,7 +1549,7 @@ GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
|
||||
size_t s = mapnum * sizeof (void *);
|
||||
hp = alloca (s);
|
||||
for (int i = 0; i < mapnum; i++)
|
||||
hp[i] = (devaddrs[i] ? devaddrs[i] : hostaddrs[i]);
|
||||
hp[i] = devaddrs[i];
|
||||
CUDA_CALL_ASSERT (cuMemAlloc, &dp, s);
|
||||
if (profiling_p)
|
||||
goacc_profiling_acc_ev_alloc (thr, (void *) dp, s);
|
||||
@@ -1591,8 +1591,7 @@ GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
|
||||
}
|
||||
}
|
||||
|
||||
nvptx_exec (fn, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc,
|
||||
dp, NULL);
|
||||
nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, NULL);
|
||||
|
||||
CUresult r = CUDA_CALL_NOCHECK (cuStreamSynchronize, NULL);
|
||||
const char *maybe_abort_msg = "(perhaps abort was called)";
|
||||
@@ -1617,7 +1616,8 @@ cuda_free_argmem (void *ptr)
|
||||
|
||||
void
|
||||
GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum,
|
||||
void **hostaddrs, void **devaddrs,
|
||||
void **hostaddrs __attribute__((unused)),
|
||||
void **devaddrs,
|
||||
unsigned *dims, void *targ_mem_desc,
|
||||
struct goacc_asyncqueue *aq)
|
||||
{
|
||||
@@ -1639,7 +1639,7 @@ GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum,
|
||||
block = (void **) GOMP_PLUGIN_malloc (2 * sizeof (void *) + s);
|
||||
hp = block + 2;
|
||||
for (int i = 0; i < mapnum; i++)
|
||||
hp[i] = (devaddrs[i] ? devaddrs[i] : hostaddrs[i]);
|
||||
hp[i] = devaddrs[i];
|
||||
CUDA_CALL_ASSERT (cuMemAlloc, &dp, s);
|
||||
if (profiling_p)
|
||||
goacc_profiling_acc_ev_alloc (thr, (void *) dp, s);
|
||||
@@ -1688,8 +1688,7 @@ GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum,
|
||||
}
|
||||
}
|
||||
|
||||
nvptx_exec (fn, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc,
|
||||
dp, aq->cuda_stream);
|
||||
nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, aq->cuda_stream);
|
||||
|
||||
if (mapnum > 0)
|
||||
GOMP_OFFLOAD_openacc_async_queue_callback (aq, cuda_free_argmem, block);
|
||||
|
||||
@@ -1207,7 +1207,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
|
||||
{
|
||||
/* Not present, hence, skip entry - including its MAP_POINTER,
|
||||
when existing. */
|
||||
tgt->list[i].offset = OFFSET_POINTER;
|
||||
tgt->list[i].offset = OFFSET_INLINED;
|
||||
if (i + 1 < mapnum
|
||||
&& ((typemask & get_kind (short_mapkind, kinds, i + 1))
|
||||
== GOMP_MAP_POINTER))
|
||||
|
||||
@@ -22,15 +22,10 @@ main (int argc, char *argv[])
|
||||
devptr[0] = &var;
|
||||
devptr[1] = &arr[2];
|
||||
}
|
||||
|
||||
if (acc_hostptr (devptr[0]) != (void *) &var)
|
||||
__builtin_abort ();
|
||||
if (acc_hostptr (devptr[1]) != (void *) &arr[2])
|
||||
__builtin_abort ();
|
||||
|
||||
acc_delete (&var, sizeof (var));
|
||||
acc_delete (arr, N * sizeof (*arr));
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
if (devptr[0] != &var)
|
||||
__builtin_abort ();
|
||||
@@ -43,6 +38,31 @@ main (int argc, char *argv[])
|
||||
__builtin_abort ();
|
||||
#endif
|
||||
|
||||
#pragma acc parallel no_create(var, arr[0:N]) copyout(devptr) async
|
||||
{
|
||||
devptr[0] = &arr[N - 2];
|
||||
devptr[1] = &var;
|
||||
}
|
||||
#pragma acc wait
|
||||
if (acc_hostptr (devptr[0]) != (void *) &arr[N - 2])
|
||||
__builtin_abort ();
|
||||
if (acc_hostptr (devptr[1]) != (void *) &var)
|
||||
__builtin_abort ();
|
||||
#if ACC_MEM_SHARED
|
||||
if (devptr[0] != &arr[N - 2])
|
||||
__builtin_abort ();
|
||||
if (devptr[1] != &var)
|
||||
__builtin_abort ();
|
||||
#else
|
||||
if (devptr[0] == &arr[N - 2])
|
||||
__builtin_abort ();
|
||||
if (devptr[1] == &var)
|
||||
__builtin_abort ();
|
||||
#endif
|
||||
|
||||
acc_delete (&var, sizeof (var));
|
||||
acc_delete (arr, N * sizeof (*arr));
|
||||
|
||||
free (arr);
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -18,12 +18,22 @@ main (int argc, char *argv[])
|
||||
devptr[0] = &var;
|
||||
devptr[1] = &arr[2];
|
||||
}
|
||||
|
||||
if (devptr[0] != &var)
|
||||
__builtin_abort ();
|
||||
if (devptr[1] != &arr[2])
|
||||
__builtin_abort ();
|
||||
|
||||
#pragma acc parallel no_create(var, arr[0:N]) copyout(devptr) async
|
||||
{
|
||||
devptr[0] = &arr[N - 2];
|
||||
devptr[1] = &var;
|
||||
}
|
||||
#pragma acc wait
|
||||
if (devptr[0] != &arr[N - 2])
|
||||
__builtin_abort ();
|
||||
if (devptr[1] != &var)
|
||||
__builtin_abort ();
|
||||
|
||||
free (arr);
|
||||
|
||||
return 0;
|
||||
|
||||
Reference in New Issue
Block a user