mirror of
https://gcc.gnu.org/git/gcc.git
synced 2026-02-21 19:35:28 -05:00
libgomp, amdgcn, nvptx: Improve omp_target_is_accessible [PR121813]
This patch extends omp_target_is_accessible to check the actual device status for the memory region, on amdgcn and nvptx devices (rather than just checking if shared memory is enabled). In both cases, we check the status of each 4k region within the given memory range (assuming 4k pages should be safe for all the currently supported hosts) and returns true if all of the pages report accessible. The testcases have been modified to check that allocations marked accessible actually are accessible (inaccessibility can't be checked without invoking memory faults), and to understand that some parts of an array can be accessible but other parts not (I have observed this intermittently for the stack memory on amdgcn using the Fortran testcase, which can have the allocation span pages). There's also new testcases for the various other memory modes, and for managed memory. include/ChangeLog: * cuda/cuda.h (CUpointer_attribute): New enum. (cuPointerGetAttribute): New prototype. libgomp/ChangeLog: PR libgomp/121813 PR libgomp/113213 * libgomp-plugin.h (GOMP_OFFLOAD_is_accessible_ptr): New prototype. * libgomp.h (struct gomp_device_descr): Add GOMP_OFFLOAD_is_accessible_ptr. * libgomp.texi: Update omp_target_is_accessible docs. * plugin/cuda-lib.def (cuPointerGetAttribute): New entry. * plugin/plugin-gcn.c (struct hsa_runtime_fn_info): Add hsa_amd_svm_attributes_get_fn and hsa_amd_pointer_info_fn. (init_hsa_runtime_functions): Add hsa_amd_svm_attributes_get and hsa_amd_pointer_info. (enum accessible): New enum type. (host_memory_is_accessible): New function. (device_memory_is_accessible): New function. (GOMP_OFFLOAD_is_accessible_ptr): New function. * plugin/plugin-nvptx.c (GOMP_OFFLOAD_is_accessible_ptr): Likewise. * target.c (omp_target_is_accessible): Call is_accessible_ptr_func. (gomp_load_plugin_for_device): Add is_accessible_ptr. * testsuite/libgomp.c-c++-common/target-is-accessible-1.c: Rework to match more details of the GPU implementation. * testsuite/libgomp.fortran/target-is-accessible-1.f90: Likewise. * testsuite/libgomp.c-c++-common/target-is-accessible-2.c: New test. * testsuite/libgomp.c-c++-common/target-is-accessible-3.c: New test. * testsuite/libgomp.c-c++-common/target-is-accessible-4.c: New test. * testsuite/libgomp.c-c++-common/target-is-accessible-5.c: New test.
This commit is contained in:
@@ -143,6 +143,13 @@ typedef enum {
|
||||
CU_MEMORYTYPE_UNIFIED = 0x04
|
||||
} CUmemorytype;
|
||||
|
||||
typedef enum {
|
||||
CU_POINTER_ATTRIBUTE_CONTEXT = 0x01,
|
||||
CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 0x02,
|
||||
CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 0x03,
|
||||
CU_POINTER_ATTRIBUTE_HOST_POINTER = 0x04
|
||||
} CUpointer_attribute;
|
||||
|
||||
typedef struct {
|
||||
size_t srcXInBytes, srcY;
|
||||
CUmemorytype srcMemoryType;
|
||||
@@ -300,6 +307,8 @@ CUresult cuModuleGetGlobal (CUdeviceptr *, size_t *, CUmodule, const char *);
|
||||
CUresult cuModuleLoad (CUmodule *, const char *);
|
||||
CUresult cuModuleLoadData (CUmodule *, const void *);
|
||||
CUresult cuModuleUnload (CUmodule);
|
||||
CUresult cuPointerGetAttribute (CUmemorytype *, CUpointer_attribute,
|
||||
CUdeviceptr);
|
||||
CUresult cuOccupancyMaxPotentialBlockSize(int *, int *, CUfunction,
|
||||
CUoccupancyB2DSize, size_t, int);
|
||||
typedef void (*CUstreamCallback)(CUstream, CUresult, void *);
|
||||
|
||||
@@ -173,6 +173,7 @@ extern void *GOMP_OFFLOAD_alloc (int, size_t);
|
||||
extern bool GOMP_OFFLOAD_free (int, void *);
|
||||
extern void *GOMP_OFFLOAD_managed_alloc (int, size_t);
|
||||
extern bool GOMP_OFFLOAD_managed_free (int, void *);
|
||||
extern int GOMP_OFFLOAD_is_accessible_ptr (int, const void *, size_t);
|
||||
extern bool GOMP_OFFLOAD_page_locked_host_alloc (void **, size_t);
|
||||
extern bool GOMP_OFFLOAD_page_locked_host_free (void *);
|
||||
extern bool GOMP_OFFLOAD_dev2host (int, void *, const void *, size_t);
|
||||
|
||||
@@ -1425,6 +1425,7 @@ struct gomp_device_descr
|
||||
__typeof (GOMP_OFFLOAD_free) *free_func;
|
||||
__typeof (GOMP_OFFLOAD_managed_alloc) *managed_alloc_func;
|
||||
__typeof (GOMP_OFFLOAD_managed_free) *managed_free_func;
|
||||
__typeof (GOMP_OFFLOAD_is_accessible_ptr) *is_accessible_ptr_func;
|
||||
__typeof (GOMP_OFFLOAD_page_locked_host_alloc) *page_locked_host_alloc_func;
|
||||
__typeof (GOMP_OFFLOAD_page_locked_host_free) *page_locked_host_free_func;
|
||||
__typeof (GOMP_OFFLOAD_dev2host) *dev2host_func;
|
||||
|
||||
@@ -2172,13 +2172,17 @@ and extending @var{size} bytes, is accessibly on the device specified by
|
||||
@var{device_num}. If so, it returns a nonzero value and otherwise zero.
|
||||
|
||||
The address given by @var{ptr} is interpreted to be in the address space of
|
||||
the device and @var{size} must be positive.
|
||||
the device and @var{size} must be positive. NULL pointers and zero-length
|
||||
ranges always return zero.
|
||||
|
||||
Note that GCC's current implementation assumes that @var{ptr} is a valid host
|
||||
pointer. Therefore, all addresses given by @var{ptr} are assumed to be
|
||||
accessible on the initial device. And, to err on the safe side, this memory
|
||||
is only available on a non-host device that can access all host memory
|
||||
([uniform] shared memory access).
|
||||
pointer. Therefore, all non-NULL addresses given by @var{ptr} are assumed to be
|
||||
accessible on the initial device. The address is only reported as accessible
|
||||
on non-host devices if this is @emph{known} to be the case, or if the device
|
||||
reports that all memory is accessible (i.e. [unified] shared memory access).
|
||||
If the runtime is uncertain it may report accessible memory as inaccessible.
|
||||
For a memory range to be reported accessible, the whole range must be known to
|
||||
be accessible.
|
||||
|
||||
Running this routine in a @code{target} region except on the initial device
|
||||
is not supported.
|
||||
|
||||
@@ -55,6 +55,7 @@ CUDA_ONE_CALL (cuModuleLoad)
|
||||
CUDA_ONE_CALL (cuModuleLoadData)
|
||||
CUDA_ONE_CALL (cuModuleUnload)
|
||||
CUDA_ONE_CALL_MAYBE_NULL (cuOccupancyMaxPotentialBlockSize)
|
||||
CUDA_ONE_CALL (cuPointerGetAttribute)
|
||||
CUDA_ONE_CALL (cuStreamAddCallback)
|
||||
CUDA_ONE_CALL (cuStreamCreate)
|
||||
CUDA_ONE_CALL (cuStreamDestroy)
|
||||
|
||||
@@ -233,6 +233,12 @@ struct hsa_runtime_fn_info
|
||||
hsa_status_t (*hsa_amd_svm_attributes_set_fn)
|
||||
(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
|
||||
size_t attribute_count);
|
||||
hsa_status_t (*hsa_amd_svm_attributes_get_fn)
|
||||
(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
|
||||
size_t attribute_count);
|
||||
hsa_status_t (*hsa_amd_pointer_info_fn)
|
||||
(const void *, hsa_amd_pointer_info_t *, void *(*)(size_t),
|
||||
uint32_t *, hsa_agent_t **);
|
||||
};
|
||||
|
||||
/* As an HIP runtime is dlopened, following structure defines function
|
||||
@@ -1494,6 +1500,8 @@ init_hsa_runtime_functions (void)
|
||||
DLSYM_OPT_FN (hsa_amd_memory_unlock)
|
||||
DLSYM_OPT_FN (hsa_amd_memory_async_copy_rect)
|
||||
DLSYM_OPT_FN (hsa_amd_svm_attributes_set)
|
||||
DLSYM_OPT_FN (hsa_amd_svm_attributes_get)
|
||||
DLSYM_OPT_FN (hsa_amd_pointer_info)
|
||||
return true;
|
||||
#undef DLSYM_OPT_FN
|
||||
#undef DLSYM_FN
|
||||
@@ -5258,6 +5266,109 @@ GOMP_OFFLOAD_managed_free (int device, void *ptr)
|
||||
return true;
|
||||
}
|
||||
|
||||
enum accessible {
|
||||
UNKNOWN,
|
||||
INACCESSIBLE,
|
||||
ACCESSIBLE
|
||||
};
|
||||
|
||||
/* Is a host memory address accessible on the given device?
|
||||
Returns UNKNOWN if the memory isn't registered, or if it isn't a valid host
|
||||
pointer. */
|
||||
|
||||
static enum accessible
|
||||
host_memory_is_accessible (hsa_agent_t agent, const void *ptr, size_t size)
|
||||
{
|
||||
if (!hsa_fns.hsa_amd_svm_attributes_get_fn)
|
||||
return UNKNOWN;
|
||||
|
||||
/* The HSA API doesn't seem to report for the whole range given, so we call
|
||||
once for each page the range straddles. */
|
||||
const void *p = ptr;
|
||||
size_t remaining = size;
|
||||
do
|
||||
{
|
||||
/* Note: the access query returns in the attribute field. */
|
||||
struct hsa_amd_svm_attribute_pair_s attr = {
|
||||
HSA_AMD_SVM_ATTRIB_ACCESS_QUERY, agent.handle
|
||||
};
|
||||
hsa_status_t status = hsa_fns.hsa_amd_svm_attributes_get_fn ((void*)p,
|
||||
remaining,
|
||||
&attr, 1);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
/* This happens when the memory isn't registered with ROCr at all. */
|
||||
return UNKNOWN;
|
||||
|
||||
switch (attr.attribute)
|
||||
{
|
||||
case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE:
|
||||
case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE:
|
||||
break;
|
||||
case HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS:
|
||||
default:
|
||||
return INACCESSIBLE;
|
||||
}
|
||||
|
||||
p = (void*)(((uintptr_t)p + 4096) & ~0xfffUL);
|
||||
remaining = size - ((uintptr_t)p - (uintptr_t)ptr);
|
||||
} while (p < ptr + size);
|
||||
|
||||
/* All pages were accessible. */
|
||||
return ACCESSIBLE;
|
||||
}
|
||||
|
||||
/* Is a device memory address accessible on the given device?
|
||||
Returns UNKNOWN if it isn't a valid device address. Returns INACCESSIBLE if
|
||||
the pointer is valid, but not the whole range, or if it refers to the wrong
|
||||
device. */
|
||||
|
||||
static enum accessible
|
||||
device_memory_is_accessible (hsa_agent_t agent, const void *ptr, size_t size)
|
||||
{
|
||||
if (!hsa_fns.hsa_amd_pointer_info_fn)
|
||||
return UNKNOWN;
|
||||
|
||||
hsa_amd_pointer_info_t info;
|
||||
uint32_t nagents;
|
||||
hsa_agent_t *agents;
|
||||
info.size = sizeof (hsa_amd_pointer_info_t);
|
||||
|
||||
hsa_status_t status = hsa_fns.hsa_amd_pointer_info_fn (ptr, &info, NULL,
|
||||
&nagents, &agents);
|
||||
if (status != HSA_STATUS_SUCCESS
|
||||
|| info.type == HSA_EXT_POINTER_TYPE_UNKNOWN)
|
||||
return UNKNOWN;
|
||||
|
||||
if (agent.handle == info.agentOwner.handle)
|
||||
return (info.sizeInBytes >= size ? ACCESSIBLE : INACCESSIBLE);
|
||||
|
||||
for (unsigned i = 0; i < nagents; i++)
|
||||
{
|
||||
if (agent.handle == agents[0].handle)
|
||||
return (info.sizeInBytes >= size ? ACCESSIBLE : INACCESSIBLE);
|
||||
}
|
||||
|
||||
return INACCESSIBLE;
|
||||
}
|
||||
|
||||
/* Backend implementation for omp_target_is_accessible. */
|
||||
|
||||
int
|
||||
GOMP_OFFLOAD_is_accessible_ptr (int device, const void *ptr, size_t size)
|
||||
{
|
||||
if (!init_hsa_context (false)
|
||||
|| device < 0 || device > hsa_context.agent_count)
|
||||
return 0;
|
||||
|
||||
struct agent_info *agent = get_agent_info (device);
|
||||
|
||||
enum accessible result;
|
||||
result = host_memory_is_accessible (agent->id, ptr, size);
|
||||
if (result == UNKNOWN)
|
||||
result = device_memory_is_accessible (agent->id, ptr, size);
|
||||
return result == ACCESSIBLE;
|
||||
}
|
||||
|
||||
/* }}} */
|
||||
/* {{{ OpenACC Plugin API */
|
||||
|
||||
|
||||
@@ -353,6 +353,8 @@ struct ptx_device
|
||||
|
||||
static struct ptx_device **ptx_devices;
|
||||
|
||||
static bool using_usm = false;
|
||||
|
||||
/* "Native" GPU thread stack size. */
|
||||
static unsigned native_gpu_thread_stack_size = 0;
|
||||
|
||||
@@ -1343,15 +1345,20 @@ GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
|
||||
if (num_devices > 0
|
||||
&& (omp_requires_mask
|
||||
& (GOMP_REQUIRES_UNIFIED_SHARED_MEMORY | GOMP_REQUIRES_SELF_MAPS)))
|
||||
for (int dev = 0; dev < num_devices; dev++)
|
||||
{
|
||||
int pi;
|
||||
CUresult r;
|
||||
r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi,
|
||||
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS, dev);
|
||||
if (r != CUDA_SUCCESS || pi == 0)
|
||||
return -1;
|
||||
}
|
||||
{
|
||||
for (int dev = 0; dev < num_devices; dev++)
|
||||
{
|
||||
int pi;
|
||||
CUresult r;
|
||||
r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi,
|
||||
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS,
|
||||
dev);
|
||||
if (r != CUDA_SUCCESS || pi == 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
using_usm = true;
|
||||
}
|
||||
return num_devices;
|
||||
}
|
||||
|
||||
@@ -1906,6 +1913,50 @@ GOMP_OFFLOAD_managed_free (int ord, void *ptr)
|
||||
return GOMP_OFFLOAD_free (ord, ptr);
|
||||
}
|
||||
|
||||
int
|
||||
GOMP_OFFLOAD_is_accessible_ptr (int ord,
|
||||
const void *ptr, size_t size)
|
||||
{
|
||||
/* USM implies access. */
|
||||
if (using_usm)
|
||||
return 1;
|
||||
|
||||
struct ptx_device *ptx_dev = ptx_devices[ord];
|
||||
CUcontext old_ctx;
|
||||
CUDA_CALL_ERET (false, cuCtxPushCurrent, ptx_dev->ctx);
|
||||
|
||||
/* The Cuda API does not permit testing a whole range, so we test each
|
||||
4K page within the range. If any page is inaccessible return false. */
|
||||
const void *p = ptr;
|
||||
int result = 1; /* All pages accessible. */
|
||||
do
|
||||
{
|
||||
CUmemorytype mem_type;
|
||||
CUresult res = CUDA_CALL_NOCHECK (cuPointerGetAttribute, &mem_type,
|
||||
CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
|
||||
(CUdeviceptr)p);
|
||||
if (res != CUDA_SUCCESS)
|
||||
/* Memory is not registered, and therefore not accessible. */
|
||||
result = 0;
|
||||
|
||||
switch (mem_type)
|
||||
{
|
||||
case CU_MEMORYTYPE_HOST:
|
||||
case CU_MEMORYTYPE_UNIFIED:
|
||||
case CU_MEMORYTYPE_DEVICE:
|
||||
break;
|
||||
case CU_MEMORYTYPE_ARRAY:
|
||||
default:
|
||||
result = 0; /* This page isn't accessible. */
|
||||
}
|
||||
|
||||
p = (void*)(((uintptr_t)p + 4096) & ~0xfffUL);
|
||||
} while (result && p < ptr + size);
|
||||
|
||||
CUDA_CALL_ASSERT (cuCtxPopCurrent, &old_ctx);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool
|
||||
GOMP_OFFLOAD_page_locked_host_alloc (void **ptr, size_t size)
|
||||
{
|
||||
|
||||
@@ -5590,6 +5590,9 @@ omp_get_mapped_ptr (const void *ptr, int device_num)
|
||||
int
|
||||
omp_target_is_accessible (const void *ptr, size_t size, int device_num)
|
||||
{
|
||||
if (ptr == NULL || size == 0)
|
||||
return false;
|
||||
|
||||
if (device_num == omp_default_device)
|
||||
device_num = gomp_get_default_device ();
|
||||
|
||||
@@ -5601,9 +5604,19 @@ omp_target_is_accessible (const void *ptr, size_t size, int device_num)
|
||||
if (devicep == NULL)
|
||||
return false;
|
||||
|
||||
/* TODO: Unified shared memory must be handled when available. */
|
||||
/* Managed memory (or other device feature).
|
||||
is_accessible_ptr may, in future, report more than simply true or false,
|
||||
but we can assume that positive responses are accessible, and
|
||||
zero/negative responses are inaccessible. */
|
||||
if (devicep->is_accessible_ptr_func)
|
||||
return (devicep->is_accessible_ptr_func (devicep->target_id, ptr, size)
|
||||
> 0);
|
||||
|
||||
return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
|
||||
/* Unified shared memory (or true shared memory). */
|
||||
if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int
|
||||
@@ -6009,6 +6022,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
|
||||
DLSYM (free);
|
||||
DLSYM_OPT (managed_alloc, managed_alloc);
|
||||
DLSYM_OPT (managed_free, managed_free);
|
||||
DLSYM_OPT (is_accessible_ptr, is_accessible_ptr);
|
||||
DLSYM_OPT (page_locked_host_alloc, page_locked_host_alloc);
|
||||
DLSYM_OPT (page_locked_host_free, page_locked_host_free);
|
||||
DLSYM (dev2host);
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
#include <omp.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
//#define __builtin_abort() __builtin_printf ("fail at line %d\n", __LINE__)
|
||||
|
||||
int
|
||||
main ()
|
||||
@@ -6,7 +10,7 @@ main ()
|
||||
int d = omp_get_default_device ();
|
||||
int id = omp_get_initial_device ();
|
||||
int n = omp_get_num_devices ();
|
||||
void *p;
|
||||
int *p = (int*)malloc (sizeof (int));
|
||||
|
||||
if (d < 0 || d >= n)
|
||||
d = id;
|
||||
@@ -26,24 +30,81 @@ main ()
|
||||
if (omp_target_is_accessible (p, sizeof (int), n + 1))
|
||||
__builtin_abort ();
|
||||
|
||||
/* Currently, a host pointer is accessible if the device supports shared
|
||||
memory or omp_target_is_accessible is executed on the host. This
|
||||
test case must be adapted when unified shared memory is avialable. */
|
||||
int a[128];
|
||||
for (int d = 0; d <= omp_get_num_devices (); d++)
|
||||
{
|
||||
if (omp_target_is_accessible (NULL, 1, d))
|
||||
__builtin_abort ();
|
||||
|
||||
if (omp_target_is_accessible (p, 0, d))
|
||||
__builtin_abort ();
|
||||
|
||||
/* Check if libgomp is treating the device as a shared memory device. */
|
||||
int shared_mem = 0;
|
||||
#pragma omp target map (alloc: shared_mem) device (d)
|
||||
shared_mem = 1;
|
||||
|
||||
int heap_accessible = shared_mem;
|
||||
if (omp_target_is_accessible (p, sizeof (int), d) != shared_mem)
|
||||
__builtin_abort ();
|
||||
{
|
||||
if (shared_mem)
|
||||
__builtin_abort ();
|
||||
|
||||
/* shared_mem is false, but the memory is reading as accessible,
|
||||
so let's check that by reading it. We should not do so
|
||||
unconditionally because if it's wrong then we'll probably get
|
||||
a memory fault. */
|
||||
*p = 123;
|
||||
uintptr_t addr = (uintptr_t)p;
|
||||
|
||||
#pragma omp target is_device_ptr(p) map(from:heap_accessible) \
|
||||
device(d)
|
||||
{
|
||||
if ((uintptr_t)p == addr && *p == 123)
|
||||
heap_accessible = 1;
|
||||
}
|
||||
|
||||
if (!heap_accessible)
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
int stack_accessible = shared_mem;
|
||||
if (omp_target_is_accessible (a, 128 * sizeof (int), d) != shared_mem)
|
||||
__builtin_abort ();
|
||||
{
|
||||
if (shared_mem)
|
||||
__builtin_abort ();
|
||||
|
||||
/* shared_mem is false, but the memory is reading as accessible,
|
||||
so let's check that by reading it. We should not do so
|
||||
unconditionally because if it's wrong then we'll probably get
|
||||
a memory fault. */
|
||||
int test_accessible = 123;
|
||||
uintptr_t addr = (uintptr_t)&test_accessible;
|
||||
|
||||
#pragma omp target has_device_addr(test_accessible) \
|
||||
map(from:stack_accessible) device(d)
|
||||
{
|
||||
if ((uintptr_t)&test_accessible == addr
|
||||
&& test_accessible == 123)
|
||||
stack_accessible = 1;
|
||||
}
|
||||
|
||||
if (!stack_accessible)
|
||||
__builtin_abort ();
|
||||
}
|
||||
__builtin_printf ("device #%d: shared_mem=%d heap_accessible=%d "
|
||||
"stack_accessible=%d\n",
|
||||
d, shared_mem, heap_accessible, stack_accessible);
|
||||
|
||||
/* omp_target_is_accessible returns false if *any* of the array is
|
||||
inaccessible, so we only check the aggregate result.
|
||||
(Varying access observed on amdgcn without xnack.) */
|
||||
bool accessible = true;
|
||||
for (int i = 0; i < 128; i++)
|
||||
if (omp_target_is_accessible (&a[i], sizeof (int), d) != shared_mem)
|
||||
__builtin_abort ();
|
||||
if (!omp_target_is_accessible (&a[i], sizeof (int), d))
|
||||
accessible = false;
|
||||
if (accessible != (shared_mem || stack_accessible))
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
/* { dg-require-effective-target omp_usm } */
|
||||
|
||||
#pragma omp requires unified_shared_memory
|
||||
|
||||
#include "target-is-accessible-1.c"
|
||||
@@ -0,0 +1,4 @@
|
||||
/* { dg-require-effective-target offload_target_amdgcn_with_xnack } */
|
||||
/* { dg-additional-options "-foffload-options=amdgcn-amdhsa=-mxnack=on" } */
|
||||
|
||||
#include "target-is-accessible-1.c"
|
||||
@@ -0,0 +1,28 @@
|
||||
/* { dg-require-effective-target omp_managedmem } */
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
//#define __builtin_abort() __builtin_printf ("fail at line %d\n", __LINE__)
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int *p = (int*)omp_alloc (sizeof (int), ompx_gnu_managed_mem_alloc);
|
||||
|
||||
*p = 42;
|
||||
uintptr_t a_p = (uintptr_t)p;
|
||||
|
||||
#pragma omp target is_device_ptr(p)
|
||||
{
|
||||
if (*p != 42 || a_p != (uintptr_t)p)
|
||||
__builtin_abort ();
|
||||
}
|
||||
if (!p
|
||||
|| !omp_target_is_accessible (p, sizeof (int),
|
||||
omp_get_default_device ()))
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
#include <omp.h>
|
||||
|
||||
void check (int dev)
|
||||
{
|
||||
constexpr int N = 10;
|
||||
constexpr int size = N*sizeof(int);
|
||||
int A[N] = {};
|
||||
|
||||
void *ptr = omp_target_alloc (size, dev);
|
||||
|
||||
if (ptr == nullptr || !omp_target_is_accessible (ptr, size, dev))
|
||||
__builtin_abort ();
|
||||
|
||||
#pragma omp target device(dev) firstprivate(ptr)
|
||||
for (int i = 0; i < N; i++)
|
||||
((int *)ptr)[i] = i + 1;
|
||||
|
||||
if (omp_target_memcpy (A, ptr, size, 0, 0, omp_initial_device, dev) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
if (A[i] != i + 1)
|
||||
__builtin_abort ();
|
||||
|
||||
omp_target_free (ptr, dev);
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
check (omp_default_device);
|
||||
for (int dev = 0; dev <= omp_get_num_devices(); dev++)
|
||||
check (dev);
|
||||
}
|
||||
@@ -1,53 +1,112 @@
|
||||
! { dg-do run }
|
||||
|
||||
program main
|
||||
use omp_lib
|
||||
use iso_c_binding
|
||||
implicit none (external, type)
|
||||
integer :: d, id, n, shared_mem, i
|
||||
integer :: d, id, n, shared_mem, i, heap_accessible, stack_accessible
|
||||
integer, target :: test_accessible
|
||||
integer, allocatable, target :: p(:)
|
||||
integer, target :: a(1:128)
|
||||
type(c_ptr) :: p
|
||||
integer(c_intptr_t) :: addr
|
||||
logical :: condition
|
||||
|
||||
d = omp_get_default_device ()
|
||||
id = omp_get_initial_device ()
|
||||
n = omp_get_num_devices ()
|
||||
allocate (p(1))
|
||||
|
||||
if (d < 0 .or. d >= n) &
|
||||
d = id
|
||||
|
||||
if (omp_target_is_accessible (p, c_sizeof (d), n) /= 1) &
|
||||
if (omp_target_is_accessible (c_loc(p), c_sizeof(p(1)), n) == 0) &
|
||||
stop 1
|
||||
|
||||
if (omp_target_is_accessible (p, c_sizeof (d), id) /= 1) &
|
||||
if (omp_target_is_accessible (c_loc(p), c_sizeof(p(1)), id) == 0) &
|
||||
stop 2
|
||||
|
||||
if (omp_target_is_accessible (p, c_sizeof (d), omp_initial_device) /= 1) &
|
||||
if (omp_target_is_accessible (c_loc(p), c_sizeof(p(1)), omp_initial_device) == 0) &
|
||||
stop 3
|
||||
|
||||
if (omp_target_is_accessible (p, c_sizeof (d), -6) /= 0) & ! -6 = omp_default_device - 1
|
||||
if (omp_target_is_accessible (c_loc(p), c_sizeof(p(1)), -6) /= 0) & ! -6 = omp_default_device - 1
|
||||
stop 4
|
||||
|
||||
if (omp_target_is_accessible (p, c_sizeof (d), n + 1) /= 0) &
|
||||
if (omp_target_is_accessible (c_loc(p), c_sizeof(p(1)), n + 1) /= 0) &
|
||||
stop 5
|
||||
|
||||
! Currently, a host pointer is accessible if the device supports shared
|
||||
! memory or omp_target_is_accessible is executed on the host. This
|
||||
! test case must be adapted when unified shared memory is avialable.
|
||||
! test case must be adapted when unified shared memory is available.
|
||||
do d = 0, omp_get_num_devices ()
|
||||
shared_mem = 0;
|
||||
! Check if libgomp is treating the device as a shared memory device.
|
||||
shared_mem = 0
|
||||
!$omp target map (alloc: shared_mem) device (d)
|
||||
shared_mem = 1;
|
||||
shared_mem = 1
|
||||
!$omp end target
|
||||
|
||||
if (omp_target_is_accessible (p, c_sizeof (d), d) /= shared_mem) &
|
||||
stop 6;
|
||||
heap_accessible = shared_mem
|
||||
condition = omp_target_is_accessible (c_loc(p), c_sizeof(p(1)), d) /= shared_mem
|
||||
if (condition) then
|
||||
if (shared_mem /= 0) &
|
||||
stop 6
|
||||
|
||||
if (omp_target_is_accessible (c_loc (a), 128 * sizeof (a(1)), d) /= shared_mem) &
|
||||
stop 7;
|
||||
! shared_mem is false, but the memory is reading as accessible,
|
||||
! so let's check that by reading it. We should not do so
|
||||
! unconditionally because if it's wrong then we'll probably get
|
||||
! a memory fault.
|
||||
p(1) = 123
|
||||
addr = transfer(c_loc(p), addr)
|
||||
|
||||
!$omp target has_device_addr(p) map(from:heap_accessible) device(d)
|
||||
if (transfer(c_loc(p), addr) == addr .and. p(1) == 123) &
|
||||
heap_accessible = 1
|
||||
!$omp end target
|
||||
|
||||
if (heap_accessible == 0) &
|
||||
stop 7
|
||||
end if
|
||||
|
||||
stack_accessible = shared_mem
|
||||
condition = omp_target_is_accessible (c_loc(a), 128 * c_sizeof(a(1)), d) /= shared_mem
|
||||
if (condition) then
|
||||
if (shared_mem /= 0) &
|
||||
stop 8
|
||||
|
||||
! shared_mem is false, but the memory is reading as accessible,
|
||||
! so let's check that by reading it. We should not do so
|
||||
! unconditionally because if it's wrong then we'll probably get
|
||||
! a memory fault.
|
||||
test_accessible = 123
|
||||
addr = transfer(c_loc(test_accessible), addr)
|
||||
|
||||
!$omp target has_device_addr(test_accessible) map(from:stack_accessible) device(d)
|
||||
if (transfer(c_loc(test_accessible), addr) == addr &
|
||||
.and. test_accessible == 123) &
|
||||
stack_accessible = 1
|
||||
!$omp end target
|
||||
|
||||
if (stack_accessible == 0) &
|
||||
stop 9
|
||||
end if
|
||||
|
||||
print '(A,I0,A,I0,A,I0,A,I0)', &
|
||||
'device #', d, &
|
||||
': shared_mem=', shared_mem, &
|
||||
' heap_accessible=', heap_accessible, &
|
||||
' stack_accessible=', stack_accessible
|
||||
|
||||
! omp_target_is_accessible returns false if *any* of the array is
|
||||
! inaccessible, so we only check the aggregate result.
|
||||
! (Varying access observed on amdgcn without xnack.)
|
||||
condition = .true.
|
||||
do i = 1, 128
|
||||
if (omp_target_is_accessible (c_loc (a(i)), sizeof (a(i)), d) /= shared_mem) &
|
||||
stop 8;
|
||||
if (omp_target_is_accessible (c_loc(a(i)), c_sizeof(a(i)), d) == 0) &
|
||||
condition = .false.
|
||||
end do
|
||||
|
||||
if (condition .neqv. stack_accessible /= 0) &
|
||||
stop 10
|
||||
end do
|
||||
|
||||
deallocate (p)
|
||||
|
||||
end program main
|
||||
|
||||
Reference in New Issue
Block a user