mirror of
https://forge.sourceware.org/marek/gcc.git
synced 2026-02-22 03:47:02 -05:00
This implements the OpenMP low-latency memory allocator for AMD GCN using the small per-team LDS memory (Local Data Store). Since addresses can now refer to LDS space, the "Global" address space is no-longer compatible. This patch therefore switches the backend to use entirely "Flat" addressing (which supports both memories). A future patch will re-enable "global" instructions for cases where it is known to be safe to do so. gcc/ChangeLog: * config/gcn/gcn-builtins.def (DISPATCH_PTR): New built-in. * config/gcn/gcn.cc (gcn_init_machine_status): Disable global addressing. (gcn_expand_builtin_1): Implement GCN_BUILTIN_DISPATCH_PTR. libgomp/ChangeLog: * config/gcn/libgomp-gcn.h (TEAM_ARENA_START): Move to here. (TEAM_ARENA_FREE): Likewise. (TEAM_ARENA_END): Likewise. (GCN_LOWLAT_HEAP): New. * config/gcn/team.c (LITTLEENDIAN_CPU): New, and import hsa.h. (__gcn_lowlat_init): New prototype. (gomp_gcn_enter_kernel): Initialize the low-latency heap. * libgomp.h (TEAM_ARENA_START): Move to libgomp.h. (TEAM_ARENA_FREE): Likewise. (TEAM_ARENA_END): Likewise. * plugin/plugin-gcn.c (lowlat_size): New variable. (print_kernel_dispatch): Label the group_segment_size purpose. (init_environment_variables): Read GOMP_GCN_LOWLAT_POOL. (create_kernel_dispatch): Pass low-latency head allocation to kernel. (run_kernel): Use shadow; don't assume values. * testsuite/libgomp.c/omp_alloc-traits.c: Enable for amdgcn. * config/gcn/allocator.c: New file. * libgomp.texi: Document low-latency implementation details.
67 lines
1.6 KiB
C
67 lines
1.6 KiB
C
/* { dg-do run } */
|
|
|
|
/* { dg-require-effective-target offload_device } */
|
|
/* { dg-xfail-if "not implemented" { ! { offload_target_nvptx || offload_target_amdgcn } } } */
|
|
|
|
/* Test that GPU low-latency allocation is limited to team access. */
|
|
|
|
#include <stddef.h>
|
|
#include <omp.h>
|
|
|
|
#pragma omp requires dynamic_allocators
|
|
|
|
int
|
|
main ()
|
|
{
|
|
#pragma omp target
|
|
{
|
|
/* Ensure that the memory we get *is* low-latency with a null-fallback. */
|
|
omp_alloctrait_t traits[2]
|
|
= { { omp_atk_fallback, omp_atv_null_fb },
|
|
{ omp_atk_access, omp_atv_cgroup } };
|
|
omp_allocator_handle_t lowlat = omp_init_allocator (omp_low_lat_mem_space,
|
|
2, traits); // good
|
|
|
|
omp_alloctrait_t traits_all[2]
|
|
= { { omp_atk_fallback, omp_atv_null_fb },
|
|
{ omp_atk_access, omp_atv_all } };
|
|
omp_allocator_handle_t lowlat_all
|
|
= omp_init_allocator (omp_low_lat_mem_space, 2, traits_all); // bad
|
|
|
|
omp_alloctrait_t traits_default[1]
|
|
= { { omp_atk_fallback, omp_atv_null_fb } };
|
|
omp_allocator_handle_t lowlat_default
|
|
= omp_init_allocator (omp_low_lat_mem_space, 1, traits_default); // bad
|
|
|
|
if (lowlat_all != omp_null_allocator
|
|
|| lowlat_default != omp_null_allocator)
|
|
__builtin_abort ();
|
|
|
|
void *a = omp_alloc (1, lowlat); // good
|
|
|
|
if (!a)
|
|
__builtin_abort ();
|
|
|
|
omp_free (a, lowlat);
|
|
|
|
|
|
a = omp_calloc (1, 1, lowlat); // good
|
|
|
|
if (!a)
|
|
__builtin_abort ();
|
|
|
|
omp_free (a, lowlat);
|
|
|
|
|
|
a = omp_realloc (NULL, 1, lowlat, lowlat); // good
|
|
|
|
if (!a)
|
|
__builtin_abort ();
|
|
|
|
omp_free (a, lowlat);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|