mirror of
https://forge.sourceware.org/marek/gcc.git
synced 2026-02-22 03:47:02 -05:00
GCN: Honor OpenMP 5.1 'num_teams' lower bound
Corresponding to commit 9fa72756d9
"libgomp, nvptx: Honor OpenMP 5.1 num_teams lower bound", these are the
GCN offloading changes to fix:
PASS: libgomp.c/../libgomp.c-c++-common/teams-2.c (test for excess errors)
[-FAIL:-]{+PASS:+} libgomp.c/../libgomp.c-c++-common/teams-2.c execution test
PASS: libgomp.c++/../libgomp.c-c++-common/teams-2.c (test for excess errors)
[-FAIL:-]{+PASS:+} libgomp.c++/../libgomp.c-c++-common/teams-2.c execution test
..., and omptests' 't-critical' test case. I've cross checked that those test
cases are the ones that regress for nvptx offloading, if I locally revert the
"libgomp, nvptx: Honor OpenMP 5.1 num_teams lower bound" changes.
libgomp/
* config/gcn/libgomp-gcn.h (GOMP_TEAM_NUM): Inject.
* config/gcn/target.c (GOMP_teams4): Handle.
* config/gcn/team.c (gomp_gcn_enter_kernel): Initialize.
* config/gcn/teams.c (omp_get_team_num): Adjust.
This commit is contained in:
@@ -34,10 +34,11 @@
|
||||
#define DEFAULT_TEAM_ARENA_SIZE (64*1024)
|
||||
|
||||
/* These define the LDS location of data needed by OpenMP. */
|
||||
#define TEAM_ARENA_START 16 /* LDS offset of free pointer. */
|
||||
#define TEAM_ARENA_FREE 24 /* LDS offset of free pointer. */
|
||||
#define TEAM_ARENA_END 32 /* LDS offset of end pointer. */
|
||||
#define GCN_LOWLAT_HEAP 40 /* LDS offset of the OpenMP low-latency heap. */
|
||||
#define GOMP_TEAM_NUM 16
|
||||
#define TEAM_ARENA_START 24 /* LDS offset of free pointer. */
|
||||
#define TEAM_ARENA_FREE 32 /* LDS offset of free pointer. */
|
||||
#define TEAM_ARENA_END 40 /* LDS offset of end pointer. */
|
||||
#define GCN_LOWLAT_HEAP 48 /* LDS offset of the OpenMP low-latency heap. */
|
||||
|
||||
struct heap
|
||||
{
|
||||
|
||||
@@ -33,26 +33,37 @@ bool
|
||||
GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper,
|
||||
unsigned int thread_limit, bool first)
|
||||
{
|
||||
int __lds *gomp_team_num = (int __lds *) GOMP_TEAM_NUM;
|
||||
unsigned int num_workgroups = __builtin_gcn_dim_size (0);
|
||||
if (!first)
|
||||
return false;
|
||||
{
|
||||
unsigned int team_num;
|
||||
if (num_workgroups > gomp_num_teams_var)
|
||||
return false;
|
||||
team_num = *gomp_team_num;
|
||||
if (team_num > gomp_num_teams_var - num_workgroups)
|
||||
return false;
|
||||
*gomp_team_num = team_num + num_workgroups;
|
||||
return true;
|
||||
}
|
||||
if (thread_limit)
|
||||
{
|
||||
struct gomp_task_icv *icv = gomp_icv (true);
|
||||
icv->thread_limit_var
|
||||
= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
|
||||
}
|
||||
unsigned int num_workgroups, workgroup_id;
|
||||
num_workgroups = __builtin_gcn_dim_size (0);
|
||||
workgroup_id = __builtin_gcn_dim_pos (0);
|
||||
/* FIXME: If num_teams_lower > num_workgroups, we want to loop
|
||||
multiple times at least for some workgroups. */
|
||||
(void) num_teams_lower;
|
||||
if (!num_teams_upper || num_teams_upper >= num_workgroups)
|
||||
if (!num_teams_upper)
|
||||
num_teams_upper = ((GOMP_ADDITIONAL_ICVS.nteams > 0
|
||||
&& num_workgroups > GOMP_ADDITIONAL_ICVS.nteams)
|
||||
? GOMP_ADDITIONAL_ICVS.nteams : num_workgroups);
|
||||
else if (workgroup_id >= num_teams_upper)
|
||||
else if (num_workgroups < num_teams_lower)
|
||||
num_teams_upper = num_teams_lower;
|
||||
else if (num_workgroups < num_teams_upper)
|
||||
num_teams_upper = num_workgroups;
|
||||
unsigned int workgroup_id = __builtin_gcn_dim_pos (0);
|
||||
if (workgroup_id >= num_teams_upper)
|
||||
return false;
|
||||
*gomp_team_num = workgroup_id;
|
||||
gomp_num_teams_var = num_teams_upper - 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -68,6 +68,9 @@ gomp_gcn_enter_kernel (void)
|
||||
/* Starting additional threads is not supported. */
|
||||
gomp_global_icv.dyn_var = true;
|
||||
|
||||
int __lds *gomp_team_num = (int __lds *) GOMP_TEAM_NUM;
|
||||
*gomp_team_num = 0;
|
||||
|
||||
/* Initialize the team arena for optimized memory allocation.
|
||||
The arena has been allocated on the host side, and the address
|
||||
passed in via the kernargs. Each team takes a small slice of it. */
|
||||
|
||||
@@ -44,10 +44,11 @@ omp_get_num_teams (void)
|
||||
return gomp_num_teams_var + 1;
|
||||
}
|
||||
|
||||
int __attribute__ ((__optimize__ ("O2")))
|
||||
int
|
||||
omp_get_team_num (void)
|
||||
{
|
||||
return __builtin_gcn_dim_pos (0);
|
||||
int __lds *gomp_team_num = (int __lds *) GOMP_TEAM_NUM;
|
||||
return *gomp_team_num;
|
||||
}
|
||||
|
||||
ialias (omp_get_num_teams)
|
||||
|
||||
Reference in New Issue
Block a user