mirror of
https://forge.sourceware.org/marek/gcc.git
synced 2026-02-22 03:47:02 -05:00
gcn: Add missing GFX9_4_GENERIC, OpenMP context-selector update
The definition for gfx942 and gfx950 missed the GFX9_4_GENERIC
family flag.
For OpenMP context selectors: The t-omp-device file missed the
generic selectors.
Additionally, there is now a note in the OpenMP documentation that
there is a one-to-one match for ISA names, ignoring any compatibility.
For instance, for Nvidia GPUs 'isa("sm_70")' is only true when compiling
for 'sm_70', even though sm < 7.0 code also runs on sm_70 hardware.
And, for AMD GPUs, gfx9-4-generic neither matches 'gfx942'
(even though such generic code runs on gfx942) - nor the reverse
(although all gfx9-4-generic code runs on gfx942).
gcc/ChangeLog:
* config/gcn/gcn-devices.def (gfx942, gfx950): Set generic name
to GFX9_4_GENERIC.
* config/gcn/t-omp-device: Include generic names for OpenMP's
ISA trait.
libgomp/ChangeLog:
* libgomp.texi (OpenMP Context Selectors): Add note that there is
currently an exact match between ISA and compilation, ignoring
compatibilities in both ways.
* testsuite/libgomp.c/declare-variant-4.h: Add missing variant
functions for specific and generic AMD GPUs.
* testsuite/libgomp.c/declare-variant-4-gfx10-3-generic.c: New test.
* testsuite/libgomp.c/declare-variant-4-gfx11-generic.c: New test.
* testsuite/libgomp.c/declare-variant-4-gfx9-4-generic.c: New test.
* testsuite/libgomp.c/declare-variant-4-gfx9-generic.c: New test.
This commit is contained in:
@@ -179,7 +179,7 @@ GCN_DEVICE(gfx942, GFX942, 0x4c, ISA_CDNA3,
|
||||
/* Max ISA VGPRs */ 512,
|
||||
/* Generic code obj version */ 0, /* non-generic */
|
||||
/* Architecture Family */ GFX9,
|
||||
/* Generic Name */ NONE
|
||||
/* Generic Name */ GFX9_4_GENERIC
|
||||
)
|
||||
|
||||
GCN_DEVICE(gfx950, GFX950, 0x4f, ISA_CDNA3,
|
||||
@@ -190,7 +190,7 @@ GCN_DEVICE(gfx950, GFX950, 0x4f, ISA_CDNA3,
|
||||
/* Max ISA VGPRs */ 512,
|
||||
/* Generic code obj version */ 0, /* non-generic */
|
||||
/* Architecture Family */ GFX9,
|
||||
/* Generic Name */ NONE
|
||||
/* Generic Name */ GFX9_4_GENERIC
|
||||
)
|
||||
|
||||
GCN_DEVICE(gfx9-generic, GFX9_GENERIC, 0x051, ISA_GCN5,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
omp-device-properties-gcn: $(srcdir)/config/gcn/gcn-devices.def
|
||||
echo kind: gpu > $@
|
||||
echo arch: amdgcn gcn >> $@
|
||||
echo isa: `grep -o -P '(?<=GCN_DEVICE\()gfx[0-9a-f]+(?=,)' $<` >> $@
|
||||
echo isa: `grep -o -P '(?<=GCN_DEVICE\()gfx[-0-9a-f]+(|-generic)(?=,)' $<` >> $@
|
||||
|
||||
@@ -6824,6 +6824,13 @@ the following traits are supported in addition; while OpenMP is supported
|
||||
on more architectures, GCC currently does not match any @code{arch} or
|
||||
@code{isa} traits for those.
|
||||
|
||||
Note that for AMD GCN and Nvidia PTX, the @code{isa} is currently an
|
||||
exact match between the compiled-for ISA architecture and the matching
|
||||
@code{isa} trait value. For instance, when compiling for @code{gfx942},
|
||||
the @code{isa} trait value @code{gfx9-4-generic} is not matched and,
|
||||
likewise, @code{gfx942} is not matched when compiling for its generic
|
||||
architecture.
|
||||
|
||||
@multitable @columnfractions .65 .30
|
||||
@headitem @code{arch} @tab @code{isa}
|
||||
@item @code{x86}, @code{x86_64}, @code{i386}, @code{i486},
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
/* { dg-do link { target { offload_target_amdgcn } } } */
|
||||
/* { dg-additional-options -foffload=amdgcn-amdhsa } */
|
||||
/* { dg-additional-options -foffload=-march=gfx10-3-generic } */
|
||||
/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
|
||||
|
||||
#include "declare-variant-4.h"
|
||||
|
||||
/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx10_3_generic \\(\\);" "optimized" } } */
|
||||
@@ -0,0 +1,8 @@
|
||||
/* { dg-do link { target { offload_target_amdgcn } } } */
|
||||
/* { dg-additional-options -foffload=amdgcn-amdhsa } */
|
||||
/* { dg-additional-options -foffload=-march=gfx11-generic } */
|
||||
/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
|
||||
|
||||
#include "declare-variant-4.h"
|
||||
|
||||
/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx11_generic \\(\\);" "optimized" } } */
|
||||
@@ -0,0 +1,8 @@
|
||||
/* { dg-do link { target { offload_target_amdgcn } } } */
|
||||
/* { dg-additional-options -foffload=amdgcn-amdhsa } */
|
||||
/* { dg-additional-options -foffload=-march=gfx9-4-generic } */
|
||||
/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
|
||||
|
||||
#include "declare-variant-4.h"
|
||||
|
||||
/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx9_4_generic \\(\\);" "optimized" } } */
|
||||
@@ -0,0 +1,8 @@
|
||||
/* { dg-do link { target { offload_target_amdgcn } } } */
|
||||
/* { dg-additional-options -foffload=amdgcn-amdhsa } */
|
||||
/* { dg-additional-options -foffload=-march=gfx9-generic } */
|
||||
/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
|
||||
|
||||
#include "declare-variant-4.h"
|
||||
|
||||
/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx9_generic \\(\\);" "optimized" } } */
|
||||
@@ -7,6 +7,20 @@ gfx900 (void)
|
||||
return 0x900;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx902 (void)
|
||||
{
|
||||
return 0x902;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx904 (void)
|
||||
{
|
||||
return 0x904;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx906 (void)
|
||||
@@ -21,6 +35,13 @@ gfx908 (void)
|
||||
return 0x908;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx909 (void)
|
||||
{
|
||||
return 0x909;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx90a (void)
|
||||
@@ -42,6 +63,13 @@ gfx942 (void)
|
||||
return 0x942;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx950 (void)
|
||||
{
|
||||
return 0x950;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx1030 (void)
|
||||
@@ -49,6 +77,41 @@ gfx1030 (void)
|
||||
return 0x1030;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx1031 (void)
|
||||
{
|
||||
return 0x1031;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx1032 (void)
|
||||
{
|
||||
return 0x1032;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx1033 (void)
|
||||
{
|
||||
return 0x1033;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx1034 (void)
|
||||
{
|
||||
return 0x1034;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx1035 (void)
|
||||
{
|
||||
return 0x1035;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx1036 (void)
|
||||
@@ -63,6 +126,20 @@ gfx1100 (void)
|
||||
return 0x1100;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx1101 (void)
|
||||
{
|
||||
return 0x1101;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx1102 (void)
|
||||
{
|
||||
return 0x1102;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx1103 (void)
|
||||
@@ -70,16 +147,92 @@ gfx1103 (void)
|
||||
return 0x1103;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx1150 (void)
|
||||
{
|
||||
return 0x1150;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx1151 (void)
|
||||
{
|
||||
return 0x1151;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx1152 (void)
|
||||
{
|
||||
return 0x1152;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx1153 (void)
|
||||
{
|
||||
return 0x1153;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx9_generic (void)
|
||||
{
|
||||
return 0x90ff;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx9_4_generic (void)
|
||||
{
|
||||
return 0x94ff;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx10_3_generic (void)
|
||||
{
|
||||
return 0x103ff;
|
||||
}
|
||||
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
gfx11_generic (void)
|
||||
{
|
||||
return 0x110ff;
|
||||
}
|
||||
|
||||
|
||||
#pragma omp declare variant(gfx900) match(device = {isa("gfx900")})
|
||||
#pragma omp declare variant(gfx902) match(device = {isa("gfx902")})
|
||||
#pragma omp declare variant(gfx904) match(device = {isa("gfx904")})
|
||||
#pragma omp declare variant(gfx906) match(device = {isa("gfx906")})
|
||||
#pragma omp declare variant(gfx908) match(device = {isa("gfx908")})
|
||||
#pragma omp declare variant(gfx909) match(device = {isa("gfx909")})
|
||||
#pragma omp declare variant(gfx90a) match(device = {isa("gfx90a")})
|
||||
#pragma omp declare variant(gfx90c) match(device = {isa("gfx90c")})
|
||||
#pragma omp declare variant(gfx942) match(device = {isa("gfx942")})
|
||||
#pragma omp declare variant(gfx950) match(device = {isa("gfx950")})
|
||||
#pragma omp declare variant(gfx1030) match(device = {isa("gfx1030")})
|
||||
#pragma omp declare variant(gfx1031) match(device = {isa("gfx1031")})
|
||||
#pragma omp declare variant(gfx1032) match(device = {isa("gfx1032")})
|
||||
#pragma omp declare variant(gfx1033) match(device = {isa("gfx1033")})
|
||||
#pragma omp declare variant(gfx1034) match(device = {isa("gfx1034")})
|
||||
#pragma omp declare variant(gfx1035) match(device = {isa("gfx1035")})
|
||||
#pragma omp declare variant(gfx1036) match(device = {isa("gfx1036")})
|
||||
#pragma omp declare variant(gfx1100) match(device = {isa("gfx1100")})
|
||||
#pragma omp declare variant(gfx1101) match(device = {isa("gfx1101")})
|
||||
#pragma omp declare variant(gfx1102) match(device = {isa("gfx1102")})
|
||||
#pragma omp declare variant(gfx1103) match(device = {isa("gfx1103")})
|
||||
#pragma omp declare variant(gfx1150) match(device = {isa("gfx1150")})
|
||||
#pragma omp declare variant(gfx1151) match(device = {isa("gfx1151")})
|
||||
#pragma omp declare variant(gfx1152) match(device = {isa("gfx1152")})
|
||||
#pragma omp declare variant(gfx1153) match(device = {isa("gfx1153")})
|
||||
#pragma omp declare variant(gfx9_generic) match(device = {isa("gfx9-generic")})
|
||||
#pragma omp declare variant(gfx9_4_generic) match(device = {isa("gfx9-4-generic")})
|
||||
#pragma omp declare variant(gfx10_3_generic) match(device = {isa("gfx10-3-generic")})
|
||||
#pragma omp declare variant(gfx11_generic) match(device = {isa("gfx11-generic")})
|
||||
__attribute__ ((noipa))
|
||||
int
|
||||
f (void)
|
||||
|
||||
Reference in New Issue
Block a user