Implement -fexternal-blas64 option.

Libraries like Intel MKL use 64-bit integers in their API, but gfortran
up to now only provides external BLAS for matmul with 32-bit
integers.  This straightforward patch provides a new option -fexternal-blas64
to remedy that situation.

gcc/fortran/ChangeLog:

	* frontend-passes.cc (optimize_namespace): Handle
	flag_external_blas64.
	(call_external_blas): If flag_external_blas is set, use
	gfc_integer_4_kind as the argument kind, gfc_integer_8_kind otherwise.
	* gfortran.h (gfc_integer_8_kind): Define.
	* invoke.texi: Document -fexternal-blas64.
	* lang.opt: Add -fexternal-blas64.
	* lang.opt.urls: Regenerated.
	* options.cc (gfc_post_options): -fexternal-blas is incompatible
	with -fexternal-blas64.

gcc/testsuite/ChangeLog:

	* gfortran.dg/matmul_blas_3.f90: New test.
This commit is contained in:
Thomas Koenig
2025-09-17 18:50:22 +02:00
parent cda451531c
commit 9a68895fee
7 changed files with 64 additions and 16 deletions

View File

@@ -1481,7 +1481,8 @@ optimize_namespace (gfc_namespace *ns)
gfc_code_walker (&ns->code, convert_elseif, dummy_expr_callback, NULL);
gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL);
gfc_code_walker (&ns->code, optimize_code, optimize_expr, NULL);
if (flag_inline_matmul_limit != 0 || flag_external_blas)
if (flag_inline_matmul_limit != 0 || flag_external_blas
|| flag_external_blas64)
{
bool found;
do
@@ -1496,7 +1497,7 @@ optimize_namespace (gfc_namespace *ns)
NULL);
}
if (flag_external_blas)
if (flag_external_blas || flag_external_blas64)
gfc_code_walker (&ns->code, call_external_blas, dummy_expr_callback,
NULL);
@@ -4644,6 +4645,7 @@ call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
enum matrix_case m_case;
bool realloc_c;
gfc_code **next_code_point;
int arg_kind;
/* Many of the tests for inline matmul also apply here. */
@@ -4929,13 +4931,20 @@ call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
transb, 1);
actual->next = next;
c1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (a->expr), 1,
gfc_integer_4_kind);
c2 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 2,
gfc_integer_4_kind);
if (flag_external_blas)
arg_kind = gfc_integer_4_kind;
else
{
gcc_assert (flag_external_blas64);
arg_kind = gfc_integer_8_kind;
}
c1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (a->expr), 1,
arg_kind);
c2 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 2,
arg_kind);
b1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 1,
gfc_integer_4_kind);
arg_kind);
/* Argument M. */
actual = next;
@@ -4975,7 +4984,7 @@ call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
actual = next;
next = gfc_get_actual_arglist ();
next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_a),
1, gfc_integer_4_kind);
1, arg_kind);
actual->next = next;
/* Argument B. */
@@ -4988,7 +4997,7 @@ call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
actual = next;
next = gfc_get_actual_arglist ();
next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_b),
1, gfc_integer_4_kind);
1, arg_kind);
actual->next = next;
/* Argument BETA - set to zero. */
@@ -5012,7 +5021,7 @@ call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
actual = next;
next = gfc_get_actual_arglist ();
next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (expr1),
1, gfc_integer_4_kind);
1, arg_kind);
actual->next = next;
return 0;

View File

@@ -3672,6 +3672,8 @@ extern int gfc_character_storage_size;
#define gfc_integer_4_kind 4
#define gfc_real_4_kind 4
#define gfc_integer_8_kind 8
/* symbol.cc */
void gfc_clear_new_implicit (void);
bool gfc_add_new_implicit_range (int, int);

View File

@@ -189,7 +189,7 @@ and warnings}.
-fbounds-check -ftail-call-workaround -ftail-call-workaround=@var{n}
-fcheck-array-temporaries
-fcheck=<all|array-temps|bits|bounds|do|mem|pointer|recursion>
-fcoarray=<none|single|lib> -fexternal-blas -ff2c
-fcoarray=<none|single|lib> -fexternal-blas -fexternal-blas64 -ff2c
-ffrontend-loop-interchange -ffrontend-optimize
-finit-character=@var{n} -finit-integer=@var{n} -finit-local-zero
-finit-derived -finit-logical=<true|false>
@@ -2014,13 +2014,26 @@ for some matrix operations like @code{MATMUL}, instead of using our own
algorithms, if the size of the matrices involved is larger than a given
limit (see @option{-fblas-matmul-limit}). This may be profitable if an
optimized vendor BLAS library is available. The BLAS library has
to be specified at link time.
to be specified at link time. This option specifies a BLAS library
with integer arguments of default kind (32 bits). It cannot be used
together with @option{-fexternal-blas64}.
@opindex fexternal-blas64
@item -fexternal-blas64
makes @command{gfortran} generate calls to BLAS functions
for some matrix operations like @code{MATMUL}, instead of using our own
algorithms, if the size of the matrices involved is larger than a given
limit (see @option{-fblas-matmul-limit}). This may be profitable if an
optimized vendor BLAS library is available. The BLAS library has
to be specified at link time. This option specifies a BLAS library
with integer arguments of @code{KIND=8} (64 bits). It cannot be used
together with @option{-fexternal-blas}.
@opindex fblas-matmul-limit
@item -fblas-matmul-limit=@var{n}
Only significant when @option{-fexternal-blas} is in effect.
Matrix multiplication of matrices with size larger than (or equal to) @var{n}
is performed by calls to BLAS functions, while others are
Only significant when @option{-fexternal-blas} or @option{-fexternal-blas64}
are in effect. Matrix multiplication of matrices with size larger than or equal
to @var{n} is performed by calls to BLAS functions, while others are
handled by @command{gfortran} internal algorithms. If the matrices
involved are not square, the size comparison is performed using the
geometric mean of the dimensions of the argument and result matrices.

View File

@@ -566,6 +566,10 @@ fexternal-blas
Fortran Var(flag_external_blas)
Specify that an external BLAS library should be used for matmul calls on large-size arrays.
fexternal-blas64
Fortran Var(flag_external_blas64)
Use an external BLAS library with 64-bit indexing for matmul on large-size arrays.
ff2c
Fortran Var(flag_f2c)
Use f2c calling convention.

View File

@@ -295,6 +295,9 @@ LangUrlSuffix_Fortran(gfortran/Developer-Options.html#index-fdump-parse-tree)
fexternal-blas
LangUrlSuffix_Fortran(gfortran/Code-Gen-Options.html#index-fexternal-blas)
fexternal-blas64
LangUrlSuffix_Fortran(gfortran/Code-Gen-Options.html#index-fexternal-blas64)
ff2c
LangUrlSuffix_Fortran(gfortran/Code-Gen-Options.html#index-ff2c)

View File

@@ -504,7 +504,12 @@ gfc_post_options (const char **pfilename)
flag_inline_matmul_limit = 30;
}
/* Optimization implies front end optimization, unless the user
/* We can only have a 32-bit or a 64-bit version of BLAS, not both. */
if (flag_external_blas && flag_external_blas64)
gfc_fatal_error ("32- and 64-bit version of BLAS cannot both be specified");
/* Optimizationx implies front end optimization, unless the user
specified it directly. */
if (flag_frontend_optimize == -1)

View File

@@ -0,0 +1,12 @@
! { dg-do compile }
! { dg-options "-ffrontend-optimize -fexternal-blas64 -fdump-tree-original" }
! PR 121161 - option for 64-bit BLAS for MATMUL.
! Check this by making sure there is no KIND=4 integer.
subroutine foo(a,b,c,n)
implicit none
integer(kind=8) :: n
real, dimension(n,n) :: a, b, c
c = matmul(a,b)
end subroutine foo
! { dg-final { scan-tree-dump-not "integer\\(kind=4\\)" "original" } }
! { dg-final { scan-tree-dump-times "sgemm" 1 "original" } }