Files
gcc-reflection/libgomp/testsuite/libgomp.c/alloc-pinned-6.c
Andrew Stubbs 3b8d9d579c libgomp, nvptx: Cuda pinned memory
Use Cuda to pin memory, instead of Linux mlock, when available.

There are two advantages: firstly, this gives a significant speed boost for
NVPTX offloading, and secondly, it side-steps the usual OS ulimit/rlimit
setting.

The design adds a device independent plugin API for allocating pinned memory,
and then implements it for NVPTX.  At present, the other supported devices do
not have equivalent capabilities (or requirements).

libgomp/ChangeLog:

	* config/linux/allocator.c: Include assert.h.
	(using_device_for_page_locked): New variable.
	(linux_memspace_alloc): Add init0 parameter. Support device pinning.
	(linux_memspace_calloc): Set init0 to true.
	(linux_memspace_free): Support device pinning.
	(linux_memspace_realloc): Support device pinning.
	(MEMSPACE_ALLOC): Set init0 to false.
	* libgomp-plugin.h
	(GOMP_OFFLOAD_page_locked_host_alloc): New prototype.
	(GOMP_OFFLOAD_page_locked_host_free): Likewise.
	* libgomp.h (gomp_page_locked_host_alloc): Likewise.
	(gomp_page_locked_host_free): Likewise.
	(struct gomp_device_descr): Add page_locked_host_alloc_func and
	page_locked_host_free_func.
	* libgomp.texi: Adjust the docs for the pinned trait.
	* plugin/plugin-nvptx.c
	(GOMP_OFFLOAD_page_locked_host_alloc): New function.
	(GOMP_OFFLOAD_page_locked_host_free): Likewise.
	* target.c (device_for_page_locked): New variable.
	(get_device_for_page_locked): New function.
	(gomp_page_locked_host_alloc): Likewise.
	(gomp_page_locked_host_free): Likewise.
	(gomp_load_plugin_for_device): Add page_locked_host_alloc and
	page_locked_host_free.
	* testsuite/libgomp.c/alloc-pinned-1.c: Change expectations for NVPTX
	devices.
	* testsuite/libgomp.c/alloc-pinned-2.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-3.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-4.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-5.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-6.c: Likewise.

Co-Authored-By: Thomas Schwinge <thomas@codesourcery.com>
2025-10-23 11:08:06 +00:00

129 lines
2.4 KiB
C

/* { dg-do run } */
/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
/* Test that ompx_gnu_pinned_mem_alloc fails correctly. */
#include <stdio.h>
#include <stdlib.h>
#ifdef __linux__
#include <sys/types.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/resource.h>
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
int
get_pinned_mem ()
{
int pid = getpid ();
char buf[100];
sprintf (buf, "/proc/%d/status", pid);
FILE *proc = fopen (buf, "r");
if (!proc)
abort ();
while (fgets (buf, 100, proc))
{
int val;
if (sscanf (buf, "VmLck: %d", &val))
{
fclose (proc);
return val;
}
}
abort ();
}
void
set_pin_limit (int size)
{
struct rlimit limit;
if (getrlimit (RLIMIT_MEMLOCK, &limit))
abort ();
limit.rlim_cur = (limit.rlim_max < size ? limit.rlim_max : size);
if (setrlimit (RLIMIT_MEMLOCK, &limit))
abort ();
}
#else
#define PAGE_SIZE 10000 * 1024 /* unknown */
int
get_pinned_mem ()
{
return 0;
}
void
set_pin_limit (int size)
{
}
#endif
#include <omp.h>
int
main ()
{
#ifdef OFFLOAD_DEVICE_NVPTX
/* Go big or go home.
The OS ulimit does not affect memory locked via CUDA for NVPTX devices. */
const int SIZE = 40 * 1024 * 1024;
#else
/* Allocate at least a page each time, but stay within the ulimit. */
const int SIZE = PAGE_SIZE * 4;
#endif
const int PIN_LIMIT = PAGE_SIZE*2;
/* Ensure that the limit is smaller than the allocation. */
set_pin_limit (PIN_LIMIT);
// Sanity check
if (get_pinned_mem () != 0)
abort ();
void *p = omp_alloc (SIZE, ompx_gnu_pinned_mem_alloc);
#ifdef OFFLOAD_DEVICE_NVPTX
// Doesn't care about 'set_pin_limit'.
if (!p)
abort ();
#else
// Should fail
if (p)
abort ();
#endif
p = omp_calloc (1, SIZE, ompx_gnu_pinned_mem_alloc);
#ifdef OFFLOAD_DEVICE_NVPTX
// Doesn't care about 'set_pin_limit'.
if (!p)
abort ();
#else
// Should fail
if (p)
abort ();
#endif
void *notpinned = omp_alloc (SIZE, omp_default_mem_alloc);
p = omp_realloc (notpinned, SIZE, ompx_gnu_pinned_mem_alloc,
omp_default_mem_alloc);
#ifdef OFFLOAD_DEVICE_NVPTX
// Doesn't care about 'set_pin_limit'; does reallocate.
if (!notpinned || !p || p == notpinned)
abort ();
#else
// Should fail to realloc
if (!notpinned || p)
abort ();
#endif
// No memory should have been pinned
int amount = get_pinned_mem ();
if (amount != 0)
abort ();
return 0;
}