mirror of
https://forge.sourceware.org/marek/gcc.git
synced 2026-02-22 03:47:02 -05:00
Use Cuda to pin memory, instead of Linux mlock, when available. There are two advantages: firstly, this gives a significant speed boost for NVPTX offloading, and secondly, it side-steps the usual OS ulimit/rlimit setting. The design adds a device independent plugin API for allocating pinned memory, and then implements it for NVPTX. At present, the other supported devices do not have equivalent capabilities (or requirements). libgomp/ChangeLog: * config/linux/allocator.c: Include assert.h. (using_device_for_page_locked): New variable. (linux_memspace_alloc): Add init0 parameter. Support device pinning. (linux_memspace_calloc): Set init0 to true. (linux_memspace_free): Support device pinning. (linux_memspace_realloc): Support device pinning. (MEMSPACE_ALLOC): Set init0 to false. * libgomp-plugin.h (GOMP_OFFLOAD_page_locked_host_alloc): New prototype. (GOMP_OFFLOAD_page_locked_host_free): Likewise. * libgomp.h (gomp_page_locked_host_alloc): Likewise. (gomp_page_locked_host_free): Likewise. (struct gomp_device_descr): Add page_locked_host_alloc_func and page_locked_host_free_func. * libgomp.texi: Adjust the docs for the pinned trait. * plugin/plugin-nvptx.c (GOMP_OFFLOAD_page_locked_host_alloc): New function. (GOMP_OFFLOAD_page_locked_host_free): Likewise. * target.c (device_for_page_locked): New variable. (get_device_for_page_locked): New function. (gomp_page_locked_host_alloc): Likewise. (gomp_page_locked_host_free): Likewise. (gomp_load_plugin_for_device): Add page_locked_host_alloc and page_locked_host_free. * testsuite/libgomp.c/alloc-pinned-1.c: Change expectations for NVPTX devices. * testsuite/libgomp.c/alloc-pinned-2.c: Likewise. * testsuite/libgomp.c/alloc-pinned-3.c: Likewise. * testsuite/libgomp.c/alloc-pinned-4.c: Likewise. * testsuite/libgomp.c/alloc-pinned-5.c: Likewise. * testsuite/libgomp.c/alloc-pinned-6.c: Likewise. Co-Authored-By: Thomas Schwinge <thomas@codesourcery.com>
129 lines
2.4 KiB
C
129 lines
2.4 KiB
C
/* { dg-do run } */
|
|
/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
|
|
|
|
/* Test that ompx_gnu_pinned_mem_alloc fails correctly. */
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#ifdef __linux__
|
|
#include <sys/types.h>
|
|
#include <unistd.h>
|
|
|
|
#include <sys/mman.h>
|
|
#include <sys/resource.h>
|
|
|
|
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
|
|
|
|
int
|
|
get_pinned_mem ()
|
|
{
|
|
int pid = getpid ();
|
|
char buf[100];
|
|
sprintf (buf, "/proc/%d/status", pid);
|
|
|
|
FILE *proc = fopen (buf, "r");
|
|
if (!proc)
|
|
abort ();
|
|
while (fgets (buf, 100, proc))
|
|
{
|
|
int val;
|
|
if (sscanf (buf, "VmLck: %d", &val))
|
|
{
|
|
fclose (proc);
|
|
return val;
|
|
}
|
|
}
|
|
abort ();
|
|
}
|
|
|
|
void
|
|
set_pin_limit (int size)
|
|
{
|
|
struct rlimit limit;
|
|
if (getrlimit (RLIMIT_MEMLOCK, &limit))
|
|
abort ();
|
|
limit.rlim_cur = (limit.rlim_max < size ? limit.rlim_max : size);
|
|
if (setrlimit (RLIMIT_MEMLOCK, &limit))
|
|
abort ();
|
|
}
|
|
#else
|
|
#define PAGE_SIZE 10000 * 1024 /* unknown */
|
|
|
|
int
|
|
get_pinned_mem ()
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
set_pin_limit (int size)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#include <omp.h>
|
|
|
|
int
|
|
main ()
|
|
{
|
|
#ifdef OFFLOAD_DEVICE_NVPTX
|
|
/* Go big or go home.
|
|
The OS ulimit does not affect memory locked via CUDA for NVPTX devices. */
|
|
const int SIZE = 40 * 1024 * 1024;
|
|
#else
|
|
/* Allocate at least a page each time, but stay within the ulimit. */
|
|
const int SIZE = PAGE_SIZE * 4;
|
|
#endif
|
|
const int PIN_LIMIT = PAGE_SIZE*2;
|
|
|
|
/* Ensure that the limit is smaller than the allocation. */
|
|
set_pin_limit (PIN_LIMIT);
|
|
|
|
// Sanity check
|
|
if (get_pinned_mem () != 0)
|
|
abort ();
|
|
|
|
void *p = omp_alloc (SIZE, ompx_gnu_pinned_mem_alloc);
|
|
#ifdef OFFLOAD_DEVICE_NVPTX
|
|
// Doesn't care about 'set_pin_limit'.
|
|
if (!p)
|
|
abort ();
|
|
#else
|
|
// Should fail
|
|
if (p)
|
|
abort ();
|
|
#endif
|
|
|
|
p = omp_calloc (1, SIZE, ompx_gnu_pinned_mem_alloc);
|
|
#ifdef OFFLOAD_DEVICE_NVPTX
|
|
// Doesn't care about 'set_pin_limit'.
|
|
if (!p)
|
|
abort ();
|
|
#else
|
|
// Should fail
|
|
if (p)
|
|
abort ();
|
|
#endif
|
|
|
|
void *notpinned = omp_alloc (SIZE, omp_default_mem_alloc);
|
|
p = omp_realloc (notpinned, SIZE, ompx_gnu_pinned_mem_alloc,
|
|
omp_default_mem_alloc);
|
|
#ifdef OFFLOAD_DEVICE_NVPTX
|
|
// Doesn't care about 'set_pin_limit'; does reallocate.
|
|
if (!notpinned || !p || p == notpinned)
|
|
abort ();
|
|
#else
|
|
// Should fail to realloc
|
|
if (!notpinned || p)
|
|
abort ();
|
|
#endif
|
|
|
|
// No memory should have been pinned
|
|
int amount = get_pinned_mem ();
|
|
if (amount != 0)
|
|
abort ();
|
|
|
|
return 0;
|
|
}
|