mirror of
https://gcc.gnu.org/git/gcc.git
synced 2026-02-22 20:01:22 -05:00
256 lines
7.6 KiB
C
256 lines
7.6 KiB
C
/* Copyright (C) 2022-2026 Free Software Foundation, Inc.
|
|
Contributed by Jakub Jelinek <jakub@redhat.com>.
|
|
|
|
This file is part of the GNU Offloading and Multi Processing Library
|
|
(libgomp).
|
|
|
|
Libgomp is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3, or (at your option)
|
|
any later version.
|
|
|
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
more details.
|
|
|
|
Under Section 7 of GPL version 3, you are granted additional
|
|
permissions described in the GCC Runtime Library Exception, version
|
|
3.1, as published by the Free Software Foundation.
|
|
|
|
You should have received a copy of the GNU General Public License and
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
/* This file contains wrappers for the system allocation routines. Most
|
|
places in the OpenMP API do not make any provision for failure, so in
|
|
general we cannot allow memory allocation to fail. */
|
|
|
|
#define _GNU_SOURCE
|
|
#include "libgomp.h"
|
|
#if defined(PLUGIN_SUPPORT) && defined(LIBGOMP_USE_PTHREADS)
|
|
#define LIBGOMP_USE_MEMKIND
|
|
#define LIBGOMP_USE_LIBNUMA
|
|
#endif
|
|
|
|
/* Implement malloc routines that can handle pinned memory on Linux.
|
|
|
|
Given that pinned memory is typically used to help host <-> device memory
|
|
transfers, we attempt to allocate such memory using a device (really:
|
|
libgomp plugin), but fall back to mmap plus mlock if no suitable device is
|
|
available.
|
|
|
|
It's possible to use mlock on any heap memory, but using munlock is
|
|
problematic if there are multiple pinned allocations on the same page.
|
|
Tracking all that manually would be possible, but adds overhead. This may
|
|
be worth it if there are a lot of small allocations getting pinned, but
|
|
this seems less likely in a HPC application.
|
|
|
|
Instead we optimize for large pinned allocations, and use mmap to ensure
|
|
that two pinned allocations don't share the same page. This also means
|
|
that large allocations don't pin extra pages by being poorly aligned. */
|
|
|
|
#define _GNU_SOURCE
|
|
#include <sys/mman.h>
|
|
#include <unistd.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include "libgomp.h"
|
|
#ifdef HAVE_INTTYPES_H
|
|
# include <inttypes.h> /* For PRIu64. */
|
|
#endif
|
|
|
|
static int using_device_for_page_locked
|
|
= /* uninitialized */ -1;
|
|
|
|
|
|
static gomp_simple_alloc_ctx_p pin_ctx = NULL;
|
|
static pthread_once_t ctxlock = PTHREAD_ONCE_INIT;
|
|
|
|
static void
|
|
linux_init_pin_ctx ()
|
|
{
|
|
pin_ctx = gomp_simple_alloc_init_context ();
|
|
}
|
|
|
|
static void *
|
|
linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin,
|
|
bool init0)
|
|
{
|
|
void *addr = NULL;
|
|
|
|
if (memspace == ompx_gnu_managed_mem_space)
|
|
addr = gomp_managed_alloc (size);
|
|
else if (pin)
|
|
{
|
|
int using_device = __atomic_load_n (&using_device_for_page_locked,
|
|
MEMMODEL_RELAXED);
|
|
if (using_device != 0)
|
|
{
|
|
using_device = gomp_page_locked_host_alloc (&addr, size);
|
|
int using_device_old
|
|
= __atomic_exchange_n (&using_device_for_page_locked,
|
|
using_device, MEMMODEL_RELAXED);
|
|
assert (using_device_old == -1
|
|
/* We shouldn't have concurrently changed our mind. */
|
|
|| using_device_old == using_device);
|
|
}
|
|
if (using_device == 0)
|
|
{
|
|
static int pagesize = 0;
|
|
static void *addrhint = NULL;
|
|
|
|
if (!pagesize)
|
|
pagesize = sysconf(_SC_PAGE_SIZE);
|
|
|
|
while (1)
|
|
{
|
|
addr = gomp_simple_alloc (pin_ctx, size);
|
|
if (addr)
|
|
break;
|
|
|
|
/* Round up to a whole page. */
|
|
size_t misalignment = size % pagesize;
|
|
size_t mmap_size = (misalignment > 0
|
|
? size + pagesize - misalignment
|
|
: size);
|
|
void *newpage = mmap (addrhint, mmap_size, PROT_READ | PROT_WRITE,
|
|
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
|
if (newpage == MAP_FAILED)
|
|
break;
|
|
else
|
|
{
|
|
if (mlock (newpage, size))
|
|
{
|
|
#ifdef HAVE_INTTYPES_H
|
|
gomp_debug (0, "libgomp: failed to pin %"PRIu64" bytes"
|
|
" of memory (ulimit too low?)\n",
|
|
(uint64_t) size);
|
|
#else
|
|
gomp_debug (0, "libgomp: failed to pin %lu bytes of"
|
|
" memory (ulimit too low?)\n",
|
|
(unsigned long) size);
|
|
#endif
|
|
munmap (newpage, size);
|
|
break;
|
|
}
|
|
|
|
addrhint = newpage + mmap_size;
|
|
|
|
pthread_once (&ctxlock, linux_init_pin_ctx);
|
|
gomp_simple_alloc_register_memory (pin_ctx, newpage,
|
|
mmap_size);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
addr = malloc (size);
|
|
|
|
if (addr && init0)
|
|
memset (addr, 0, size);
|
|
|
|
return addr;
|
|
}
|
|
|
|
static void *
|
|
linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin)
|
|
{
|
|
if (memspace == ompx_gnu_managed_mem_space)
|
|
{
|
|
void *ret = gomp_managed_alloc (size);
|
|
if (!ret)
|
|
return NULL;
|
|
memset (ret, 0, size);
|
|
return ret;
|
|
}
|
|
else if (pin)
|
|
return linux_memspace_alloc (memspace, size, pin, true);
|
|
else
|
|
return calloc (1, size);
|
|
}
|
|
|
|
static void
|
|
linux_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size,
|
|
int pin)
|
|
{
|
|
if (memspace == ompx_gnu_managed_mem_space)
|
|
gomp_managed_free (addr);
|
|
else if (pin)
|
|
{
|
|
int using_device
|
|
= __atomic_load_n (&using_device_for_page_locked,
|
|
MEMMODEL_RELAXED);
|
|
if (using_device == 1)
|
|
gomp_page_locked_host_free (addr);
|
|
else
|
|
/* The "simple" allocator does not (currently) munmap locked pages
|
|
(meaning that the number of locked pages never decreases), but it
|
|
can reuse the freed memory in subsequent gomp_simple_alloc calls. */
|
|
gomp_simple_free (pin_ctx, addr);
|
|
}
|
|
else
|
|
free (addr);
|
|
}
|
|
|
|
static void *
|
|
linux_memspace_realloc (omp_memspace_handle_t memspace, void *addr,
|
|
size_t oldsize, size_t size, int oldpin, int pin)
|
|
{
|
|
if (memspace == ompx_gnu_managed_mem_space)
|
|
/* Realloc is not implemented for device Managed Memory. */
|
|
;
|
|
else if (oldpin && pin)
|
|
{
|
|
int using_device
|
|
= __atomic_load_n (&using_device_for_page_locked,
|
|
MEMMODEL_RELAXED);
|
|
/* The device plugin API does not support realloc,
|
|
but the gomp_simple_alloc allocator does. */
|
|
if (using_device == 0)
|
|
{
|
|
/* This can fail if there is insufficient pinned memory free. */
|
|
void *newaddr = gomp_simple_realloc (pin_ctx, addr, size);
|
|
if (newaddr)
|
|
return newaddr;
|
|
}
|
|
}
|
|
else if (oldpin || pin)
|
|
/* Moving from pinned to unpinned memory cannot be done in-place. */
|
|
;
|
|
else
|
|
return realloc (addr, size);
|
|
|
|
/* In-place reallocation failed. Fall back to copy. */
|
|
void *newaddr = linux_memspace_alloc (memspace, size, pin, false);
|
|
if (newaddr)
|
|
{
|
|
memcpy (newaddr, addr, oldsize < size ? oldsize : size);
|
|
linux_memspace_free (memspace, addr, oldsize, oldpin);
|
|
}
|
|
|
|
return newaddr;
|
|
}
|
|
|
|
static int
|
|
linux_memspace_validate (omp_memspace_handle_t, unsigned, int)
|
|
{
|
|
/* Everything should be accepted on Linux, including pinning and
|
|
non-standard memspaces. */
|
|
return 1;
|
|
}
|
|
|
|
#define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \
|
|
linux_memspace_alloc (MEMSPACE, SIZE, PIN, false)
|
|
#define MEMSPACE_CALLOC(MEMSPACE, SIZE, PIN) \
|
|
linux_memspace_calloc (MEMSPACE, SIZE, PIN)
|
|
#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) \
|
|
linux_memspace_realloc (MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN)
|
|
#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE, PIN) \
|
|
linux_memspace_free (MEMSPACE, ADDR, SIZE, PIN)
|
|
#define MEMSPACE_VALIDATE(MEMSPACE, ACCESS, PIN) \
|
|
linux_memspace_validate (MEMSPACE, ACCESS, PIN)
|
|
|
|
#include "../../allocator.c"
|