Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:26
erlang
2442-Add-an-option-to-map-mseg-pages-with-THP.p...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 2442-Add-an-option-to-map-mseg-pages-with-THP.patch of Package erlang
From aebc8f756b946baa92980af953efb7f1c7878547 Mon Sep 17 00:00:00 2001 From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com> Date: Wed, 20 Dec 2023 15:02:55 -0800 Subject: [PATCH 2/2] Add an option to map mseg pages with THP In order for mseg pages to be reliably mapped with pages lager than the default page size, the mapping must start and end at a multiple of the larger page size. To do this, this change adds an abstraction for performing a memory-mapping with a specified alignment. On an operating systems like SunOS 5.9 and later, this is done by passing some extra flags to mmap(2). On operating systems without such a capability, we must do this manually by over-allocating and freeing the excess. The logic in this change only affects super carrier allocations but it can be generalized to other mseg allocations. --- erts/doc/references/erts_alloc.md | 6 ++ erts/emulator/beam/erl_alloc.c | 17 ++++ erts/emulator/sys/common/erl_mmap.c | 140 +++++++++++++++++++++------- erts/emulator/sys/common/erl_mmap.h | 5 +- erts/etc/common/erlexec.c | 1 + 5 files changed, 132 insertions(+), 37 deletions(-) diff --git a/erts/doc/src/erts_alloc.xml b/erts/doc/src/erts_alloc.xml index e7199daa4f..2cdc48fa45 100644 --- a/erts/doc/src/erts_alloc.xml +++ b/erts/doc/src/erts_alloc.xml @@ -338,6 +338,14 @@ requested size with more than relative maximum cache bad fit percent of the requested size. Defaults to <c>20</c>.</p> </item> + <tag><marker id="MMlp"/><c><![CDATA[+MMlp on|off]]></c></tag> + <item> + <p>Enables the use of large pages, sometimes known + as huge pages or super pages, for mapping memory segment allocations. Large + pages improve performance by reducing TLB pressure but they can sometimes be + costly to allocate or can only be allocated on a best-effort basis. Currently + only affects memory segments allocated in a super carrier. Defaults to <c>off</c>.</p> + </item> <tag><marker id="MMsco"/><c><![CDATA[+MMsco true|false]]></c></tag> <item> <p>Sets <seecref marker="#MMscs">super carrier</seecref> only flag. diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index 703d7bdc28..9fdba3e9a7 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -1650,6 +1650,23 @@ handle_args(int *argc, char **argv, erts_alc_hndl_args_init_t *init) #endif get_amount_value(argv[i]+9, argv, &i); } + else if (has_prefix("lp", argv[i]+3)) { + char *param_end = argv[i]+5; + char *value = get_value(param_end, argv, &i); + if (sys_strcmp(value, "on") == 0) { +#if HAVE_ERTS_MSEG + init->mseg.dflt_mmap.lp = 1; + init->mseg.literal_mmap.lp = 1; +#endif + } else if (sys_strcmp(value, "off") == 0) { +#if HAVE_ERTS_MSEG + init->mseg.dflt_mmap.lp = 0; + init->mseg.literal_mmap.lp = 0; +#endif + } else { + bad_value(param, param_end, value); + } + } else { bad_param(param, param+2); } diff --git a/erts/emulator/sys/common/erl_mmap.c b/erts/emulator/sys/common/erl_mmap.c index b45146a783..7a4919c65b 100644 --- a/erts/emulator/sys/common/erl_mmap.c +++ b/erts/emulator/sys/common/erl_mmap.c @@ -1272,18 +1272,13 @@ Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map) #endif static ERTS_INLINE void * -os_mmap(void *hint_ptr, UWord size, int try_superalign) +os_mmap(void *hint_ptr, UWord size) { #if HAVE_MMAP void *res; -#ifdef MAP_ALIGN - if (try_superalign) - res = mmap((void *) ERTS_SUPERALIGNED_SIZE, size, ERTS_MMAP_PROT, - ERTS_MMAP_FLAGS|MAP_ALIGN, ERTS_MMAP_FD, 0); - else -#endif - res = mmap((void *) hint_ptr, size, ERTS_MMAP_PROT, - ERTS_MMAP_FLAGS, ERTS_MMAP_FD, 0); + + res = mmap((void *) hint_ptr, size, ERTS_MMAP_PROT, + ERTS_MMAP_FLAGS, ERTS_MMAP_FD, 0); if (res == MAP_FAILED) return NULL; return res; @@ -1315,6 +1310,84 @@ os_munmap(void *ptr, UWord size) #endif } +#define ALIGN_UP(x, a) ((void*)((((UWord)(x)) + ((a) - 1)) & ~((a) - 1))) +#define IS_ALIGNED(x, a) ((((UWord)(x)) & ((a) - 1)) == 0) + +/* + * Just like os_mmap, but ensures that mapping is a multiple of the + * specified alignment. Alignment must be a power-of-2 multiple of + * the page size in bytes. + */ +static ERTS_INLINE void * +os_mmap_aligned(UWord size, UWord alignment) +{ + char *result; +#ifdef MAP_ALIGN + + /* + * On an operating systems that support MAP_ALIGN (SunOS >=5.9) we + * can directly ask mmap(2) to align the virtual memory mapping. + */ + result = mmap((void *) alignment, size, ERTS_MMAP_PROT, + ERTS_MMAP_FLAGS|MAP_ALIGN, ERTS_MMAP_FD, 0); + if (result == MAP_FAILED) { + return NULL; + } +#else + UWord diff; + + ASSERT((size % sys_page_size) == 0); + ASSERT((alignment % sys_page_size) == 0); + + /* + * Allocate and test for alignment. It is possible 1) the + * operating aligned the allocation based its length or 2) the + * previous allocation aligned the next available address. + */ + if ((result = os_mmap(NULL, size)) == NULL) { + return NULL; + } + + if (IS_ALIGNED(result, alignment)) { + return result; + } + + /* + * The virtual memory allocation was not aligned, clean-up the + * mapping so we can try a different strategy. + */ + os_munmap(result, size); + + /* + * Retry the virtual memory allocation adding padding to ensure + * the requested alignment. + */ + if ((result = os_mmap(NULL, size + alignment)) == NULL) { + return NULL; + } + + diff = (char *)ALIGN_UP(result, alignment) - result; + + /* + * Unmap any extra pages at the beginning of the allocation. If + * the allocation ended up being aligned, there will be nothing to + * unmap. + */ + if (diff != 0) { + os_munmap(result, diff); + result += diff; + } + + /* + * Unmap extra pages at the end of the allocation. There must + * always be at least one. + */ + os_munmap(result + size, alignment - diff); +#endif + + return result; +} + #ifdef ERTS_HAVE_OS_MREMAP # if HAVE_MREMAP # if defined(__NetBSD__) @@ -1324,7 +1397,7 @@ os_munmap(void *ptr, UWord size) # endif # endif static ERTS_INLINE void * -os_mremap(void *ptr, UWord old_size, UWord new_size, int try_superalign) +os_mremap(void *ptr, UWord old_size, UWord new_size) { void *new_seg; #if HAVE_MREMAP @@ -1442,7 +1515,7 @@ alloc_desc_insert_free_seg(ErtsMemMapper* mm, #if ERTS_HAVE_OS_MMAP if (!mm->no_os_mmap) { - ptr = os_mmap(mm->desc.new_area_hint, ERTS_PAGEALIGNED_SIZE, 0); + ptr = os_mmap(mm->desc.new_area_hint, ERTS_PAGEALIGNED_SIZE); if (ptr) { mm->desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE; ERTS_MMAP_SIZE_OS_INC(ERTS_PAGEALIGNED_SIZE); @@ -1650,35 +1723,15 @@ erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep) /* Map using OS primitives */ if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mm->no_os_mmap) { if (!(ERTS_MMAPFLG_SUPERALIGNED & flags)) { - seg = os_mmap(NULL, asize, 0); + seg = os_mmap(NULL, asize); if (!seg) goto failure; } else { asize = ERTS_SUPERALIGNED_CEILING(*sizep); - seg = os_mmap(NULL, asize, 1); + seg = os_mmap_aligned(asize, ERTS_SUPERALIGNED_SIZE); if (!seg) goto failure; - - if (!ERTS_IS_SUPERALIGNED(seg)) { - char *ptr; - UWord sz; - - os_munmap(seg, asize); - - ptr = os_mmap(NULL, asize + ERTS_SUPERALIGNED_SIZE, 1); - if (!ptr) - goto failure; - - seg = (char *) ERTS_SUPERALIGNED_CEILING(ptr); - sz = (UWord) (seg - ptr); - ERTS_MMAP_ASSERT(sz <= ERTS_SUPERALIGNED_SIZE); - if (sz) - os_munmap(ptr, sz); - sz = ERTS_SUPERALIGNED_SIZE - sz; - if (sz) - os_munmap(seg+asize, sz); - } } ERTS_MMAP_OP_LCK(seg, *sizep, asize); @@ -1899,7 +1952,7 @@ erts_mremap(ErtsMemMapper* mm, if (superaligned) { return remap_move(mm, flags, ptr, old_size, sizep); } else { - new_ptr = os_mremap(ptr, old_size, asize, 0); + new_ptr = os_mremap(ptr, old_size, asize); if (!new_ptr) return NULL; if (asize > old_size) @@ -2232,13 +2285,30 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init) * The whole supercarrier will by physically * reserved all the time. */ - start = os_mmap(NULL, sz, 1); + UWord alignment; + + if (init->lp) + alignment = MAX(sys_large_page_size, ERTS_SUPERALIGNED_SIZE); + else + alignment = ERTS_SUPERALIGNED_SIZE; + start = os_mmap_aligned(sz, alignment); } if (!start) erts_exit(1, "erts_mmap: Failed to create super carrier of size %bpu MB\n", init->scs/1024/1024); end = start + sz; +#ifdef HAVE_LINUX_THP + if (init->lp) { + /* + * Enable the Transparent Huge Pages for the virtual + * memory reservation. + */ + if (madvise(start, sz, MADV_HUGEPAGE) != 0) { + erts_exit(1, "erts_mmap: Failed to enable THP for the super carrier: %s\n", strerror(errno)); + } + } +#endif #ifdef ERTS_MMAP_DEBUG_FILL_AREAS if (!virtual_map) { Uint32 *uip; diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h index ecb1ef6a13..4322324ace 100644 --- a/erts/emulator/sys/common/erl_mmap.h +++ b/erts/emulator/sys/common/erl_mmap.h @@ -88,15 +88,16 @@ typedef struct { int sco; /* super carrier only? */ UWord scrfsd; /* super carrier reserved free segment descriptors */ int scrpm; /* super carrier reserve physical memory */ + int lp; /* try to use large pages? */ }ErtsMMapInit; #define ERTS_MMAP_INIT_DEFAULT_INITER \ - {{NULL, NULL}, {NULL, NULL}, 0, 1, (1 << 16), 1} + {{NULL, NULL}, {NULL, NULL}, 0, 1, (1 << 16), 1, 0} #define ERTS_LITERAL_VIRTUAL_AREA_SIZE (UWORD_CONSTANT(1)*1024*1024*1024) #define ERTS_MMAP_INIT_LITERAL_INITER \ - {{NULL, NULL}, {NULL, NULL}, ERTS_LITERAL_VIRTUAL_AREA_SIZE, 1, (1 << 10), 0} + {{NULL, NULL}, {NULL, NULL}, ERTS_LITERAL_VIRTUAL_AREA_SIZE, 1, (1 << 10), 0, 0} #define ERTS_SUPERALIGNED_SIZE \ diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c index 854e30406b..d05721dba8 100644 --- a/erts/etc/common/erlexec.c +++ b/erts/etc/common/erlexec.c @@ -111,6 +111,7 @@ static char *plusM_other_switches[] = { "Mscrfsd", "Msco", "Mscrpm", + "Mlp", "Ye", "Ym", "Ytp", -- 2.35.3
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor