diff options
Diffstat (limited to 'malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch')
-rw-r--r-- | malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch | 532 |
1 files changed, 532 insertions, 0 deletions
diff --git a/malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch b/malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch new file mode 100644 index 0000000..1fc292a --- /dev/null +++ b/malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch @@ -0,0 +1,532 @@ +From 5f6d8d97c69748180f0031dfa385aff75062c4d5 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella <adhemerval.zanella@linaro.org> +Date: Fri, 13 Aug 2021 08:36:29 -0300 +Subject: [PATCH 1/7] malloc: Add madvise support for Transparent Huge Pages + +Linux Transparent Huge Pages (THP) current supports three different +states: 'never', 'madvise', and 'always'. The 'never' is +self-explanatory and 'always' will enable THP for all anonymous +pages. However, 'madvise' is still the default for some system and +for such case THP will be only used if the memory range is explicity +advertise by the program through a madvise(MADV_HUGEPAGE) call. + +To enable it a new tunable is provided, 'glibc.malloc.hugetlb', +where setting to a value diffent than 0 enables the madvise call. + +This patch issues the madvise(MADV_HUGEPAGE) call after a successful +mmap() call at sysmalloc() with sizes larger than the default huge +page size. The madvise() call is disable is system does not support +THP or if it has the mode set to "never" and on Linux only support +one page size for THP, even if the architecture supports multiple +sizes. + +To test is a new rule is added tests-malloc-hugetlb1, which run the +addes tests with the required GLIBC_TUNABLE setting. + +Checked on x86_64-linux-gnu. + +Reviewed-by: DJ Delorie <dj@redhat.com> +--- + NEWS | 5 ++ + Rules | 19 ++++++ + elf/dl-tunables.list | 5 ++ + elf/tst-rtld-list-tunables.exp | 1 + + malloc/Makefile | 16 +++++ + malloc/arena.c | 5 ++ + malloc/malloc-internal.h | 1 + + malloc/malloc.c | 47 ++++++++++++++ + manual/tunables.texi | 10 +++ + sysdeps/generic/Makefile | 8 +++ + sysdeps/generic/malloc-hugepages.c | 31 +++++++++ + sysdeps/generic/malloc-hugepages.h | 37 +++++++++++ + sysdeps/unix/sysv/linux/malloc-hugepages.c | 74 ++++++++++++++++++++++ + 13 files changed, 259 insertions(+) + create mode 100644 sysdeps/generic/malloc-hugepages.c + create mode 100644 sysdeps/generic/malloc-hugepages.h + create mode 100644 sysdeps/unix/sysv/linux/malloc-hugepages.c + +diff --git a/NEWS b/NEWS +index 2532565d77..3b94dd209c 100644 +--- a/NEWS ++++ b/NEWS +@@ -92,6 +92,11 @@ Major new features: + variables. The GNU C Library manual has details on integration of + Restartable Sequences. + ++* On Linux, a new tunable, glibc.malloc.hugetlb, can be used to ++ make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls. ++ Setting this might improve performance with Transparent Huge Pages madvise ++ mode depending of the workload. ++ + Deprecated and removed features, and other changes affecting compatibility: + + * The function pthread_mutex_consistent_np has been deprecated; programs +diff --git a/Rules b/Rules +index b1137afe71..5f5d9ba4cc 100644 +--- a/Rules ++++ b/Rules +@@ -157,6 +157,7 @@ tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \ + $(tests-container:%=$(objpfx)%.out) \ + $(tests-mcheck:%=$(objpfx)%-mcheck.out) \ + $(tests-malloc-check:%=$(objpfx)%-malloc-check.out) \ ++ $(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \ + $(tests-special) $(tests-printers-out) + xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special) + endif +@@ -168,6 +169,7 @@ tests-expected = + else + tests-expected = $(tests) $(tests-internal) $(tests-printers) \ + $(tests-container) $(tests-malloc-check:%=%-malloc-check) \ ++ $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \ + $(tests-mcheck:%=%-mcheck) + endif + tests: +@@ -196,6 +198,7 @@ binaries-pie-notests = + endif + binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck) + binaries-malloc-check-tests = $(tests-malloc-check:%=%-malloc-check) ++binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) + else + binaries-all-notests = + binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs) +@@ -207,6 +210,7 @@ binaries-pie-tests = + binaries-pie-notests = + binaries-mcheck-tests = + binaries-malloc-check-tests = ++binaries-malloc-hugetlb1-tests = + endif + + binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests) +@@ -247,6 +251,14 @@ $(addprefix $(objpfx),$(binaries-malloc-check-tests)): %-malloc-check: %.o \ + $(+link-tests) + endif + ++ifneq "$(strip $(binaries-malloc-hugetlb1-tests))" "" ++$(addprefix $(objpfx),$(binaries-malloc-hugetlb1-tests)): %-malloc-hugetlb1: %.o \ ++ $(link-extra-libs-tests) \ ++ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \ ++ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit) ++ $(+link-tests) ++endif ++ + ifneq "$(strip $(binaries-pie-tests))" "" + $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \ + $(link-extra-libs-tests) \ +@@ -284,6 +296,13 @@ $(1)-malloc-check-ENV = MALLOC_CHECK_=3 \ + endef + $(foreach t,$(tests-malloc-check),$(eval $(call malloc-check-ENVS,$(t)))) + ++# All malloc-hugetlb1 tests will be run with GLIBC_TUNABLES=glibc.malloc.hugetlb=1 ++define malloc-hugetlb1-ENVS ++$(1)-malloc-hugetlb1-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=1 ++endef ++$(foreach t,$(tests-malloc-hugetlb1),$(eval $(call malloc-hugetlb1-ENVS,$(t)))) ++ ++ + # mcheck tests need the debug DSO to support -lmcheck. + define mcheck-ENVS + $(1)-mcheck-ENV = LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so +diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list +index ffcd7f18d4..d1fd3f3e91 100644 +--- a/elf/dl-tunables.list ++++ b/elf/dl-tunables.list +@@ -92,6 +92,11 @@ glibc { + minval: 0 + security_level: SXID_IGNORE + } ++ hugetlb { ++ type: INT_32 ++ minval: 0 ++ maxval: 1 ++ } + } + cpu { + hwcap_mask { +diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp +index 44e4834cfb..d8e363f2c5 100644 +--- a/elf/tst-rtld-list-tunables.exp ++++ b/elf/tst-rtld-list-tunables.exp +@@ -1,6 +1,7 @@ + glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+) + glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+) + glibc.malloc.check: 0 (min: 0, max: 3) ++glibc.malloc.hugetlb: 0 (min: 0, max: 1) + glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647) + glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+) + glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+) +diff --git a/malloc/Makefile b/malloc/Makefile +index 63cd7c0734..0137595e17 100644 +--- a/malloc/Makefile ++++ b/malloc/Makefile +@@ -78,6 +78,22 @@ tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \ + tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \ + $(tests-static),$(tests)) + ++# Run all testes with GLIBC_TUNABLES=glibc.malloc.hugetlb=1 that check the ++# Transparent Huge Pages support. We need exclude some tests that define ++# the ENV vars. ++tests-exclude-hugetlb1 = \ ++ tst-compathooks-off \ ++ tst-compathooks-on \ ++ tst-interpose-nothread \ ++ tst-interpose-thread \ ++ tst-interpose-static-nothread \ ++ tst-interpose-static-thread \ ++ tst-malloc-usable \ ++ tst-malloc-usable-tunables \ ++ tst-mallocstate ++tests-malloc-hugetlb1 = \ ++ $(filter-out $(tests-exclude-hugetlb1), $(tests)) ++ + # -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24. + ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes) + # Tests that don't play well with mcheck. They are either bugs in mcheck or +diff --git a/malloc/arena.c b/malloc/arena.c +index 78ef4cf18c..cd00c7bef4 100644 +--- a/malloc/arena.c ++++ b/malloc/arena.c +@@ -230,6 +230,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t) + TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t) + #endif + TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t) ++TUNABLE_CALLBACK_FNDECL (set_hugetlb, int32_t) + #else + /* Initialization routine. */ + #include <string.h> +@@ -330,6 +331,7 @@ ptmalloc_init (void) + TUNABLE_CALLBACK (set_tcache_unsorted_limit)); + # endif + TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast)); ++ TUNABLE_GET (hugetlb, int32_t, TUNABLE_CALLBACK (set_hugetlb)); + #else + if (__glibc_likely (_environ != NULL)) + { +@@ -508,6 +510,9 @@ new_heap (size_t size, size_t top_pad) + __munmap (p2, HEAP_MAX_SIZE); + return 0; + } ++ ++ madvise_thp (p2, size); ++ + h = (heap_info *) p2; + h->size = size; + h->mprotect_size = size; +diff --git a/malloc/malloc-internal.h b/malloc/malloc-internal.h +index 0c7b5a183c..7493e34d86 100644 +--- a/malloc/malloc-internal.h ++++ b/malloc/malloc-internal.h +@@ -22,6 +22,7 @@ + #include <malloc-machine.h> + #include <malloc-sysdep.h> + #include <malloc-size.h> ++#include <malloc-hugepages.h> + + /* Called in the parent process before a fork. */ + void __malloc_fork_lock_parent (void) attribute_hidden; +diff --git a/malloc/malloc.c b/malloc/malloc.c +index 095d97a3be..c75841b841 100644 +--- a/malloc/malloc.c ++++ b/malloc/malloc.c +@@ -1880,6 +1880,11 @@ struct malloc_par + INTERNAL_SIZE_T arena_test; + INTERNAL_SIZE_T arena_max; + ++#if HAVE_TUNABLES ++ /* Transparent Large Page support. */ ++ INTERNAL_SIZE_T thp_pagesize; ++#endif ++ + /* Memory map support */ + int n_mmaps; + int n_mmaps_max; +@@ -2008,6 +2013,20 @@ free_perturb (char *p, size_t n) + + #include <stap-probe.h> + ++/* ----------- Routines dealing with transparent huge pages ----------- */ ++ ++static inline void ++madvise_thp (void *p, INTERNAL_SIZE_T size) ++{ ++#if HAVE_TUNABLES && defined (MADV_HUGEPAGE) ++ /* Do not consider areas smaller than a huge page or if the tunable is ++ not active. */ ++ if (mp_.thp_pagesize == 0 || size < mp_.thp_pagesize) ++ return; ++ __madvise (p, size, MADV_HUGEPAGE); ++#endif ++} ++ + /* ------------------- Support for multiple arenas -------------------- */ + #include "arena.c" + +@@ -2445,6 +2464,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + + if (mm != MAP_FAILED) + { ++ madvise_thp (mm, size); ++ + /* + The offset to the start of the mmapped region is stored + in the prev_size field of the chunk. This allows us to adjust +@@ -2606,6 +2627,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + if (size > 0) + { + brk = (char *) (MORECORE (size)); ++ if (brk != (char *) (MORECORE_FAILURE)) ++ madvise_thp (brk, size); + LIBC_PROBE (memory_sbrk_more, 2, brk, size); + } + +@@ -2637,6 +2660,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + + if (mbrk != MAP_FAILED) + { ++ madvise_thp (mbrk, size); ++ + /* We do not need, and cannot use, another sbrk call to find end */ + brk = mbrk; + snd_brk = brk + size; +@@ -2748,6 +2773,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + correction = 0; + snd_brk = (char *) (MORECORE (0)); + } ++ else ++ madvise_thp (snd_brk, correction); + } + + /* handle non-contiguous cases */ +@@ -2988,6 +3015,8 @@ mremap_chunk (mchunkptr p, size_t new_size) + if (cp == MAP_FAILED) + return 0; + ++ madvise_thp (cp, new_size); ++ + p = (mchunkptr) (cp + offset); + + assert (aligned_OK (chunk2mem (p))); +@@ -5316,6 +5345,24 @@ do_set_mxfast (size_t value) + return 0; + } + ++#if HAVE_TUNABLES ++static __always_inline int ++do_set_hugetlb (int32_t value) ++{ ++ if (value == 1) ++ { ++ enum malloc_thp_mode_t thp_mode = __malloc_thp_mode (); ++ /* ++ Only enable THP madvise usage if system does support it and ++ has 'madvise' mode. Otherwise the madvise() call is wasteful. ++ */ ++ if (thp_mode == malloc_thp_mode_madvise) ++ mp_.thp_pagesize = __malloc_default_thp_pagesize (); ++ } ++ return 0; ++} ++#endif ++ + int + __libc_mallopt (int param_number, int value) + { +diff --git a/manual/tunables.texi b/manual/tunables.texi +index 28ff502990..9ca6e3f603 100644 +--- a/manual/tunables.texi ++++ b/manual/tunables.texi +@@ -270,6 +270,16 @@ pointer, so add 4 on 32-bit systems or 8 on 64-bit systems to the size + passed to @code{malloc} for the largest bin size to enable. + @end deftp + ++@deftp Tunable glibc.malloc.hugetlb ++This tunable controls the usage of Huge Pages on @code{malloc} calls. The ++default value is @code{0}, which disables any additional support on ++@code{malloc}. ++ ++Setting its value to @code{1} enables the use of @code{madvise} with ++@code{MADV_HUGEPAGE} after memory allocation with @code{mmap}. It is enabled ++only if the system supports Transparent Huge Page (currently only on Linux). ++@end deftp ++ + @node Dynamic Linking Tunables + @section Dynamic Linking Tunables + @cindex dynamic linking tunables +diff --git a/sysdeps/generic/Makefile b/sysdeps/generic/Makefile +index a209e85cc4..8eef83c94d 100644 +--- a/sysdeps/generic/Makefile ++++ b/sysdeps/generic/Makefile +@@ -27,3 +27,11 @@ sysdep_routines += framestate unwind-pe + shared-only-routines += framestate unwind-pe + endif + endif ++ ++ifeq ($(subdir),malloc) ++sysdep_malloc_debug_routines += malloc-hugepages ++endif ++ ++ifeq ($(subdir),misc) ++sysdep_routines += malloc-hugepages ++endif +diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c +new file mode 100644 +index 0000000000..8fb459a263 +--- /dev/null ++++ b/sysdeps/generic/malloc-hugepages.c +@@ -0,0 +1,31 @@ ++/* Huge Page support. Generic implementation. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see <https://www.gnu.org/licenses/>. */ ++ ++#include <malloc-hugepages.h> ++ ++unsigned long int ++__malloc_default_thp_pagesize (void) ++{ ++ return 0; ++} ++ ++enum malloc_thp_mode_t ++__malloc_thp_mode (void) ++{ ++ return malloc_thp_mode_not_supported; ++} +diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h +new file mode 100644 +index 0000000000..f5a442e328 +--- /dev/null ++++ b/sysdeps/generic/malloc-hugepages.h +@@ -0,0 +1,37 @@ ++/* Malloc huge page support. Generic implementation. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see <https://www.gnu.org/licenses/>. */ ++ ++#ifndef _MALLOC_HUGEPAGES_H ++#define _MALLOC_HUGEPAGES_H ++ ++#include <stddef.h> ++ ++/* Return the default transparent huge page size. */ ++unsigned long int __malloc_default_thp_pagesize (void) attribute_hidden; ++ ++enum malloc_thp_mode_t ++{ ++ malloc_thp_mode_always, ++ malloc_thp_mode_madvise, ++ malloc_thp_mode_never, ++ malloc_thp_mode_not_supported ++}; ++ ++enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden; ++ ++#endif /* _MALLOC_HUGEPAGES_H */ +diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c +new file mode 100644 +index 0000000000..7497e07260 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c +@@ -0,0 +1,74 @@ ++/* Huge Page support. Linux implementation. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see <https://www.gnu.org/licenses/>. */ ++ ++#include <intprops.h> ++#include <malloc-hugepages.h> ++#include <not-cancel.h> ++ ++unsigned long int ++__malloc_default_thp_pagesize (void) ++{ ++ int fd = __open64_nocancel ( ++ "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", O_RDONLY); ++ if (fd == -1) ++ return 0; ++ ++ char str[INT_BUFSIZE_BOUND (unsigned long int)]; ++ ssize_t s = __read_nocancel (fd, str, sizeof (str)); ++ __close_nocancel (fd); ++ if (s < 0) ++ return 0; ++ ++ unsigned long int r = 0; ++ for (ssize_t i = 0; i < s; i++) ++ { ++ if (str[i] == '\n') ++ break; ++ r *= 10; ++ r += str[i] - '0'; ++ } ++ return r; ++} ++ ++enum malloc_thp_mode_t ++__malloc_thp_mode (void) ++{ ++ int fd = __open64_nocancel ("/sys/kernel/mm/transparent_hugepage/enabled", ++ O_RDONLY); ++ if (fd == -1) ++ return malloc_thp_mode_not_supported; ++ ++ static const char mode_always[] = "[always] madvise never\n"; ++ static const char mode_madvise[] = "always [madvise] never\n"; ++ static const char mode_never[] = "always madvise [never]\n"; ++ ++ char str[sizeof(mode_always)]; ++ ssize_t s = __read_nocancel (fd, str, sizeof (str)); ++ __close_nocancel (fd); ++ ++ if (s == sizeof (mode_always) - 1) ++ { ++ if (strcmp (str, mode_always) == 0) ++ return malloc_thp_mode_always; ++ else if (strcmp (str, mode_madvise) == 0) ++ return malloc_thp_mode_madvise; ++ else if (strcmp (str, mode_never) == 0) ++ return malloc_thp_mode_never; ++ } ++ return malloc_thp_mode_not_supported; ++} +-- +2.33.0 + |