summaryrefslogtreecommitdiff
path: root/malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch
diff options
context:
space:
mode:
Diffstat (limited to 'malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch')
-rw-r--r--malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch532
1 files changed, 532 insertions, 0 deletions
diff --git a/malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch b/malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch
new file mode 100644
index 0000000..1fc292a
--- /dev/null
+++ b/malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch
@@ -0,0 +1,532 @@
+From 5f6d8d97c69748180f0031dfa385aff75062c4d5 Mon Sep 17 00:00:00 2001
+From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
+Date: Fri, 13 Aug 2021 08:36:29 -0300
+Subject: [PATCH 1/7] malloc: Add madvise support for Transparent Huge Pages
+
+Linux Transparent Huge Pages (THP) current supports three different
+states: 'never', 'madvise', and 'always'. The 'never' is
+self-explanatory and 'always' will enable THP for all anonymous
+pages. However, 'madvise' is still the default for some system and
+for such case THP will be only used if the memory range is explicity
+advertise by the program through a madvise(MADV_HUGEPAGE) call.
+
+To enable it a new tunable is provided, 'glibc.malloc.hugetlb',
+where setting to a value diffent than 0 enables the madvise call.
+
+This patch issues the madvise(MADV_HUGEPAGE) call after a successful
+mmap() call at sysmalloc() with sizes larger than the default huge
+page size. The madvise() call is disable is system does not support
+THP or if it has the mode set to "never" and on Linux only support
+one page size for THP, even if the architecture supports multiple
+sizes.
+
+To test is a new rule is added tests-malloc-hugetlb1, which run the
+addes tests with the required GLIBC_TUNABLE setting.
+
+Checked on x86_64-linux-gnu.
+
+Reviewed-by: DJ Delorie <dj@redhat.com>
+---
+ NEWS | 5 ++
+ Rules | 19 ++++++
+ elf/dl-tunables.list | 5 ++
+ elf/tst-rtld-list-tunables.exp | 1 +
+ malloc/Makefile | 16 +++++
+ malloc/arena.c | 5 ++
+ malloc/malloc-internal.h | 1 +
+ malloc/malloc.c | 47 ++++++++++++++
+ manual/tunables.texi | 10 +++
+ sysdeps/generic/Makefile | 8 +++
+ sysdeps/generic/malloc-hugepages.c | 31 +++++++++
+ sysdeps/generic/malloc-hugepages.h | 37 +++++++++++
+ sysdeps/unix/sysv/linux/malloc-hugepages.c | 74 ++++++++++++++++++++++
+ 13 files changed, 259 insertions(+)
+ create mode 100644 sysdeps/generic/malloc-hugepages.c
+ create mode 100644 sysdeps/generic/malloc-hugepages.h
+ create mode 100644 sysdeps/unix/sysv/linux/malloc-hugepages.c
+
+diff --git a/NEWS b/NEWS
+index 2532565d77..3b94dd209c 100644
+--- a/NEWS
++++ b/NEWS
+@@ -92,6 +92,11 @@ Major new features:
+ variables. The GNU C Library manual has details on integration of
+ Restartable Sequences.
+
++* On Linux, a new tunable, glibc.malloc.hugetlb, can be used to
++ make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls.
++ Setting this might improve performance with Transparent Huge Pages madvise
++ mode depending of the workload.
++
+ Deprecated and removed features, and other changes affecting compatibility:
+
+ * The function pthread_mutex_consistent_np has been deprecated; programs
+diff --git a/Rules b/Rules
+index b1137afe71..5f5d9ba4cc 100644
+--- a/Rules
++++ b/Rules
+@@ -157,6 +157,7 @@ tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \
+ $(tests-container:%=$(objpfx)%.out) \
+ $(tests-mcheck:%=$(objpfx)%-mcheck.out) \
+ $(tests-malloc-check:%=$(objpfx)%-malloc-check.out) \
++ $(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \
+ $(tests-special) $(tests-printers-out)
+ xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special)
+ endif
+@@ -168,6 +169,7 @@ tests-expected =
+ else
+ tests-expected = $(tests) $(tests-internal) $(tests-printers) \
+ $(tests-container) $(tests-malloc-check:%=%-malloc-check) \
++ $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \
+ $(tests-mcheck:%=%-mcheck)
+ endif
+ tests:
+@@ -196,6 +198,7 @@ binaries-pie-notests =
+ endif
+ binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck)
+ binaries-malloc-check-tests = $(tests-malloc-check:%=%-malloc-check)
++binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1)
+ else
+ binaries-all-notests =
+ binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs)
+@@ -207,6 +210,7 @@ binaries-pie-tests =
+ binaries-pie-notests =
+ binaries-mcheck-tests =
+ binaries-malloc-check-tests =
++binaries-malloc-hugetlb1-tests =
+ endif
+
+ binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests)
+@@ -247,6 +251,14 @@ $(addprefix $(objpfx),$(binaries-malloc-check-tests)): %-malloc-check: %.o \
+ $(+link-tests)
+ endif
+
++ifneq "$(strip $(binaries-malloc-hugetlb1-tests))" ""
++$(addprefix $(objpfx),$(binaries-malloc-hugetlb1-tests)): %-malloc-hugetlb1: %.o \
++ $(link-extra-libs-tests) \
++ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
++ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
++ $(+link-tests)
++endif
++
+ ifneq "$(strip $(binaries-pie-tests))" ""
+ $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \
+ $(link-extra-libs-tests) \
+@@ -284,6 +296,13 @@ $(1)-malloc-check-ENV = MALLOC_CHECK_=3 \
+ endef
+ $(foreach t,$(tests-malloc-check),$(eval $(call malloc-check-ENVS,$(t))))
+
++# All malloc-hugetlb1 tests will be run with GLIBC_TUNABLES=glibc.malloc.hugetlb=1
++define malloc-hugetlb1-ENVS
++$(1)-malloc-hugetlb1-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=1
++endef
++$(foreach t,$(tests-malloc-hugetlb1),$(eval $(call malloc-hugetlb1-ENVS,$(t))))
++
++
+ # mcheck tests need the debug DSO to support -lmcheck.
+ define mcheck-ENVS
+ $(1)-mcheck-ENV = LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so
+diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
+index ffcd7f18d4..d1fd3f3e91 100644
+--- a/elf/dl-tunables.list
++++ b/elf/dl-tunables.list
+@@ -92,6 +92,11 @@ glibc {
+ minval: 0
+ security_level: SXID_IGNORE
+ }
++ hugetlb {
++ type: INT_32
++ minval: 0
++ maxval: 1
++ }
+ }
+ cpu {
+ hwcap_mask {
+diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
+index 44e4834cfb..d8e363f2c5 100644
+--- a/elf/tst-rtld-list-tunables.exp
++++ b/elf/tst-rtld-list-tunables.exp
+@@ -1,6 +1,7 @@
+ glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+)
+ glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+)
+ glibc.malloc.check: 0 (min: 0, max: 3)
++glibc.malloc.hugetlb: 0 (min: 0, max: 1)
+ glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
+ glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+)
+ glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+)
+diff --git a/malloc/Makefile b/malloc/Makefile
+index 63cd7c0734..0137595e17 100644
+--- a/malloc/Makefile
++++ b/malloc/Makefile
+@@ -78,6 +78,22 @@ tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \
+ tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \
+ $(tests-static),$(tests))
+
++# Run all testes with GLIBC_TUNABLES=glibc.malloc.hugetlb=1 that check the
++# Transparent Huge Pages support. We need exclude some tests that define
++# the ENV vars.
++tests-exclude-hugetlb1 = \
++ tst-compathooks-off \
++ tst-compathooks-on \
++ tst-interpose-nothread \
++ tst-interpose-thread \
++ tst-interpose-static-nothread \
++ tst-interpose-static-thread \
++ tst-malloc-usable \
++ tst-malloc-usable-tunables \
++ tst-mallocstate
++tests-malloc-hugetlb1 = \
++ $(filter-out $(tests-exclude-hugetlb1), $(tests))
++
+ # -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24.
+ ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes)
+ # Tests that don't play well with mcheck. They are either bugs in mcheck or
+diff --git a/malloc/arena.c b/malloc/arena.c
+index 78ef4cf18c..cd00c7bef4 100644
+--- a/malloc/arena.c
++++ b/malloc/arena.c
+@@ -230,6 +230,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t)
+ TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
+ #endif
+ TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
++TUNABLE_CALLBACK_FNDECL (set_hugetlb, int32_t)
+ #else
+ /* Initialization routine. */
+ #include <string.h>
+@@ -330,6 +331,7 @@ ptmalloc_init (void)
+ TUNABLE_CALLBACK (set_tcache_unsorted_limit));
+ # endif
+ TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
++ TUNABLE_GET (hugetlb, int32_t, TUNABLE_CALLBACK (set_hugetlb));
+ #else
+ if (__glibc_likely (_environ != NULL))
+ {
+@@ -508,6 +510,9 @@ new_heap (size_t size, size_t top_pad)
+ __munmap (p2, HEAP_MAX_SIZE);
+ return 0;
+ }
++
++ madvise_thp (p2, size);
++
+ h = (heap_info *) p2;
+ h->size = size;
+ h->mprotect_size = size;
+diff --git a/malloc/malloc-internal.h b/malloc/malloc-internal.h
+index 0c7b5a183c..7493e34d86 100644
+--- a/malloc/malloc-internal.h
++++ b/malloc/malloc-internal.h
+@@ -22,6 +22,7 @@
+ #include <malloc-machine.h>
+ #include <malloc-sysdep.h>
+ #include <malloc-size.h>
++#include <malloc-hugepages.h>
+
+ /* Called in the parent process before a fork. */
+ void __malloc_fork_lock_parent (void) attribute_hidden;
+diff --git a/malloc/malloc.c b/malloc/malloc.c
+index 095d97a3be..c75841b841 100644
+--- a/malloc/malloc.c
++++ b/malloc/malloc.c
+@@ -1880,6 +1880,11 @@ struct malloc_par
+ INTERNAL_SIZE_T arena_test;
+ INTERNAL_SIZE_T arena_max;
+
++#if HAVE_TUNABLES
++ /* Transparent Large Page support. */
++ INTERNAL_SIZE_T thp_pagesize;
++#endif
++
+ /* Memory map support */
+ int n_mmaps;
+ int n_mmaps_max;
+@@ -2008,6 +2013,20 @@ free_perturb (char *p, size_t n)
+
+ #include <stap-probe.h>
+
++/* ----------- Routines dealing with transparent huge pages ----------- */
++
++static inline void
++madvise_thp (void *p, INTERNAL_SIZE_T size)
++{
++#if HAVE_TUNABLES && defined (MADV_HUGEPAGE)
++ /* Do not consider areas smaller than a huge page or if the tunable is
++ not active. */
++ if (mp_.thp_pagesize == 0 || size < mp_.thp_pagesize)
++ return;
++ __madvise (p, size, MADV_HUGEPAGE);
++#endif
++}
++
+ /* ------------------- Support for multiple arenas -------------------- */
+ #include "arena.c"
+
+@@ -2445,6 +2464,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+
+ if (mm != MAP_FAILED)
+ {
++ madvise_thp (mm, size);
++
+ /*
+ The offset to the start of the mmapped region is stored
+ in the prev_size field of the chunk. This allows us to adjust
+@@ -2606,6 +2627,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+ if (size > 0)
+ {
+ brk = (char *) (MORECORE (size));
++ if (brk != (char *) (MORECORE_FAILURE))
++ madvise_thp (brk, size);
+ LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+ }
+
+@@ -2637,6 +2660,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+
+ if (mbrk != MAP_FAILED)
+ {
++ madvise_thp (mbrk, size);
++
+ /* We do not need, and cannot use, another sbrk call to find end */
+ brk = mbrk;
+ snd_brk = brk + size;
+@@ -2748,6 +2773,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+ correction = 0;
+ snd_brk = (char *) (MORECORE (0));
+ }
++ else
++ madvise_thp (snd_brk, correction);
+ }
+
+ /* handle non-contiguous cases */
+@@ -2988,6 +3015,8 @@ mremap_chunk (mchunkptr p, size_t new_size)
+ if (cp == MAP_FAILED)
+ return 0;
+
++ madvise_thp (cp, new_size);
++
+ p = (mchunkptr) (cp + offset);
+
+ assert (aligned_OK (chunk2mem (p)));
+@@ -5316,6 +5345,24 @@ do_set_mxfast (size_t value)
+ return 0;
+ }
+
++#if HAVE_TUNABLES
++static __always_inline int
++do_set_hugetlb (int32_t value)
++{
++ if (value == 1)
++ {
++ enum malloc_thp_mode_t thp_mode = __malloc_thp_mode ();
++ /*
++ Only enable THP madvise usage if system does support it and
++ has 'madvise' mode. Otherwise the madvise() call is wasteful.
++ */
++ if (thp_mode == malloc_thp_mode_madvise)
++ mp_.thp_pagesize = __malloc_default_thp_pagesize ();
++ }
++ return 0;
++}
++#endif
++
+ int
+ __libc_mallopt (int param_number, int value)
+ {
+diff --git a/manual/tunables.texi b/manual/tunables.texi
+index 28ff502990..9ca6e3f603 100644
+--- a/manual/tunables.texi
++++ b/manual/tunables.texi
+@@ -270,6 +270,16 @@ pointer, so add 4 on 32-bit systems or 8 on 64-bit systems to the size
+ passed to @code{malloc} for the largest bin size to enable.
+ @end deftp
+
++@deftp Tunable glibc.malloc.hugetlb
++This tunable controls the usage of Huge Pages on @code{malloc} calls. The
++default value is @code{0}, which disables any additional support on
++@code{malloc}.
++
++Setting its value to @code{1} enables the use of @code{madvise} with
++@code{MADV_HUGEPAGE} after memory allocation with @code{mmap}. It is enabled
++only if the system supports Transparent Huge Page (currently only on Linux).
++@end deftp
++
+ @node Dynamic Linking Tunables
+ @section Dynamic Linking Tunables
+ @cindex dynamic linking tunables
+diff --git a/sysdeps/generic/Makefile b/sysdeps/generic/Makefile
+index a209e85cc4..8eef83c94d 100644
+--- a/sysdeps/generic/Makefile
++++ b/sysdeps/generic/Makefile
+@@ -27,3 +27,11 @@ sysdep_routines += framestate unwind-pe
+ shared-only-routines += framestate unwind-pe
+ endif
+ endif
++
++ifeq ($(subdir),malloc)
++sysdep_malloc_debug_routines += malloc-hugepages
++endif
++
++ifeq ($(subdir),misc)
++sysdep_routines += malloc-hugepages
++endif
+diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
+new file mode 100644
+index 0000000000..8fb459a263
+--- /dev/null
++++ b/sysdeps/generic/malloc-hugepages.c
+@@ -0,0 +1,31 @@
++/* Huge Page support. Generic implementation.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public License as
++ published by the Free Software Foundation; either version 2.1 of the
++ License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; see the file COPYING.LIB. If
++ not, see <https://www.gnu.org/licenses/>. */
++
++#include <malloc-hugepages.h>
++
++unsigned long int
++__malloc_default_thp_pagesize (void)
++{
++ return 0;
++}
++
++enum malloc_thp_mode_t
++__malloc_thp_mode (void)
++{
++ return malloc_thp_mode_not_supported;
++}
+diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
+new file mode 100644
+index 0000000000..f5a442e328
+--- /dev/null
++++ b/sysdeps/generic/malloc-hugepages.h
+@@ -0,0 +1,37 @@
++/* Malloc huge page support. Generic implementation.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public License as
++ published by the Free Software Foundation; either version 2.1 of the
++ License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; see the file COPYING.LIB. If
++ not, see <https://www.gnu.org/licenses/>. */
++
++#ifndef _MALLOC_HUGEPAGES_H
++#define _MALLOC_HUGEPAGES_H
++
++#include <stddef.h>
++
++/* Return the default transparent huge page size. */
++unsigned long int __malloc_default_thp_pagesize (void) attribute_hidden;
++
++enum malloc_thp_mode_t
++{
++ malloc_thp_mode_always,
++ malloc_thp_mode_madvise,
++ malloc_thp_mode_never,
++ malloc_thp_mode_not_supported
++};
++
++enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
++
++#endif /* _MALLOC_HUGEPAGES_H */
+diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
+new file mode 100644
+index 0000000000..7497e07260
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
+@@ -0,0 +1,74 @@
++/* Huge Page support. Linux implementation.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public License as
++ published by the Free Software Foundation; either version 2.1 of the
++ License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; see the file COPYING.LIB. If
++ not, see <https://www.gnu.org/licenses/>. */
++
++#include <intprops.h>
++#include <malloc-hugepages.h>
++#include <not-cancel.h>
++
++unsigned long int
++__malloc_default_thp_pagesize (void)
++{
++ int fd = __open64_nocancel (
++ "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", O_RDONLY);
++ if (fd == -1)
++ return 0;
++
++ char str[INT_BUFSIZE_BOUND (unsigned long int)];
++ ssize_t s = __read_nocancel (fd, str, sizeof (str));
++ __close_nocancel (fd);
++ if (s < 0)
++ return 0;
++
++ unsigned long int r = 0;
++ for (ssize_t i = 0; i < s; i++)
++ {
++ if (str[i] == '\n')
++ break;
++ r *= 10;
++ r += str[i] - '0';
++ }
++ return r;
++}
++
++enum malloc_thp_mode_t
++__malloc_thp_mode (void)
++{
++ int fd = __open64_nocancel ("/sys/kernel/mm/transparent_hugepage/enabled",
++ O_RDONLY);
++ if (fd == -1)
++ return malloc_thp_mode_not_supported;
++
++ static const char mode_always[] = "[always] madvise never\n";
++ static const char mode_madvise[] = "always [madvise] never\n";
++ static const char mode_never[] = "always madvise [never]\n";
++
++ char str[sizeof(mode_always)];
++ ssize_t s = __read_nocancel (fd, str, sizeof (str));
++ __close_nocancel (fd);
++
++ if (s == sizeof (mode_always) - 1)
++ {
++ if (strcmp (str, mode_always) == 0)
++ return malloc_thp_mode_always;
++ else if (strcmp (str, mode_madvise) == 0)
++ return malloc_thp_mode_madvise;
++ else if (strcmp (str, mode_never) == 0)
++ return malloc_thp_mode_never;
++ }
++ return malloc_thp_mode_not_supported;
++}
+--
+2.33.0
+