From 7adf5ee832d2649fa85f8f104523932dab64f12e Mon Sep 17 00:00:00 2001 From: Lv Ying Date: Tue, 7 Feb 2023 19:29:11 +0800 Subject: [PATCH 1/3] ld.so: support ld.so mmap hugetlb hugepage according to PT_LOAD segment granularity Only attempt to use hugepage to load PT_LOAD segments marked with PF_HUGEPAGE flag. Even if the segment is marked with the PF_HUGEPAGE flag, the segment does not necessarily use the hugetlb huge page, either because the segment is too small, or because the segment is in an address space that is not suitable for using a huge page. Also we add added the -i option to hugepageedit to mark any PT_LOAD segment with the PF_HUGEPAGE flag. Signed-off-by: Lv Ying --- elf/dl-load.c | 15 ++++--- elf/dl-map-segments-hugepage.h | 72 ++++++++++++++++++++++++++++++---- elf/elf.h | 2 + elf/hugepageedit.c | 58 ++++++++++++++++++++++++--- 4 files changed, 128 insertions(+), 19 deletions(-) diff --git a/elf/dl-load.c b/elf/dl-load.c index f4b5c4a7..e0d4fa2e 100644 --- a/elf/dl-load.c +++ b/elf/dl-load.c @@ -1136,6 +1136,7 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, bool empty_dynamic = false; #ifdef HUGEPAGE_SHARED_LIB bool use_hugepage = false; + char hp_bitmap[l->l_phnum]; #endif /* The struct is initialized to zero so this is not necessary: @@ -1182,6 +1183,13 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, goto lose; } +#ifdef HUGEPAGE_SHARED_LIB + if (ph->p_flags & PF_HUGEPAGE) { + hp_bitmap[nloadcmds] = 1; + use_hugepage = true; + } else + hp_bitmap[nloadcmds] = 0; +#endif struct loadcmd *c = &loadcmds[nloadcmds++]; c->mapstart = ALIGN_DOWN (ph->p_vaddr, GLRO(dl_pagesize)); c->mapend = ALIGN_UP (ph->p_vaddr + ph->p_filesz, GLRO(dl_pagesize)); @@ -1194,11 +1202,6 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, if (nloadcmds > 1 && c[-1].mapend != c->mapstart) has_holes = true; -#ifdef HUGEPAGE_SHARED_LIB - if (ph->p_flags & PF_HUGEPAGE) - use_hugepage = true; -#endif - /* Optimize a common case. */ #if (PF_R | PF_W | PF_X) == 7 && (PROT_READ | PROT_WRITE | PROT_EXEC) == 7 c->prot = (PF_TO_PROT @@ -1297,7 +1300,7 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, ((GLRO(dl_hugepage_mask) & DL_HUGEPAGE_PROBE_FLAG) && use_hugepage)) { errstring = _dl_map_segments_largein (l, fd, header, type, loadcmds, nloadcmds, - maplength, has_holes); + maplength, hp_bitmap); if (__glibc_unlikely (errstring != NULL)) { hp_errcode = errno; diff --git a/elf/dl-map-segments-hugepage.h b/elf/dl-map-segments-hugepage.h index 37788ef9..e7202131 100644 --- a/elf/dl-map-segments-hugepage.h +++ b/elf/dl-map-segments-hugepage.h @@ -400,6 +400,55 @@ _extra_mmap(struct link_map *l, const struct loadcmd loadcmds[], size_t nloadcmd return extra_len; } +static __always_inline const char * +__mmap_segment_normalsz(const struct loadcmd *c, ElfW(Addr) mapstart, int fd, + size_t *mapseglen) +{ + if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES)) + _dl_debug_printf("\tuse normal page mmap segment:[%lx-%lx)\n", mapstart, + mapstart + (c->allocend - c->mapstart)); + + if (c->mapend > c->mapstart && + (__mmap((void *)mapstart, c->mapend - c->mapstart, c->prot, + MAP_FILE|MAP_PRIVATE|MAP_FIXED, fd, c->mapoff) == MAP_FAILED)) + return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT; + + if (c->allocend > c->dataend) { + ElfW(Addr) zero, zeroend, zeropage; + + zero = mapstart + c->dataend - c->mapstart; + zeroend = mapstart + c->allocend - c->mapstart; + zeropage = ((zero + GLRO(dl_pagesize) - 1) + & ~(GLRO(dl_pagesize) - 1)); + + if (zeroend < zeropage) + zeropage = zeroend; + + if (zeropage > zero) { + if (__glibc_unlikely ((c->prot & PROT_WRITE) == 0)) { + if (__mprotect ((caddr_t) (zero & ~(GLRO(dl_pagesize) - 1)), + GLRO(dl_pagesize), c->prot|PROT_WRITE) < 0) + return DL_MAP_SEGMENTS_ERROR_MPROTECT; + } + + memset ((void *) zero, '\0', zeropage - zero); + + if (__glibc_unlikely ((c->prot & PROT_WRITE) == 0)) + __mprotect ((caddr_t) (zero & ~(GLRO(dl_pagesize) - 1)), + GLRO(dl_pagesize), c->prot); + } + + if (zeroend > zeropage) { + if (__mmap ((caddr_t) zeropage, zeroend - zeropage, c->prot, + MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0) == MAP_FAILED) + return DL_MAP_SEGMENTS_ERROR_MAP_ZERO_FILL; + } + } + + *mapseglen = c->allocend - c->mapstart; + return NULL; +} + /* * PT_LOAD segment is described by p_filesz and p_memsz. * The bytes from the file are mapped to the beginning of the memory segment. @@ -409,11 +458,17 @@ _extra_mmap(struct link_map *l, const struct loadcmd loadcmds[], size_t nloadcmd */ static __always_inline const char * _mmap_segment(struct link_map *l, const struct loadcmd loadcmds[], size_t nloadcmds, - const struct loadcmd *c, ElfW(Addr) mapstart, int fd, size_t *mapseglen) + const struct loadcmd *c, ElfW(Addr) mapstart, int fd, + size_t *mapseglen, const char hp_bitmap[]) { const char * errstring = NULL; - size_t extra_len = _extra_mmap(l, loadcmds, nloadcmds, c, mapstart); - size_t memsz_len = 0; + size_t extra_len, memsz_len = 0; + + if (!hp_bitmap[((void *)c - (void *)loadcmds) / sizeof(struct loadcmd)]) { + return __mmap_segment_normalsz(c, mapstart, fd, mapseglen); + } + + extra_len = _extra_mmap(l, loadcmds, nloadcmds, c, mapstart); if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES)) _dl_debug_printf("\t%s(0x%lx): extra_len = 0x%lx\n\t{\n", __func__, (unsigned long)c, extra_len); @@ -448,7 +503,7 @@ static __always_inline const char * _dl_map_segments_largein (struct link_map *l, int fd, const ElfW(Ehdr) *header, int type, const struct loadcmd loadcmds[], size_t nloadcmds, - const size_t maplength, bool has_holes) + const size_t maplength, const char hp_bitmap[]) { if (__glibc_unlikely (type != ET_DYN)) return DL_MAP_SEGMENTS_ERROR_TYPE; @@ -470,7 +525,8 @@ _dl_map_segments_largein (struct link_map *l, int fd, const struct loadcmd * c = loadcmds; ElfW(Addr) text_addr = ALIGN_UP((ElfW(Addr))map_area_start + (text->mapstart - c->mapstart), SIZE_2MB); size_t mapseglen; - errstring = _mmap_segment(l, loadcmds, nloadcmds, text, text_addr, fd, &mapseglen); + errstring = _mmap_segment(l, loadcmds, nloadcmds, text, text_addr, fd, + &mapseglen, hp_bitmap); if (__glibc_unlikely(errstring != NULL)) goto unmap_reserved_area; @@ -493,7 +549,8 @@ _dl_map_segments_largein (struct link_map *l, int fd, } map_addr += c->mapstart - prev->mapstart; - errstring = _mmap_segment(l, loadcmds, nloadcmds, c, map_addr, fd, &mapseglen); + errstring = _mmap_segment(l, loadcmds, nloadcmds, c, map_addr, fd, + &mapseglen, hp_bitmap); if (__glibc_unlikely(errstring != NULL)) goto unmap_reserved_area; prev = c; @@ -514,7 +571,8 @@ _dl_map_segments_largein (struct link_map *l, int fd, } map_addr -= prev->mapstart - c->mapstart; - errstring = _mmap_segment(l, loadcmds, nloadcmds, c, map_addr, fd, &mapseglen); + errstring = _mmap_segment(l, loadcmds, nloadcmds, c, map_addr, fd, + &mapseglen, hp_bitmap); if (__glibc_unlikely(errstring != NULL)) goto unmap_reserved_area; diff --git a/elf/elf.h b/elf/elf.h index c5315d1b..a64576bb 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -730,8 +730,10 @@ typedef struct /* Legal values for p_flags (segment flags). */ +#ifdef HUGEPAGE_SHARED_LIB /* libhugetlbfs's hugeedit use 0x00100000, here use another */ #define PF_HUGEPAGE (0x01000000) +#endif #define PF_X (1 << 0) /* Segment is executable */ #define PF_W (1 << 1) /* Segment is writable */ #define PF_R (1 << 2) /* Segment is readable */ diff --git a/elf/hugepageedit.c b/elf/hugepageedit.c index ab4247ad..0a44ece6 100644 --- a/elf/hugepageedit.c +++ b/elf/hugepageedit.c @@ -31,18 +31,52 @@ void print_usage(void) { - fprintf(stderr, "%s [-x] [-d] \n" \ + fprintf(stderr, "%s [-x] [-d] [-i index] \n" \ "\tdefault mark all PT_LOAD segment PF_HUGEPAGE flag\n" \ "\t-x option only mark executable PT_LOAD segment PF_HUGEPAGE flag\n" \ + "\t-i [index(start from 0)] option specifies the index that marks the PT_LOAD segment PF_HUGEPAGE flag\n" \ "\t-d option delete all the PT_LOAD segment PF_HUGEPAGE flag\n", TOOL_NAME); } + +static long parse_index(char *str) +{ + char *endptr; + + errno = 0; + long val = strtol(str, &endptr, 10); + + if (errno != 0) { + perror("strtol"); + return -1; + } + + if (endptr == str) { + fprintf(stderr, "No digits were found in -i option\n"); + return -1; + } + + if (*endptr != '\0') { + fprintf(stderr, "Invalid characters %s in -i %s option\n", endptr, str); + return -1; + } + + if (val < 0) { + fprintf(stderr, "Negative index %ld in -i %s option\n", val, str); + return -1; + } + + return val; +} + + int main(int argc, char *argv[]) { size_t length; int exit_status = -1; - int i, opt, delete = 0, exec_only = 0; - while ((opt = getopt(argc, argv, "dx")) != -1) + int i, opt, delete = 0, exec_only = 0, index_set = 0; + long index = -1; + while ((opt = getopt(argc, argv, "dxi:")) != -1) { switch (opt) { @@ -52,15 +86,21 @@ int main(int argc, char *argv[]) case 'x': exec_only = 1; break; + case 'i': + index = parse_index(optarg); + index_set = 1; + if (index < 0) + return -1; + break; default: print_usage(); return 0; } } - if (delete && exec_only) + if (delete + exec_only + index_set > 1) { - fprintf(stderr, "can not specify -x and -d option at the same time\n"); + fprintf(stderr, "can not specify -x, -d and -i option at the same time\n"); return -1; } @@ -81,6 +121,12 @@ int main(int argc, char *argv[]) if (ehdr == NULL) goto close_fd; + if (index_set && index >= ((ElfW(Ehdr) *)ehdr)->e_phnum) { + fprintf(stderr, "Index %ld in -i %s option out of PT_LOAD segment range\n", + index, argv[optind]); + goto close_fd; + } + ElfW(Phdr) *phdr = (ElfW(Phdr) *)get_phdr(ehdr, length); if (phdr == NULL) goto unmap; @@ -100,7 +146,7 @@ int main(int argc, char *argv[]) } else { - if (exec_only && !(phdr[i].p_flags & PF_X)) + if ((exec_only && !(phdr[i].p_flags & PF_X)) || (index_set && index != i)) continue; phdr[i].p_flags |= PF_HUGEPAGE; } -- 2.38.1