summaryrefslogtreecommitdiff
path: root/0001-ld.so-support-ld.so-mmap-hugetlb-hugepage-according-.patch
blob: 8d899e5f5dba39a826cb795da4e7726a552b8b4d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
From 7adf5ee832d2649fa85f8f104523932dab64f12e Mon Sep 17 00:00:00 2001
From: Lv Ying <lvying6@huawei.com>
Date: Tue, 7 Feb 2023 19:29:11 +0800
Subject: [PATCH 1/3] ld.so: support ld.so mmap hugetlb hugepage according to
 PT_LOAD segment granularity

Only attempt to use hugepage to load PT_LOAD segments marked with PF_HUGEPAGE flag.
Even if the segment is marked with the PF_HUGEPAGE flag, the segment does not
necessarily use the hugetlb huge page, either because the segment is too small,
or because the segment is in an address space that is not suitable for using
a huge page. Also we add added the -i option to hugepageedit to mark any
PT_LOAD segment with the PF_HUGEPAGE flag.

Signed-off-by: Lv Ying <lvying6@huawei.com>
---
 elf/dl-load.c                  | 15 ++++---
 elf/dl-map-segments-hugepage.h | 72 ++++++++++++++++++++++++++++++----
 elf/elf.h                      |  2 +
 elf/hugepageedit.c             | 58 ++++++++++++++++++++++++---
 4 files changed, 128 insertions(+), 19 deletions(-)

diff --git a/elf/dl-load.c b/elf/dl-load.c
index f4b5c4a7..e0d4fa2e 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -1136,6 +1136,7 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
     bool empty_dynamic = false;
 #ifdef HUGEPAGE_SHARED_LIB
     bool use_hugepage = false;
+    char hp_bitmap[l->l_phnum];
 #endif
 
     /* The struct is initialized to zero so this is not necessary:
@@ -1182,6 +1183,13 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
 	      goto lose;
 	    }
 
+#ifdef HUGEPAGE_SHARED_LIB
+  if (ph->p_flags & PF_HUGEPAGE) {
+              hp_bitmap[nloadcmds] = 1;
+              use_hugepage = true;
+          } else
+              hp_bitmap[nloadcmds] = 0;
+#endif
 	  struct loadcmd *c = &loadcmds[nloadcmds++];
 	  c->mapstart = ALIGN_DOWN (ph->p_vaddr, GLRO(dl_pagesize));
 	  c->mapend = ALIGN_UP (ph->p_vaddr + ph->p_filesz, GLRO(dl_pagesize));
@@ -1194,11 +1202,6 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
 	  if (nloadcmds > 1 && c[-1].mapend != c->mapstart)
 	    has_holes = true;
 
-#ifdef HUGEPAGE_SHARED_LIB
-    if (ph->p_flags & PF_HUGEPAGE)
-      use_hugepage = true;
-#endif
-
 	  /* Optimize a common case.  */
 #if (PF_R | PF_W | PF_X) == 7 && (PROT_READ | PROT_WRITE | PROT_EXEC) == 7
 	  c->prot = (PF_TO_PROT
@@ -1297,7 +1300,7 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
             ((GLRO(dl_hugepage_mask) & DL_HUGEPAGE_PROBE_FLAG) && use_hugepage))
       {
         errstring = _dl_map_segments_largein (l, fd, header, type, loadcmds, nloadcmds,
-            maplength, has_holes);
+            maplength, hp_bitmap);
         if (__glibc_unlikely (errstring != NULL))
           {
             hp_errcode = errno;
diff --git a/elf/dl-map-segments-hugepage.h b/elf/dl-map-segments-hugepage.h
index 37788ef9..e7202131 100644
--- a/elf/dl-map-segments-hugepage.h
+++ b/elf/dl-map-segments-hugepage.h
@@ -400,6 +400,55 @@ _extra_mmap(struct link_map *l, const struct loadcmd loadcmds[], size_t nloadcmd
   return extra_len;
 }
 
+static __always_inline const char *
+__mmap_segment_normalsz(const struct loadcmd *c, ElfW(Addr) mapstart, int fd,
+        size_t *mapseglen)
+{
+  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
+    _dl_debug_printf("\tuse normal page mmap segment:[%lx-%lx)\n", mapstart,
+            mapstart + (c->allocend - c->mapstart));
+
+  if (c->mapend > c->mapstart &&
+          (__mmap((void *)mapstart, c->mapend - c->mapstart, c->prot,
+                  MAP_FILE|MAP_PRIVATE|MAP_FIXED, fd, c->mapoff) == MAP_FAILED))
+    return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT;
+
+  if (c->allocend > c->dataend) {
+    ElfW(Addr) zero, zeroend, zeropage;
+
+    zero = mapstart + c->dataend - c->mapstart;
+    zeroend = mapstart + c->allocend - c->mapstart;
+    zeropage = ((zero + GLRO(dl_pagesize) - 1)
+                & ~(GLRO(dl_pagesize) - 1));
+
+    if (zeroend < zeropage)
+      zeropage = zeroend;
+
+    if (zeropage > zero) {
+      if (__glibc_unlikely ((c->prot & PROT_WRITE) == 0)) {
+        if (__mprotect ((caddr_t) (zero & ~(GLRO(dl_pagesize) - 1)),
+                GLRO(dl_pagesize), c->prot|PROT_WRITE) < 0)
+          return DL_MAP_SEGMENTS_ERROR_MPROTECT;
+      }
+
+      memset ((void *) zero, '\0', zeropage - zero);
+
+      if (__glibc_unlikely ((c->prot & PROT_WRITE) == 0))
+        __mprotect ((caddr_t) (zero & ~(GLRO(dl_pagesize) - 1)),
+                GLRO(dl_pagesize), c->prot);
+    }
+
+    if (zeroend > zeropage) {
+      if (__mmap ((caddr_t) zeropage, zeroend - zeropage, c->prot,
+                  MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0) == MAP_FAILED)
+        return DL_MAP_SEGMENTS_ERROR_MAP_ZERO_FILL;
+    }
+  }
+
+  *mapseglen = c->allocend - c->mapstart;
+  return NULL;
+}
+
 /*
  * PT_LOAD segment is described by p_filesz and p_memsz.
  * The bytes from the file are mapped to the beginning of the memory segment.
@@ -409,11 +458,17 @@ _extra_mmap(struct link_map *l, const struct loadcmd loadcmds[], size_t nloadcmd
  */
 static __always_inline const char *
 _mmap_segment(struct link_map *l, const struct loadcmd loadcmds[], size_t nloadcmds,
-              const struct loadcmd *c, ElfW(Addr) mapstart, int fd, size_t *mapseglen)
+              const struct loadcmd *c, ElfW(Addr) mapstart, int fd,
+              size_t *mapseglen, const char hp_bitmap[])
 {
   const char * errstring = NULL;
-  size_t extra_len = _extra_mmap(l, loadcmds, nloadcmds, c, mapstart);
-  size_t memsz_len = 0;
+  size_t extra_len, memsz_len = 0;
+
+  if (!hp_bitmap[((void *)c - (void *)loadcmds) / sizeof(struct loadcmd)]) {
+    return __mmap_segment_normalsz(c, mapstart, fd, mapseglen);
+  }
+
+  extra_len = _extra_mmap(l, loadcmds, nloadcmds, c, mapstart);
   if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
     _dl_debug_printf("\t%s(0x%lx): extra_len = 0x%lx\n\t{\n", __func__,
                     (unsigned long)c, extra_len);
@@ -448,7 +503,7 @@ static __always_inline const char *
 _dl_map_segments_largein (struct link_map *l, int fd,
                   const ElfW(Ehdr) *header, int type,
                   const struct loadcmd loadcmds[], size_t nloadcmds,
-                  const size_t maplength, bool has_holes)
+                  const size_t maplength, const char hp_bitmap[])
 {
   if (__glibc_unlikely (type != ET_DYN))
     return DL_MAP_SEGMENTS_ERROR_TYPE;
@@ -470,7 +525,8 @@ _dl_map_segments_largein (struct link_map *l, int fd,
   const struct loadcmd * c = loadcmds;
   ElfW(Addr) text_addr = ALIGN_UP((ElfW(Addr))map_area_start + (text->mapstart - c->mapstart), SIZE_2MB);
   size_t mapseglen;
-  errstring = _mmap_segment(l, loadcmds, nloadcmds, text, text_addr, fd, &mapseglen);
+  errstring = _mmap_segment(l, loadcmds, nloadcmds, text, text_addr, fd,
+          &mapseglen, hp_bitmap);
   if (__glibc_unlikely(errstring != NULL))
     goto unmap_reserved_area;
 
@@ -493,7 +549,8 @@ _dl_map_segments_largein (struct link_map *l, int fd,
         }
 
       map_addr += c->mapstart - prev->mapstart;
-      errstring = _mmap_segment(l, loadcmds, nloadcmds, c, map_addr, fd, &mapseglen);
+      errstring = _mmap_segment(l, loadcmds, nloadcmds, c, map_addr, fd,
+              &mapseglen, hp_bitmap);
       if (__glibc_unlikely(errstring != NULL))
         goto unmap_reserved_area;
       prev = c;
@@ -514,7 +571,8 @@ _dl_map_segments_largein (struct link_map *l, int fd,
         }
 
       map_addr -= prev->mapstart - c->mapstart;
-      errstring = _mmap_segment(l, loadcmds, nloadcmds, c, map_addr, fd, &mapseglen);
+      errstring = _mmap_segment(l, loadcmds, nloadcmds, c, map_addr, fd,
+              &mapseglen, hp_bitmap);
       if (__glibc_unlikely(errstring != NULL))
         goto unmap_reserved_area;
 
diff --git a/elf/elf.h b/elf/elf.h
index c5315d1b..a64576bb 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -730,8 +730,10 @@ typedef struct
 
 /* Legal values for p_flags (segment flags).  */
 
+#ifdef HUGEPAGE_SHARED_LIB
 /* libhugetlbfs's hugeedit use 0x00100000, here use another */
 #define PF_HUGEPAGE (0x01000000)
+#endif
 #define PF_X		(1 << 0)	/* Segment is executable */
 #define PF_W		(1 << 1)	/* Segment is writable */
 #define PF_R		(1 << 2)	/* Segment is readable */
diff --git a/elf/hugepageedit.c b/elf/hugepageedit.c
index ab4247ad..0a44ece6 100644
--- a/elf/hugepageedit.c
+++ b/elf/hugepageedit.c
@@ -31,18 +31,52 @@
 
 void print_usage(void)
 {
-  fprintf(stderr, "%s [-x] [-d]  <ELF file>\n"  \
+  fprintf(stderr, "%s [-x] [-d] [-i index] <ELF file>\n"  \
         "\tdefault mark all PT_LOAD segment PF_HUGEPAGE flag\n"  \
         "\t-x option only mark executable PT_LOAD segment PF_HUGEPAGE flag\n" \
+        "\t-i [index(start from 0)] option specifies the index that marks the PT_LOAD segment PF_HUGEPAGE flag\n" \
         "\t-d option delete all the PT_LOAD segment PF_HUGEPAGE flag\n", TOOL_NAME);
 }
 
+
+static long parse_index(char *str)
+{
+    char *endptr;
+
+    errno = 0;
+    long val = strtol(str, &endptr, 10);
+
+    if (errno != 0) {
+        perror("strtol");
+        return -1;
+    }
+
+    if (endptr == str) {
+        fprintf(stderr, "No digits were found in -i option\n");
+        return -1;
+    }
+
+    if (*endptr != '\0') {
+        fprintf(stderr, "Invalid characters %s in -i %s option\n", endptr, str);
+        return -1;
+    }
+
+    if (val < 0) {
+        fprintf(stderr, "Negative index %ld in -i %s option\n", val, str);
+        return -1;
+    }
+
+    return val;
+}
+
+
 int main(int argc, char *argv[])
 {
   size_t length;
   int exit_status = -1;
-  int i, opt, delete = 0, exec_only = 0;
-  while ((opt = getopt(argc, argv, "dx")) != -1)
+  int i, opt, delete = 0, exec_only = 0, index_set = 0;
+  long index = -1;
+  while ((opt = getopt(argc, argv, "dxi:")) != -1)
     {
       switch (opt)
         {
@@ -52,15 +86,21 @@ int main(int argc, char *argv[])
           case 'x':
             exec_only = 1;
             break;
+          case 'i':
+            index = parse_index(optarg);
+            index_set = 1;
+            if (index < 0)
+                return -1;
+            break;
           default:
             print_usage();
             return 0;
         }
     }
 
-  if (delete && exec_only)
+  if (delete + exec_only + index_set > 1)
     {
-      fprintf(stderr, "can not specify -x and -d option at the same time\n");
+      fprintf(stderr, "can not specify -x, -d and -i option at the same time\n");
       return -1;
     }
 
@@ -81,6 +121,12 @@ int main(int argc, char *argv[])
   if (ehdr == NULL)
     goto close_fd;
 
+  if (index_set && index >= ((ElfW(Ehdr) *)ehdr)->e_phnum) {
+      fprintf(stderr, "Index %ld in -i %s option out of PT_LOAD segment range\n",
+              index, argv[optind]);
+      goto close_fd;
+  }
+
   ElfW(Phdr) *phdr = (ElfW(Phdr) *)get_phdr(ehdr, length);
   if (phdr == NULL)
     goto unmap;
@@ -100,7 +146,7 @@ int main(int argc, char *argv[])
             }
           else
             {
-              if (exec_only && !(phdr[i].p_flags & PF_X))
+              if ((exec_only && !(phdr[i].p_flags & PF_X)) || (index_set && index != i))
                 continue;
               phdr[i].p_flags |= PF_HUGEPAGE;
             }
-- 
2.38.1