summaryrefslogtreecommitdiff
path: root/linux-Revert-the-use-of-sched_getaffinity-on-get_npr.patch
blob: 17a7a5ce7c3aa2d8f8168c0d7c71f9b99323452c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
From 342298278eabc75baabcaced110a11a02c3d3580 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Mon, 6 Sep 2021 14:19:51 -0300
Subject: [PATCH] linux: Revert the use of sched_getaffinity on get_nproc (BZ
 #28310)

The use of sched_getaffinity on get_nproc and
sysconf (_SC_NPROCESSORS_ONLN) done in 903bc7dcc2acafc40 (BZ #27645)
breaks the top command in common hypervisor configurations and also
other monitoring tools.

The main issue using sched_getaffinity changed the symbols semantic
from system-wide scope of online CPUs to per-process one (which can
be changed with kernel cpusets or book parameters in VM).

This patch reverts mostly of the 903bc7dcc2acafc40, with the
exceptions:

  * No more cached values and atomic updates, since they are inherent
    racy.

  * No /proc/cpuinfo fallback, since /proc/stat is already used and
    it would require to revert more arch-specific code.

  * The alloca is replace with a static buffer of 1024 bytes.

So the implementation first consult the sysfs, and fallbacks to procfs.

Checked on x86_64-linux-gnu.

Reviewed-by: Florian Weimer <fweimer@redhat.com>
---
 sysdeps/unix/sysv/linux/getsysstats.c | 139 ++++++++++++++++++++++++++++++++--
 1 file changed, 134 insertions(+), 5 deletions(-)

diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
index 1e3d886..15ad91c 100644
--- a/sysdeps/unix/sysv/linux/getsysstats.c
+++ b/sysdeps/unix/sysv/linux/getsysstats.c
@@ -17,6 +17,8 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <array_length.h>
+#include <assert.h>
+#include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
 #include <ldsodefs.h>
@@ -29,7 +31,7 @@
 #include <sysdep.h>
 
 int
-__get_nprocs (void)
+__get_nprocs_sched (void)
 {
   enum
     {
@@ -52,14 +54,141 @@ __get_nprocs (void)
      atomics are needed). */
   return 2;
 }
-libc_hidden_def (__get_nprocs)
-weak_alias (__get_nprocs, get_nprocs)
+
+static char *
+next_line (int fd, char *const buffer, char **cp, char **re,
+           char *const buffer_end)
+{
+  char *res = *cp;
+  char *nl = memchr (*cp, '\n', *re - *cp);
+  if (nl == NULL)
+    {
+      if (*cp != buffer)
+        {
+          if (*re == buffer_end)
+            {
+              memmove (buffer, *cp, *re - *cp);
+              *re = buffer + (*re - *cp);
+              *cp = buffer;
+
+              ssize_t n = __read_nocancel (fd, *re, buffer_end - *re);
+              if (n < 0)
+                return NULL;
+
+              *re += n;
+
+              nl = memchr (*cp, '\n', *re - *cp);
+              while (nl == NULL && *re == buffer_end)
+                {
+                  /* Truncate too long lines.  */
+                  *re = buffer + 3 * (buffer_end - buffer) / 4;
+                  n = __read_nocancel (fd, *re, buffer_end - *re);
+                  if (n < 0)
+                    return NULL;
+
+                  nl = memchr (*re, '\n', n);
+                  **re = '\n';
+                  *re += n;
+                }
+            }
+          else
+            nl = memchr (*cp, '\n', *re - *cp);
+
+          res = *cp;
+        }
+
+      if (nl == NULL)
+        nl = *re - 1;
+    }
+
+  *cp = nl + 1;
+  assert (*cp <= *re);
+
+  return res == *re ? NULL : res;
+}
+
 
 int
-__get_nprocs_sched (void)
+__get_nprocs (void)
 {
-  return __get_nprocs ();
+  enum { buffer_size = 1024 };
+  char buffer[buffer_size];
+  char *buffer_end = buffer + buffer_size;
+  char *cp = buffer_end;
+  char *re = buffer_end;
+
+  const int flags = O_RDONLY | O_CLOEXEC;
+  /* This file contains comma-separated ranges.  */
+  int fd = __open_nocancel ("/sys/devices/system/cpu/online", flags);
+  char *l;
+  int result = 0;
+  if (fd != -1)
+    {
+      l = next_line (fd, buffer, &cp, &re, buffer_end);
+      if (l != NULL)
+	do
+	  {
+	    char *endp;
+	    unsigned long int n = strtoul (l, &endp, 10);
+	    if (l == endp)
+	      {
+		result = 0;
+		break;
+	      }
+
+	    unsigned long int m = n;
+	    if (*endp == '-')
+	      {
+		l = endp + 1;
+		m = strtoul (l, &endp, 10);
+		if (l == endp)
+		  {
+		    result = 0;
+		    break;
+		  }
+	      }
+
+	    result += m - n + 1;
+
+	    l = endp;
+	    if (l < re && *l == ',')
+	      ++l;
+	  }
+	while (l < re && *l != '\n');
+
+      __close_nocancel_nostatus (fd);
+
+      if (result > 0)
+	return result;
+    }
+
+  cp = buffer_end;
+  re = buffer_end;
+
+  /* Default to an SMP system in case we cannot obtain an accurate
+     number.  */
+  result = 2;
+
+  fd = __open_nocancel ("/proc/stat", flags);
+  if (fd != -1)
+    {
+      result = 0;
+
+      while ((l = next_line (fd, buffer, &cp, &re, buffer_end)) != NULL)
+	/* The current format of /proc/stat has all the cpu* entries
+	   at the front.  We assume here that stays this way.  */
+	if (strncmp (l, "cpu", 3) != 0)
+	  break;
+	else if (isdigit (l[3]))
+	  ++result;
+
+      __close_nocancel_nostatus (fd);
+    }
+
+  return result;
 }
+libc_hidden_def (__get_nprocs)
+weak_alias (__get_nprocs, get_nprocs)
 
 
 /* On some architectures it is possible to distinguish between configured
-- 
1.8.3.1