1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
|
From 02b51963a37da893cc52a35562dd32f772e9e497 Mon Sep 17 00:00:00 2001
From: jikai <jikai11@huawei.com>
Date: Fri, 24 Nov 2023 17:36:50 +0800
Subject: [PATCH 184/198] do check process alive read pid ppid info in runtime
Signed-off-by: jikai <jikai11@huawei.com>
---
src/daemon/modules/api/runtime_api.h | 17 ++++++++
.../container/container_gc/containers_gc.c | 19 +++++++--
.../modules/container/restore/restore.c | 29 ++++++++++---
.../modules/container/supervisor/supervisor.c | 17 +++++++-
.../modules/runtime/engines/lcr/lcr_rt_ops.c | 11 +++++
.../modules/runtime/engines/lcr/lcr_rt_ops.h | 4 ++
.../modules/runtime/isula/isula_rt_ops.c | 13 +++++-
.../modules/runtime/isula/isula_rt_ops.h | 3 ++
src/daemon/modules/runtime/runtime.c | 42 +++++++++++++++++++
src/daemon/modules/runtime/shim/shim_rt_ops.c | 22 ++++++++++
src/daemon/modules/runtime/shim/shim_rt_ops.h | 3 ++
.../modules/service/service_container.c | 1 +
12 files changed, 169 insertions(+), 12 deletions(-)
diff --git a/src/daemon/modules/api/runtime_api.h b/src/daemon/modules/api/runtime_api.h
index 1f23efe3..b0d70493 100644
--- a/src/daemon/modules/api/runtime_api.h
+++ b/src/daemon/modules/api/runtime_api.h
@@ -208,6 +208,15 @@ typedef struct _rt_runtime_rebuild_config_params_t {
const char *rootpath;
} rt_rebuild_config_params_t;
+typedef struct _rt_runtime_read_pid_ppid_info_params_t {
+ int pid;
+} rt_read_pid_ppid_info_params_t;
+
+typedef struct _rt_runtime_detect_process_params_t {
+ int pid;
+ uint64_t start_time;
+} rt_detect_process_params_t;
+
struct rt_ops {
/* detect whether runtime is of this runtime type */
bool (*detect)(const char *runtime);
@@ -245,6 +254,10 @@ struct rt_ops {
int (*rt_resize)(const char *name, const char *runtime, const rt_resize_params_t *params);
int (*rt_exec_resize)(const char *name, const char *runtime, const rt_exec_resize_params_t *params);
int (*rt_rebuild_config)(const char *name, const char *runtime, const rt_rebuild_config_params_t *params);
+
+ int (*rt_read_pid_ppid_info)(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
+ pid_ppid_info_t *pid_info);
+ int (*rt_detect_process)(const char *name, const char *runtime, const rt_detect_process_params_t *params);
};
int runtime_create(const char *name, const char *runtime, const rt_create_params_t *params);
@@ -269,6 +282,10 @@ int runtime_rebuild_config(const char *name, const char *runtime, const rt_rebui
void free_rt_listpids_out_t(rt_listpids_out_t *out);
int runtime_resize(const char *name, const char *runtime, const rt_resize_params_t *params);
int runtime_exec_resize(const char *name, const char *runtime, const rt_exec_resize_params_t *params);
+
+int runtime_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
+ pid_ppid_info_t *pid_info);
+int runtime_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params);
bool is_default_runtime(const char *name);
int runtime_init(void);
diff --git a/src/daemon/modules/container/container_gc/containers_gc.c b/src/daemon/modules/container/container_gc/containers_gc.c
index 2d16dee8..3f2473d5 100644
--- a/src/daemon/modules/container/container_gc/containers_gc.c
+++ b/src/daemon/modules/container/container_gc/containers_gc.c
@@ -386,6 +386,7 @@ static void gc_monitor_process(const char *id, pid_t pid, unsigned long long sta
{
INFO("Received garbage collector monitor of %s with pid %d", id, pid);
+ // for shim-v2, here is a ppid, which is always 0
if (util_process_alive(pid, start_time)) {
int ret = kill(pid, SIGKILL);
if (ret < 0 && errno != ESRCH) {
@@ -461,14 +462,16 @@ static void gc_container_process(struct linked_list *it)
char *runtime = NULL;
char *id = NULL;
container_garbage_config_gc_containers_element *gc_cont = NULL;
+ rt_detect_process_params_t detect_params = {
+ .pid = gc_cont->pid,
+ .start_time = gc_cont->start_time,
+ };
gc_cont = (container_garbage_config_gc_containers_element *)it->elem;
id = gc_cont->id;
runtime = gc_cont->runtime;
- pid = gc_cont->pid;
- start_time = gc_cont->start_time;
- if (util_process_alive(pid, start_time) == false) {
+ if (runtime_detect_process(id, runtime, &detect_params) < 0) {
ret = clean_container_resource(id, runtime, pid);
if (ret != 0) {
WARN("Failed to clean resources of container %s", id);
@@ -495,7 +498,15 @@ static void gc_container_process(struct linked_list *it)
free(it);
} else {
try_to_resume_container(id, runtime);
- ret = kill(pid, SIGKILL);
+
+ rt_kill_params_t kill_params = {
+ .signal = SIGKILL,
+ .stop_signal = SIGKILL,
+ .pid = pid,
+ .start_time = start_time,
+ };
+
+ ret = runtime_kill(id, runtime, &kill_params);
if (ret < 0 && errno != ESRCH) {
ERROR("Can not kill process (pid=%d) with SIGKILL for container %s", pid, id);
}
diff --git a/src/daemon/modules/container/restore/restore.c b/src/daemon/modules/container/restore/restore.c
index 44ed14df..48a3ae4b 100644
--- a/src/daemon/modules/container/restore/restore.c
+++ b/src/daemon/modules/container/restore/restore.c
@@ -58,7 +58,12 @@ static int restore_supervisor(const container_t *cont)
char *exit_fifo = NULL;
char *id = cont->common_config->id;
char *statepath = cont->state_path;
+ char *runtime = cont->runtime;
pid_ppid_info_t pid_info = { 0 };
+ rt_detect_process_params_t params = {
+ .pid = cont->state->state->pid,
+ .start_time = cont->state->state->start_time,
+ };
nret = snprintf(container_state, sizeof(container_state), "%s/%s", statepath, id);
if (nret < 0 || (size_t)nret >= sizeof(container_state)) {
@@ -81,7 +86,7 @@ static int restore_supervisor(const container_t *cont)
goto out;
}
- if (!util_process_alive(cont->state->state->pid, cont->state->state->start_time)) {
+ if (runtime_detect_process(id, runtime, ¶ms) != 0) {
ERROR("Container %s pid %d already dead, skip add supervisor", id, cont->state->state->pid);
close(exit_fifo_fd);
ret = -1;
@@ -112,8 +117,10 @@ static int post_stopped_container_to_gc(const char *id, const char *runtime, con
{
int ret = 0;
pid_ppid_info_t pid_info = { 0 };
+ rt_read_pid_ppid_info_params_t params = { 0 };
+ params.pid = old_pid_info->pid;
- (void)util_read_pid_ppid_info(old_pid_info->pid, &pid_info);
+ (void)runtime_read_pid_ppid_info(id, runtime, ¶ms, &pid_info);
if (pid_info.ppid == 0) {
pid_info.ppid = old_pid_info->ppid;
pid_info.pstart_time = old_pid_info->pstart_time;
@@ -180,9 +187,15 @@ static void restore_stopped_container(Container_Status status, const container_t
pid_ppid_info_t pid_info = { 0 };
if (status != CONTAINER_STATUS_STOPPED && status != CONTAINER_STATUS_CREATED) {
- if (util_process_alive(cont->state->state->pid, cont->state->state->start_time)) {
+ rt_detect_process_params_t params = {
+ .pid = cont->state->state->pid,
+ .start_time = cont->state->state->start_time,
+ };
+ if (runtime_detect_process(id, cont->runtime, ¶ms) == 0) {
pid_info.pid = cont->state->state->pid;
+ pid_info.start_time = cont->state->state->start_time;
}
+
if (util_process_alive(cont->state->state->p_pid, cont->state->state->p_start_time)) {
pid_info.ppid = cont->state->state->p_pid;
pid_info.pstart_time = cont->state->state->p_start_time;
@@ -204,8 +217,11 @@ static void restore_running_container(Container_Status status, container_t *cont
int nret = 0;
const char *id = cont->common_config->id;
pid_ppid_info_t pid_info = { 0 };
+ rt_read_pid_ppid_info_params_t params = {
+ .pid = info->pid,
+ };
- nret = util_read_pid_ppid_info(info->pid, &pid_info);
+ nret = runtime_read_pid_ppid_info(id, cont->runtime, ¶ms, &pid_info);
if (nret == 0) {
try_to_set_container_running(status, cont, &pid_info);
container_state_reset_has_been_manual_stopped(cont->state);
@@ -234,10 +250,13 @@ static void restore_paused_container(Container_Status status, container_t *cont,
int nret = 0;
const char *id = cont->common_config->id;
pid_ppid_info_t pid_info = { 0 };
+ rt_read_pid_ppid_info_params_t params = {
+ .pid = info->pid,
+ };
container_state_set_paused(cont->state);
- nret = util_read_pid_ppid_info(info->pid, &pid_info);
+ nret = runtime_read_pid_ppid_info(id, cont->runtime, ¶ms, &pid_info);
if (nret == 0) {
try_to_set_paused_container_pid(status, cont, &pid_info);
container_state_reset_has_been_manual_stopped(cont->state);
diff --git a/src/daemon/modules/container/supervisor/supervisor.c b/src/daemon/modules/container/supervisor/supervisor.c
index f77f58d7..b5ff6166 100644
--- a/src/daemon/modules/container/supervisor/supervisor.c
+++ b/src/daemon/modules/container/supervisor/supervisor.c
@@ -46,6 +46,7 @@
#include "cgroup.h"
#include "specs_api.h"
#endif
+#include "runtime_api.h"
pthread_mutex_t g_supervisor_lock = PTHREAD_MUTEX_INITIALIZER;
struct epoll_descr g_supervisor_descr;
@@ -177,6 +178,17 @@ static void *clean_resources_thread(void *arg)
char *runtime = data->runtime;
unsigned long long start_time = data->pid_info.start_time;
pid_t pid = data->pid_info.pid;
+ rt_detect_process_params_t detect_params = {
+ .pid = pid,
+ .start_time = start_time,
+ };
+ rt_kill_params_t kill_params = {
+ .signal = SIGKILL,
+ .stop_signal = SIGKILL,
+ .pid = pid,
+ .start_time = start_time
+ };
+
int retry_count = 0;
int max_retry = 10;
#ifdef ENABLE_CRI_API_V1
@@ -192,7 +204,7 @@ static void *clean_resources_thread(void *arg)
prctl(PR_SET_NAME, "Clean resource");
retry:
- if (false == util_process_alive(pid, start_time)) {
+ if (runtime_detect_process(name, runtime, &detect_params) < 0) {
ret = clean_container_resource(name, runtime, pid);
// clean_container_resource failed, do not log error message,
// just add to gc to retry clean resource.
@@ -200,7 +212,7 @@ retry:
ERROR("Failed to clean resources of container %s", name);
}
} else {
- ret = kill(pid, SIGKILL);
+ ret = runtime_kill(name, runtime, &kill_params);
if (ret < 0 && errno != ESRCH) {
ERROR("Can not kill process (pid=%d) with SIGKILL for container %s", pid, name);
}
@@ -212,6 +224,7 @@ retry:
}
// get info of init process in container for debug problem of container
+ // but for shim-v2, this might be a misleading debug info
proc_t *c_proc = util_get_process_proc_info(pid);
if (c_proc != NULL) {
ERROR("Container %s into GC with process state: {cmd: %s, state: %c, pid: %d}", name, c_proc->cmd, c_proc->state,
diff --git a/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.c b/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.c
index 64a8adbc..eb3afb94 100644
--- a/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.c
+++ b/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.c
@@ -941,3 +941,14 @@ out:
free_oci_runtime_spec(oci_spec);
return ret;
}
+
+int rt_lcr_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
+ pid_ppid_info_t *pid_info)
+{
+ return util_read_pid_ppid_info(params->pid, pid_info);
+}
+
+int rt_lcr_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params)
+{
+ return util_process_alive(params->pid, params->start_time) ? 0 : -1;
+}
diff --git a/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.h b/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.h
index 7403544d..85ebe6f7 100644
--- a/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.h
+++ b/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.h
@@ -48,6 +48,10 @@ int rt_lcr_resize(const char *id, const char *runtime, const rt_resize_params_t
int rt_lcr_exec_resize(const char *id, const char *runtime, const rt_exec_resize_params_t *params);
int rt_lcr_kill(const char *id, const char *runtime, const rt_kill_params_t *params);
int rt_lcr_rebuild_config(const char *name, const char *runtime, const rt_rebuild_config_params_t *params);
+
+int rt_lcr_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
+ pid_ppid_info_t *pid_info);
+int rt_lcr_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params);
#ifdef __cplusplus
}
#endif
diff --git a/src/daemon/modules/runtime/isula/isula_rt_ops.c b/src/daemon/modules/runtime/isula/isula_rt_ops.c
index 0adb3858..35c09921 100644
--- a/src/daemon/modules/runtime/isula/isula_rt_ops.c
+++ b/src/daemon/modules/runtime/isula/isula_rt_ops.c
@@ -2173,4 +2173,15 @@ int rt_isula_kill(const char *id, const char *runtime, const rt_kill_params_t *p
int rt_isula_rebuild_config(const char *name, const char *runtime, const rt_rebuild_config_params_t *params)
{
return 0;
-}
\ No newline at end of file
+}
+
+int rt_isula_read_pid_ppid_info(const char *id, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
+ pid_ppid_info_t *pid_info)
+{
+ return util_read_pid_ppid_info(params->pid, pid_info);
+}
+
+int rt_isula_detect_process(const char *id, const char *runtime, const rt_detect_process_params_t *params)
+{
+ return util_process_alive(params->pid, params->start_time) ? 0 : -1;
+}
diff --git a/src/daemon/modules/runtime/isula/isula_rt_ops.h b/src/daemon/modules/runtime/isula/isula_rt_ops.h
index 1e5e049a..88236a1e 100644
--- a/src/daemon/modules/runtime/isula/isula_rt_ops.h
+++ b/src/daemon/modules/runtime/isula/isula_rt_ops.h
@@ -48,6 +48,9 @@ int rt_isula_exec_resize(const char *id, const char *runtime, const rt_exec_resi
int rt_isula_kill(const char *id, const char *runtime, const rt_kill_params_t *params);
int rt_isula_rebuild_config(const char *name, const char *runtime, const rt_rebuild_config_params_t *params);
+int rt_isula_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
+ pid_ppid_info_t *pid_info);
+int rt_isula_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params);
#ifdef __cplusplus
}
#endif
diff --git a/src/daemon/modules/runtime/runtime.c b/src/daemon/modules/runtime/runtime.c
index 43b78bca..7be140dc 100644
--- a/src/daemon/modules/runtime/runtime.c
+++ b/src/daemon/modules/runtime/runtime.c
@@ -46,6 +46,8 @@ static const struct rt_ops g_lcr_rt_ops = {
.rt_exec_resize = rt_lcr_exec_resize,
.rt_kill = rt_lcr_kill,
.rt_rebuild_config = rt_lcr_rebuild_config,
+ .rt_read_pid_ppid_info = rt_lcr_read_pid_ppid_info,
+ .rt_detect_process = rt_lcr_detect_process,
};
static const struct rt_ops g_isula_rt_ops = {
@@ -67,6 +69,8 @@ static const struct rt_ops g_isula_rt_ops = {
.rt_exec_resize = rt_isula_exec_resize,
.rt_kill = rt_isula_kill,
.rt_rebuild_config = rt_isula_rebuild_config,
+ .rt_read_pid_ppid_info = rt_isula_read_pid_ppid_info,
+ .rt_detect_process = rt_isula_detect_process,
};
#ifdef ENABLE_SHIM_V2
@@ -89,6 +93,8 @@ static const struct rt_ops g_shim_rt_ops = {
.rt_exec_resize = rt_shim_exec_resize,
.rt_kill = rt_shim_kill,
.rt_rebuild_config = rt_shim_rebuild_config,
+ .rt_read_pid_ppid_info = rt_shim_read_pid_ppid_info,
+ .rt_detect_process = rt_shim_detect_process,
};
#endif
@@ -534,6 +540,42 @@ out:
return ret;
}
+int runtime_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
+ pid_ppid_info_t *pid_info)
+{
+ const struct rt_ops *ops = NULL;
+
+ if (name == NULL || runtime == NULL || params == NULL) {
+ ERROR("Invalid arguments for runtime exec resize");
+ return -1;
+ }
+
+ ops = rt_ops_query(runtime);
+ if (ops == NULL) {
+ ERROR("Failed to get runtime ops");
+ return -1;
+ }
+
+ return ops->rt_read_pid_ppid_info(name, runtime, params, pid_info);
+}
+
+int runtime_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params)
+{
+ const struct rt_ops *ops = NULL;
+
+ if (name == NULL || runtime == NULL || params == NULL) {
+ ERROR("Invalid arguments for runtime process alive");
+ return -1;
+ }
+
+ ops = rt_ops_query(runtime);
+ if (ops == NULL) {
+ return -1;
+ }
+
+ return ops->rt_detect_process(name, runtime, params);
+}
+
bool is_default_runtime(const char *name)
{
const char *runtimes[] = { "lcr", "runc", "kata-runtime" };
diff --git a/src/daemon/modules/runtime/shim/shim_rt_ops.c b/src/daemon/modules/runtime/shim/shim_rt_ops.c
index 30b5e442..268d66d9 100644
--- a/src/daemon/modules/runtime/shim/shim_rt_ops.c
+++ b/src/daemon/modules/runtime/shim/shim_rt_ops.c
@@ -847,3 +847,25 @@ int rt_shim_rebuild_config(const char *name, const char *runtime, const rt_rebui
{
return 0;
}
+
+int rt_shim_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
+ pid_ppid_info_t *pid_info)
+{
+ if (pid_info == NULL) {
+ ERROR("Invalid input params");
+ return -1;
+ }
+
+ pid_info->pid = params->pid;
+ return 0;
+}
+
+int rt_shim_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params)
+{
+ if (shim_v2_kill(name, NULL, 0, false) != 0) {
+ ERROR("%s: detect process failed", name);
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/daemon/modules/runtime/shim/shim_rt_ops.h b/src/daemon/modules/runtime/shim/shim_rt_ops.h
index 2df34f4c..a3968cf5 100644
--- a/src/daemon/modules/runtime/shim/shim_rt_ops.h
+++ b/src/daemon/modules/runtime/shim/shim_rt_ops.h
@@ -64,6 +64,9 @@ bool is_valid_v2_runtime(const char* name);
int rt_shim_rebuild_config(const char *name, const char *runtime, const rt_rebuild_config_params_t *params);
+int rt_shim_read_pid_ppid_info(const char *id, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
+ pid_ppid_info_t *pid_info);
+int rt_shim_detect_process(const char *id, const char *runtime, const rt_detect_process_params_t *params);
#ifdef __cplusplus
}
#endif
diff --git a/src/daemon/modules/service/service_container.c b/src/daemon/modules/service/service_container.c
index 250e8299..dbf56776 100644
--- a/src/daemon/modules/service/service_container.c
+++ b/src/daemon/modules/service/service_container.c
@@ -1513,6 +1513,7 @@ out:
static int send_signal_to_process(pid_t pid, unsigned long long start_time, uint32_t stop_signal, uint32_t signal)
{
+ // for shim-v2, here is a ppid, which is always 0
if (util_process_alive(pid, start_time) == false) {
if (signal == stop_signal || signal == SIGKILL) {
WARN("Process %d is not alive", pid);
--
2.34.1
|