summaryrefslogtreecommitdiff
path: root/0184-do-check-process-alive-read-pid-ppid-info-in-runtime.patch
diff options
context:
space:
mode:
Diffstat (limited to '0184-do-check-process-alive-read-pid-ppid-info-in-runtime.patch')
-rw-r--r--0184-do-check-process-alive-read-pid-ppid-info-in-runtime.patch454
1 files changed, 454 insertions, 0 deletions
diff --git a/0184-do-check-process-alive-read-pid-ppid-info-in-runtime.patch b/0184-do-check-process-alive-read-pid-ppid-info-in-runtime.patch
new file mode 100644
index 0000000..4fa315f
--- /dev/null
+++ b/0184-do-check-process-alive-read-pid-ppid-info-in-runtime.patch
@@ -0,0 +1,454 @@
+From 02b51963a37da893cc52a35562dd32f772e9e497 Mon Sep 17 00:00:00 2001
+From: jikai <jikai11@huawei.com>
+Date: Fri, 24 Nov 2023 17:36:50 +0800
+Subject: [PATCH 184/198] do check process alive read pid ppid info in runtime
+
+Signed-off-by: jikai <jikai11@huawei.com>
+---
+ src/daemon/modules/api/runtime_api.h | 17 ++++++++
+ .../container/container_gc/containers_gc.c | 19 +++++++--
+ .../modules/container/restore/restore.c | 29 ++++++++++---
+ .../modules/container/supervisor/supervisor.c | 17 +++++++-
+ .../modules/runtime/engines/lcr/lcr_rt_ops.c | 11 +++++
+ .../modules/runtime/engines/lcr/lcr_rt_ops.h | 4 ++
+ .../modules/runtime/isula/isula_rt_ops.c | 13 +++++-
+ .../modules/runtime/isula/isula_rt_ops.h | 3 ++
+ src/daemon/modules/runtime/runtime.c | 42 +++++++++++++++++++
+ src/daemon/modules/runtime/shim/shim_rt_ops.c | 22 ++++++++++
+ src/daemon/modules/runtime/shim/shim_rt_ops.h | 3 ++
+ .../modules/service/service_container.c | 1 +
+ 12 files changed, 169 insertions(+), 12 deletions(-)
+
+diff --git a/src/daemon/modules/api/runtime_api.h b/src/daemon/modules/api/runtime_api.h
+index 1f23efe3..b0d70493 100644
+--- a/src/daemon/modules/api/runtime_api.h
++++ b/src/daemon/modules/api/runtime_api.h
+@@ -208,6 +208,15 @@ typedef struct _rt_runtime_rebuild_config_params_t {
+ const char *rootpath;
+ } rt_rebuild_config_params_t;
+
++typedef struct _rt_runtime_read_pid_ppid_info_params_t {
++ int pid;
++} rt_read_pid_ppid_info_params_t;
++
++typedef struct _rt_runtime_detect_process_params_t {
++ int pid;
++ uint64_t start_time;
++} rt_detect_process_params_t;
++
+ struct rt_ops {
+ /* detect whether runtime is of this runtime type */
+ bool (*detect)(const char *runtime);
+@@ -245,6 +254,10 @@ struct rt_ops {
+ int (*rt_resize)(const char *name, const char *runtime, const rt_resize_params_t *params);
+ int (*rt_exec_resize)(const char *name, const char *runtime, const rt_exec_resize_params_t *params);
+ int (*rt_rebuild_config)(const char *name, const char *runtime, const rt_rebuild_config_params_t *params);
++
++ int (*rt_read_pid_ppid_info)(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
++ pid_ppid_info_t *pid_info);
++ int (*rt_detect_process)(const char *name, const char *runtime, const rt_detect_process_params_t *params);
+ };
+
+ int runtime_create(const char *name, const char *runtime, const rt_create_params_t *params);
+@@ -269,6 +282,10 @@ int runtime_rebuild_config(const char *name, const char *runtime, const rt_rebui
+ void free_rt_listpids_out_t(rt_listpids_out_t *out);
+ int runtime_resize(const char *name, const char *runtime, const rt_resize_params_t *params);
+ int runtime_exec_resize(const char *name, const char *runtime, const rt_exec_resize_params_t *params);
++
++int runtime_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
++ pid_ppid_info_t *pid_info);
++int runtime_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params);
+ bool is_default_runtime(const char *name);
+
+ int runtime_init(void);
+diff --git a/src/daemon/modules/container/container_gc/containers_gc.c b/src/daemon/modules/container/container_gc/containers_gc.c
+index 2d16dee8..3f2473d5 100644
+--- a/src/daemon/modules/container/container_gc/containers_gc.c
++++ b/src/daemon/modules/container/container_gc/containers_gc.c
+@@ -386,6 +386,7 @@ static void gc_monitor_process(const char *id, pid_t pid, unsigned long long sta
+ {
+ INFO("Received garbage collector monitor of %s with pid %d", id, pid);
+
++ // for shim-v2, here is a ppid, which is always 0
+ if (util_process_alive(pid, start_time)) {
+ int ret = kill(pid, SIGKILL);
+ if (ret < 0 && errno != ESRCH) {
+@@ -461,14 +462,16 @@ static void gc_container_process(struct linked_list *it)
+ char *runtime = NULL;
+ char *id = NULL;
+ container_garbage_config_gc_containers_element *gc_cont = NULL;
++ rt_detect_process_params_t detect_params = {
++ .pid = gc_cont->pid,
++ .start_time = gc_cont->start_time,
++ };
+
+ gc_cont = (container_garbage_config_gc_containers_element *)it->elem;
+ id = gc_cont->id;
+ runtime = gc_cont->runtime;
+- pid = gc_cont->pid;
+- start_time = gc_cont->start_time;
+
+- if (util_process_alive(pid, start_time) == false) {
++ if (runtime_detect_process(id, runtime, &detect_params) < 0) {
+ ret = clean_container_resource(id, runtime, pid);
+ if (ret != 0) {
+ WARN("Failed to clean resources of container %s", id);
+@@ -495,7 +498,15 @@ static void gc_container_process(struct linked_list *it)
+ free(it);
+ } else {
+ try_to_resume_container(id, runtime);
+- ret = kill(pid, SIGKILL);
++
++ rt_kill_params_t kill_params = {
++ .signal = SIGKILL,
++ .stop_signal = SIGKILL,
++ .pid = pid,
++ .start_time = start_time,
++ };
++
++ ret = runtime_kill(id, runtime, &kill_params);
+ if (ret < 0 && errno != ESRCH) {
+ ERROR("Can not kill process (pid=%d) with SIGKILL for container %s", pid, id);
+ }
+diff --git a/src/daemon/modules/container/restore/restore.c b/src/daemon/modules/container/restore/restore.c
+index 44ed14df..48a3ae4b 100644
+--- a/src/daemon/modules/container/restore/restore.c
++++ b/src/daemon/modules/container/restore/restore.c
+@@ -58,7 +58,12 @@ static int restore_supervisor(const container_t *cont)
+ char *exit_fifo = NULL;
+ char *id = cont->common_config->id;
+ char *statepath = cont->state_path;
++ char *runtime = cont->runtime;
+ pid_ppid_info_t pid_info = { 0 };
++ rt_detect_process_params_t params = {
++ .pid = cont->state->state->pid,
++ .start_time = cont->state->state->start_time,
++ };
+
+ nret = snprintf(container_state, sizeof(container_state), "%s/%s", statepath, id);
+ if (nret < 0 || (size_t)nret >= sizeof(container_state)) {
+@@ -81,7 +86,7 @@ static int restore_supervisor(const container_t *cont)
+ goto out;
+ }
+
+- if (!util_process_alive(cont->state->state->pid, cont->state->state->start_time)) {
++ if (runtime_detect_process(id, runtime, &params) != 0) {
+ ERROR("Container %s pid %d already dead, skip add supervisor", id, cont->state->state->pid);
+ close(exit_fifo_fd);
+ ret = -1;
+@@ -112,8 +117,10 @@ static int post_stopped_container_to_gc(const char *id, const char *runtime, con
+ {
+ int ret = 0;
+ pid_ppid_info_t pid_info = { 0 };
++ rt_read_pid_ppid_info_params_t params = { 0 };
++ params.pid = old_pid_info->pid;
+
+- (void)util_read_pid_ppid_info(old_pid_info->pid, &pid_info);
++ (void)runtime_read_pid_ppid_info(id, runtime, &params, &pid_info);
+ if (pid_info.ppid == 0) {
+ pid_info.ppid = old_pid_info->ppid;
+ pid_info.pstart_time = old_pid_info->pstart_time;
+@@ -180,9 +187,15 @@ static void restore_stopped_container(Container_Status status, const container_t
+ pid_ppid_info_t pid_info = { 0 };
+
+ if (status != CONTAINER_STATUS_STOPPED && status != CONTAINER_STATUS_CREATED) {
+- if (util_process_alive(cont->state->state->pid, cont->state->state->start_time)) {
++ rt_detect_process_params_t params = {
++ .pid = cont->state->state->pid,
++ .start_time = cont->state->state->start_time,
++ };
++ if (runtime_detect_process(id, cont->runtime, &params) == 0) {
+ pid_info.pid = cont->state->state->pid;
++ pid_info.start_time = cont->state->state->start_time;
+ }
++
+ if (util_process_alive(cont->state->state->p_pid, cont->state->state->p_start_time)) {
+ pid_info.ppid = cont->state->state->p_pid;
+ pid_info.pstart_time = cont->state->state->p_start_time;
+@@ -204,8 +217,11 @@ static void restore_running_container(Container_Status status, container_t *cont
+ int nret = 0;
+ const char *id = cont->common_config->id;
+ pid_ppid_info_t pid_info = { 0 };
++ rt_read_pid_ppid_info_params_t params = {
++ .pid = info->pid,
++ };
+
+- nret = util_read_pid_ppid_info(info->pid, &pid_info);
++ nret = runtime_read_pid_ppid_info(id, cont->runtime, &params, &pid_info);
+ if (nret == 0) {
+ try_to_set_container_running(status, cont, &pid_info);
+ container_state_reset_has_been_manual_stopped(cont->state);
+@@ -234,10 +250,13 @@ static void restore_paused_container(Container_Status status, container_t *cont,
+ int nret = 0;
+ const char *id = cont->common_config->id;
+ pid_ppid_info_t pid_info = { 0 };
++ rt_read_pid_ppid_info_params_t params = {
++ .pid = info->pid,
++ };
+
+ container_state_set_paused(cont->state);
+
+- nret = util_read_pid_ppid_info(info->pid, &pid_info);
++ nret = runtime_read_pid_ppid_info(id, cont->runtime, &params, &pid_info);
+ if (nret == 0) {
+ try_to_set_paused_container_pid(status, cont, &pid_info);
+ container_state_reset_has_been_manual_stopped(cont->state);
+diff --git a/src/daemon/modules/container/supervisor/supervisor.c b/src/daemon/modules/container/supervisor/supervisor.c
+index f77f58d7..b5ff6166 100644
+--- a/src/daemon/modules/container/supervisor/supervisor.c
++++ b/src/daemon/modules/container/supervisor/supervisor.c
+@@ -46,6 +46,7 @@
+ #include "cgroup.h"
+ #include "specs_api.h"
+ #endif
++#include "runtime_api.h"
+
+ pthread_mutex_t g_supervisor_lock = PTHREAD_MUTEX_INITIALIZER;
+ struct epoll_descr g_supervisor_descr;
+@@ -177,6 +178,17 @@ static void *clean_resources_thread(void *arg)
+ char *runtime = data->runtime;
+ unsigned long long start_time = data->pid_info.start_time;
+ pid_t pid = data->pid_info.pid;
++ rt_detect_process_params_t detect_params = {
++ .pid = pid,
++ .start_time = start_time,
++ };
++ rt_kill_params_t kill_params = {
++ .signal = SIGKILL,
++ .stop_signal = SIGKILL,
++ .pid = pid,
++ .start_time = start_time
++ };
++
+ int retry_count = 0;
+ int max_retry = 10;
+ #ifdef ENABLE_CRI_API_V1
+@@ -192,7 +204,7 @@ static void *clean_resources_thread(void *arg)
+ prctl(PR_SET_NAME, "Clean resource");
+
+ retry:
+- if (false == util_process_alive(pid, start_time)) {
++ if (runtime_detect_process(name, runtime, &detect_params) < 0) {
+ ret = clean_container_resource(name, runtime, pid);
+ // clean_container_resource failed, do not log error message,
+ // just add to gc to retry clean resource.
+@@ -200,7 +212,7 @@ retry:
+ ERROR("Failed to clean resources of container %s", name);
+ }
+ } else {
+- ret = kill(pid, SIGKILL);
++ ret = runtime_kill(name, runtime, &kill_params);
+ if (ret < 0 && errno != ESRCH) {
+ ERROR("Can not kill process (pid=%d) with SIGKILL for container %s", pid, name);
+ }
+@@ -212,6 +224,7 @@ retry:
+ }
+
+ // get info of init process in container for debug problem of container
++ // but for shim-v2, this might be a misleading debug info
+ proc_t *c_proc = util_get_process_proc_info(pid);
+ if (c_proc != NULL) {
+ ERROR("Container %s into GC with process state: {cmd: %s, state: %c, pid: %d}", name, c_proc->cmd, c_proc->state,
+diff --git a/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.c b/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.c
+index 64a8adbc..eb3afb94 100644
+--- a/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.c
++++ b/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.c
+@@ -941,3 +941,14 @@ out:
+ free_oci_runtime_spec(oci_spec);
+ return ret;
+ }
++
++int rt_lcr_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
++ pid_ppid_info_t *pid_info)
++{
++ return util_read_pid_ppid_info(params->pid, pid_info);
++}
++
++int rt_lcr_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params)
++{
++ return util_process_alive(params->pid, params->start_time) ? 0 : -1;
++}
+diff --git a/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.h b/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.h
+index 7403544d..85ebe6f7 100644
+--- a/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.h
++++ b/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.h
+@@ -48,6 +48,10 @@ int rt_lcr_resize(const char *id, const char *runtime, const rt_resize_params_t
+ int rt_lcr_exec_resize(const char *id, const char *runtime, const rt_exec_resize_params_t *params);
+ int rt_lcr_kill(const char *id, const char *runtime, const rt_kill_params_t *params);
+ int rt_lcr_rebuild_config(const char *name, const char *runtime, const rt_rebuild_config_params_t *params);
++
++int rt_lcr_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
++ pid_ppid_info_t *pid_info);
++int rt_lcr_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params);
+ #ifdef __cplusplus
+ }
+ #endif
+diff --git a/src/daemon/modules/runtime/isula/isula_rt_ops.c b/src/daemon/modules/runtime/isula/isula_rt_ops.c
+index 0adb3858..35c09921 100644
+--- a/src/daemon/modules/runtime/isula/isula_rt_ops.c
++++ b/src/daemon/modules/runtime/isula/isula_rt_ops.c
+@@ -2173,4 +2173,15 @@ int rt_isula_kill(const char *id, const char *runtime, const rt_kill_params_t *p
+ int rt_isula_rebuild_config(const char *name, const char *runtime, const rt_rebuild_config_params_t *params)
+ {
+ return 0;
+-}
+\ No newline at end of file
++}
++
++int rt_isula_read_pid_ppid_info(const char *id, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
++ pid_ppid_info_t *pid_info)
++{
++ return util_read_pid_ppid_info(params->pid, pid_info);
++}
++
++int rt_isula_detect_process(const char *id, const char *runtime, const rt_detect_process_params_t *params)
++{
++ return util_process_alive(params->pid, params->start_time) ? 0 : -1;
++}
+diff --git a/src/daemon/modules/runtime/isula/isula_rt_ops.h b/src/daemon/modules/runtime/isula/isula_rt_ops.h
+index 1e5e049a..88236a1e 100644
+--- a/src/daemon/modules/runtime/isula/isula_rt_ops.h
++++ b/src/daemon/modules/runtime/isula/isula_rt_ops.h
+@@ -48,6 +48,9 @@ int rt_isula_exec_resize(const char *id, const char *runtime, const rt_exec_resi
+ int rt_isula_kill(const char *id, const char *runtime, const rt_kill_params_t *params);
+ int rt_isula_rebuild_config(const char *name, const char *runtime, const rt_rebuild_config_params_t *params);
+
++int rt_isula_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
++ pid_ppid_info_t *pid_info);
++int rt_isula_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params);
+ #ifdef __cplusplus
+ }
+ #endif
+diff --git a/src/daemon/modules/runtime/runtime.c b/src/daemon/modules/runtime/runtime.c
+index 43b78bca..7be140dc 100644
+--- a/src/daemon/modules/runtime/runtime.c
++++ b/src/daemon/modules/runtime/runtime.c
+@@ -46,6 +46,8 @@ static const struct rt_ops g_lcr_rt_ops = {
+ .rt_exec_resize = rt_lcr_exec_resize,
+ .rt_kill = rt_lcr_kill,
+ .rt_rebuild_config = rt_lcr_rebuild_config,
++ .rt_read_pid_ppid_info = rt_lcr_read_pid_ppid_info,
++ .rt_detect_process = rt_lcr_detect_process,
+ };
+
+ static const struct rt_ops g_isula_rt_ops = {
+@@ -67,6 +69,8 @@ static const struct rt_ops g_isula_rt_ops = {
+ .rt_exec_resize = rt_isula_exec_resize,
+ .rt_kill = rt_isula_kill,
+ .rt_rebuild_config = rt_isula_rebuild_config,
++ .rt_read_pid_ppid_info = rt_isula_read_pid_ppid_info,
++ .rt_detect_process = rt_isula_detect_process,
+ };
+
+ #ifdef ENABLE_SHIM_V2
+@@ -89,6 +93,8 @@ static const struct rt_ops g_shim_rt_ops = {
+ .rt_exec_resize = rt_shim_exec_resize,
+ .rt_kill = rt_shim_kill,
+ .rt_rebuild_config = rt_shim_rebuild_config,
++ .rt_read_pid_ppid_info = rt_shim_read_pid_ppid_info,
++ .rt_detect_process = rt_shim_detect_process,
+ };
+ #endif
+
+@@ -534,6 +540,42 @@ out:
+ return ret;
+ }
+
++int runtime_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
++ pid_ppid_info_t *pid_info)
++{
++ const struct rt_ops *ops = NULL;
++
++ if (name == NULL || runtime == NULL || params == NULL) {
++ ERROR("Invalid arguments for runtime exec resize");
++ return -1;
++ }
++
++ ops = rt_ops_query(runtime);
++ if (ops == NULL) {
++ ERROR("Failed to get runtime ops");
++ return -1;
++ }
++
++ return ops->rt_read_pid_ppid_info(name, runtime, params, pid_info);
++}
++
++int runtime_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params)
++{
++ const struct rt_ops *ops = NULL;
++
++ if (name == NULL || runtime == NULL || params == NULL) {
++ ERROR("Invalid arguments for runtime process alive");
++ return -1;
++ }
++
++ ops = rt_ops_query(runtime);
++ if (ops == NULL) {
++ return -1;
++ }
++
++ return ops->rt_detect_process(name, runtime, params);
++}
++
+ bool is_default_runtime(const char *name)
+ {
+ const char *runtimes[] = { "lcr", "runc", "kata-runtime" };
+diff --git a/src/daemon/modules/runtime/shim/shim_rt_ops.c b/src/daemon/modules/runtime/shim/shim_rt_ops.c
+index 30b5e442..268d66d9 100644
+--- a/src/daemon/modules/runtime/shim/shim_rt_ops.c
++++ b/src/daemon/modules/runtime/shim/shim_rt_ops.c
+@@ -847,3 +847,25 @@ int rt_shim_rebuild_config(const char *name, const char *runtime, const rt_rebui
+ {
+ return 0;
+ }
++
++int rt_shim_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
++ pid_ppid_info_t *pid_info)
++{
++ if (pid_info == NULL) {
++ ERROR("Invalid input params");
++ return -1;
++ }
++
++ pid_info->pid = params->pid;
++ return 0;
++}
++
++int rt_shim_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params)
++{
++ if (shim_v2_kill(name, NULL, 0, false) != 0) {
++ ERROR("%s: detect process failed", name);
++ return -1;
++ }
++
++ return 0;
++}
+diff --git a/src/daemon/modules/runtime/shim/shim_rt_ops.h b/src/daemon/modules/runtime/shim/shim_rt_ops.h
+index 2df34f4c..a3968cf5 100644
+--- a/src/daemon/modules/runtime/shim/shim_rt_ops.h
++++ b/src/daemon/modules/runtime/shim/shim_rt_ops.h
+@@ -64,6 +64,9 @@ bool is_valid_v2_runtime(const char* name);
+
+ int rt_shim_rebuild_config(const char *name, const char *runtime, const rt_rebuild_config_params_t *params);
+
++int rt_shim_read_pid_ppid_info(const char *id, const char *runtime, const rt_read_pid_ppid_info_params_t *params,
++ pid_ppid_info_t *pid_info);
++int rt_shim_detect_process(const char *id, const char *runtime, const rt_detect_process_params_t *params);
+ #ifdef __cplusplus
+ }
+ #endif
+diff --git a/src/daemon/modules/service/service_container.c b/src/daemon/modules/service/service_container.c
+index 250e8299..dbf56776 100644
+--- a/src/daemon/modules/service/service_container.c
++++ b/src/daemon/modules/service/service_container.c
+@@ -1513,6 +1513,7 @@ out:
+
+ static int send_signal_to_process(pid_t pid, unsigned long long start_time, uint32_t stop_signal, uint32_t signal)
+ {
++ // for shim-v2, here is a ppid, which is always 0
+ if (util_process_alive(pid, start_time) == false) {
+ if (signal == stop_signal || signal == SIGKILL) {
+ WARN("Process %d is not alive", pid);
+--
+2.34.1
+