From 02b51963a37da893cc52a35562dd32f772e9e497 Mon Sep 17 00:00:00 2001 From: jikai Date: Fri, 24 Nov 2023 17:36:50 +0800 Subject: [PATCH 184/198] do check process alive read pid ppid info in runtime Signed-off-by: jikai --- src/daemon/modules/api/runtime_api.h | 17 ++++++++ .../container/container_gc/containers_gc.c | 19 +++++++-- .../modules/container/restore/restore.c | 29 ++++++++++--- .../modules/container/supervisor/supervisor.c | 17 +++++++- .../modules/runtime/engines/lcr/lcr_rt_ops.c | 11 +++++ .../modules/runtime/engines/lcr/lcr_rt_ops.h | 4 ++ .../modules/runtime/isula/isula_rt_ops.c | 13 +++++- .../modules/runtime/isula/isula_rt_ops.h | 3 ++ src/daemon/modules/runtime/runtime.c | 42 +++++++++++++++++++ src/daemon/modules/runtime/shim/shim_rt_ops.c | 22 ++++++++++ src/daemon/modules/runtime/shim/shim_rt_ops.h | 3 ++ .../modules/service/service_container.c | 1 + 12 files changed, 169 insertions(+), 12 deletions(-) diff --git a/src/daemon/modules/api/runtime_api.h b/src/daemon/modules/api/runtime_api.h index 1f23efe3..b0d70493 100644 --- a/src/daemon/modules/api/runtime_api.h +++ b/src/daemon/modules/api/runtime_api.h @@ -208,6 +208,15 @@ typedef struct _rt_runtime_rebuild_config_params_t { const char *rootpath; } rt_rebuild_config_params_t; +typedef struct _rt_runtime_read_pid_ppid_info_params_t { + int pid; +} rt_read_pid_ppid_info_params_t; + +typedef struct _rt_runtime_detect_process_params_t { + int pid; + uint64_t start_time; +} rt_detect_process_params_t; + struct rt_ops { /* detect whether runtime is of this runtime type */ bool (*detect)(const char *runtime); @@ -245,6 +254,10 @@ struct rt_ops { int (*rt_resize)(const char *name, const char *runtime, const rt_resize_params_t *params); int (*rt_exec_resize)(const char *name, const char *runtime, const rt_exec_resize_params_t *params); int (*rt_rebuild_config)(const char *name, const char *runtime, const rt_rebuild_config_params_t *params); + + int (*rt_read_pid_ppid_info)(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params, + pid_ppid_info_t *pid_info); + int (*rt_detect_process)(const char *name, const char *runtime, const rt_detect_process_params_t *params); }; int runtime_create(const char *name, const char *runtime, const rt_create_params_t *params); @@ -269,6 +282,10 @@ int runtime_rebuild_config(const char *name, const char *runtime, const rt_rebui void free_rt_listpids_out_t(rt_listpids_out_t *out); int runtime_resize(const char *name, const char *runtime, const rt_resize_params_t *params); int runtime_exec_resize(const char *name, const char *runtime, const rt_exec_resize_params_t *params); + +int runtime_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params, + pid_ppid_info_t *pid_info); +int runtime_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params); bool is_default_runtime(const char *name); int runtime_init(void); diff --git a/src/daemon/modules/container/container_gc/containers_gc.c b/src/daemon/modules/container/container_gc/containers_gc.c index 2d16dee8..3f2473d5 100644 --- a/src/daemon/modules/container/container_gc/containers_gc.c +++ b/src/daemon/modules/container/container_gc/containers_gc.c @@ -386,6 +386,7 @@ static void gc_monitor_process(const char *id, pid_t pid, unsigned long long sta { INFO("Received garbage collector monitor of %s with pid %d", id, pid); + // for shim-v2, here is a ppid, which is always 0 if (util_process_alive(pid, start_time)) { int ret = kill(pid, SIGKILL); if (ret < 0 && errno != ESRCH) { @@ -461,14 +462,16 @@ static void gc_container_process(struct linked_list *it) char *runtime = NULL; char *id = NULL; container_garbage_config_gc_containers_element *gc_cont = NULL; + rt_detect_process_params_t detect_params = { + .pid = gc_cont->pid, + .start_time = gc_cont->start_time, + }; gc_cont = (container_garbage_config_gc_containers_element *)it->elem; id = gc_cont->id; runtime = gc_cont->runtime; - pid = gc_cont->pid; - start_time = gc_cont->start_time; - if (util_process_alive(pid, start_time) == false) { + if (runtime_detect_process(id, runtime, &detect_params) < 0) { ret = clean_container_resource(id, runtime, pid); if (ret != 0) { WARN("Failed to clean resources of container %s", id); @@ -495,7 +498,15 @@ static void gc_container_process(struct linked_list *it) free(it); } else { try_to_resume_container(id, runtime); - ret = kill(pid, SIGKILL); + + rt_kill_params_t kill_params = { + .signal = SIGKILL, + .stop_signal = SIGKILL, + .pid = pid, + .start_time = start_time, + }; + + ret = runtime_kill(id, runtime, &kill_params); if (ret < 0 && errno != ESRCH) { ERROR("Can not kill process (pid=%d) with SIGKILL for container %s", pid, id); } diff --git a/src/daemon/modules/container/restore/restore.c b/src/daemon/modules/container/restore/restore.c index 44ed14df..48a3ae4b 100644 --- a/src/daemon/modules/container/restore/restore.c +++ b/src/daemon/modules/container/restore/restore.c @@ -58,7 +58,12 @@ static int restore_supervisor(const container_t *cont) char *exit_fifo = NULL; char *id = cont->common_config->id; char *statepath = cont->state_path; + char *runtime = cont->runtime; pid_ppid_info_t pid_info = { 0 }; + rt_detect_process_params_t params = { + .pid = cont->state->state->pid, + .start_time = cont->state->state->start_time, + }; nret = snprintf(container_state, sizeof(container_state), "%s/%s", statepath, id); if (nret < 0 || (size_t)nret >= sizeof(container_state)) { @@ -81,7 +86,7 @@ static int restore_supervisor(const container_t *cont) goto out; } - if (!util_process_alive(cont->state->state->pid, cont->state->state->start_time)) { + if (runtime_detect_process(id, runtime, ¶ms) != 0) { ERROR("Container %s pid %d already dead, skip add supervisor", id, cont->state->state->pid); close(exit_fifo_fd); ret = -1; @@ -112,8 +117,10 @@ static int post_stopped_container_to_gc(const char *id, const char *runtime, con { int ret = 0; pid_ppid_info_t pid_info = { 0 }; + rt_read_pid_ppid_info_params_t params = { 0 }; + params.pid = old_pid_info->pid; - (void)util_read_pid_ppid_info(old_pid_info->pid, &pid_info); + (void)runtime_read_pid_ppid_info(id, runtime, ¶ms, &pid_info); if (pid_info.ppid == 0) { pid_info.ppid = old_pid_info->ppid; pid_info.pstart_time = old_pid_info->pstart_time; @@ -180,9 +187,15 @@ static void restore_stopped_container(Container_Status status, const container_t pid_ppid_info_t pid_info = { 0 }; if (status != CONTAINER_STATUS_STOPPED && status != CONTAINER_STATUS_CREATED) { - if (util_process_alive(cont->state->state->pid, cont->state->state->start_time)) { + rt_detect_process_params_t params = { + .pid = cont->state->state->pid, + .start_time = cont->state->state->start_time, + }; + if (runtime_detect_process(id, cont->runtime, ¶ms) == 0) { pid_info.pid = cont->state->state->pid; + pid_info.start_time = cont->state->state->start_time; } + if (util_process_alive(cont->state->state->p_pid, cont->state->state->p_start_time)) { pid_info.ppid = cont->state->state->p_pid; pid_info.pstart_time = cont->state->state->p_start_time; @@ -204,8 +217,11 @@ static void restore_running_container(Container_Status status, container_t *cont int nret = 0; const char *id = cont->common_config->id; pid_ppid_info_t pid_info = { 0 }; + rt_read_pid_ppid_info_params_t params = { + .pid = info->pid, + }; - nret = util_read_pid_ppid_info(info->pid, &pid_info); + nret = runtime_read_pid_ppid_info(id, cont->runtime, ¶ms, &pid_info); if (nret == 0) { try_to_set_container_running(status, cont, &pid_info); container_state_reset_has_been_manual_stopped(cont->state); @@ -234,10 +250,13 @@ static void restore_paused_container(Container_Status status, container_t *cont, int nret = 0; const char *id = cont->common_config->id; pid_ppid_info_t pid_info = { 0 }; + rt_read_pid_ppid_info_params_t params = { + .pid = info->pid, + }; container_state_set_paused(cont->state); - nret = util_read_pid_ppid_info(info->pid, &pid_info); + nret = runtime_read_pid_ppid_info(id, cont->runtime, ¶ms, &pid_info); if (nret == 0) { try_to_set_paused_container_pid(status, cont, &pid_info); container_state_reset_has_been_manual_stopped(cont->state); diff --git a/src/daemon/modules/container/supervisor/supervisor.c b/src/daemon/modules/container/supervisor/supervisor.c index f77f58d7..b5ff6166 100644 --- a/src/daemon/modules/container/supervisor/supervisor.c +++ b/src/daemon/modules/container/supervisor/supervisor.c @@ -46,6 +46,7 @@ #include "cgroup.h" #include "specs_api.h" #endif +#include "runtime_api.h" pthread_mutex_t g_supervisor_lock = PTHREAD_MUTEX_INITIALIZER; struct epoll_descr g_supervisor_descr; @@ -177,6 +178,17 @@ static void *clean_resources_thread(void *arg) char *runtime = data->runtime; unsigned long long start_time = data->pid_info.start_time; pid_t pid = data->pid_info.pid; + rt_detect_process_params_t detect_params = { + .pid = pid, + .start_time = start_time, + }; + rt_kill_params_t kill_params = { + .signal = SIGKILL, + .stop_signal = SIGKILL, + .pid = pid, + .start_time = start_time + }; + int retry_count = 0; int max_retry = 10; #ifdef ENABLE_CRI_API_V1 @@ -192,7 +204,7 @@ static void *clean_resources_thread(void *arg) prctl(PR_SET_NAME, "Clean resource"); retry: - if (false == util_process_alive(pid, start_time)) { + if (runtime_detect_process(name, runtime, &detect_params) < 0) { ret = clean_container_resource(name, runtime, pid); // clean_container_resource failed, do not log error message, // just add to gc to retry clean resource. @@ -200,7 +212,7 @@ retry: ERROR("Failed to clean resources of container %s", name); } } else { - ret = kill(pid, SIGKILL); + ret = runtime_kill(name, runtime, &kill_params); if (ret < 0 && errno != ESRCH) { ERROR("Can not kill process (pid=%d) with SIGKILL for container %s", pid, name); } @@ -212,6 +224,7 @@ retry: } // get info of init process in container for debug problem of container + // but for shim-v2, this might be a misleading debug info proc_t *c_proc = util_get_process_proc_info(pid); if (c_proc != NULL) { ERROR("Container %s into GC with process state: {cmd: %s, state: %c, pid: %d}", name, c_proc->cmd, c_proc->state, diff --git a/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.c b/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.c index 64a8adbc..eb3afb94 100644 --- a/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.c +++ b/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.c @@ -941,3 +941,14 @@ out: free_oci_runtime_spec(oci_spec); return ret; } + +int rt_lcr_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params, + pid_ppid_info_t *pid_info) +{ + return util_read_pid_ppid_info(params->pid, pid_info); +} + +int rt_lcr_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params) +{ + return util_process_alive(params->pid, params->start_time) ? 0 : -1; +} diff --git a/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.h b/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.h index 7403544d..85ebe6f7 100644 --- a/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.h +++ b/src/daemon/modules/runtime/engines/lcr/lcr_rt_ops.h @@ -48,6 +48,10 @@ int rt_lcr_resize(const char *id, const char *runtime, const rt_resize_params_t int rt_lcr_exec_resize(const char *id, const char *runtime, const rt_exec_resize_params_t *params); int rt_lcr_kill(const char *id, const char *runtime, const rt_kill_params_t *params); int rt_lcr_rebuild_config(const char *name, const char *runtime, const rt_rebuild_config_params_t *params); + +int rt_lcr_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params, + pid_ppid_info_t *pid_info); +int rt_lcr_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params); #ifdef __cplusplus } #endif diff --git a/src/daemon/modules/runtime/isula/isula_rt_ops.c b/src/daemon/modules/runtime/isula/isula_rt_ops.c index 0adb3858..35c09921 100644 --- a/src/daemon/modules/runtime/isula/isula_rt_ops.c +++ b/src/daemon/modules/runtime/isula/isula_rt_ops.c @@ -2173,4 +2173,15 @@ int rt_isula_kill(const char *id, const char *runtime, const rt_kill_params_t *p int rt_isula_rebuild_config(const char *name, const char *runtime, const rt_rebuild_config_params_t *params) { return 0; -} \ No newline at end of file +} + +int rt_isula_read_pid_ppid_info(const char *id, const char *runtime, const rt_read_pid_ppid_info_params_t *params, + pid_ppid_info_t *pid_info) +{ + return util_read_pid_ppid_info(params->pid, pid_info); +} + +int rt_isula_detect_process(const char *id, const char *runtime, const rt_detect_process_params_t *params) +{ + return util_process_alive(params->pid, params->start_time) ? 0 : -1; +} diff --git a/src/daemon/modules/runtime/isula/isula_rt_ops.h b/src/daemon/modules/runtime/isula/isula_rt_ops.h index 1e5e049a..88236a1e 100644 --- a/src/daemon/modules/runtime/isula/isula_rt_ops.h +++ b/src/daemon/modules/runtime/isula/isula_rt_ops.h @@ -48,6 +48,9 @@ int rt_isula_exec_resize(const char *id, const char *runtime, const rt_exec_resi int rt_isula_kill(const char *id, const char *runtime, const rt_kill_params_t *params); int rt_isula_rebuild_config(const char *name, const char *runtime, const rt_rebuild_config_params_t *params); +int rt_isula_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params, + pid_ppid_info_t *pid_info); +int rt_isula_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params); #ifdef __cplusplus } #endif diff --git a/src/daemon/modules/runtime/runtime.c b/src/daemon/modules/runtime/runtime.c index 43b78bca..7be140dc 100644 --- a/src/daemon/modules/runtime/runtime.c +++ b/src/daemon/modules/runtime/runtime.c @@ -46,6 +46,8 @@ static const struct rt_ops g_lcr_rt_ops = { .rt_exec_resize = rt_lcr_exec_resize, .rt_kill = rt_lcr_kill, .rt_rebuild_config = rt_lcr_rebuild_config, + .rt_read_pid_ppid_info = rt_lcr_read_pid_ppid_info, + .rt_detect_process = rt_lcr_detect_process, }; static const struct rt_ops g_isula_rt_ops = { @@ -67,6 +69,8 @@ static const struct rt_ops g_isula_rt_ops = { .rt_exec_resize = rt_isula_exec_resize, .rt_kill = rt_isula_kill, .rt_rebuild_config = rt_isula_rebuild_config, + .rt_read_pid_ppid_info = rt_isula_read_pid_ppid_info, + .rt_detect_process = rt_isula_detect_process, }; #ifdef ENABLE_SHIM_V2 @@ -89,6 +93,8 @@ static const struct rt_ops g_shim_rt_ops = { .rt_exec_resize = rt_shim_exec_resize, .rt_kill = rt_shim_kill, .rt_rebuild_config = rt_shim_rebuild_config, + .rt_read_pid_ppid_info = rt_shim_read_pid_ppid_info, + .rt_detect_process = rt_shim_detect_process, }; #endif @@ -534,6 +540,42 @@ out: return ret; } +int runtime_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params, + pid_ppid_info_t *pid_info) +{ + const struct rt_ops *ops = NULL; + + if (name == NULL || runtime == NULL || params == NULL) { + ERROR("Invalid arguments for runtime exec resize"); + return -1; + } + + ops = rt_ops_query(runtime); + if (ops == NULL) { + ERROR("Failed to get runtime ops"); + return -1; + } + + return ops->rt_read_pid_ppid_info(name, runtime, params, pid_info); +} + +int runtime_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params) +{ + const struct rt_ops *ops = NULL; + + if (name == NULL || runtime == NULL || params == NULL) { + ERROR("Invalid arguments for runtime process alive"); + return -1; + } + + ops = rt_ops_query(runtime); + if (ops == NULL) { + return -1; + } + + return ops->rt_detect_process(name, runtime, params); +} + bool is_default_runtime(const char *name) { const char *runtimes[] = { "lcr", "runc", "kata-runtime" }; diff --git a/src/daemon/modules/runtime/shim/shim_rt_ops.c b/src/daemon/modules/runtime/shim/shim_rt_ops.c index 30b5e442..268d66d9 100644 --- a/src/daemon/modules/runtime/shim/shim_rt_ops.c +++ b/src/daemon/modules/runtime/shim/shim_rt_ops.c @@ -847,3 +847,25 @@ int rt_shim_rebuild_config(const char *name, const char *runtime, const rt_rebui { return 0; } + +int rt_shim_read_pid_ppid_info(const char *name, const char *runtime, const rt_read_pid_ppid_info_params_t *params, + pid_ppid_info_t *pid_info) +{ + if (pid_info == NULL) { + ERROR("Invalid input params"); + return -1; + } + + pid_info->pid = params->pid; + return 0; +} + +int rt_shim_detect_process(const char *name, const char *runtime, const rt_detect_process_params_t *params) +{ + if (shim_v2_kill(name, NULL, 0, false) != 0) { + ERROR("%s: detect process failed", name); + return -1; + } + + return 0; +} diff --git a/src/daemon/modules/runtime/shim/shim_rt_ops.h b/src/daemon/modules/runtime/shim/shim_rt_ops.h index 2df34f4c..a3968cf5 100644 --- a/src/daemon/modules/runtime/shim/shim_rt_ops.h +++ b/src/daemon/modules/runtime/shim/shim_rt_ops.h @@ -64,6 +64,9 @@ bool is_valid_v2_runtime(const char* name); int rt_shim_rebuild_config(const char *name, const char *runtime, const rt_rebuild_config_params_t *params); +int rt_shim_read_pid_ppid_info(const char *id, const char *runtime, const rt_read_pid_ppid_info_params_t *params, + pid_ppid_info_t *pid_info); +int rt_shim_detect_process(const char *id, const char *runtime, const rt_detect_process_params_t *params); #ifdef __cplusplus } #endif diff --git a/src/daemon/modules/service/service_container.c b/src/daemon/modules/service/service_container.c index 250e8299..dbf56776 100644 --- a/src/daemon/modules/service/service_container.c +++ b/src/daemon/modules/service/service_container.c @@ -1513,6 +1513,7 @@ out: static int send_signal_to_process(pid_t pid, unsigned long long start_time, uint32_t stop_signal, uint32_t signal) { + // for shim-v2, here is a ppid, which is always 0 if (util_process_alive(pid, start_time) == false) { if (signal == stop_signal || signal == SIGKILL) { WARN("Process %d is not alive", pid); -- 2.34.1