summaryrefslogtreecommitdiff
path: root/0035-monitor-cgroup-oom-killed-event-and-update-to-cri-of.patch
diff options
context:
space:
mode:
Diffstat (limited to '0035-monitor-cgroup-oom-killed-event-and-update-to-cri-of.patch')
-rw-r--r--0035-monitor-cgroup-oom-killed-event-and-update-to-cri-of.patch868
1 files changed, 868 insertions, 0 deletions
diff --git a/0035-monitor-cgroup-oom-killed-event-and-update-to-cri-of.patch b/0035-monitor-cgroup-oom-killed-event-and-update-to-cri-of.patch
new file mode 100644
index 0000000..85f7f62
--- /dev/null
+++ b/0035-monitor-cgroup-oom-killed-event-and-update-to-cri-of.patch
@@ -0,0 +1,868 @@
+From 947cf87a87ec49409ae509e5142b8134454d1547 Mon Sep 17 00:00:00 2001
+From: jikai <jikai11@huawei.com>
+Date: Thu, 28 Mar 2024 12:51:09 +0000
+Subject: [PATCH 35/43] monitor cgroup oom killed event and update to cri of
+ container
+
+Signed-off-by: jikai <jikai11@huawei.com>
+---
+ src/daemon/common/cgroup/cgroup.c | 91 +++++++++-
+ src/daemon/common/cgroup/cgroup.h | 5 +
+ src/daemon/common/cgroup/cgroup_common.h | 13 ++
+ src/daemon/common/cgroup/cgroup_v1.c | 160 ++++++++++++++++++
+ src/daemon/common/cgroup/cgroup_v2.c | 138 ++++++++++++++-
+ .../v1/v1_cri_container_manager_service.cc | 3 +
+ src/daemon/modules/api/container_api.h | 5 +-
+ .../container/container_events_handler.c | 12 +-
+ .../modules/container/container_state.c | 15 ++
+ .../modules/container/restore/restore.c | 10 +-
+ .../modules/container/supervisor/supervisor.c | 54 +++++-
+ src/daemon/modules/events/collector.c | 7 +-
+ .../modules/service/service_container.c | 11 +-
+ 13 files changed, 498 insertions(+), 26 deletions(-)
+
+diff --git a/src/daemon/common/cgroup/cgroup.c b/src/daemon/common/cgroup/cgroup.c
+index 837b514a..d3f1445a 100644
+--- a/src/daemon/common/cgroup/cgroup.c
++++ b/src/daemon/common/cgroup/cgroup.c
+@@ -133,4 +133,93 @@ char *common_get_own_cgroup_path(const char *subsystem)
+ }
+
+ return g_cgroup_ops.get_own_cgroup_path(subsystem);
+-}
+\ No newline at end of file
++}
++
++char *common_convert_cgroup_path(const char *cgroup_path)
++{
++ char *token = NULL;
++ char result[PATH_MAX + 1] = {0};
++ __isula_auto_array_t char **arr = NULL;
++
++ if (cgroup_path == NULL) {
++ ERROR("Invalid NULL cgroup path");
++ return NULL;
++ }
++
++ // for cgroup fs cgroup path, return directly
++ if (!util_has_suffix(cgroup_path, ".slice")) {
++ return util_strdup_s(cgroup_path);
++ }
++
++ // for systemd cgroup, cgroup_path should have the form slice:prefix:id,
++ // convert it to a true path, such as from test-a.slice:isulad:id
++ // to test.slice/test-a.slice/isulad-id.scope
++ arr = util_string_split_n(cgroup_path, ':', 3);
++ if (arr == NULL || util_array_len((const char **)arr) != 3) {
++ ERROR("Invalid systemd cgroup parent");
++ return NULL;
++ }
++
++ token = strchr(arr[0], '-');
++ while (token != NULL) {
++ *token = '\0';
++ if (strlen(arr[0]) > PATH_MAX || strlen(result) + 1 + strlen(".slice") >
++ PATH_MAX - strlen(arr[0])) {
++ ERROR("Invalid systemd cgroup parent: exceeds max length of path");
++ *token = '-';
++ return NULL;
++ }
++ if (result[0] != '\0') {
++ strcat(result, "/");
++ }
++ strcat(result, arr[0]);
++ strcat(result, ".slice");
++ *token = '-';
++ token = strchr(token + 1, '-');
++ }
++
++ // Add /arr[0]/arr[1]-arr[2].scope, 3 include two slashes and one dash
++ if (strlen(cgroup_path) > PATH_MAX || strlen(result) + 3 + strlen(".scope") >
++ PATH_MAX - strlen(arr[0] - strlen(arr[1]) - strlen(arr[2]))) {
++ ERROR("Invalid systemd cgroup parent: exceeds max length of path");
++ return NULL;
++ }
++
++ (void)strcat(result, "/");
++ (void)strcat(result, arr[0]);
++ (void)strcat(result, "/");
++ (void)strcat(result, arr[1]);
++ (void)strcat(result, "-");
++ (void)strcat(result, arr[2]);
++ (void)strcat(result, ".scope");
++
++ return util_strdup_s(result);
++}
++
++cgroup_oom_handler_info_t *common_get_cgroup_oom_handler(int fd, const char *name, const char *cgroup_path, const char *exit_fifo)
++{
++ if (g_cgroup_ops.get_cgroup_oom_handler == NULL) {
++ ERROR("Unimplmented get_cgroup_oom_handler op");
++ return NULL;
++ }
++
++ return g_cgroup_ops.get_cgroup_oom_handler(fd, name, cgroup_path, exit_fifo);
++}
++
++void common_free_cgroup_oom_handler_info(cgroup_oom_handler_info_t *info)
++{
++ if (info == NULL) {
++ return;
++ }
++
++ if (info->oom_event_fd >= 0) {
++ close(info->oom_event_fd);
++ }
++ if (info->cgroup_file_fd >= 0) {
++ close(info->cgroup_file_fd);
++ }
++
++ free(info->name);
++ free(info->cgroup_memory_event_path);
++ free(info);
++}
+diff --git a/src/daemon/common/cgroup/cgroup.h b/src/daemon/common/cgroup/cgroup.h
+index 1efc3ca6..8c76d99d 100644
+--- a/src/daemon/common/cgroup/cgroup.h
++++ b/src/daemon/common/cgroup/cgroup.h
+@@ -41,6 +41,11 @@ int common_get_cgroup_mnt_and_root_path(const char *subsystem, char **mountpoint
+ char *common_get_init_cgroup_path(const char *subsystem);
+ char *common_get_own_cgroup_path(const char *subsystem);
+
++char *common_convert_cgroup_path(const char *cgroup_path);
++
++cgroup_oom_handler_info_t *common_get_cgroup_oom_handler(int fd, const char *name, const char *cgroup_path, const char *exit_fifo);
++void common_free_cgroup_oom_handler_info(cgroup_oom_handler_info_t *info);
++
+ #ifdef __cplusplus
+ }
+ #endif
+diff --git a/src/daemon/common/cgroup/cgroup_common.h b/src/daemon/common/cgroup/cgroup_common.h
+index 2a0935cb..e3912bf0 100644
+--- a/src/daemon/common/cgroup/cgroup_common.h
++++ b/src/daemon/common/cgroup/cgroup_common.h
+@@ -116,6 +116,17 @@ typedef struct {
+ cgroup_pids_metrics_t cgpids_metrics;
+ } cgroup_metrics_t;
+
++#define CGROUP_OOM_HANDLE_CONTINUE false
++#define CGROUP_OOM_HANDLE_CLOSE true
++
++typedef struct _cgroup_oom_handler_info_t {
++ int oom_event_fd;
++ int cgroup_file_fd;
++ char *name;
++ char *cgroup_memory_event_path;
++ bool (*oom_event_handler)(int, void *);
++} cgroup_oom_handler_info_t;
++
+ typedef struct {
+ int (*get_cgroup_version)(void);
+ int (*get_cgroup_info)(cgroup_mem_info_t *meminfo, cgroup_cpu_info_t *cpuinfo,
+@@ -128,6 +139,8 @@ typedef struct {
+
+ char *(*get_init_cgroup_path)(const char *subsystem);
+ char *(*get_own_cgroup_path)(const char *subsystem);
++
++ cgroup_oom_handler_info_t *(*get_cgroup_oom_handler)(int fd, const char *name, const char *cgroup_path, const char *exit_fifo);
+ } cgroup_ops;
+
+ #ifdef __cplusplus
+diff --git a/src/daemon/common/cgroup/cgroup_v1.c b/src/daemon/common/cgroup/cgroup_v1.c
+index 51cf7512..41f3110a 100644
+--- a/src/daemon/common/cgroup/cgroup_v1.c
++++ b/src/daemon/common/cgroup/cgroup_v1.c
+@@ -12,14 +12,20 @@
+ * Create: 2023-03-29
+ * Description: provide cgroup v1 functions
+ ******************************************************************************/
++#ifndef _GNU_SOURCE
++#define _GNU_SOURCE
++#endif
++
+ #include "cgroup.h"
+
+ #include <stdio.h>
+ #include <stdlib.h>
++#include <sys/eventfd.h>
+
+ #include "utils.h"
+ #include "sysinfo.h"
+ #include "err_msg.h"
++#include "events_sender_api.h"
+
+ #define CGROUP_HUGETLB_LIMIT "hugetlb.%s.limit_in_bytes"
+ #define CGROUP_MOUNT_PATH_PREFIX "/sys/fs/cgroup/"
+@@ -1045,6 +1051,159 @@ static char *common_get_cgroup_path(const char *path, const char *subsystem)
+ return res;
+ }
+
++static bool oom_cb_cgroup_v1(int fd, void *cbdata)
++{
++ cgroup_oom_handler_info_t *info = (cgroup_oom_handler_info_t *)cbdata;
++ /* Try to read cgroup.event_control and known if the cgroup was removed
++ * if the cgroup was removed and only one event received,
++ * we know that it is a cgroup removal event rather than an oom event
++ */
++ bool cgroup_removed = false;
++ if (info == NULL) {
++ ERROR("Invalide callback data");
++ return CGROUP_OOM_HANDLE_CLOSE;
++ }
++
++ if (access(info->cgroup_memory_event_path, F_OK) < 0) {
++ DEBUG("Cgroup event path was removed");
++ cgroup_removed = true;
++ }
++
++ uint64_t event_count;
++ ssize_t num_read = util_read_nointr(fd, &event_count, sizeof(uint64_t));
++ if (num_read < 0) {
++ ERROR("Failed to read oom event from eventfd");
++ return CGROUP_OOM_HANDLE_CLOSE;
++ }
++
++ if (num_read == 0) {
++ return CGROUP_OOM_HANDLE_CLOSE;
++ }
++
++ if (num_read != sizeof(uint64_t)) {
++ ERROR("Failed to read full oom event from eventfd");
++ return CGROUP_OOM_HANDLE_CLOSE;
++ }
++
++ if (event_count == 0) {
++ ERROR("Unexpected event count when reading for oom event");
++ return CGROUP_OOM_HANDLE_CLOSE;
++ }
++
++ if (event_count == 1 && cgroup_removed) {
++ return CGROUP_OOM_HANDLE_CLOSE;
++ }
++
++ INFO("OOM event detected");
++ (void)isulad_monitor_send_container_event(info->name, OOM, -1, 0, NULL, NULL);
++
++ return CGROUP_OOM_HANDLE_CLOSE;
++}
++
++static char *get_memory_cgroup_path_v1(const char *cgroup_path)
++{
++ int nret = 0;
++ __isula_auto_free char *converted_cgroup_path = NULL;
++ __isula_auto_free char *mnt = NULL;
++ __isula_auto_free char *root = NULL;
++ char fpath[PATH_MAX] = { 0 };
++
++ converted_cgroup_path = common_convert_cgroup_path(cgroup_path);
++ if (converted_cgroup_path == NULL) {
++ ERROR("Failed to transfer cgroup path");
++ return NULL;
++ }
++
++ nret = get_cgroup_mnt_and_root_path_v1("memory", &mnt, &root);
++ if (nret != 0 || mnt == NULL || root == NULL) {
++ ERROR("Can not find cgroup mnt and root path for subsystem 'memory'");
++ return NULL;
++ }
++
++ // When iSulad is run inside docker, the root is based of the host cgroup.
++ // Replace root to "/"
++ if (strncmp(root, "/docker/", strlen("/docker/")) == 0) {
++ root[1] = '\0';
++ }
++
++ nret = snprintf(fpath, sizeof(fpath), "%s/%s", mnt, root);
++ if (nret < 0 || (size_t)nret >= sizeof(fpath)) {
++ ERROR("Failed to print string");
++ return NULL;
++ }
++
++ return util_path_join(fpath, converted_cgroup_path);
++}
++
++static cgroup_oom_handler_info_t *get_cgroup_oom_handler_v1(int fd, const char *name, const char *cgroup_path, const char *exit_fifo)
++{
++ __isula_auto_free char *memory_cgroup_path = NULL;
++ __isula_auto_free char *memory_cgroup_oom_control_path = NULL;
++ __isula_auto_free char *data = NULL;
++ __isula_auto_close int cgroup_event_control_fd = -1;
++ if (name == NULL || cgroup_path == NULL || exit_fifo == NULL) {
++ ERROR("Invalid arguments");
++ return NULL;
++ }
++
++ cgroup_oom_handler_info_t *info = util_common_calloc_s(sizeof(cgroup_oom_handler_info_t));
++ if (info == NULL) {
++ ERROR("Out of memory");
++ return NULL;
++ }
++ info->name = util_strdup_s(name);
++ info->cgroup_file_fd = -1;
++ info->oom_event_fd = -1;
++ info->oom_event_handler = oom_cb_cgroup_v1;
++
++ memory_cgroup_path = get_memory_cgroup_path_v1(cgroup_path);
++ if (memory_cgroup_path == NULL) {
++ ERROR("Failed to get memory cgroup path");
++ goto cleanup;
++ }
++
++ info->cgroup_memory_event_path = util_path_join(memory_cgroup_path, "cgroup.event_control");
++ if (info->cgroup_memory_event_path == NULL) {
++ ERROR("Failed to join memory cgroup file path");
++ goto cleanup;
++ }
++
++ cgroup_event_control_fd = util_open(info->cgroup_memory_event_path, O_WRONLY | O_CLOEXEC, 0);
++ if (cgroup_event_control_fd < 0) {
++ ERROR("Failed to open %s", info->cgroup_memory_event_path);
++ goto cleanup;
++ }
++
++ memory_cgroup_oom_control_path = util_path_join(memory_cgroup_path, "memory.oom_control");
++ if (memory_cgroup_oom_control_path == NULL) {
++ ERROR("Failed to join memory cgroup file path");
++ goto cleanup;
++ }
++
++ info->cgroup_file_fd = util_open(memory_cgroup_oom_control_path, O_RDONLY | O_CLOEXEC, 0);
++ if (info->cgroup_file_fd < 0) {
++ ERROR("Failed to open %s", memory_cgroup_oom_control_path);
++ goto cleanup;
++ }
++
++ info->oom_event_fd = eventfd(0, EFD_CLOEXEC);
++ if (info->oom_event_fd < 0) {
++ ERROR("Failed to create oom eventfd");
++ goto cleanup;
++ }
++
++ if (asprintf(&data, "%d %d", info->oom_event_fd, info->cgroup_file_fd) < 0 ||
++ util_write_nointr(cgroup_event_control_fd, data, strlen(data)) < 0) {
++ ERROR("Failed to write to cgroup.event_control");
++ goto cleanup;
++ }
++
++ return info;
++cleanup:
++ common_free_cgroup_oom_handler_info(info);
++ return NULL;
++}
++
+ char *get_init_cgroup_path_v1(const char *subsystem)
+ {
+ return common_get_cgroup_path("/proc/1/cgroup", subsystem);
+@@ -1071,5 +1230,6 @@ int cgroup_v1_ops_init(cgroup_ops *ops)
+ ops->get_cgroup_mnt_and_root_path = get_cgroup_mnt_and_root_path_v1;
+ ops->get_init_cgroup_path = get_init_cgroup_path_v1;
+ ops->get_own_cgroup_path = get_own_cgroup_v1;
++ ops->get_cgroup_oom_handler = get_cgroup_oom_handler_v1;
+ return 0;
+ }
+\ No newline at end of file
+diff --git a/src/daemon/common/cgroup/cgroup_v2.c b/src/daemon/common/cgroup/cgroup_v2.c
+index 65cf90d8..a36258f0 100644
+--- a/src/daemon/common/cgroup/cgroup_v2.c
++++ b/src/daemon/common/cgroup/cgroup_v2.c
+@@ -17,12 +17,14 @@
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <sys/stat.h>
++#include <sys/inotify.h>
+
+ #include <isula_libutils/auto_cleanup.h>
+
+ #include "utils.h"
+ #include "path.h"
+ #include "sysinfo.h"
++#include "events_sender_api.h"
+
+ // Cgroup V2 Item Definition
+ #define CGROUP2_CPU_WEIGHT "cpu.weight"
+@@ -408,10 +410,143 @@ static int get_cgroup_metrics_v2(const char *cgroup_path, cgroup_metrics_t *cgro
+
+ static int get_cgroup_mnt_and_root_v2(const char *subsystem, char **mountpoint, char **root)
+ {
+- *mountpoint = util_strdup_s(CGROUP_ISULAD_PATH);
++ if (mountpoint != NULL) {
++ *mountpoint = util_strdup_s(CGROUP_ISULAD_PATH);
++ }
+ return 0;
+ }
+
++static bool oom_cb_cgroup_v2(int fd, void *cbdata)
++{
++ const size_t events_size = sizeof(struct inotify_event) + NAME_MAX + 1;
++ char events[events_size];
++ cgroup_oom_handler_info_t *info = (cgroup_oom_handler_info_t *)cbdata;
++
++ if (info == NULL) {
++ ERROR("Invalid callback data");
++ return CGROUP_OOM_HANDLE_CLOSE;
++ }
++
++ ssize_t num_read = util_read_nointr(fd, &events, events_size);
++ if (num_read < 0) {
++ ERROR("Failed to read oom event from eventfd in v2");
++ return CGROUP_OOM_HANDLE_CLOSE;
++ }
++
++ if (((struct inotify_event *)events)->mask & ( IN_DELETE | IN_DELETE_SELF)) {
++ return CGROUP_OOM_HANDLE_CLOSE;
++ }
++
++ __isula_auto_file FILE *fp = fopen(info->cgroup_memory_event_path, "re");
++ if (fp == NULL) {
++ ERROR("Failed to open cgroups file: %s", info->cgroup_memory_event_path);
++ return CGROUP_OOM_HANDLE_CLOSE;
++ }
++
++ __isula_auto_free char *line = NULL;
++ size_t len = 0;
++ ssize_t read;
++ while ((read = getline(&line, &len, fp)) != -1) {
++ int count;
++ const char *oom_str = "oom ";
++ const char *oom_kill_str = "oom_kill ";
++ const int oom_len = strlen(oom_str), oom_kill_len = strlen(oom_kill_str);
++
++ if (read >= oom_kill_len + 2 && memcmp(line, oom_kill_str, oom_kill_len) == 0) {
++ len = oom_kill_len;
++ } else if (read >= oom_len + 2 && memcmp(line, oom_str, oom_len) == 0) {
++ len = oom_len;
++ } else {
++ continue;
++ }
++
++ // to make use of util_safe_int, it requires it ends with '\0'
++ line[strcspn(line, "\n")] = '\0';
++ if (util_safe_int(&line[len], &count) < 0) {
++ ERROR("Failed to parse: %s", &line[len]);
++ continue;
++ }
++
++ if (count == 0) {
++ continue;
++ }
++
++ INFO("OOM event detected in cgroup v2");
++ (void)isulad_monitor_send_container_event(info->name, OOM, -1, 0, NULL, NULL);
++
++ return CGROUP_OOM_HANDLE_CLOSE;
++ }
++
++ return CGROUP_OOM_HANDLE_CONTINUE;
++}
++
++static char *get_real_cgroup_path_v2(const char *cgroup_path)
++{
++ __isula_auto_free char *converted_cgroup_path = NULL;
++ converted_cgroup_path = common_convert_cgroup_path(cgroup_path);
++ if (converted_cgroup_path == NULL) {
++ ERROR("Failed to convert cgroup path");
++ return NULL;
++ }
++
++ return util_path_join(CGROUP_MOUNTPOINT, converted_cgroup_path);
++}
++
++cgroup_oom_handler_info_t *get_cgroup_oom_handler_v2(int fd, const char *name, const char *cgroup_path, const char *exit_fifo)
++{
++ __isula_auto_free char *real_cgroup_path = NULL;
++ if (name == NULL || cgroup_path == NULL || exit_fifo == NULL) {
++ ERROR("Invalid arguments");
++ return NULL;
++ }
++
++ cgroup_oom_handler_info_t *info = util_common_calloc_s(sizeof(cgroup_oom_handler_info_t));
++ if (info == NULL) {
++ ERROR("Out of memory");
++ return NULL;
++ }
++
++ info->name = util_strdup_s(name);
++ info->oom_event_fd = -1;
++ info->cgroup_file_fd = -1;
++ info->oom_event_handler = oom_cb_cgroup_v2;
++
++ real_cgroup_path = get_real_cgroup_path_v2(cgroup_path);
++ if (real_cgroup_path == NULL) {
++ ERROR("Failed to transfer cgroup path: %s", cgroup_path);
++ goto cleanup;
++ }
++
++ info->cgroup_memory_event_path = util_path_join(real_cgroup_path, "memory.events");
++ if (info->cgroup_memory_event_path == NULL) {
++ ERROR("Failed to join path");
++ goto cleanup;
++ }
++
++ if ((info->oom_event_fd = inotify_init()) < 0) {
++ ERROR("Failed to init inotify fd");
++ goto cleanup;
++ }
++
++ if (inotify_add_watch(info->oom_event_fd, info->cgroup_memory_event_path, IN_MODIFY) < 0) {
++ ERROR("Failed to watch inotify fd for %s", info->cgroup_memory_event_path);
++ goto cleanup;
++ }
++
++ // watch exit fifo for container exit, so we can close the inotify fd
++ // because inotify cannot watch cgroup file delete event
++ if (inotify_add_watch(info->oom_event_fd, exit_fifo, IN_DELETE | IN_DELETE_SELF) < 0) {
++ ERROR("Failed to watch inotify fd for %s", exit_fifo);
++ goto cleanup;
++ }
++
++ return info;
++
++cleanup:
++ common_free_cgroup_oom_handler_info(info);
++ return NULL;
++}
++
+ int get_cgroup_version_v2()
+ {
+ return CGROUP_VERSION_2;
+@@ -426,5 +561,6 @@ int cgroup_v2_ops_init(cgroup_ops *ops)
+ ops->get_cgroup_info = get_cgroup_info_v2;
+ ops->get_cgroup_metrics = get_cgroup_metrics_v2;
+ ops->get_cgroup_mnt_and_root_path = get_cgroup_mnt_and_root_v2;
++ ops->get_cgroup_oom_handler = get_cgroup_oom_handler_v2;
+ return 0;
+ }
+\ No newline at end of file
+diff --git a/src/daemon/entry/cri/v1/v1_cri_container_manager_service.cc b/src/daemon/entry/cri/v1/v1_cri_container_manager_service.cc
+index 47a33c2c..cac5c0ba 100644
+--- a/src/daemon/entry/cri/v1/v1_cri_container_manager_service.cc
++++ b/src/daemon/entry/cri/v1/v1_cri_container_manager_service.cc
+@@ -1055,6 +1055,9 @@ void ContainerManagerService::UpdateBaseStatusFromInspect(
+ } else { // Case 3
+ state = runtime::v1::CONTAINER_CREATED;
+ }
++ if (inspect->state->oom_killed) {
++ reason = "OOMKilled";
++ }
+ if (inspect->state->error != nullptr) {
+ message = inspect->state->error;
+ }
+diff --git a/src/daemon/modules/api/container_api.h b/src/daemon/modules/api/container_api.h
+index 43d66d64..830fd696 100644
+--- a/src/daemon/modules/api/container_api.h
++++ b/src/daemon/modules/api/container_api.h
+@@ -221,6 +221,8 @@ void container_state_set_restarting(container_state_t *s, int exit_code);
+ void container_state_set_paused(container_state_t *s);
+ void container_state_reset_paused(container_state_t *s);
+
++void container_state_set_oom_killed(container_state_t *s);
++
+ void container_state_set_dead(container_state_t *s);
+
+ void container_state_increase_restart_count(container_state_t *s);
+@@ -269,8 +271,7 @@ bool container_is_valid_state_string(const char *state);
+
+ void container_update_health_monitor(const char *container_id);
+
+-extern int container_supervisor_add_exit_monitor(int fd, const pid_ppid_info_t *pid_info, const char *name,
+- const char *runtime, bool sandbox_container);
++extern int container_supervisor_add_exit_monitor(int fd, const char *exit_fifo, const pid_ppid_info_t *pid_info, const container_t *cont);
+
+ extern char *container_exit_fifo_create(const char *cont_state_path);
+
+diff --git a/src/daemon/modules/container/container_events_handler.c b/src/daemon/modules/container/container_events_handler.c
+index b84f1ad5..109a628c 100644
+--- a/src/daemon/modules/container/container_events_handler.c
++++ b/src/daemon/modules/container/container_events_handler.c
+@@ -114,7 +114,7 @@ static int container_state_changed(container_t *cont, const struct isulad_events
+ bool has_been_manually_stopped = false;
+
+ /* only handle Exit event */
+- if (events->type != EVENTS_TYPE_STOPPED1) {
++ if (events->type != EVENTS_TYPE_STOPPED1 && events->type != EVENTS_TYPE_OOM) {
+ return 0;
+ }
+
+@@ -187,6 +187,16 @@ static int container_state_changed(container_t *cont, const struct isulad_events
+ }
+
+ break;
++
++ case EVENTS_TYPE_OOM: {
++ container_lock(cont);
++ container_state_set_oom_killed(cont->state);
++ if (container_state_to_disk(cont)) {
++ WARN("Failed to save container \"%s\" to disk", id);
++ }
++ container_unlock(cont);
++ break;
++ }
+ default:
+ /* ignore garbage */
+ break;
+diff --git a/src/daemon/modules/container/container_state.c b/src/daemon/modules/container/container_state.c
+index f31959fa..452a2b26 100644
+--- a/src/daemon/modules/container/container_state.c
++++ b/src/daemon/modules/container/container_state.c
+@@ -154,6 +154,7 @@ void container_state_set_running(container_state_t *s, const pid_ppid_info_t *pi
+ state->paused = false;
+ }
+ state->exit_code = 0;
++ state->oom_killed = false;
+
+ if (pid_info != NULL) {
+ state->pid = pid_info->pid;
+@@ -222,6 +223,19 @@ void container_state_set_paused(container_state_t *s)
+ container_state_unlock(s);
+ }
+
++void container_state_set_oom_killed(container_state_t *s)
++{
++ if (s == NULL || s->state == NULL) {
++ return;
++ }
++
++ container_state_lock(s);
++
++ s->state->oom_killed = true;
++
++ container_state_unlock(s);
++}
++
+ /* state reset paused */
+ void container_state_reset_paused(container_state_t *s)
+ {
+@@ -573,6 +587,7 @@ container_inspect_state *container_state_to_inspect_state(container_state_t *s)
+ state->running = s->state->running;
+ state->paused = s->state->paused;
+ state->restarting = s->state->restarting;
++ state->oom_killed = s->state->oom_killed;
+ state->pid = s->state->pid;
+
+ state->exit_code = s->state->exit_code;
+diff --git a/src/daemon/modules/container/restore/restore.c b/src/daemon/modules/container/restore/restore.c
+index 76868e28..52f68d21 100644
+--- a/src/daemon/modules/container/restore/restore.c
++++ b/src/daemon/modules/container/restore/restore.c
+@@ -24,6 +24,7 @@
+ #include <isula_libutils/container_config_v2.h>
+ #include <isula_libutils/host_config.h>
+ #include <isula_libutils/log.h>
++#include <isula_libutils/auto_cleanup.h>
+
+ #include "isulad_config.h"
+
+@@ -44,6 +45,8 @@
+ #include "utils_file.h"
+ #include "utils_timestamp.h"
+ #include "id_name_manager.h"
++#include "cgroup.h"
++#include "specs_api.h"
+
+ /* restore supervisor */
+ static int restore_supervisor(const container_t *cont)
+@@ -55,9 +58,7 @@ static int restore_supervisor(const container_t *cont)
+ char *exit_fifo = NULL;
+ char *id = cont->common_config->id;
+ char *statepath = cont->state_path;
+- char *runtime = cont->runtime;
+ pid_ppid_info_t pid_info = { 0 };
+- bool sandbox_container = false;
+
+ nret = snprintf(container_state, sizeof(container_state), "%s/%s", statepath, id);
+ if (nret < 0 || (size_t)nret >= sizeof(container_state)) {
+@@ -91,11 +92,8 @@ static int restore_supervisor(const container_t *cont)
+ pid_info.ppid = cont->state->state->p_pid;
+ pid_info.start_time = cont->state->state->start_time;
+ pid_info.pstart_time = cont->state->state->p_start_time;
+-#ifdef ENABLE_CRI_API_V1
+- sandbox_container = is_sandbox_container(cont->common_config->sandbox_info);
+-#endif
+
+- if (container_supervisor_add_exit_monitor(exit_fifo_fd, &pid_info, id, runtime, sandbox_container)) {
++ if (container_supervisor_add_exit_monitor(exit_fifo_fd, exit_fifo, &pid_info, cont)) {
+ ERROR("Failed to add exit monitor to supervisor");
+ ret = -1;
+ goto out;
+diff --git a/src/daemon/modules/container/supervisor/supervisor.c b/src/daemon/modules/container/supervisor/supervisor.c
+index 63289283..1b7da383 100644
+--- a/src/daemon/modules/container/supervisor/supervisor.c
++++ b/src/daemon/modules/container/supervisor/supervisor.c
+@@ -41,6 +41,8 @@
+ #ifdef ENABLE_CRI_API_V1
+ #include "sandbox_ops.h"
+ #endif
++#include "cgroup.h"
++#include "specs_api.h"
+
+ pthread_mutex_t g_supervisor_lock = PTHREAD_MUTEX_INITIALIZER;
+ struct epoll_descr g_supervisor_descr;
+@@ -269,24 +271,52 @@ static int supervisor_exit_cb(int fd, uint32_t events, void *cbdata, struct epol
+ return EPOLL_LOOP_HANDLE_CONTINUE;
+ }
+
++static int oom_handle_cb(int fd, uint32_t events, void *cbdata, struct epoll_descr *descr)
++{
++ cgroup_oom_handler_info_t *oom_handler_info = (cgroup_oom_handler_info_t *)cbdata;
++ bool close_oom_handler = CGROUP_OOM_HANDLE_CLOSE;
++ // supervisor only handle one oom event, so we remove the handler directly
++ if (oom_handler_info != NULL && oom_handler_info->oom_event_handler != NULL) {
++ close_oom_handler = oom_handler_info->oom_event_handler(fd, oom_handler_info);
++ }
++
++ if (close_oom_handler == CGROUP_OOM_HANDLE_CLOSE) {
++ supervisor_handler_lock();
++ epoll_loop_del_handler(&g_supervisor_descr, fd);
++ supervisor_handler_unlock();
++
++ common_free_cgroup_oom_handler_info(oom_handler_info);
++ }
++
++ return EPOLL_LOOP_HANDLE_CONTINUE;
++}
++
+ /* supervisor add exit monitor */
+-int container_supervisor_add_exit_monitor(int fd, const pid_ppid_info_t *pid_info, const char *name,
+- const char *runtime, bool sandbox_container)
++int container_supervisor_add_exit_monitor(int fd, const char *exit_fifo, const pid_ppid_info_t *pid_info, const container_t *cont)
+ {
+ int ret = 0;
+ struct supervisor_handler_data *data = NULL;
++ cgroup_oom_handler_info_t *oom_handler_info = NULL;
++ __isula_auto_free char *cgroup_path = NULL;
+
+ if (fd < 0) {
+ ERROR("Invalid exit fifo fd");
+ return -1;
+ }
+
+- if (pid_info == NULL || name == NULL || runtime == NULL) {
++ if (pid_info == NULL || cont == NULL || cont->common_config == NULL) {
+ ERROR("Invalid input arguments");
+ close(fd);
+ return -1;
+ }
+
++ cgroup_path = merge_container_cgroups_path(cont->common_config->id, cont->hostconfig);
++ if (cgroup_path == NULL) {
++ ERROR("Failed to get cgroup path");
++ close(fd);
++ return -1;
++ }
++
+ data = util_common_calloc_s(sizeof(struct supervisor_handler_data));
+ if (data == NULL) {
+ ERROR("Memory out");
+@@ -295,15 +325,26 @@ int container_supervisor_add_exit_monitor(int fd, const pid_ppid_info_t *pid_inf
+ }
+
+ data->fd = fd;
+- data->name = util_strdup_s(name);
+- data->runtime = util_strdup_s(runtime);
+- data->is_sandbox_container = sandbox_container;
++ data->name = util_strdup_s(cont->common_config->id);
++ data->runtime = util_strdup_s(cont->runtime);
++#ifdef ENABLE_CRI_API_V1
++ data->is_sandbox_container = is_sandbox_container(cont->common_config->sandbox_info);
++#endif
+ data->pid_info.pid = pid_info->pid;
+ data->pid_info.start_time = pid_info->start_time;
+ data->pid_info.ppid = pid_info->ppid;
+ data->pid_info.pstart_time = pid_info->pstart_time;
++ oom_handler_info = common_get_cgroup_oom_handler(fd, cont->common_config->id, cgroup_path, exit_fifo);
+
+ supervisor_handler_lock();
++ if (oom_handler_info != NULL) {
++ ret = epoll_loop_add_handler(&g_supervisor_descr, oom_handler_info->oom_event_fd, oom_handle_cb, oom_handler_info);
++ if (ret != 0) {
++ ERROR("Failed to add handler for oom event");
++ goto err;
++ }
++ }
++
+ ret = epoll_loop_add_handler(&g_supervisor_descr, fd, supervisor_exit_cb, data);
+ if (ret != 0) {
+ ERROR("Failed to add handler for exit fifo");
+@@ -314,6 +355,7 @@ int container_supervisor_add_exit_monitor(int fd, const pid_ppid_info_t *pid_inf
+
+ err:
+ supervisor_handler_data_free(data);
++ common_free_cgroup_oom_handler_info(oom_handler_info);
+ out:
+ supervisor_handler_unlock();
+ return ret;
+diff --git a/src/daemon/modules/events/collector.c b/src/daemon/modules/events/collector.c
+index fb4a7fea..af688742 100644
+--- a/src/daemon/modules/events/collector.c
++++ b/src/daemon/modules/events/collector.c
+@@ -133,6 +133,9 @@ static container_events_type_t lcrsta2Evetype(int value)
+ case THAWED:
+ et = EVENTS_TYPE_THAWED;
+ break;
++ case OOM:
++ et = EVENTS_TYPE_OOM;
++ break;
+ default:
+ et = EVENTS_TYPE_EXIT;
+ break;
+@@ -822,8 +825,8 @@ static int post_event_to_events_hander(const struct isulad_events_format *events
+ return -1;
+ }
+
+- /* only post STOPPED event to events_hander */
+- if (events->type != EVENTS_TYPE_STOPPED1) {
++ /* only post STOPPED event and OOM event to events_hander */
++ if (events->type != EVENTS_TYPE_STOPPED1 && events->type != EVENTS_TYPE_OOM) {
+ return 0;
+ }
+
+diff --git a/src/daemon/modules/service/service_container.c b/src/daemon/modules/service/service_container.c
+index a8090d5a..eb7ce4f4 100644
+--- a/src/daemon/modules/service/service_container.c
++++ b/src/daemon/modules/service/service_container.c
+@@ -275,14 +275,13 @@ static void clean_resources_on_failure(const container_t *cont, const char *engi
+ return;
+ }
+
+-static int do_post_start_on_success(const char *id, const char *runtime, bool sandbox_container,
+- const char *pidfile, int exit_fifo_fd,
+- const pid_ppid_info_t *pid_info)
++static int do_post_start_on_success(container_t *cont, int exit_fifo_fd,
++ const char *exit_fifo, const pid_ppid_info_t *pid_info)
+ {
+ int ret = 0;
+
+ // exit_fifo_fd was closed in container_supervisor_add_exit_monitor
+- if (container_supervisor_add_exit_monitor(exit_fifo_fd, pid_info, id, runtime, sandbox_container)) {
++ if (container_supervisor_add_exit_monitor(exit_fifo_fd, exit_fifo, pid_info, cont)) {
+ ERROR("Failed to add exit monitor to supervisor");
+ ret = -1;
+ }
+@@ -750,7 +749,6 @@ static int do_start_container(container_t *cont, const char *console_fifos[], bo
+ oci_runtime_spec *oci_spec = NULL;
+ rt_create_params_t create_params = { 0 };
+ rt_start_params_t start_params = { 0 };
+- bool sandbox_container = false;
+
+ nret = snprintf(bundle, sizeof(bundle), "%s/%s", cont->root_path, id);
+ if (nret < 0 || (size_t)nret >= sizeof(bundle)) {
+@@ -899,7 +897,6 @@ static int do_start_container(container_t *cont, const char *console_fifos[], bo
+ if (cont->common_config->sandbox_info != NULL) {
+ create_params.task_addr = cont->common_config->sandbox_info->task_address;
+ }
+- sandbox_container = is_sandbox_container(cont->common_config->sandbox_info);
+ #endif
+
+ if (runtime_create(id, runtime, &create_params) != 0) {
+@@ -924,7 +921,7 @@ static int do_start_container(container_t *cont, const char *console_fifos[], bo
+
+ ret = runtime_start(id, runtime, &start_params, pid_info);
+ if (ret == 0) {
+- if (do_post_start_on_success(id, runtime, sandbox_container, pidfile, exit_fifo_fd, pid_info) != 0) {
++ if (do_post_start_on_success(cont, exit_fifo_fd, exit_fifo, pid_info) != 0) {
+ ERROR("Failed to do post start on runtime start success");
+ ret = -1;
+ goto clean_resources;
+--
+2.34.1
+