1 В избранное 0 Ответвления 0

OSCHINA-MIRROR/src-openeuler-iSulad

В этом репозитории не указан файл с открытой лицензией (LICENSE). При использовании обратитесь к конкретному описанию проекта и его зависимостям в коде.
Клонировать/Скачать
0035-monitor-cgroup-oom-killed-event-and-update-to-cri-of.patch 31 КБ
Копировать Редактировать Web IDE Исходные данные Просмотреть построчно История
jikai Отправлено 02.04.2024 14:50 0a4195b
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868
From 947cf87a87ec49409ae509e5142b8134454d1547 Mon Sep 17 00:00:00 2001
From: jikai <jikai11@huawei.com>
Date: Thu, 28 Mar 2024 12:51:09 +0000
Subject: [PATCH 35/43] monitor cgroup oom killed event and update to cri of
container
Signed-off-by: jikai <jikai11@huawei.com>
---
src/daemon/common/cgroup/cgroup.c | 91 +++++++++-
src/daemon/common/cgroup/cgroup.h | 5 +
src/daemon/common/cgroup/cgroup_common.h | 13 ++
src/daemon/common/cgroup/cgroup_v1.c | 160 ++++++++++++++++++
src/daemon/common/cgroup/cgroup_v2.c | 138 ++++++++++++++-
.../v1/v1_cri_container_manager_service.cc | 3 +
src/daemon/modules/api/container_api.h | 5 +-
.../container/container_events_handler.c | 12 +-
.../modules/container/container_state.c | 15 ++
.../modules/container/restore/restore.c | 10 +-
.../modules/container/supervisor/supervisor.c | 54 +++++-
src/daemon/modules/events/collector.c | 7 +-
.../modules/service/service_container.c | 11 +-
13 files changed, 498 insertions(+), 26 deletions(-)
diff --git a/src/daemon/common/cgroup/cgroup.c b/src/daemon/common/cgroup/cgroup.c
index 837b514a..d3f1445a 100644
--- a/src/daemon/common/cgroup/cgroup.c
+++ b/src/daemon/common/cgroup/cgroup.c
@@ -133,4 +133,93 @@ char *common_get_own_cgroup_path(const char *subsystem)
}
return g_cgroup_ops.get_own_cgroup_path(subsystem);
-}
\ No newline at end of file
+}
+
+char *common_convert_cgroup_path(const char *cgroup_path)
+{
+ char *token = NULL;
+ char result[PATH_MAX + 1] = {0};
+ __isula_auto_array_t char **arr = NULL;
+
+ if (cgroup_path == NULL) {
+ ERROR("Invalid NULL cgroup path");
+ return NULL;
+ }
+
+ // for cgroup fs cgroup path, return directly
+ if (!util_has_suffix(cgroup_path, ".slice")) {
+ return util_strdup_s(cgroup_path);
+ }
+
+ // for systemd cgroup, cgroup_path should have the form slice:prefix:id,
+ // convert it to a true path, such as from test-a.slice:isulad:id
+ // to test.slice/test-a.slice/isulad-id.scope
+ arr = util_string_split_n(cgroup_path, ':', 3);
+ if (arr == NULL || util_array_len((const char **)arr) != 3) {
+ ERROR("Invalid systemd cgroup parent");
+ return NULL;
+ }
+
+ token = strchr(arr[0], '-');
+ while (token != NULL) {
+ *token = '\0';
+ if (strlen(arr[0]) > PATH_MAX || strlen(result) + 1 + strlen(".slice") >
+ PATH_MAX - strlen(arr[0])) {
+ ERROR("Invalid systemd cgroup parent: exceeds max length of path");
+ *token = '-';
+ return NULL;
+ }
+ if (result[0] != '\0') {
+ strcat(result, "/");
+ }
+ strcat(result, arr[0]);
+ strcat(result, ".slice");
+ *token = '-';
+ token = strchr(token + 1, '-');
+ }
+
+ // Add /arr[0]/arr[1]-arr[2].scope, 3 include two slashes and one dash
+ if (strlen(cgroup_path) > PATH_MAX || strlen(result) + 3 + strlen(".scope") >
+ PATH_MAX - strlen(arr[0] - strlen(arr[1]) - strlen(arr[2]))) {
+ ERROR("Invalid systemd cgroup parent: exceeds max length of path");
+ return NULL;
+ }
+
+ (void)strcat(result, "/");
+ (void)strcat(result, arr[0]);
+ (void)strcat(result, "/");
+ (void)strcat(result, arr[1]);
+ (void)strcat(result, "-");
+ (void)strcat(result, arr[2]);
+ (void)strcat(result, ".scope");
+
+ return util_strdup_s(result);
+}
+
+cgroup_oom_handler_info_t *common_get_cgroup_oom_handler(int fd, const char *name, const char *cgroup_path, const char *exit_fifo)
+{
+ if (g_cgroup_ops.get_cgroup_oom_handler == NULL) {
+ ERROR("Unimplmented get_cgroup_oom_handler op");
+ return NULL;
+ }
+
+ return g_cgroup_ops.get_cgroup_oom_handler(fd, name, cgroup_path, exit_fifo);
+}
+
+void common_free_cgroup_oom_handler_info(cgroup_oom_handler_info_t *info)
+{
+ if (info == NULL) {
+ return;
+ }
+
+ if (info->oom_event_fd >= 0) {
+ close(info->oom_event_fd);
+ }
+ if (info->cgroup_file_fd >= 0) {
+ close(info->cgroup_file_fd);
+ }
+
+ free(info->name);
+ free(info->cgroup_memory_event_path);
+ free(info);
+}
diff --git a/src/daemon/common/cgroup/cgroup.h b/src/daemon/common/cgroup/cgroup.h
index 1efc3ca6..8c76d99d 100644
--- a/src/daemon/common/cgroup/cgroup.h
+++ b/src/daemon/common/cgroup/cgroup.h
@@ -41,6 +41,11 @@ int common_get_cgroup_mnt_and_root_path(const char *subsystem, char **mountpoint
char *common_get_init_cgroup_path(const char *subsystem);
char *common_get_own_cgroup_path(const char *subsystem);
+char *common_convert_cgroup_path(const char *cgroup_path);
+
+cgroup_oom_handler_info_t *common_get_cgroup_oom_handler(int fd, const char *name, const char *cgroup_path, const char *exit_fifo);
+void common_free_cgroup_oom_handler_info(cgroup_oom_handler_info_t *info);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/daemon/common/cgroup/cgroup_common.h b/src/daemon/common/cgroup/cgroup_common.h
index 2a0935cb..e3912bf0 100644
--- a/src/daemon/common/cgroup/cgroup_common.h
+++ b/src/daemon/common/cgroup/cgroup_common.h
@@ -116,6 +116,17 @@ typedef struct {
cgroup_pids_metrics_t cgpids_metrics;
} cgroup_metrics_t;
+#define CGROUP_OOM_HANDLE_CONTINUE false
+#define CGROUP_OOM_HANDLE_CLOSE true
+
+typedef struct _cgroup_oom_handler_info_t {
+ int oom_event_fd;
+ int cgroup_file_fd;
+ char *name;
+ char *cgroup_memory_event_path;
+ bool (*oom_event_handler)(int, void *);
+} cgroup_oom_handler_info_t;
+
typedef struct {
int (*get_cgroup_version)(void);
int (*get_cgroup_info)(cgroup_mem_info_t *meminfo, cgroup_cpu_info_t *cpuinfo,
@@ -128,6 +139,8 @@ typedef struct {
char *(*get_init_cgroup_path)(const char *subsystem);
char *(*get_own_cgroup_path)(const char *subsystem);
+
+ cgroup_oom_handler_info_t *(*get_cgroup_oom_handler)(int fd, const char *name, const char *cgroup_path, const char *exit_fifo);
} cgroup_ops;
#ifdef __cplusplus
diff --git a/src/daemon/common/cgroup/cgroup_v1.c b/src/daemon/common/cgroup/cgroup_v1.c
index 51cf7512..41f3110a 100644
--- a/src/daemon/common/cgroup/cgroup_v1.c
+++ b/src/daemon/common/cgroup/cgroup_v1.c
@@ -12,14 +12,20 @@
* Create: 2023-03-29
* Description: provide cgroup v1 functions
******************************************************************************/
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
#include "cgroup.h"
#include <stdio.h>
#include <stdlib.h>
+#include <sys/eventfd.h>
#include "utils.h"
#include "sysinfo.h"
#include "err_msg.h"
+#include "events_sender_api.h"
#define CGROUP_HUGETLB_LIMIT "hugetlb.%s.limit_in_bytes"
#define CGROUP_MOUNT_PATH_PREFIX "/sys/fs/cgroup/"
@@ -1045,6 +1051,159 @@ static char *common_get_cgroup_path(const char *path, const char *subsystem)
return res;
}
+static bool oom_cb_cgroup_v1(int fd, void *cbdata)
+{
+ cgroup_oom_handler_info_t *info = (cgroup_oom_handler_info_t *)cbdata;
+ /* Try to read cgroup.event_control and known if the cgroup was removed
+ * if the cgroup was removed and only one event received,
+ * we know that it is a cgroup removal event rather than an oom event
+ */
+ bool cgroup_removed = false;
+ if (info == NULL) {
+ ERROR("Invalide callback data");
+ return CGROUP_OOM_HANDLE_CLOSE;
+ }
+
+ if (access(info->cgroup_memory_event_path, F_OK) < 0) {
+ DEBUG("Cgroup event path was removed");
+ cgroup_removed = true;
+ }
+
+ uint64_t event_count;
+ ssize_t num_read = util_read_nointr(fd, &event_count, sizeof(uint64_t));
+ if (num_read < 0) {
+ ERROR("Failed to read oom event from eventfd");
+ return CGROUP_OOM_HANDLE_CLOSE;
+ }
+
+ if (num_read == 0) {
+ return CGROUP_OOM_HANDLE_CLOSE;
+ }
+
+ if (num_read != sizeof(uint64_t)) {
+ ERROR("Failed to read full oom event from eventfd");
+ return CGROUP_OOM_HANDLE_CLOSE;
+ }
+
+ if (event_count == 0) {
+ ERROR("Unexpected event count when reading for oom event");
+ return CGROUP_OOM_HANDLE_CLOSE;
+ }
+
+ if (event_count == 1 && cgroup_removed) {
+ return CGROUP_OOM_HANDLE_CLOSE;
+ }
+
+ INFO("OOM event detected");
+ (void)isulad_monitor_send_container_event(info->name, OOM, -1, 0, NULL, NULL);
+
+ return CGROUP_OOM_HANDLE_CLOSE;
+}
+
+static char *get_memory_cgroup_path_v1(const char *cgroup_path)
+{
+ int nret = 0;
+ __isula_auto_free char *converted_cgroup_path = NULL;
+ __isula_auto_free char *mnt = NULL;
+ __isula_auto_free char *root = NULL;
+ char fpath[PATH_MAX] = { 0 };
+
+ converted_cgroup_path = common_convert_cgroup_path(cgroup_path);
+ if (converted_cgroup_path == NULL) {
+ ERROR("Failed to transfer cgroup path");
+ return NULL;
+ }
+
+ nret = get_cgroup_mnt_and_root_path_v1("memory", &mnt, &root);
+ if (nret != 0 || mnt == NULL || root == NULL) {
+ ERROR("Can not find cgroup mnt and root path for subsystem 'memory'");
+ return NULL;
+ }
+
+ // When iSulad is run inside docker, the root is based of the host cgroup.
+ // Replace root to "/"
+ if (strncmp(root, "/docker/", strlen("/docker/")) == 0) {
+ root[1] = '\0';
+ }
+
+ nret = snprintf(fpath, sizeof(fpath), "%s/%s", mnt, root);
+ if (nret < 0 || (size_t)nret >= sizeof(fpath)) {
+ ERROR("Failed to print string");
+ return NULL;
+ }
+
+ return util_path_join(fpath, converted_cgroup_path);
+}
+
+static cgroup_oom_handler_info_t *get_cgroup_oom_handler_v1(int fd, const char *name, const char *cgroup_path, const char *exit_fifo)
+{
+ __isula_auto_free char *memory_cgroup_path = NULL;
+ __isula_auto_free char *memory_cgroup_oom_control_path = NULL;
+ __isula_auto_free char *data = NULL;
+ __isula_auto_close int cgroup_event_control_fd = -1;
+ if (name == NULL || cgroup_path == NULL || exit_fifo == NULL) {
+ ERROR("Invalid arguments");
+ return NULL;
+ }
+
+ cgroup_oom_handler_info_t *info = util_common_calloc_s(sizeof(cgroup_oom_handler_info_t));
+ if (info == NULL) {
+ ERROR("Out of memory");
+ return NULL;
+ }
+ info->name = util_strdup_s(name);
+ info->cgroup_file_fd = -1;
+ info->oom_event_fd = -1;
+ info->oom_event_handler = oom_cb_cgroup_v1;
+
+ memory_cgroup_path = get_memory_cgroup_path_v1(cgroup_path);
+ if (memory_cgroup_path == NULL) {
+ ERROR("Failed to get memory cgroup path");
+ goto cleanup;
+ }
+
+ info->cgroup_memory_event_path = util_path_join(memory_cgroup_path, "cgroup.event_control");
+ if (info->cgroup_memory_event_path == NULL) {
+ ERROR("Failed to join memory cgroup file path");
+ goto cleanup;
+ }
+
+ cgroup_event_control_fd = util_open(info->cgroup_memory_event_path, O_WRONLY | O_CLOEXEC, 0);
+ if (cgroup_event_control_fd < 0) {
+ ERROR("Failed to open %s", info->cgroup_memory_event_path);
+ goto cleanup;
+ }
+
+ memory_cgroup_oom_control_path = util_path_join(memory_cgroup_path, "memory.oom_control");
+ if (memory_cgroup_oom_control_path == NULL) {
+ ERROR("Failed to join memory cgroup file path");
+ goto cleanup;
+ }
+
+ info->cgroup_file_fd = util_open(memory_cgroup_oom_control_path, O_RDONLY | O_CLOEXEC, 0);
+ if (info->cgroup_file_fd < 0) {
+ ERROR("Failed to open %s", memory_cgroup_oom_control_path);
+ goto cleanup;
+ }
+
+ info->oom_event_fd = eventfd(0, EFD_CLOEXEC);
+ if (info->oom_event_fd < 0) {
+ ERROR("Failed to create oom eventfd");
+ goto cleanup;
+ }
+
+ if (asprintf(&data, "%d %d", info->oom_event_fd, info->cgroup_file_fd) < 0 ||
+ util_write_nointr(cgroup_event_control_fd, data, strlen(data)) < 0) {
+ ERROR("Failed to write to cgroup.event_control");
+ goto cleanup;
+ }
+
+ return info;
+cleanup:
+ common_free_cgroup_oom_handler_info(info);
+ return NULL;
+}
+
char *get_init_cgroup_path_v1(const char *subsystem)
{
return common_get_cgroup_path("/proc/1/cgroup", subsystem);
@@ -1071,5 +1230,6 @@ int cgroup_v1_ops_init(cgroup_ops *ops)
ops->get_cgroup_mnt_and_root_path = get_cgroup_mnt_and_root_path_v1;
ops->get_init_cgroup_path = get_init_cgroup_path_v1;
ops->get_own_cgroup_path = get_own_cgroup_v1;
+ ops->get_cgroup_oom_handler = get_cgroup_oom_handler_v1;
return 0;
}
\ No newline at end of file
diff --git a/src/daemon/common/cgroup/cgroup_v2.c b/src/daemon/common/cgroup/cgroup_v2.c
index 65cf90d8..a36258f0 100644
--- a/src/daemon/common/cgroup/cgroup_v2.c
+++ b/src/daemon/common/cgroup/cgroup_v2.c
@@ -17,12 +17,14 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
+#include <sys/inotify.h>
#include <isula_libutils/auto_cleanup.h>
#include "utils.h"
#include "path.h"
#include "sysinfo.h"
+#include "events_sender_api.h"
// Cgroup V2 Item Definition
#define CGROUP2_CPU_WEIGHT "cpu.weight"
@@ -408,10 +410,143 @@ static int get_cgroup_metrics_v2(const char *cgroup_path, cgroup_metrics_t *cgro
static int get_cgroup_mnt_and_root_v2(const char *subsystem, char **mountpoint, char **root)
{
- *mountpoint = util_strdup_s(CGROUP_ISULAD_PATH);
+ if (mountpoint != NULL) {
+ *mountpoint = util_strdup_s(CGROUP_ISULAD_PATH);
+ }
return 0;
}
+static bool oom_cb_cgroup_v2(int fd, void *cbdata)
+{
+ const size_t events_size = sizeof(struct inotify_event) + NAME_MAX + 1;
+ char events[events_size];
+ cgroup_oom_handler_info_t *info = (cgroup_oom_handler_info_t *)cbdata;
+
+ if (info == NULL) {
+ ERROR("Invalid callback data");
+ return CGROUP_OOM_HANDLE_CLOSE;
+ }
+
+ ssize_t num_read = util_read_nointr(fd, &events, events_size);
+ if (num_read < 0) {
+ ERROR("Failed to read oom event from eventfd in v2");
+ return CGROUP_OOM_HANDLE_CLOSE;
+ }
+
+ if (((struct inotify_event *)events)->mask & ( IN_DELETE | IN_DELETE_SELF)) {
+ return CGROUP_OOM_HANDLE_CLOSE;
+ }
+
+ __isula_auto_file FILE *fp = fopen(info->cgroup_memory_event_path, "re");
+ if (fp == NULL) {
+ ERROR("Failed to open cgroups file: %s", info->cgroup_memory_event_path);
+ return CGROUP_OOM_HANDLE_CLOSE;
+ }
+
+ __isula_auto_free char *line = NULL;
+ size_t len = 0;
+ ssize_t read;
+ while ((read = getline(&line, &len, fp)) != -1) {
+ int count;
+ const char *oom_str = "oom ";
+ const char *oom_kill_str = "oom_kill ";
+ const int oom_len = strlen(oom_str), oom_kill_len = strlen(oom_kill_str);
+
+ if (read >= oom_kill_len + 2 && memcmp(line, oom_kill_str, oom_kill_len) == 0) {
+ len = oom_kill_len;
+ } else if (read >= oom_len + 2 && memcmp(line, oom_str, oom_len) == 0) {
+ len = oom_len;
+ } else {
+ continue;
+ }
+
+ // to make use of util_safe_int, it requires it ends with '\0'
+ line[strcspn(line, "\n")] = '\0';
+ if (util_safe_int(&line[len], &count) < 0) {
+ ERROR("Failed to parse: %s", &line[len]);
+ continue;
+ }
+
+ if (count == 0) {
+ continue;
+ }
+
+ INFO("OOM event detected in cgroup v2");
+ (void)isulad_monitor_send_container_event(info->name, OOM, -1, 0, NULL, NULL);
+
+ return CGROUP_OOM_HANDLE_CLOSE;
+ }
+
+ return CGROUP_OOM_HANDLE_CONTINUE;
+}
+
+static char *get_real_cgroup_path_v2(const char *cgroup_path)
+{
+ __isula_auto_free char *converted_cgroup_path = NULL;
+ converted_cgroup_path = common_convert_cgroup_path(cgroup_path);
+ if (converted_cgroup_path == NULL) {
+ ERROR("Failed to convert cgroup path");
+ return NULL;
+ }
+
+ return util_path_join(CGROUP_MOUNTPOINT, converted_cgroup_path);
+}
+
+cgroup_oom_handler_info_t *get_cgroup_oom_handler_v2(int fd, const char *name, const char *cgroup_path, const char *exit_fifo)
+{
+ __isula_auto_free char *real_cgroup_path = NULL;
+ if (name == NULL || cgroup_path == NULL || exit_fifo == NULL) {
+ ERROR("Invalid arguments");
+ return NULL;
+ }
+
+ cgroup_oom_handler_info_t *info = util_common_calloc_s(sizeof(cgroup_oom_handler_info_t));
+ if (info == NULL) {
+ ERROR("Out of memory");
+ return NULL;
+ }
+
+ info->name = util_strdup_s(name);
+ info->oom_event_fd = -1;
+ info->cgroup_file_fd = -1;
+ info->oom_event_handler = oom_cb_cgroup_v2;
+
+ real_cgroup_path = get_real_cgroup_path_v2(cgroup_path);
+ if (real_cgroup_path == NULL) {
+ ERROR("Failed to transfer cgroup path: %s", cgroup_path);
+ goto cleanup;
+ }
+
+ info->cgroup_memory_event_path = util_path_join(real_cgroup_path, "memory.events");
+ if (info->cgroup_memory_event_path == NULL) {
+ ERROR("Failed to join path");
+ goto cleanup;
+ }
+
+ if ((info->oom_event_fd = inotify_init()) < 0) {
+ ERROR("Failed to init inotify fd");
+ goto cleanup;
+ }
+
+ if (inotify_add_watch(info->oom_event_fd, info->cgroup_memory_event_path, IN_MODIFY) < 0) {
+ ERROR("Failed to watch inotify fd for %s", info->cgroup_memory_event_path);
+ goto cleanup;
+ }
+
+ // watch exit fifo for container exit, so we can close the inotify fd
+ // because inotify cannot watch cgroup file delete event
+ if (inotify_add_watch(info->oom_event_fd, exit_fifo, IN_DELETE | IN_DELETE_SELF) < 0) {
+ ERROR("Failed to watch inotify fd for %s", exit_fifo);
+ goto cleanup;
+ }
+
+ return info;
+
+cleanup:
+ common_free_cgroup_oom_handler_info(info);
+ return NULL;
+}
+
int get_cgroup_version_v2()
{
return CGROUP_VERSION_2;
@@ -426,5 +561,6 @@ int cgroup_v2_ops_init(cgroup_ops *ops)
ops->get_cgroup_info = get_cgroup_info_v2;
ops->get_cgroup_metrics = get_cgroup_metrics_v2;
ops->get_cgroup_mnt_and_root_path = get_cgroup_mnt_and_root_v2;
+ ops->get_cgroup_oom_handler = get_cgroup_oom_handler_v2;
return 0;
}
\ No newline at end of file
diff --git a/src/daemon/entry/cri/v1/v1_cri_container_manager_service.cc b/src/daemon/entry/cri/v1/v1_cri_container_manager_service.cc
index 47a33c2c..cac5c0ba 100644
--- a/src/daemon/entry/cri/v1/v1_cri_container_manager_service.cc
+++ b/src/daemon/entry/cri/v1/v1_cri_container_manager_service.cc
@@ -1055,6 +1055,9 @@ void ContainerManagerService::UpdateBaseStatusFromInspect(
} else { // Case 3
state = runtime::v1::CONTAINER_CREATED;
}
+ if (inspect->state->oom_killed) {
+ reason = "OOMKilled";
+ }
if (inspect->state->error != nullptr) {
message = inspect->state->error;
}
diff --git a/src/daemon/modules/api/container_api.h b/src/daemon/modules/api/container_api.h
index 43d66d64..830fd696 100644
--- a/src/daemon/modules/api/container_api.h
+++ b/src/daemon/modules/api/container_api.h
@@ -221,6 +221,8 @@ void container_state_set_restarting(container_state_t *s, int exit_code);
void container_state_set_paused(container_state_t *s);
void container_state_reset_paused(container_state_t *s);
+void container_state_set_oom_killed(container_state_t *s);
+
void container_state_set_dead(container_state_t *s);
void container_state_increase_restart_count(container_state_t *s);
@@ -269,8 +271,7 @@ bool container_is_valid_state_string(const char *state);
void container_update_health_monitor(const char *container_id);
-extern int container_supervisor_add_exit_monitor(int fd, const pid_ppid_info_t *pid_info, const char *name,
- const char *runtime, bool sandbox_container);
+extern int container_supervisor_add_exit_monitor(int fd, const char *exit_fifo, const pid_ppid_info_t *pid_info, const container_t *cont);
extern char *container_exit_fifo_create(const char *cont_state_path);
diff --git a/src/daemon/modules/container/container_events_handler.c b/src/daemon/modules/container/container_events_handler.c
index b84f1ad5..109a628c 100644
--- a/src/daemon/modules/container/container_events_handler.c
+++ b/src/daemon/modules/container/container_events_handler.c
@@ -114,7 +114,7 @@ static int container_state_changed(container_t *cont, const struct isulad_events
bool has_been_manually_stopped = false;
/* only handle Exit event */
- if (events->type != EVENTS_TYPE_STOPPED1) {
+ if (events->type != EVENTS_TYPE_STOPPED1 && events->type != EVENTS_TYPE_OOM) {
return 0;
}
@@ -187,6 +187,16 @@ static int container_state_changed(container_t *cont, const struct isulad_events
}
break;
+
+ case EVENTS_TYPE_OOM: {
+ container_lock(cont);
+ container_state_set_oom_killed(cont->state);
+ if (container_state_to_disk(cont)) {
+ WARN("Failed to save container \"%s\" to disk", id);
+ }
+ container_unlock(cont);
+ break;
+ }
default:
/* ignore garbage */
break;
diff --git a/src/daemon/modules/container/container_state.c b/src/daemon/modules/container/container_state.c
index f31959fa..452a2b26 100644
--- a/src/daemon/modules/container/container_state.c
+++ b/src/daemon/modules/container/container_state.c
@@ -154,6 +154,7 @@ void container_state_set_running(container_state_t *s, const pid_ppid_info_t *pi
state->paused = false;
}
state->exit_code = 0;
+ state->oom_killed = false;
if (pid_info != NULL) {
state->pid = pid_info->pid;
@@ -222,6 +223,19 @@ void container_state_set_paused(container_state_t *s)
container_state_unlock(s);
}
+void container_state_set_oom_killed(container_state_t *s)
+{
+ if (s == NULL || s->state == NULL) {
+ return;
+ }
+
+ container_state_lock(s);
+
+ s->state->oom_killed = true;
+
+ container_state_unlock(s);
+}
+
/* state reset paused */
void container_state_reset_paused(container_state_t *s)
{
@@ -573,6 +587,7 @@ container_inspect_state *container_state_to_inspect_state(container_state_t *s)
state->running = s->state->running;
state->paused = s->state->paused;
state->restarting = s->state->restarting;
+ state->oom_killed = s->state->oom_killed;
state->pid = s->state->pid;
state->exit_code = s->state->exit_code;
diff --git a/src/daemon/modules/container/restore/restore.c b/src/daemon/modules/container/restore/restore.c
index 76868e28..52f68d21 100644
--- a/src/daemon/modules/container/restore/restore.c
+++ b/src/daemon/modules/container/restore/restore.c
@@ -24,6 +24,7 @@
#include <isula_libutils/container_config_v2.h>
#include <isula_libutils/host_config.h>
#include <isula_libutils/log.h>
+#include <isula_libutils/auto_cleanup.h>
#include "isulad_config.h"
@@ -44,6 +45,8 @@
#include "utils_file.h"
#include "utils_timestamp.h"
#include "id_name_manager.h"
+#include "cgroup.h"
+#include "specs_api.h"
/* restore supervisor */
static int restore_supervisor(const container_t *cont)
@@ -55,9 +58,7 @@ static int restore_supervisor(const container_t *cont)
char *exit_fifo = NULL;
char *id = cont->common_config->id;
char *statepath = cont->state_path;
- char *runtime = cont->runtime;
pid_ppid_info_t pid_info = { 0 };
- bool sandbox_container = false;
nret = snprintf(container_state, sizeof(container_state), "%s/%s", statepath, id);
if (nret < 0 || (size_t)nret >= sizeof(container_state)) {
@@ -91,11 +92,8 @@ static int restore_supervisor(const container_t *cont)
pid_info.ppid = cont->state->state->p_pid;
pid_info.start_time = cont->state->state->start_time;
pid_info.pstart_time = cont->state->state->p_start_time;
-#ifdef ENABLE_CRI_API_V1
- sandbox_container = is_sandbox_container(cont->common_config->sandbox_info);
-#endif
- if (container_supervisor_add_exit_monitor(exit_fifo_fd, &pid_info, id, runtime, sandbox_container)) {
+ if (container_supervisor_add_exit_monitor(exit_fifo_fd, exit_fifo, &pid_info, cont)) {
ERROR("Failed to add exit monitor to supervisor");
ret = -1;
goto out;
diff --git a/src/daemon/modules/container/supervisor/supervisor.c b/src/daemon/modules/container/supervisor/supervisor.c
index 63289283..1b7da383 100644
--- a/src/daemon/modules/container/supervisor/supervisor.c
+++ b/src/daemon/modules/container/supervisor/supervisor.c
@@ -41,6 +41,8 @@
#ifdef ENABLE_CRI_API_V1
#include "sandbox_ops.h"
#endif
+#include "cgroup.h"
+#include "specs_api.h"
pthread_mutex_t g_supervisor_lock = PTHREAD_MUTEX_INITIALIZER;
struct epoll_descr g_supervisor_descr;
@@ -269,24 +271,52 @@ static int supervisor_exit_cb(int fd, uint32_t events, void *cbdata, struct epol
return EPOLL_LOOP_HANDLE_CONTINUE;
}
+static int oom_handle_cb(int fd, uint32_t events, void *cbdata, struct epoll_descr *descr)
+{
+ cgroup_oom_handler_info_t *oom_handler_info = (cgroup_oom_handler_info_t *)cbdata;
+ bool close_oom_handler = CGROUP_OOM_HANDLE_CLOSE;
+ // supervisor only handle one oom event, so we remove the handler directly
+ if (oom_handler_info != NULL && oom_handler_info->oom_event_handler != NULL) {
+ close_oom_handler = oom_handler_info->oom_event_handler(fd, oom_handler_info);
+ }
+
+ if (close_oom_handler == CGROUP_OOM_HANDLE_CLOSE) {
+ supervisor_handler_lock();
+ epoll_loop_del_handler(&g_supervisor_descr, fd);
+ supervisor_handler_unlock();
+
+ common_free_cgroup_oom_handler_info(oom_handler_info);
+ }
+
+ return EPOLL_LOOP_HANDLE_CONTINUE;
+}
+
/* supervisor add exit monitor */
-int container_supervisor_add_exit_monitor(int fd, const pid_ppid_info_t *pid_info, const char *name,
- const char *runtime, bool sandbox_container)
+int container_supervisor_add_exit_monitor(int fd, const char *exit_fifo, const pid_ppid_info_t *pid_info, const container_t *cont)
{
int ret = 0;
struct supervisor_handler_data *data = NULL;
+ cgroup_oom_handler_info_t *oom_handler_info = NULL;
+ __isula_auto_free char *cgroup_path = NULL;
if (fd < 0) {
ERROR("Invalid exit fifo fd");
return -1;
}
- if (pid_info == NULL || name == NULL || runtime == NULL) {
+ if (pid_info == NULL || cont == NULL || cont->common_config == NULL) {
ERROR("Invalid input arguments");
close(fd);
return -1;
}
+ cgroup_path = merge_container_cgroups_path(cont->common_config->id, cont->hostconfig);
+ if (cgroup_path == NULL) {
+ ERROR("Failed to get cgroup path");
+ close(fd);
+ return -1;
+ }
+
data = util_common_calloc_s(sizeof(struct supervisor_handler_data));
if (data == NULL) {
ERROR("Memory out");
@@ -295,15 +325,26 @@ int container_supervisor_add_exit_monitor(int fd, const pid_ppid_info_t *pid_inf
}
data->fd = fd;
- data->name = util_strdup_s(name);
- data->runtime = util_strdup_s(runtime);
- data->is_sandbox_container = sandbox_container;
+ data->name = util_strdup_s(cont->common_config->id);
+ data->runtime = util_strdup_s(cont->runtime);
+#ifdef ENABLE_CRI_API_V1
+ data->is_sandbox_container = is_sandbox_container(cont->common_config->sandbox_info);
+#endif
data->pid_info.pid = pid_info->pid;
data->pid_info.start_time = pid_info->start_time;
data->pid_info.ppid = pid_info->ppid;
data->pid_info.pstart_time = pid_info->pstart_time;
+ oom_handler_info = common_get_cgroup_oom_handler(fd, cont->common_config->id, cgroup_path, exit_fifo);
supervisor_handler_lock();
+ if (oom_handler_info != NULL) {
+ ret = epoll_loop_add_handler(&g_supervisor_descr, oom_handler_info->oom_event_fd, oom_handle_cb, oom_handler_info);
+ if (ret != 0) {
+ ERROR("Failed to add handler for oom event");
+ goto err;
+ }
+ }
+
ret = epoll_loop_add_handler(&g_supervisor_descr, fd, supervisor_exit_cb, data);
if (ret != 0) {
ERROR("Failed to add handler for exit fifo");
@@ -314,6 +355,7 @@ int container_supervisor_add_exit_monitor(int fd, const pid_ppid_info_t *pid_inf
err:
supervisor_handler_data_free(data);
+ common_free_cgroup_oom_handler_info(oom_handler_info);
out:
supervisor_handler_unlock();
return ret;
diff --git a/src/daemon/modules/events/collector.c b/src/daemon/modules/events/collector.c
index fb4a7fea..af688742 100644
--- a/src/daemon/modules/events/collector.c
+++ b/src/daemon/modules/events/collector.c
@@ -133,6 +133,9 @@ static container_events_type_t lcrsta2Evetype(int value)
case THAWED:
et = EVENTS_TYPE_THAWED;
break;
+ case OOM:
+ et = EVENTS_TYPE_OOM;
+ break;
default:
et = EVENTS_TYPE_EXIT;
break;
@@ -822,8 +825,8 @@ static int post_event_to_events_hander(const struct isulad_events_format *events
return -1;
}
- /* only post STOPPED event to events_hander */
- if (events->type != EVENTS_TYPE_STOPPED1) {
+ /* only post STOPPED event and OOM event to events_hander */
+ if (events->type != EVENTS_TYPE_STOPPED1 && events->type != EVENTS_TYPE_OOM) {
return 0;
}
diff --git a/src/daemon/modules/service/service_container.c b/src/daemon/modules/service/service_container.c
index a8090d5a..eb7ce4f4 100644
--- a/src/daemon/modules/service/service_container.c
+++ b/src/daemon/modules/service/service_container.c
@@ -275,14 +275,13 @@ static void clean_resources_on_failure(const container_t *cont, const char *engi
return;
}
-static int do_post_start_on_success(const char *id, const char *runtime, bool sandbox_container,
- const char *pidfile, int exit_fifo_fd,
- const pid_ppid_info_t *pid_info)
+static int do_post_start_on_success(container_t *cont, int exit_fifo_fd,
+ const char *exit_fifo, const pid_ppid_info_t *pid_info)
{
int ret = 0;
// exit_fifo_fd was closed in container_supervisor_add_exit_monitor
- if (container_supervisor_add_exit_monitor(exit_fifo_fd, pid_info, id, runtime, sandbox_container)) {
+ if (container_supervisor_add_exit_monitor(exit_fifo_fd, exit_fifo, pid_info, cont)) {
ERROR("Failed to add exit monitor to supervisor");
ret = -1;
}
@@ -750,7 +749,6 @@ static int do_start_container(container_t *cont, const char *console_fifos[], bo
oci_runtime_spec *oci_spec = NULL;
rt_create_params_t create_params = { 0 };
rt_start_params_t start_params = { 0 };
- bool sandbox_container = false;
nret = snprintf(bundle, sizeof(bundle), "%s/%s", cont->root_path, id);
if (nret < 0 || (size_t)nret >= sizeof(bundle)) {
@@ -899,7 +897,6 @@ static int do_start_container(container_t *cont, const char *console_fifos[], bo
if (cont->common_config->sandbox_info != NULL) {
create_params.task_addr = cont->common_config->sandbox_info->task_address;
}
- sandbox_container = is_sandbox_container(cont->common_config->sandbox_info);
#endif
if (runtime_create(id, runtime, &create_params) != 0) {
@@ -924,7 +921,7 @@ static int do_start_container(container_t *cont, const char *console_fifos[], bo
ret = runtime_start(id, runtime, &start_params, pid_info);
if (ret == 0) {
- if (do_post_start_on_success(id, runtime, sandbox_container, pidfile, exit_fifo_fd, pid_info) != 0) {
+ if (do_post_start_on_success(cont, exit_fifo_fd, exit_fifo, pid_info) != 0) {
ERROR("Failed to do post start on runtime start success");
ret = -1;
goto clean_resources;
--
2.34.1

Опубликовать ( 0 )

Вы можете оставить комментарий после Вход в систему

1
https://api.gitlife.ru/oschina-mirror/src-openeuler-iSulad.git
git@api.gitlife.ru:oschina-mirror/src-openeuler-iSulad.git
oschina-mirror
src-openeuler-iSulad
src-openeuler-iSulad
master