summaryrefslogtreecommitdiff
path: root/bugfix-also-stop-machine-when-a-machine-un.patch
blob: 5d93a9103f8d98818b40b404302982c5227ae614 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
From 89110c823f246d3d2c398652999826107da446bf Mon Sep 17 00:00:00 2001
From: yangbin <robin.yb@huawei.com>
Date: Tue, 7 Apr 2020 12:01:39 +0800
Subject: [PATCH] systemd-machined: Also stop machine when a machine unit is
 active but the leader process is exited

When a VM machine is created in a scenario as below, it will remain in systemd-machined even though it has already been terminated by libvirtd.
1. libvirtd sends a request to systemd-machined with the leader(the PID of the vm) to create a machine.
2. systemd-machined directs the request to systemd
3. systemd constructs a scope and creates cgroup for the machine. the scope unit is then added to job queue and will be started later.
4. the leader process(the PID of the vm) is terminated by libvirtd(due some reason) before the scope is started.
5. Since the scope unit is yet not started, systemd will not destroy the scope althrough it is noticed with the signal event.
6. systemd starts the scope, and now the scope and machine is in active but no leader process exist.
7. systemd-machined will not stop and destroy the machine, and remains in system until the scope is stopped by others or the OS is restarted.

This patch fix this problem by ansering yes to stop machine in machine_check_gc
when the machine unit is active but the leader process has already exited.

Change-Id: I80e3c32832f4ecf08b6cb149735978730ce1d1c0
---
 src/machine/machine.c       | 37 ++++++++++++++++++++++++++++++++++++-
 src/machine/machined-dbus.c | 35 +++++++++++++++++++++++++++++++++++
 src/machine/machined.h      |  1 +
 3 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/src/machine/machine.c b/src/machine/machine.c
index 44ff5c1..2519fd7 100644
--- a/src/machine/machine.c
+++ b/src/machine/machine.c
@@ -34,6 +34,7 @@
 #include "tmpfile-util.h"
 #include "unit-name.h"
 #include "user-util.h"
+#include "cgroup-util.h"
 
 DEFINE_TRIVIAL_CLEANUP_FUNC(Machine*, machine_free);
 
@@ -534,6 +535,40 @@ int machine_finalize(Machine *m) {
         return 0;
 }
 
+static bool machine_validate_unit(Machine *m) {
+        int r;
+        _cleanup_free_ char *unit = NULL;
+        _cleanup_free_ char *cgroup = NULL;
+
+        r = cg_pid_get_unit(m->leader.pid, &unit);
+        if (!r && streq(m->unit, unit))
+                return true;
+
+        if (r == -ESRCH) {
+                /* the original leader may exit and be replaced with a new leader when qemu hotreplace is performed.
+                 * so we don't return true here, otherwise the vm will be added to the gc list.
+                 * */
+                log_info("Machine unit is in active, but the leader process is exited. "
+                        "machine: %s, leader: "PID_FMT", unit: %s.", m->name, m->leader.pid, m->unit);
+        } else if (r) {
+                log_info_errno(r, "Can not get unit from cgroup. "
+                        "machine: %s, leader: "PID_FMT", unit: %s, error: %m", m->name, m->leader.pid, m->unit);
+        } else if (unit && !streq(m->unit, unit)) {
+                log_info("Machine unit name not match. "
+                        "machine: %s, leader: "PID_FMT", machine unit: %s, real unit: %s", m->name, m->leader.pid, m->unit, unit);
+        }
+
+        r = manager_get_unit_cgroup_path(m->manager, m->unit, &cgroup);
+        if (!r && !isempty(cgroup) && cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, cgroup) > 0) {
+                log_info("Cgroup is empty in the machine unit. "
+                        "machine: %s, leader: "PID_FMT", machine unit: %s.", m->name, m->leader.pid, m->unit);
+                /*The vm will be added to gc list only when there is no any process in the scope*/
+                return false;
+        }
+
+        return true;
+}
+
 bool machine_may_gc(Machine *m, bool drop_not_started) {
         assert(m);
 
@@ -546,7 +581,7 @@ bool machine_may_gc(Machine *m, bool drop_not_started) {
         if (m->scope_job && manager_job_is_active(m->manager, m->scope_job))
                 return false;
 
-        if (m->unit && manager_unit_is_active(m->manager, m->unit))
+        if (m->unit && manager_unit_is_active(m->manager, m->unit) && machine_validate_unit(m))
                 return false;
 
         return true;
diff --git a/src/machine/machined-dbus.c b/src/machine/machined-dbus.c
index 9fec047..938f42b 100644
--- a/src/machine/machined-dbus.c
+++ b/src/machine/machined-dbus.c
@@ -1514,3 +1514,38 @@ int manager_add_machine(Manager *m, const char *name, Machine **_machine) {
 
         return 0;
 }
+
+int manager_get_unit_cgroup_path(Manager *manager, const char *unit, char **cgroup) {
+        _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+        _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+        _cleanup_free_ char *path = NULL;
+        const char *cgroup_path = NULL;
+        int r;
+
+        assert(manager);
+        assert(unit);
+
+        path = unit_dbus_path_from_name(unit);
+        if (!path)
+                return -ENOMEM;
+
+        r = sd_bus_get_property(
+                        manager->bus,
+                        "org.freedesktop.systemd1",
+                        path,
+                        endswith(unit, ".scope") ? "org.freedesktop.systemd1.Scope" : "org.freedesktop.systemd1.Service",
+                        "ControlGroup",
+                        &error,
+                        &reply,
+                        "s");
+        if (r < 0) {
+                return r;
+        }
+
+        r = sd_bus_message_read(reply, "s", &cgroup_path);
+        if (r < 0)
+                return -EINVAL;
+        *cgroup = strdup(cgroup_path);
+
+        return 0;
+}
diff --git a/src/machine/machined.h b/src/machine/machined.h
index 280c32b..6b8d98b 100644
--- a/src/machine/machined.h
+++ b/src/machine/machined.h
@@ -58,6 +58,7 @@ int manager_kill_unit(Manager *manager, const char *unit, int signo, sd_bus_erro
 int manager_unref_unit(Manager *m, const char *unit, sd_bus_error *error);
 int manager_unit_is_active(Manager *manager, const char *unit);
 int manager_job_is_active(Manager *manager, const char *path);
+int manager_get_unit_cgroup_path(Manager *manager, const char *unit, char **cgroup);
 
 #if ENABLE_NSCD
 int manager_enqueue_nscd_cache_flush(Manager *m);
-- 
2.33.0