1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
|
From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001
From: Raghavendra Bhat <raghavendra@redhat.com>
Date: Mon, 15 Apr 2019 14:09:34 -0400
Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after
cancelling the signer thread
When bit-rot feature is disabled, the signer thread from the bit-rot-stub
xlator (the thread which performs the setxattr of the signature on to the
disk) is cancelled. But, if the cancelled signer thread had already held
the mutex (&priv->lock) which it uses to monitor the queue of files to
be signed, then the mutex is never released. This creates problems in
future when the feature is enabled again. Both the new instance of the
signer thread and the regular thread which enqueues the files to be
signed will be blocked on this mutex.
So, as part of cancelling the signer thread, unlock the mutex associated
with it as well using pthread_cleanup_push and pthread_cleanup_pop.
Upstream patch:
> patch: https://review.gluster.org/22572
> fixes: #bz1700078
> Change-Id: Ib761910caed90b268e69794ddeb108165487af40
Change-Id: Ib761910caed90b268e69794ddeb108165487af40
BUG: 1851424
Signed-off-by: Raghavendra M <raghavendra@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/208304
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
.../bit-rot/src/stub/bit-rot-stub-messages.h | 4 +-
xlators/features/bit-rot/src/stub/bit-rot-stub.c | 62 +++++++++++++++++++---
2 files changed, 59 insertions(+), 7 deletions(-)
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
index 7f07f29..155802b 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
@@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL,
BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL,
BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED,
- BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL);
+ BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL,
+ BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD,
+ BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL);
#endif /* !_BITROT_STUB_MESSAGES_H_ */
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
index 3f48a4b..c3f81bc 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -26,6 +26,15 @@
#define BR_STUB_REQUEST_COOKIE 0x1
+void
+br_stub_lock_cleaner(void *arg)
+{
+ pthread_mutex_t *clean_mutex = arg;
+
+ pthread_mutex_unlock(clean_mutex);
+ return;
+}
+
void *
br_stub_signth(void *);
@@ -166,8 +175,11 @@ init(xlator_t *this)
ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
"brssign");
- if (ret != 0)
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED,
+ "failed to create the new thread for signer");
goto cleanup_lock;
+ }
ret = br_stub_bad_object_container_init(this, priv);
if (ret) {
@@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options)
priv = this->private;
GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err);
- if (priv->do_versioning) {
+ if (priv->do_versioning && !priv->signth) {
ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
"brssign");
- if (ret != 0)
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_SPAWN_SIGN_THRD_FAILED,
+ "failed to create the new thread for signer");
goto err;
+ }
ret = br_stub_bad_object_container_init(this, priv);
if (ret) {
@@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options)
gf_msg(this->name, GF_LOG_ERROR, 0,
BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
"Could not cancel sign serializer thread");
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD,
+ "killed the signer thread");
+ priv->signth = 0;
}
- priv->signth = 0;
}
if (priv->container.thread) {
@@ -902,6 +921,24 @@ br_stub_signth(void *arg)
THIS = this;
while (1) {
+ /*
+ * Disabling bit-rot feature leads to this particular thread
+ * getting cleaned up by reconfigure via a call to the function
+ * gf_thread_cleanup_xint (which in turn calls pthread_cancel
+ * and pthread_join). But, if this thread had held the mutex
+ * &priv->lock at the time of cancellation, then it leads to
+ * deadlock in future when bit-rot feature is enabled (which
+ * again spawns this thread which cant hold the lock as the
+ * mutex is still held by the previous instance of the thread
+ * which got killed). Also, the br_stub_handle_object_signature
+ * function which is called whenever file has to be signed
+ * also gets blocked as it too attempts to acquire &priv->lock.
+ *
+ * So, arrange for the lock to be unlocked as part of the
+ * cleanup of this thread using pthread_cleanup_push and
+ * pthread_cleanup_pop.
+ */
+ pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock);
pthread_mutex_lock(&priv->lock);
{
while (list_empty(&priv->squeue))
@@ -912,6 +949,7 @@ br_stub_signth(void *arg)
list_del_init(&sigstub->list);
}
pthread_mutex_unlock(&priv->lock);
+ pthread_cleanup_pop(0);
call_resume(sigstub->stub);
@@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- if (frame->root->pid != GF_CLIENT_PID_BITD)
+ if (frame->root->pid != GF_CLIENT_PID_BITD) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID,
+ "PID %d from where signature request"
+ "came, does not belong to bit-rot daemon."
+ "Unwinding the fop",
+ frame->root->pid);
goto dofop;
+ }
ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess);
- if (ret)
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL,
+ "failed to prepare the signature for %s. Unwinding the fop",
+ uuid_utoa(fd->inode->gfid));
goto dofop;
+ }
if (fakesuccess) {
op_ret = op_errno = 0;
goto dofop;
@@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
/* object signature request */
ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign);
if (!ret) {
+ gf_msg_debug(this->name, 0, "got SIGNATURE request on %s",
+ uuid_utoa(fd->inode->gfid));
br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata);
goto done;
}
--
1.8.3.1
|