1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
|
From ea7f11b989896d76b8d091d26bc0241bce9413f8 Mon Sep 17 00:00:00 2001
From: Xavi Hernandez <xhernandez@redhat.com>
Date: Thu, 4 Jul 2019 13:21:33 +0200
Subject: [PATCH 253/255] core: fix deadlock between statedump and
fd_anonymous()
There exists a deadlock between statedump generation and fd_anonymous()
function because they are acquiring inode table lock and inode lock in
reverse order.
This patch modifies fd_anonymous() so that it takes inode lock only when
it's really necessary, avoiding the deadlock.
Upstream patch:
> Change-Id: I24355447f0ea1b39e2546782ad07f0512cc381e7
> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22995
> BUG: 1727068
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
Change-Id: I24355447f0ea1b39e2546782ad07f0512cc381e7
Fixes: bz#1722209
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/176096
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
libglusterfs/src/fd.c | 137 ++++++++++++++++++++++----------------------------
1 file changed, 61 insertions(+), 76 deletions(-)
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
index b8aac72..314546a 100644
--- a/libglusterfs/src/fd.c
+++ b/libglusterfs/src/fd.c
@@ -532,7 +532,7 @@ fd_unref(fd_t *fd)
return;
}
-fd_t *
+static fd_t *
__fd_bind(fd_t *fd)
{
list_del_init(&fd->inode_list);
@@ -562,9 +562,9 @@ fd_bind(fd_t *fd)
}
static fd_t *
-__fd_create(inode_t *inode, uint64_t pid)
+fd_allocate(inode_t *inode, uint64_t pid)
{
- fd_t *fd = NULL;
+ fd_t *fd;
if (inode == NULL) {
gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
@@ -573,64 +573,67 @@ __fd_create(inode_t *inode, uint64_t pid)
}
fd = mem_get0(inode->table->fd_mem_pool);
- if (!fd)
- goto out;
+ if (fd == NULL) {
+ return NULL;
+ }
fd->xl_count = inode->table->xl->graph->xl_count + 1;
fd->_ctx = GF_CALLOC(1, (sizeof(struct _fd_ctx) * fd->xl_count),
gf_common_mt_fd_ctx);
- if (!fd->_ctx)
- goto free_fd;
+ if (fd->_ctx == NULL) {
+ goto failed;
+ }
fd->lk_ctx = fd_lk_ctx_create();
- if (!fd->lk_ctx)
- goto free_fd_ctx;
-
- fd->inode = inode_ref(inode);
- fd->pid = pid;
- INIT_LIST_HEAD(&fd->inode_list);
-
- LOCK_INIT(&fd->lock);
-out:
- return fd;
+ if (fd->lk_ctx != NULL) {
+ /* We need to take a reference from the inode, but we cannot do it
+ * here because this function can be called with the inode lock taken
+ * and inode_ref() takes the inode's table lock. This is the reverse
+ * of the logical lock acquisition order and can cause a deadlock. So
+ * we simply assign the inode here and we delefate the inode reference
+ * responsibility to the caller (when this function succeeds and the
+ * inode lock is released). This is safe because the caller must hold
+ * a reference of the inode to use it, so it's guaranteed that the
+ * number of references won't reach 0 before the caller finishes.
+ *
+ * TODO: minimize use of locks in favor of atomic operations to avoid
+ * these dependencies. */
+ fd->inode = inode;
+ fd->pid = pid;
+ INIT_LIST_HEAD(&fd->inode_list);
+ LOCK_INIT(&fd->lock);
+ GF_ATOMIC_INIT(fd->refcount, 1);
+ return fd;
+ }
-free_fd_ctx:
GF_FREE(fd->_ctx);
-free_fd:
+
+failed:
mem_put(fd);
return NULL;
}
fd_t *
-fd_create(inode_t *inode, pid_t pid)
+fd_create_uint64(inode_t *inode, uint64_t pid)
{
- fd_t *fd = NULL;
-
- fd = __fd_create(inode, (uint64_t)pid);
- if (!fd)
- goto out;
+ fd_t *fd;
- fd = fd_ref(fd);
+ fd = fd_allocate(inode, pid);
+ if (fd != NULL) {
+ /* fd_allocate() doesn't get a reference from the inode. We need to
+ * take it here in case of success. */
+ inode_ref(inode);
+ }
-out:
return fd;
}
fd_t *
-fd_create_uint64(inode_t *inode, uint64_t pid)
+fd_create(inode_t *inode, pid_t pid)
{
- fd_t *fd = NULL;
-
- fd = __fd_create(inode, pid);
- if (!fd)
- goto out;
-
- fd = fd_ref(fd);
-
-out:
- return fd;
+ return fd_create_uint64(inode, (uint64_t)pid);
}
static fd_t *
@@ -719,10 +722,13 @@ __fd_lookup_anonymous(inode_t *inode, int32_t flags)
return fd;
}
-static fd_t *
-__fd_anonymous(inode_t *inode, int32_t flags)
+fd_t *
+fd_anonymous_with_flags(inode_t *inode, int32_t flags)
{
fd_t *fd = NULL;
+ bool ref = false;
+
+ LOCK(&inode->lock);
fd = __fd_lookup_anonymous(inode, flags);
@@ -730,54 +736,33 @@ __fd_anonymous(inode_t *inode, int32_t flags)
__fd_lookup_anonymous(), so no need of one more fd_ref().
if (!fd); then both create and bind won't bump up the ref
count, so we have to call fd_ref() after bind. */
- if (!fd) {
- fd = __fd_create(inode, 0);
-
- if (!fd)
- return NULL;
-
- fd->anonymous = _gf_true;
- fd->flags = GF_ANON_FD_FLAGS | flags;
+ if (fd == NULL) {
+ fd = fd_allocate(inode, 0);
+ if (fd != NULL) {
+ fd->anonymous = _gf_true;
+ fd->flags = GF_ANON_FD_FLAGS | (flags & O_DIRECT);
- __fd_bind(fd);
+ __fd_bind(fd);
- __fd_ref(fd);
+ ref = true;
+ }
}
- return fd;
-}
-
-fd_t *
-fd_anonymous(inode_t *inode)
-{
- fd_t *fd = NULL;
+ UNLOCK(&inode->lock);
- LOCK(&inode->lock);
- {
- fd = __fd_anonymous(inode, GF_ANON_FD_FLAGS);
+ if (ref) {
+ /* fd_allocate() doesn't get a reference from the inode. We need to
+ * take it here in case of success. */
+ inode_ref(inode);
}
- UNLOCK(&inode->lock);
return fd;
}
fd_t *
-fd_anonymous_with_flags(inode_t *inode, int32_t flags)
+fd_anonymous(inode_t *inode)
{
- fd_t *fd = NULL;
-
- if (flags & O_DIRECT)
- flags = GF_ANON_FD_FLAGS | O_DIRECT;
- else
- flags = GF_ANON_FD_FLAGS;
-
- LOCK(&inode->lock);
- {
- fd = __fd_anonymous(inode, flags);
- }
- UNLOCK(&inode->lock);
-
- return fd;
+ return fd_anonymous_with_flags(inode, 0);
}
fd_t *
--
1.8.3.1
|