summaryrefslogtreecommitdiff
path: root/0419-cluster-afr-fix-race-when-bricks-come-up.patch
diff options
context:
space:
mode:
Diffstat (limited to '0419-cluster-afr-fix-race-when-bricks-come-up.patch')
-rw-r--r--0419-cluster-afr-fix-race-when-bricks-come-up.patch104
1 files changed, 104 insertions, 0 deletions
diff --git a/0419-cluster-afr-fix-race-when-bricks-come-up.patch b/0419-cluster-afr-fix-race-when-bricks-come-up.patch
new file mode 100644
index 0000000..ea8c2ea
--- /dev/null
+++ b/0419-cluster-afr-fix-race-when-bricks-come-up.patch
@@ -0,0 +1,104 @@
+From b9b479de2a7fd1c5eefa7aa1142e0a39e0c96ca9 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Sun, 1 Mar 2020 19:49:04 +0100
+Subject: [PATCH 419/449] cluster/afr: fix race when bricks come up
+
+The was a problem when self-heal was sending lookups at the same time
+that one of the bricks was coming up. In this case there was a chance
+that the number of 'up' bricks changes in the middle of sending the
+requests to subvolumes which caused a discrepancy in the expected
+number of replies and the actual number of sent requests.
+
+This discrepancy caused that AFR continued executing requests before
+all requests were complete. Eventually, the frame of the pending
+request was destroyed when the operation terminated, causing a use-
+after-free issue when the answer was finally received.
+
+In theory the same thing could happen in the reverse way, i.e. AFR
+tries to wait for more replies than sent requests, causing a hang.
+
+Backport of:
+> Upstream-patch-link: https://review.gluster.org/24191
+> Change-Id: I7ed6108554ca379d532efb1a29b2de8085410b70
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+> Fixes: bz#1808875
+
+BUG: 1794663
+Change-Id: I7ed6108554ca379d532efb1a29b2de8085410b70
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202489
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-self-heal-common.c | 6 +++---
+ xlators/cluster/afr/src/afr-self-heal-name.c | 4 +++-
+ xlators/cluster/afr/src/afr-self-heal.h | 7 +++++--
+ 3 files changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index ce1ea50..d942ccf 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -1869,12 +1869,12 @@ int
+ afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
+ struct afr_reply *replies)
+ {
+- afr_private_t *priv = NULL;
++ afr_local_t *local = NULL;
+
+- priv = frame->this->private;
++ local = frame->local;
+
+ return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
+- priv->child_up);
++ local->child_up);
+ }
+
+ unsigned int
+diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
+index 7d4f208..dace071 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-name.c
++++ b/xlators/cluster/afr/src/afr-self-heal-name.c
+@@ -560,13 +560,15 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies = NULL;
+ inode_t *inode = NULL;
+ int first_idx = -1;
++ afr_local_t *local = NULL;
+
+ priv = this->private;
++ local = frame->local;
+
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
+- priv->child_up, NULL);
++ local->child_up, NULL);
+ if (!inode)
+ return -ENOMEM;
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
+index 8234cec..f7ecf5d 100644
+--- a/xlators/cluster/afr/src/afr-self-heal.h
++++ b/xlators/cluster/afr/src/afr-self-heal.h
+@@ -46,13 +46,16 @@
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0; \
+- int __count = AFR_COUNT(list, __priv->child_count); \
++ int __count = 0; \
++ unsigned char *__list = alloca(__priv->child_count); \
+ \
++ memcpy(__list, list, sizeof(*__list) * __priv->child_count); \
++ __count = AFR_COUNT(__list, __priv->child_count); \
+ __local->barrier.waitfor = __count; \
+ afr_local_replies_wipe(__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+- if (!list[__i]) \
++ if (!__list[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
+ __priv->children[__i], \
+--
+1.8.3.1
+