summaryrefslogtreecommitdiff
path: root/0514-afr-event-gen-changes.patch
diff options
context:
space:
mode:
Diffstat (limited to '0514-afr-event-gen-changes.patch')
-rw-r--r--0514-afr-event-gen-changes.patch308
1 files changed, 308 insertions, 0 deletions
diff --git a/0514-afr-event-gen-changes.patch b/0514-afr-event-gen-changes.patch
new file mode 100644
index 0000000..9f9562e
--- /dev/null
+++ b/0514-afr-event-gen-changes.patch
@@ -0,0 +1,308 @@
+From 4c47d6dd7c5ddcaa2a1e159427c0f6713fd33907 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Wed, 23 Dec 2020 14:57:51 +0530
+Subject: [PATCH 514/517] afr: event gen changes
+
+The general idea of the changes is to prevent resetting event generation
+to zero in the inode ctx, since event gen is something that should
+follow 'causal order'.
+
+Change #1:
+For a read txn, in inode refresh cbk, if event_generation is
+found zero, we are failing the read fop. This is not needed
+because change in event gen is only a marker for the next inode refresh to
+happen and should not be taken into account by the current read txn.
+
+Change #2:
+The event gen being zero above can happen if there is a racing lookup,
+which resets even get (in afr_lookup_done) if there are non zero afr
+xattrs. The resetting is done only to trigger an inode refresh and a
+possible client side heal on the next lookup. That can be acheived by
+setting the need_refresh flag in the inode ctx. So replaced all
+occurences of resetting even gen to zero with a call to
+afr_inode_need_refresh_set().
+
+Change #3:
+In both lookup and discover path, we are doing an inode refresh which is
+not required since all 3 essentially do the same thing- update the inode
+ctx with the good/bad copies from the brick replies. Inode refresh also
+triggers background heals, but I think it is okay to do it when we call
+refresh during the read and write txns and not in the lookup path.
+
+The .ts which relied on inode refresh in lookup path to trigger heals are
+now changed to do read txn so that inode refresh and the heal happens.
+
+Upstream patch details:
+> Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
+> Fixes: #1179
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+> Reported-by: Erik Jacobson <erik.jacobson at hpe.com>
+Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24316/
+
+BUG: 1640148
+Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/222074
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ ...fid-mismatch-resolution-with-fav-child-policy.t | 8 +-
+ xlators/cluster/afr/src/afr-common.c | 92 +++++-----------------
+ xlators/cluster/afr/src/afr-dir-write.c | 6 +-
+ xlators/cluster/afr/src/afr.h | 5 +-
+ 4 files changed, 29 insertions(+), 82 deletions(-)
+
+diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
+index f4aa351..12af0c8 100644
+--- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
++++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
+@@ -168,8 +168,8 @@ TEST [ "$gfid_1" != "$gfid_2" ]
+ #We know that second brick has the bigger size file
+ BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\ -f1)
+
+-TEST ls $M0/f3
+-TEST cat $M0/f3
++TEST ls $M0 #Trigger entry heal via readdir inode refresh
++TEST cat $M0/f3 #Trigger data heal via readv inode refresh
+ EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+ #gfid split-brain should be resolved
+@@ -215,8 +215,8 @@ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+-TEST ls $M0/f4
+-TEST cat $M0/f4
++TEST ls $M0 #Trigger entry heal via readdir inode refresh
++TEST cat $M0/f4 #Trigger data heal via readv inode refresh
+ EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+ #gfid split-brain should be resolved
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index fca2cd5..90b4f14 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -284,7 +284,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
+ metadatamap |= (1 << index);
+ }
+ if (metadatamap_old != metadatamap) {
+- event = 0;
++ __afr_inode_need_refresh_set(inode, this);
+ }
+ break;
+
+@@ -297,7 +297,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
+ datamap |= (1 << index);
+ }
+ if (datamap_old != datamap)
+- event = 0;
++ __afr_inode_need_refresh_set(inode, this);
+ break;
+
+ default:
+@@ -461,34 +461,6 @@ out:
+ }
+
+ int
+-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
+-{
+- int ret = -1;
+- uint16_t datamap = 0;
+- uint16_t metadatamap = 0;
+- uint32_t event = 0;
+- uint64_t val = 0;
+- afr_inode_ctx_t *ctx = NULL;
+-
+- ret = __afr_inode_ctx_get(this, inode, &ctx);
+- if (ret)
+- return ret;
+-
+- val = ctx->read_subvol;
+-
+- metadatamap = (val & 0x000000000000ffff) >> 0;
+- datamap = (val & 0x00000000ffff0000) >> 16;
+- event = 0;
+-
+- val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+- (((uint64_t)event) << 32);
+-
+- ctx->read_subvol = val;
+-
+- return ret;
+-}
+-
+-int
+ __afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
+ {
+@@ -559,22 +531,6 @@ out:
+ }
+
+ int
+-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
+-{
+- afr_private_t *priv = NULL;
+- int ret = -1;
+-
+- priv = this->private;
+-
+- if (priv->child_count <= 16)
+- ret = __afr_inode_event_gen_reset_small(inode, this);
+- else
+- ret = -1;
+-
+- return ret;
+-}
+-
+-int
+ afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
+ {
+@@ -723,30 +679,22 @@ out:
+ return need_refresh;
+ }
+
+-static int
+-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
++int
++__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+ {
+ int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
+
+- GF_VALIDATE_OR_GOTO(this->name, inode, out);
+-
+- LOCK(&inode->lock);
+- {
+- ret = __afr_inode_ctx_get(this, inode, &ctx);
+- if (ret)
+- goto unlock;
+-
++ ret = __afr_inode_ctx_get(this, inode, &ctx);
++ if (ret == 0) {
+ ctx->need_refresh = _gf_true;
+ }
+-unlock:
+- UNLOCK(&inode->lock);
+-out:
++
+ return ret;
+ }
+
+ int
+-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
++afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+ {
+ int ret = -1;
+
+@@ -754,7 +702,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
+
+ LOCK(&inode->lock);
+ {
+- ret = __afr_inode_event_gen_reset(inode, this);
++ ret = __afr_inode_need_refresh_set(inode, this);
+ }
+ UNLOCK(&inode->lock);
+ out:
+@@ -1191,7 +1139,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
+ ret = afr_inode_get_readable(frame, inode, this, local->readable,
+ &event_generation, local->transaction.type);
+
+- if (ret == -EIO || (local->is_read_txn && !event_generation)) {
++ if (ret == -EIO) {
+ /* No readable subvolume even after refresh ==> splitbrain.*/
+ if (!priv->fav_child_policy) {
+ err = EIO;
+@@ -2413,7 +2361,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
+ if (read_subvol == -1)
+ goto cant_interpret;
+ if (ret) {
+- afr_inode_event_gen_reset(local->inode, this);
++ afr_inode_need_refresh_set(local->inode, this);
+ dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
+ }
+ } else {
+@@ -2971,6 +2919,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int read_subvol = -1;
++ int ret = 0;
+ unsigned char *data_readable = NULL;
+ unsigned char *success_replies = NULL;
+
+@@ -2992,7 +2941,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
+ if (!afr_has_quorum(success_replies, this, frame))
+ goto unwind;
+
+- afr_replies_interpret(frame, this, local->inode, NULL);
++ ret = afr_replies_interpret(frame, this, local->inode, NULL);
++ if (ret) {
++ afr_inode_need_refresh_set(local->inode, this);
++ }
+
+ read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
+ data_readable);
+@@ -3248,11 +3200,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+ afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
+ AFR_DATA_TRANSACTION, NULL);
+
+- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
+- local->event_generation))
+- afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
+- else
+- afr_discover_do(frame, this, 0);
++ afr_discover_do(frame, this, 0);
+
+ return 0;
+ out:
+@@ -3393,11 +3341,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+ afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
+ AFR_DATA_TRANSACTION, NULL);
+
+- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
+- local->event_generation))
+- afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
+- else
+- afr_lookup_do(frame, this, 0);
++ afr_lookup_do(frame, this, 0);
+
+ return 0;
+ out:
+diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
+index 416c19d..d419bfc 100644
+--- a/xlators/cluster/afr/src/afr-dir-write.c
++++ b/xlators/cluster/afr/src/afr-dir-write.c
+@@ -123,11 +123,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
+ continue;
+ if (local->replies[i].op_ret < 0) {
+ if (local->inode)
+- afr_inode_event_gen_reset(local->inode, this);
++ afr_inode_need_refresh_set(local->inode, this);
+ if (local->parent)
+- afr_inode_event_gen_reset(local->parent, this);
++ afr_inode_need_refresh_set(local->parent, this);
+ if (local->parent2)
+- afr_inode_event_gen_reset(local->parent2, this);
++ afr_inode_need_refresh_set(local->parent2, this);
+ continue;
+ }
+
+diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
+index ed5096e..3a2b26d 100644
+--- a/xlators/cluster/afr/src/afr.h
++++ b/xlators/cluster/afr/src/afr.h
+@@ -948,7 +948,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
+ int event_generation);
+
+ int
+-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
++__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
++
++int
++afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
+
+ int
+ afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
+--
+1.8.3.1
+