summaryrefslogtreecommitdiff
path: root/0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch
diff options
context:
space:
mode:
Diffstat (limited to '0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch')
-rw-r--r--0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch216
1 files changed, 216 insertions, 0 deletions
diff --git a/0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch b/0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch
new file mode 100644
index 0000000..a96b66e
--- /dev/null
+++ b/0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch
@@ -0,0 +1,216 @@
+From 91936fe5ef854bd9d2f91e643795d0e7791b97ba Mon Sep 17 00:00:00 2001
+From: Harpreet Kaur <hlalwani@redhat.com>
+Date: Mon, 7 Jan 2019 16:38:25 +0530
+Subject: [PATCH 426/449] geo-rep: Fix for "Transport End Point not connected"
+ issue
+
+problem: Geo-rep gsyncd process mounts the master and slave volume
+ on master nodes and slave nodes respectively and starts
+ the sync. But it doesn't wait for the mount to be in ready
+ state to accept I/O. The gluster mount is considered to be
+ ready when all the distribute sub-volumes is up. If the all
+ the distribute subvolumes are not up, it can cause ENOTCONN
+ error, when lookup on file comes and file is on the subvol
+ that is down.
+
+solution: Added a Virtual Xattr "dht.subvol.status" which returns "1"
+ if all subvols are up and "0" if all subvols are not up.
+ Geo-rep then uses this virtual xattr after a fresh mount, to
+ check whether all subvols are up or not and then starts the
+ I/O.
+
+>fixes: bz#1664335
+>Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91
+>Signed-off-by: Harpreet Kaur <hlalwani@redhat.com>
+>Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+backport of https://review.gluster.org/#/c/glusterfs/+/22001/
+BUG: 1640573
+Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202554
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/resource.py | 11 ++++++
+ geo-replication/syncdaemon/syncdutils.py | 20 +++++++++--
+ xlators/cluster/dht/src/dht-common.c | 59 ++++++++++++++++++++++++++++++++
+ xlators/cluster/dht/src/dht-common.h | 4 +++
+ 4 files changed, 91 insertions(+), 3 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
+index 189d8a1..0c61de9 100644
+--- a/geo-replication/syncdaemon/resource.py
++++ b/geo-replication/syncdaemon/resource.py
+@@ -37,6 +37,7 @@ from syncdutils import ChangelogException, ChangelogHistoryNotAvailable
+ from syncdutils import get_changelog_log_level, get_rsync_version
+ from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION
+ from syncdutils import GX_GFID_CANONICAL_LEN
++from syncdutils import gf_mount_ready
+ from gsyncdstatus import GeorepStatus
+ from syncdutils import lf, Popen, sup
+ from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt
+@@ -950,6 +951,16 @@ class Mounter(object):
+ logging.exception('mount cleanup failure:')
+ rv = 200
+ os._exit(rv)
++
++ #Polling the dht.subvol.status value.
++ RETRIES = 10
++ while not gf_mount_ready():
++ if RETRIES < 0:
++ logging.error('Subvols are not up')
++ break
++ RETRIES -= 1
++ time.sleep(0.2)
++
+ logging.debug('auxiliary glusterfs mount prepared')
+
+
+diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
+index b08098e..7560fa1 100644
+--- a/geo-replication/syncdaemon/syncdutils.py
++++ b/geo-replication/syncdaemon/syncdutils.py
+@@ -21,8 +21,8 @@ import subprocess
+ import socket
+ from subprocess import PIPE
+ from threading import Lock, Thread as baseThread
+-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED
+-from errno import EINTR, ENOENT, ESTALE, EBUSY, errorcode
++from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED
++from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode
+ from signal import signal, SIGTERM
+ import select as oselect
+ from os import waitpid as owaitpid
+@@ -55,6 +55,8 @@ from rconf import rconf
+
+ from hashlib import sha256 as sha256
+
++ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
++
+ # auxiliary gfid based access prefix
+ _CL_AUX_GFID_PFX = ".gfid/"
+ ROOT_GFID = "00000000-0000-0000-0000-000000000001"
+@@ -100,6 +102,19 @@ def unescape_space_newline(s):
+ .replace(NEWLINE_ESCAPE_CHAR, "\n")\
+ .replace(PERCENTAGE_ESCAPE_CHAR, "%")
+
++# gf_mount_ready() returns 1 if all subvols are up, else 0
++def gf_mount_ready():
++ ret = errno_wrap(Xattr.lgetxattr,
++ ['.', 'dht.subvol.status', 16],
++ [ENOENT, ENOTSUP, ENODATA], [ENOMEM])
++
++ if isinstance(ret, int):
++ logging.error("failed to get the xattr value")
++ return 1
++ ret = ret.rstrip('\x00')
++ if ret == "1":
++ return 1
++ return 0
+
+ def norm(s):
+ if s:
+@@ -564,7 +579,6 @@ def errno_wrap(call, arg=[], errnos=[], retry_errnos=[]):
+ def lstat(e):
+ return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY])
+
+-
+ def get_gfid_from_mnt(gfidpath):
+ return errno_wrap(Xattr.lgetxattr,
+ [gfidpath, 'glusterfs.gfid.string',
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 6aa18f3..23cc80c 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -4858,6 +4858,60 @@ out:
+ return 0;
+ }
+
++/* Virtual Xattr which returns 1 if all subvols are up,
++ else returns 0. Geo-rep then uses this virtual xattr
++ after a fresh mount and starts the I/O.
++*/
++
++enum dht_vxattr_subvol {
++ DHT_VXATTR_SUBVOLS_UP = 1,
++ DHT_VXATTR_SUBVOLS_DOWN = 0,
++};
++
++int
++dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this,
++ const char *key)
++{
++ dht_local_t *local = NULL;
++ int ret = -1;
++ int op_errno = ENODATA;
++ int value = DHT_VXATTR_SUBVOLS_UP;
++ int i = 0;
++ dht_conf_t *conf = NULL;
++
++ conf = this->private;
++ local = frame->local;
++
++ if (!key) {
++ op_errno = EINVAL;
++ goto out;
++ }
++ local->xattr = dict_new();
++ if (!local->xattr) {
++ op_errno = ENOMEM;
++ goto out;
++ }
++ for (i = 0; i < conf->subvolume_cnt; i++) {
++ if (!conf->subvolume_status[i]) {
++ value = DHT_VXATTR_SUBVOLS_DOWN;
++ gf_msg_debug(this->name, 0, "subvol %s is down ",
++ conf->subvolumes[i]->name);
++ break;
++ }
++ }
++ ret = dict_set_int8(local->xattr, (char *)key, value);
++ if (ret < 0) {
++ op_errno = -ret;
++ ret = -1;
++ goto out;
++ }
++ ret = 0;
++
++out:
++ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
++ return 0;
++}
++
+ int
+ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ dict_t *xdata)
+@@ -4915,6 +4969,11 @@ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ goto err;
+ }
+
++ if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
++ dht_vgetxattr_subvol_status(frame, this, key);
++ return 0;
++ }
++
+ /* skip over code which is irrelevant if !DHT_IS_DIR(layout) */
+ if (!DHT_IS_DIR(layout))
+ goto no_dht_is_dir;
+diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
+index 1b3e826..9ec5b51 100644
+--- a/xlators/cluster/dht/src/dht-common.h
++++ b/xlators/cluster/dht/src/dht-common.h
+@@ -45,6 +45,10 @@
+ #define DHT_DIR_STAT_BLOCKS 8
+ #define DHT_DIR_STAT_SIZE 4096
+
++/* Virtual xattr for subvols status */
++
++#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status"
++
+ /* Virtual xattrs for debugging */
+
+ #define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
+--
+1.8.3.1
+