summaryrefslogtreecommitdiff
path: root/0196-posix-ctime-Fix-ctime-upgrade-issue.patch
diff options
context:
space:
mode:
Diffstat (limited to '0196-posix-ctime-Fix-ctime-upgrade-issue.patch')
-rw-r--r--0196-posix-ctime-Fix-ctime-upgrade-issue.patch384
1 files changed, 384 insertions, 0 deletions
diff --git a/0196-posix-ctime-Fix-ctime-upgrade-issue.patch b/0196-posix-ctime-Fix-ctime-upgrade-issue.patch
new file mode 100644
index 0000000..1a7b68d
--- /dev/null
+++ b/0196-posix-ctime-Fix-ctime-upgrade-issue.patch
@@ -0,0 +1,384 @@
+From 584ee2dbb8158ee3d3c3f055f1b06ff3d9177192 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Thu, 13 Jun 2019 16:23:21 +0530
+Subject: [PATCH 196/221] posix/ctime: Fix ctime upgrade issue
+
+Problem:
+On a EC volume, during upgrade from the older version where
+ctime feature is not enabled(or not present) to the newer
+version where the ctime feature is available (enabled default),
+the self heal hangs and doesn't complete.
+
+Cause:
+The ctime feature has both client side code (utime) and
+server side code (posix). The feature is driven from client.
+Only if the client side sets the time in the frame, should
+the server side sets the time attributes in xattr. But posix
+setattr/fseattr was not doing that. When one of the server
+nodes is updated, since ctime is enabled by default, it
+starts setting xattr on setattr/fseattr on the updated node/brick.
+
+On a EC volume the first two updated nodes(bricks) are not a
+problem because there are 4 other bricks with consistent data.
+However once the third brick is updated, the new attribute(mdata xattr)
+will cause an inconsistency on metadata on 3 bricks, which
+prevents the file to be repaired.
+
+Fix:
+Don't create mdata xattr with utimes/utimensat system call.
+Only update if already present.
+
+Backport of:
+ > Patch: https://review.gluster.org/22858
+ > Change-Id: Ieacedecb8a738bb437283ef3e0f042fd49dc4c8c
+ > fixes: bz#1720201
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+Change-Id: Ieacedecb8a738bb437283ef3e0f042fd49dc4c8c
+BUG: 1713664
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174238
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ tests/basic/afr/split-brain-healing.t | 36 ++++---
+ tests/utils/get-mdata-xattr.c | 152 +++++++++++++++++++++++++++++
+ tests/volume.rc | 30 ++++++
+ xlators/storage/posix/src/posix-metadata.c | 21 ++++
+ 4 files changed, 223 insertions(+), 16 deletions(-)
+ create mode 100644 tests/utils/get-mdata-xattr.c
+
+diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t
+index c80f900..78553e6 100644
+--- a/tests/basic/afr/split-brain-healing.t
++++ b/tests/basic/afr/split-brain-healing.t
+@@ -20,11 +20,14 @@ function get_replicate_subvol_number {
+ cleanup;
+
+ AREQUAL_PATH=$(dirname $0)/../../utils
++GET_MDATA_PATH=$(dirname $0)/../../utils
+ CFLAGS=""
+ test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+ }
+ build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
++build_tester $GET_MDATA_PATH/get-mdata-xattr.c
++
+ TEST glusterd
+ TEST pidof glusterd
+ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
+@@ -152,13 +155,13 @@ EXPECT $SMALLER_FILE_SIZE stat -c %s file4
+ subvolume=$(get_replicate_subvol_number file5)
+ if [ $subvolume == 0 ]
+ then
+- mtime1=$(stat -c %Y $B0/${V0}1/file5)
+- mtime2=$(stat -c %Y $B0/${V0}2/file5)
++ mtime1=$(get_mtime $B0/${V0}1/file5)
++ mtime2=$(get_mtime $B0/${V0}2/file5)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+ elif [ $subvolume == 1 ]
+ then
+- mtime1=$(stat -c %Y $B0/${V0}3/file5)
+- mtime2=$(stat -c %Y $B0/${V0}4/file5)
++ mtime1=$(get_mtime $B0/${V0}3/file5)
++ mtime2=$(get_mtime $B0/${V0}4/file5)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+ fi
+ $CLI volume heal $V0 split-brain latest-mtime /file5
+@@ -166,12 +169,12 @@ EXPECT "0" echo $?
+
+ if [ $subvolume == 0 ]
+ then
+- mtime1_after_heal=$(stat -c %Y $B0/${V0}1/file5)
+- mtime2_after_heal=$(stat -c %Y $B0/${V0}2/file5)
++ mtime1_after_heal=$(get_mtime $B0/${V0}1/file5)
++ mtime2_after_heal=$(get_mtime $B0/${V0}2/file5)
+ elif [ $subvolume == 1 ]
+ then
+- mtime1_after_heal=$(stat -c %Y $B0/${V0}3/file5)
+- mtime2_after_heal=$(stat -c %Y $B0/${V0}4/file5)
++ mtime1_after_heal=$(get_mtime $B0/${V0}3/file5)
++ mtime2_after_heal=$(get_mtime $B0/${V0}4/file5)
+ fi
+
+ #TODO: To below comparisons on full sub-second resolution
+@@ -188,14 +191,14 @@ subvolume=$(get_replicate_subvol_number file6)
+ if [ $subvolume == 0 ]
+ then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6)
+- mtime1=$(stat -c %Y $B0/${V0}1/file6)
+- mtime2=$(stat -c %Y $B0/${V0}2/file6)
++ mtime1=$(get_mtime $B0/${V0}1/file6)
++ mtime2=$(get_mtime $B0/${V0}2/file6)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+ elif [ $subvolume == 1 ]
+ then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6)
+- mtime1=$(stat -c %Y $B0/${V0}3/file6)
+- mtime2=$(stat -c %Y $B0/${V0}4/file6)
++ mtime1=$(get_mtime $B0/${V0}3/file6)
++ mtime2=$(get_mtime $B0/${V0}4/file6)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+ fi
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+@@ -204,12 +207,12 @@ EXPECT "0" echo $?
+
+ if [ $subvolume == 0 ]
+ then
+- mtime1_after_heal=$(stat -c %Y $B0/${V0}1/file6)
+- mtime2_after_heal=$(stat -c %Y $B0/${V0}2/file6)
++ mtime1_after_heal=$(get_mtime $B0/${V0}1/file6)
++ mtime2_after_heal=$(get_mtime $B0/${V0}2/file6)
+ elif [ $subvolume == 1 ]
+ then
+- mtime1_after_heal=$(stat -c %Y $B0/${V0}3/file6)
+- mtime2_after_heal=$(stat -c %Y $B0/${V0}4/file6)
++ mtime1_after_heal=$(get_mtime $B0/${V0}3/file6)
++ mtime2_after_heal=$(get_mtime $B0/${V0}4/file6)
+ fi
+
+ #TODO: To below comparisons on full sub-second resolution
+@@ -253,4 +256,5 @@ EXPECT "1" echo $?
+
+ cd -
+ TEST rm $AREQUAL_PATH/arequal-checksum
++TEST rm $GET_MDATA_PATH/get-mdata-xattr
+ cleanup
+diff --git a/tests/utils/get-mdata-xattr.c b/tests/utils/get-mdata-xattr.c
+new file mode 100644
+index 0000000..e9f5471
+--- /dev/null
++++ b/tests/utils/get-mdata-xattr.c
+@@ -0,0 +1,152 @@
++/*
++ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#include <stdlib.h>
++#include <endian.h>
++#include <stdio.h>
++#include <time.h>
++#include <string.h>
++#include <inttypes.h>
++#include <sys/types.h>
++#include <sys/xattr.h>
++#include <errno.h>
++
++typedef struct gf_timespec_disk {
++ uint64_t tv_sec;
++ uint64_t tv_nsec;
++} gf_timespec_disk_t;
++
++/* posix_mdata_t on disk structure */
++typedef struct __attribute__((__packed__)) posix_mdata_disk {
++ /* version of structure, bumped up if any new member is added */
++ uint8_t version;
++ /* flags indicates valid fields in the structure */
++ uint64_t flags;
++ gf_timespec_disk_t ctime;
++ gf_timespec_disk_t mtime;
++ gf_timespec_disk_t atime;
++} posix_mdata_disk_t;
++
++/* In memory representation posix metadata xattr */
++typedef struct {
++ /* version of structure, bumped up if any new member is added */
++ uint8_t version;
++ /* flags indicates valid fields in the structure */
++ uint64_t flags;
++ struct timespec ctime;
++ struct timespec mtime;
++ struct timespec atime;
++} posix_mdata_t;
++
++#define GF_XATTR_MDATA_KEY "trusted.glusterfs.mdata"
++
++/* posix_mdata_from_disk converts posix_mdata_disk_t into host byte order
++ */
++static inline void
++posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in)
++{
++ out->version = in->version;
++ out->flags = be64toh(in->flags);
++
++ out->ctime.tv_sec = be64toh(in->ctime.tv_sec);
++ out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec);
++
++ out->mtime.tv_sec = be64toh(in->mtime.tv_sec);
++ out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec);
++
++ out->atime.tv_sec = be64toh(in->atime.tv_sec);
++ out->atime.tv_nsec = be64toh(in->atime.tv_nsec);
++}
++
++/* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */
++static int
++posix_fetch_mdata_xattr(const char *real_path, posix_mdata_t *metadata)
++{
++ size_t size = -1;
++ char *value = NULL;
++ char gfid_str[64] = {0};
++
++ char *key = GF_XATTR_MDATA_KEY;
++
++ if (!metadata || !real_path) {
++ goto err;
++ }
++
++ /* Get size */
++ size = lgetxattr(real_path, key, NULL, 0);
++ if (size == -1) {
++ goto err;
++ }
++
++ value = calloc(size + 1, sizeof(char));
++ if (!value) {
++ goto err;
++ }
++
++ /* Get xattr value */
++ size = lgetxattr(real_path, key, value, size);
++ if (size == -1) {
++ goto err;
++ }
++ posix_mdata_from_disk(metadata, (posix_mdata_disk_t *)value);
++
++out:
++ if (value)
++ free(value);
++ return 0;
++err:
++ if (value)
++ free(value);
++ return -1;
++}
++
++int
++main(int argc, char *argv[])
++{
++ posix_mdata_t metadata;
++ uint64_t result;
++
++ if (argc != 3) {
++ /*
++ Usage: get_mdata_xattr -c|-m|-a <file-name>
++ where -c --> ctime
++ -m --> mtime
++ -a --> atime
++ */
++ printf("-1");
++ goto err;
++ }
++
++ if (posix_fetch_mdata_xattr(argv[2], &metadata)) {
++ printf("-1");
++ goto err;
++ }
++
++ switch (argv[1][1]) {
++ case 'c':
++ result = metadata.ctime.tv_sec;
++ break;
++ case 'm':
++ result = metadata.mtime.tv_sec;
++ break;
++ case 'a':
++ result = metadata.atime.tv_sec;
++ break;
++ default:
++ printf("-1");
++ goto err;
++ }
++ printf("%" PRIu64, result);
++ fflush(stdout);
++ return 0;
++err:
++ fflush(stdout);
++ return -1;
++}
+diff --git a/tests/volume.rc b/tests/volume.rc
+index bb400cc..6a78c37 100644
+--- a/tests/volume.rc
++++ b/tests/volume.rc
+@@ -927,3 +927,33 @@ function number_healer_threads_shd {
+ local pid=$(get_shd_mux_pid $1)
+ pstack $pid | grep $2 | wc -l
+ }
++
++function get_mtime {
++ local time=$(get-mdata-xattr -m $1)
++ if [ $time == "-1" ];
++ then
++ echo $(stat -c %Y $1)
++ else
++ echo $time
++ fi
++}
++
++function get_ctime {
++ local time=$(get-mdata-xattr -c $1)
++ if [ $time == "-1" ];
++ then
++ echo $(stat -c %Z $2)
++ else
++ echo $time
++ fi
++}
++
++function get_atime {
++ local time=$(get-mdata-xattr -a $1)
++ if [ $time == "-1" ];
++ then
++ echo $(stat -c %X $1)
++ else
++ echo $time
++ fi
++}
+diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
+index e96f222..5a5e6cd 100644
+--- a/xlators/storage/posix/src/posix-metadata.c
++++ b/xlators/storage/posix/src/posix-metadata.c
+@@ -416,6 +416,22 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+ * still fine as the times would get eventually
+ * accurate.
+ */
++
++ /* Don't create xattr with utimes/utimensat, only update if
++ * present. This otherwise causes issues during inservice
++ * upgrade. It causes inconsistent xattr values with in replica
++ * set. The scenario happens during upgrade where clients are
++ * older versions (without the ctime feature) and the server is
++ * upgraded to the new version (with the ctime feature which
++ * is enabled by default).
++ */
++
++ if (update_utime) {
++ UNLOCK(&inode->lock);
++ GF_FREE(mdata);
++ return 0;
++ }
++
+ mdata->version = 1;
+ mdata->flags = 0;
+ mdata->ctime.tv_sec = time->tv_sec;
+@@ -527,6 +543,11 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+
+ priv = this->private;
+
++ /* NOTE:
++ * This routine (utimes) is intentionally allowed for all internal and
++ * external clients even if ctime is not set. This is because AFR and
++ * WORM uses time attributes for it's internal operations
++ */
+ if (inode && priv->ctime) {
+ if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {
+ tv.tv_sec = stbuf->ia_atime;
+--
+1.8.3.1
+