summaryrefslogtreecommitdiff
path: root/0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch
diff options
context:
space:
mode:
Diffstat (limited to '0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch')
-rw-r--r--0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch143
1 files changed, 143 insertions, 0 deletions
diff --git a/0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch b/0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch
new file mode 100644
index 0000000..0be69e8
--- /dev/null
+++ b/0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch
@@ -0,0 +1,143 @@
+From cf13847a6341b7519ed0dc51e3b9ecf12444a3e4 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Mon, 29 Jul 2019 16:22:10 +0530
+Subject: [PATCH 267/276] posix/ctime: Fix race during lookup ctime xattr heal
+
+Problem:
+Ctime heals the ctime xattr ("trusted.glusterfs.mdata") in lookup
+if it's not present. In a multi client scenario, there is a race
+which results in updating the ctime xattr to older value.
+
+e.g. Let c1 and c2 be two clients and file1 be the file which
+doesn't have the ctime xattr. Let the ctime of file1 be t1.
+(from backend, ctime heals time attributes from backend when not present).
+
+Now following operations are done on mount
+c1 -> ls -l /mnt/file1 | c2 -> ls -l /mnt/file1;echo "append" >> /mnt/file1;
+
+The race is that the both c1 and c2 didn't fetch the ctime xattr in lookup,
+so both of them tries to heal ctime to time 't1'. If c2 wins the race and
+appends the file before c1 heals it, it sets the time to 't1' and updates
+it to 't2' (because of append). Now c1 proceeds to heal and sets it to 't1'
+which is incorrect.
+
+Solution:
+Compare the times during heal and only update the larger time. This is the
+general approach used in ctime feature but got missed with healing legacy
+files.
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23131/
+
+>fixes: bz#1734299
+>Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
+>Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+BUG: 1734305
+Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/177866
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/storage/posix/src/posix-metadata.c | 76 +++++++++++++++++++++++-------
+ 1 file changed, 58 insertions(+), 18 deletions(-)
+
+diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
+index 647c0bb..57791fa 100644
+--- a/xlators/storage/posix/src/posix-metadata.c
++++ b/xlators/storage/posix/src/posix-metadata.c
+@@ -344,33 +344,73 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
+ struct mdata_iatt *mdata_iatt, int *op_errno)
+ {
+ posix_mdata_t *mdata = NULL;
++ posix_mdata_t imdata = {
++ 0,
++ };
+ int ret = 0;
++ gf_boolean_t mdata_already_set = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("posix", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+- mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
+- if (!mdata) {
+- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
+- "Could not allocate mdata. gfid: %s",
+- uuid_utoa(inode->gfid));
+- ret = -1;
+- *op_errno = ENOMEM;
+- goto unlock;
+- }
++ ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
++ if (ret == 0 && mdata) {
++ mdata_already_set = _gf_true;
++ } else if (ret == -1 || !mdata) {
++ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
++ if (!mdata) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
++ "Could not allocate mdata. gfid: %s",
++ uuid_utoa(inode->gfid));
++ ret = -1;
++ *op_errno = ENOMEM;
++ goto unlock;
++ }
++
++ ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata,
++ op_errno);
++ if (ret == 0) {
++ /* Got mdata from disk. This is a race, another client
++ * has healed the xattr during lookup. So set it in inode
++ * ctx */
++ __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
++ mdata_already_set = _gf_true;
++ } else {
++ *op_errno = 0;
++ mdata->version = 1;
++ mdata->flags = 0;
++ mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
++ mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
++ mdata->atime.tv_sec = mdata_iatt->ia_atime;
++ mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
++ mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
++ mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
+
+- mdata->version = 1;
+- mdata->flags = 0;
+- mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
+- mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
+- mdata->atime.tv_sec = mdata_iatt->ia_atime;
+- mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
+- mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
+- mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
++ __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
++ }
++ }
+
+- __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
++ if (mdata_already_set) {
++ /* Compare and update the larger time */
++ imdata.ctime.tv_sec = mdata_iatt->ia_ctime;
++ imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
++ imdata.atime.tv_sec = mdata_iatt->ia_atime;
++ imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec;
++ imdata.mtime.tv_sec = mdata_iatt->ia_mtime;
++ imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
++
++ if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) {
++ mdata->ctime = imdata.ctime;
++ }
++ if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) {
++ mdata->mtime = imdata.mtime;
++ }
++ if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) {
++ mdata->atime = imdata.atime;
++ }
++ }
+
+ ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata);
+ if (ret) {
+--
+1.8.3.1
+