diff options
Diffstat (limited to '0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch')
-rw-r--r-- | 0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch | 372 |
1 files changed, 372 insertions, 0 deletions
diff --git a/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch b/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch new file mode 100644 index 0000000..93bd3c9 --- /dev/null +++ b/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch @@ -0,0 +1,372 @@ +From 546f412c155dd5aca2b3cd4202f80c9977b215dc Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Wed, 4 Sep 2019 12:06:34 +0530 +Subject: [PATCH 287/297] cluster/ec: Fail fsync/flush for files on update + size/version failure + +Problem: +If update size/version is not successful on the file, updates on the +same stripe could lead to data corruptions if the earlier un-aligned +write is not successful on all the bricks. Application won't have +any knowledge of this because update size/version happens in the +background. + +Fix: +Fail fsync/flush on fds that are opened before update-size-version +went bad. + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23355 +fixes: bz#1745107 +Change-Id: I9d323eddcda703bd27d55f340c4079d76e06e492 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/180672 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/ec/ec-badfd.c | 124 +++++++++++++++++++++++++++++++++++ + tests/basic/ec/ec-badfd.t | 26 ++++++++ + xlators/cluster/ec/src/ec-common.c | 23 +++++++ + xlators/cluster/ec/src/ec-generic.c | 47 +++++++++++++ + xlators/cluster/ec/src/ec-helpers.c | 7 ++ + xlators/cluster/ec/src/ec-messages.h | 2 +- + xlators/cluster/ec/src/ec-types.h | 2 + + 7 files changed, 230 insertions(+), 1 deletion(-) + create mode 100644 tests/basic/ec/ec-badfd.c + create mode 100755 tests/basic/ec/ec-badfd.t + +diff --git a/tests/basic/ec/ec-badfd.c b/tests/basic/ec/ec-badfd.c +new file mode 100644 +index 0000000..8be23c1 +--- /dev/null ++++ b/tests/basic/ec/ec-badfd.c +@@ -0,0 +1,124 @@ ++#include <stdio.h> ++#include <fcntl.h> ++#include <unistd.h> ++#include <time.h> ++#include <limits.h> ++#include <string.h> ++#include <stdlib.h> ++#include <errno.h> ++#include <glusterfs/api/glfs.h> ++#include <glusterfs/api/glfs-handles.h> ++ ++int ++fill_iov(struct iovec *iov, char fillchar, int count) ++{ ++ int ret = -1; ++ ++ iov->iov_base = malloc(count + 1); ++ if (iov->iov_base == NULL) { ++ return ret; ++ } else { ++ iov->iov_len = count; ++ ret = 0; ++ } ++ memset(iov->iov_base, fillchar, count); ++ memset(iov->iov_base + count, '\0', 1); ++ ++ return ret; ++} ++ ++int ++write_sync(glfs_t *fs, glfs_fd_t *glfd, int char_count) ++{ ++ ssize_t ret = -1; ++ int flags = O_RDWR; ++ struct iovec iov = {0}; ++ ++ ret = fill_iov(&iov, 'a', char_count); ++ if (ret) { ++ fprintf(stderr, "failed to create iov"); ++ goto out; ++ } ++ ++ ret = glfs_pwritev(glfd, &iov, 1, 0, flags); ++out: ++ if (ret < 0) { ++ fprintf(stderr, "glfs_pwritev failed, %d", errno); ++ } ++ return ret; ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ glfs_t *fs = NULL; ++ glfs_fd_t *fd = NULL; ++ int ret = 1; ++ char volume_cmd[4096] = {0}; ++ ++ if (argc != 4) { ++ fprintf(stderr, "Syntax: %s <host> <volname> <file>\n", argv[0]); ++ return 1; ++ } ++ ++ fs = glfs_new(argv[2]); ++ if (!fs) { ++ fprintf(stderr, "glfs_new: returned NULL\n"); ++ return 1; ++ } ++ ++ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret); ++ goto out; ++ } ++ ret = glfs_set_logging(fs, "/tmp/ec-badfd.log", 7); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_set_logging: returned %d\n", ret); ++ goto out; ++ } ++ ret = glfs_init(fs); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_init: returned %d\n", ret); ++ goto out; ++ } ++ ++ fd = glfs_open(fs, argv[3], O_RDWR); ++ if (fd == NULL) { ++ fprintf(stderr, "glfs_open: returned NULL\n"); ++ goto out; ++ } ++ ++ ret = write_sync(fs, fd, 16); ++ if (ret < 0) { ++ fprintf(stderr, "write_sync failed\n"); ++ } ++ ++ snprintf(volume_cmd, sizeof(volume_cmd), ++ "gluster --mode=script volume stop %s", argv[2]); ++ /*Stop the volume so that update-size-version fails*/ ++ system(volume_cmd); ++ sleep(8); /* 3 seconds more than eager-lock-timeout*/ ++ snprintf(volume_cmd, sizeof(volume_cmd), ++ "gluster --mode=script volume start %s", argv[2]); ++ system(volume_cmd); ++ sleep(8); /*wait for bricks to come up*/ ++ ret = glfs_fsync(fd, NULL, NULL); ++ if (ret == 0) { ++ fprintf(stderr, "fsync succeeded on a BADFD\n"); ++ exit(1); ++ } ++ ++ ret = glfs_close(fd); ++ if (ret == 0) { ++ fprintf(stderr, "flush succeeded on a BADFD\n"); ++ exit(1); ++ } ++ ret = 0; ++ ++out: ++ unlink("/tmp/ec-badfd.log"); ++ glfs_fini(fs); ++ ++ return ret; ++} +diff --git a/tests/basic/ec/ec-badfd.t b/tests/basic/ec/ec-badfd.t +new file mode 100755 +index 0000000..56feb47 +--- /dev/null ++++ b/tests/basic/ec/ec-badfd.t +@@ -0,0 +1,26 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{1..6} ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 disperse.eager-lock-timeout 5 ++ ++TEST $CLI volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status' ++ ++TEST $GFS -s $H0 --volfile-id $V0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++TEST touch $M0/file ++ ++TEST build_tester $(dirname $0)/ec-badfd.c -lgfapi -Wall -O2 ++TEST $(dirname $0)/ec-badfd $H0 $V0 /file ++cleanup_tester $(dirname ${0})/ec-badfd ++ ++cleanup; +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 5fb4610..92d4e5d 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -2255,6 +2255,23 @@ ec_unlock_lock(ec_lock_link_t *link) + } + } + ++void ++ec_inode_bad_inc(inode_t *inode, xlator_t *xl) ++{ ++ ec_inode_t *ctx = NULL; ++ ++ LOCK(&inode->lock); ++ { ++ ctx = __ec_inode_get(inode, xl); ++ if (ctx == NULL) { ++ goto unlock; ++ } ++ ctx->bad_version++; ++ } ++unlock: ++ UNLOCK(&inode->lock); ++} ++ + int32_t + ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, +@@ -2270,6 +2287,12 @@ ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this, + ctx = lock->ctx; + + if (op_ret < 0) { ++ if (link->lock->fd == NULL) { ++ ec_inode_bad_inc(link->lock->loc.inode, this); ++ } else { ++ ec_inode_bad_inc(link->lock->fd->inode, this); ++ } ++ + gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno, + EC_MSG_SIZE_VERS_UPDATE_FAIL, + "Failed to update version and size. %s", ec_msg_str(fop)); +diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c +index acc16b5..b019050 100644 +--- a/xlators/cluster/ec/src/ec-generic.c ++++ b/xlators/cluster/ec/src/ec-generic.c +@@ -150,6 +150,37 @@ ec_manager_flush(ec_fop_data_t *fop, int32_t state) + } + } + ++static int32_t ++ec_validate_fd(fd_t *fd, xlator_t *xl) ++{ ++ uint64_t iversion = 0; ++ uint64_t fversion = 0; ++ ec_inode_t *inode_ctx = NULL; ++ ec_fd_t *fd_ctx = NULL; ++ ++ LOCK(&fd->lock); ++ { ++ fd_ctx = __ec_fd_get(fd, xl); ++ if (fd_ctx) { ++ fversion = fd_ctx->bad_version; ++ } ++ } ++ UNLOCK(&fd->lock); ++ ++ LOCK(&fd->inode->lock); ++ { ++ inode_ctx = __ec_inode_get(fd->inode, xl); ++ if (inode_ctx) { ++ iversion = inode_ctx->bad_version; ++ } ++ } ++ UNLOCK(&fd->inode->lock); ++ if (fversion < iversion) { ++ return EBADF; ++ } ++ return 0; ++} ++ + void + ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, + uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd, +@@ -165,6 +196,14 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + ++ error = ec_validate_fd(fd, this); ++ if (error) { ++ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, ++ "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH], ++ fd->inode ? uuid_utoa(fd->inode->gfid) : ""); ++ goto out; ++ } ++ + fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags, + ec_wind_flush, ec_manager_flush, callback, data); + if (fop == NULL) { +@@ -381,6 +420,14 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + ++ error = ec_validate_fd(fd, this); ++ if (error) { ++ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, ++ "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC], ++ fd->inode ? uuid_utoa(fd->inode->gfid) : ""); ++ goto out; ++ } ++ + fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags, + ec_wind_fsync, ec_manager_fsync, callback, data); + if (fop == NULL) { +diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c +index 43f6e3b..baac001 100644 +--- a/xlators/cluster/ec/src/ec-helpers.c ++++ b/xlators/cluster/ec/src/ec-helpers.c +@@ -753,6 +753,7 @@ __ec_fd_get(fd_t *fd, xlator_t *xl) + { + int i = 0; + ec_fd_t *ctx = NULL; ++ ec_inode_t *ictx = NULL; + uint64_t value = 0; + ec_t *ec = xl->private; + +@@ -775,6 +776,12 @@ __ec_fd_get(fd_t *fd, xlator_t *xl) + GF_FREE(ctx); + return NULL; + } ++ /* Only refering bad-version so no need for lock ++ * */ ++ ictx = __ec_inode_get(fd->inode, xl); ++ if (ictx) { ++ ctx->bad_version = ictx->bad_version; ++ } + } + } else { + ctx = (ec_fd_t *)(uintptr_t)value; +diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h +index 7c28808..be86b37 100644 +--- a/xlators/cluster/ec/src/ec-messages.h ++++ b/xlators/cluster/ec/src/ec-messages.h +@@ -55,6 +55,6 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL, + EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE, + EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED, + EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED, +- EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED); ++ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED, EC_MSG_FD_BAD); + + #endif /* !_EC_MESSAGES_H_ */ +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index 1c295c0..f27f2ec 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -150,6 +150,7 @@ struct _ec_fd { + loc_t loc; + uintptr_t open; + int32_t flags; ++ uint64_t bad_version; + ec_fd_status_t fd_status[0]; + }; + +@@ -180,6 +181,7 @@ struct _ec_inode { + uint64_t dirty[2]; + struct list_head heal; + ec_stripe_list_t stripe_cache; ++ uint64_t bad_version; + }; + + typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, +-- +1.8.3.1 + |