summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2024-08-01 14:35:16 +0000
committerCoprDistGit <infra@openeuler.org>2024-08-01 14:35:16 +0000
commit2453fd874197f84e11ae70053cff7f56a32988f4 (patch)
treed6ce5f0f1defa8b7a9b070ba870a8b7f916578dc
parente47cbe682033e9df1530280ef7460c172c32961a (diff)
automatic import of glusterfsopeneuler24.03_LTS
-rw-r--r--.gitignore1
-rw-r--r--0002-glusterd-fix-op-versions-for-RHS-backwards-compatabi.patch1557
-rw-r--r--0003-rpc-set-bind-insecure-to-off-by-default.patch51
-rw-r--r--0004-glusterd-spec-fixing-autogen-issue.patch47
-rw-r--r--0005-libglusterfs-glusterd-Fix-compilation-errors.patch36
-rw-r--r--0006-build-remove-ghost-directory-entries.patch58
-rw-r--r--0007-build-add-RHGS-specific-changes.patch620
-rw-r--r--0008-secalert-remove-setuid-bit-for-fusermount-glusterfs.patch35
-rw-r--r--0009-build-introduce-security-hardening-flags-in-gluster.patch57
-rw-r--r--0010-spec-fix-add-pre-transaction-scripts-for-geo-rep-and.patch100
-rw-r--r--0011-rpm-glusterfs-devel-for-client-builds-should-not-dep.patch138
-rw-r--r--0012-build-add-pretrans-check.patch73
-rw-r--r--0013-glusterd-fix-info-file-checksum-mismatch-during-upgr.patch50
-rw-r--r--0014-build-spec-file-conflict-resolution.patch72
-rw-r--r--0015-build-randomize-temp-file-names-in-pretrans-scriptle.patch198
-rw-r--r--0016-glusterd-parallel-readdir-Change-the-op-version-of-p.patch42
-rw-r--r--0017-glusterd-Revert-op-version-for-cluster.max-brick-per.patch37
-rw-r--r--0018-cli-Add-message-for-user-before-modifying-brick-mult.patch56
-rw-r--r--0019-build-launch-glusterd-upgrade-after-all-new-bits-are.patch99
-rw-r--r--0020-spec-unpackaged-files-found-for-RHEL-7-client-build.patch38
-rw-r--r--0021-cli-glusterfsd-remove-copyright-information.patch66
-rw-r--r--0022-cli-Remove-upstream-doc-reference.patch40
-rw-r--r--0023-hooks-remove-selinux-hooks.patch148
-rw-r--r--0024-glusterd-Make-localtime-logging-option-invisible-in-.patch50
-rw-r--r--0025-build-make-RHGS-version-available-for-server.patch45
-rw-r--r--0026-glusterd-Introduce-daemon-log-level-cluster-wide-opt.patch68
-rw-r--r--0027-glusterd-change-op-version-of-fips-mode-rchecksum.patch50
-rw-r--r--0028-glusterd-Reset-op-version-for-features.shard-deletio.patch52
-rw-r--r--0029-glusterd-Reset-op-version-for-features.shard-lru-lim.patch39
-rw-r--r--0030-selinux-glusterd-add-features.selinux-to-glusterd-vo.patch42
-rw-r--r--0031-glusterd-turn-off-selinux-feature-in-downstream.patch34
-rw-r--r--0032-glusterd-update-gd-op-version-to-3_7_0.patch29
-rw-r--r--0033-build-add-missing-explicit-package-dependencies.patch83
-rw-r--r--0034-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch59
-rw-r--r--0035-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch41
-rw-r--r--0036-build-add-conditional-dependency-on-server-for-devel.patch47
-rw-r--r--0037-cli-change-the-warning-message.patch35
-rw-r--r--0038-spec-avoid-creation-of-temp-file-in-lua-script.patch230
-rw-r--r--0039-cli-fix-query-to-user-during-brick-mux-selection.patch61
-rw-r--r--0040-build-Remove-unsupported-test-cases-failing-consiste.patch136
-rw-r--r--0041-tests-geo-rep-Build-failed-in-Jenkins-for-test-bug-1.patch43
-rw-r--r--0042-spec-client-server-Builds-are-failing-on-rhel-6.patch123
-rw-r--r--0043-inode-don-t-dump-the-whole-table-to-CLI.patch137
-rw-r--r--0044-cluster-ec-Don-t-enqueue-an-entry-if-it-is-already-h.patch360
-rw-r--r--0045-glusterd-fix-txn-id-mem-leak.patch126
-rw-r--r--0046-protocol-client-Do-not-fallback-to-anon-fd-if-fd-is-.patch98
-rw-r--r--0047-client-rpc-Fix-the-payload-being-sent-on-the-wire.patch1652
-rw-r--r--0048-gfapi-Unblock-epoll-thread-for-upcall-processing.patch115
-rw-r--r--0049-transport-socket-log-shutdown-msg-occasionally.patch49
-rw-r--r--0050-geo-rep-Fix-syncing-multiple-rename-of-symlink.patch142
-rw-r--r--0051-spec-update-rpm-install-condition.patch67
-rw-r--r--0052-geo-rep-IPv6-support.patch299
-rw-r--r--0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch575
-rw-r--r--0054-Revert-glusterd-storhaug-remove-ganesha.patch1912
-rw-r--r--0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch1897
-rw-r--r--0056-common-ha-fixes-for-Debian-based-systems.patch229
-rw-r--r--0057-ganesha-scripts-Remove-export-entries-from-ganesha.c.patch40
-rw-r--r--0058-glusterd-ganesha-During-volume-delete-remove-the-gan.patch62
-rw-r--r--0059-glusterd-ganesha-throw-proper-error-for-gluster-nfs-.patch132
-rw-r--r--0060-ganesha-scripts-Stop-ganesha-process-on-all-nodes-if.patch61
-rw-r--r--0061-ganesha-allow-refresh-config-and-volume-export-unexp.patch106
-rw-r--r--0062-glusterd-ganesha-perform-removal-of-ganesha.conf-on-.patch59
-rw-r--r--0063-glusterd-ganesha-update-cache-invalidation-properly-.patch144
-rw-r--r--0064-glusterd-ganesha-return-proper-value-in-pre_setup.patch52
-rw-r--r--0065-ganesha-scripts-remove-dependency-over-export-config.patch58
-rw-r--r--0066-glusterd-ganesha-add-proper-NULL-check-in-manage_exp.patch41
-rw-r--r--0067-ganesha-minor-improvments-for-commit-e91cdf4-17081.patch41
-rw-r--r--0068-common-ha-surviving-ganesha.nfsd-not-put-in-grace-on.patch58
-rw-r--r--0069-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch96
-rw-r--r--0070-packaging-glusterfs-ganesha-update-sometimes-fails-s.patch76
-rw-r--r--0071-common-ha-enable-and-disable-selinux-gluster_use_exe.patch66
-rw-r--r--0072-ganesha-ha-don-t-set-SELinux-booleans-if-SELinux-is-.patch60
-rw-r--r--0073-build-remove-ganesha-dependency-on-selinux-policy.patch45
-rw-r--r--0074-common-ha-enable-pacemaker-at-end-of-setup.patch67
-rw-r--r--0075-common-ha-Fix-an-incorrect-syntax-during-setup.patch43
-rw-r--r--0076-glusterd-ganesha-change-voltype-for-ganesha.enable-i.patch44
-rw-r--r--0077-glusterd-ganesha-create-remove-export-file-only-from.patch73
-rw-r--r--0078-common-ha-scripts-pass-the-list-of-servers-properly-.patch40
-rw-r--r--0079-common-ha-All-statd-related-files-need-to-be-owned-b.patch93
-rw-r--r--0080-glusterd-ganesha-Skip-non-ganesha-nodes-properly-for.patch62
-rw-r--r--0081-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch50
-rw-r--r--0082-build-Add-dependency-on-netstat-for-glusterfs-ganesh.patch59
-rw-r--r--0083-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch82
-rw-r--r--0084-glusterd-Fix-duplicate-client_op_version-in-info-fil.patch37
-rw-r--r--0085-Revert-all-remove-code-which-is-not-being-considered.patch8976
-rw-r--r--0086-Revert-tiering-remove-the-translator-from-build-and-.patch3194
-rw-r--r--0087-ganesha-fixing-minor-issues-after-the-backport-from-.patch89
-rw-r--r--0088-tier-fix-failures-noticed-during-tier-start-and-tier.patch74
-rw-r--r--0089-glusterd-gNFS-On-post-upgrade-to-3.2-disable-gNFS-fo.patch85
-rw-r--r--0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch307
-rw-r--r--0091-glusterd-gNFS-explicitly-set-nfs.disable-to-off-afte.patch110
-rw-r--r--0092-logging-Fix-GF_LOG_OCCASSIONALLY-API.patch41
-rw-r--r--0093-glusterd-Change-op-version-of-cache-invalidation-in-.patch106
-rw-r--r--0094-glusterd-load-ctime-in-the-client-graph-only-if-it-s.patch45
-rw-r--r--0095-cluster-afr-Remove-local-from-owners_list-on-failure.patch204
-rw-r--r--0096-core-Brick-is-not-able-to-detach-successfully-in-bri.patch94
-rw-r--r--0097-glusterd-tier-while-doing-an-attach-tier-the-self-he.patch61
-rw-r--r--0098-mgmt-shd-Implement-multiplexing-in-self-heal-daemon.patch4617
-rw-r--r--0099-client-fini-return-fini-after-rpc-cleanup.patch119
-rw-r--r--0100-clnt-rpc-ref-leak-during-disconnect.patch179
-rw-r--r--0101-shd-mux-Fix-coverity-issues-introduced-by-shd-mux-pa.patch162
-rw-r--r--0102-rpc-transport-Missing-a-ref-on-dict-while-creating-t.patch737
-rw-r--r--0103-dht-NULL-check-before-setting-error-flag.patch43
-rw-r--r--0104-afr-shd-Cleanup-self-heal-daemon-resources-during-af.patch151
-rw-r--r--0105-core-Log-level-changes-do-not-effect-on-running-clie.patch336
-rw-r--r--0106-libgfchangelog-use-find_library-to-locate-shared-lib.patch111
-rw-r--r--0107-gfapi-add-function-to-set-client-pid.patch93
-rw-r--r--0108-afr-add-client-pid-to-all-gf_event-calls.patch225
-rw-r--r--0109-glusterd-Optimize-glusterd-handshaking-code-path.patch613
-rw-r--r--0110-tier-shd-glusterd-with-shd-mux-the-shd-volfile-path-.patch108
-rw-r--r--0111-glusterd-fix-loading-ctime-in-client-graph-logic.patch49
-rw-r--r--0112-geo-rep-fix-incorrectly-formatted-authorized_keys.patch45
-rw-r--r--0113-spec-Glusterd-did-not-start-by-default-after-node-re.patch71
-rw-r--r--0114-core-fix-hang-issue-in-__gf_free.patch46
-rw-r--r--0115-core-only-log-seek-errors-if-SEEK_HOLE-SEEK_DATA-is-.patch56
-rw-r--r--0116-cluster-ec-fix-fd-reopen.patch1931
-rw-r--r--0117-spec-Remove-thin-arbiter-package.patch184
-rw-r--r--0118-tests-mark-thin-arbiter-test-ta.t-as-bad.patch31
-rw-r--r--0119-glusterd-provide-a-way-to-detach-failed-node.patch53
-rw-r--r--0120-glusterd-shd-Keep-a-ref-on-volinfo-until-attach-rpc-.patch62
-rw-r--r--0121-spec-glusterfs-devel-for-client-build-should-not-dep.patch42
-rw-r--r--0122-posix-ctime-Fix-stat-time-attributes-inconsistency-d.patch312
-rw-r--r--0123-ctime-Fix-log-repeated-logging-during-open.patch79
-rw-r--r--0124-spec-remove-duplicate-references-to-files.patch39
-rw-r--r--0125-glusterd-define-dumpops-in-the-xlator_api-of-gluster.patch75
-rw-r--r--0126-cluster-dht-refactor-dht-lookup-functions.patch663
-rw-r--r--0127-cluster-dht-Refactor-dht-lookup-functions.patch200
-rw-r--r--0128-glusterd-Fix-bulkvoldict-thread-logic-in-brick-multi.patch86
-rw-r--r--0129-core-handle-memory-accounting-correctly.patch401
-rw-r--r--0130-tier-test-new-tier-cmds.t-fails-after-a-glusterd-res.patch117
-rw-r--r--0131-tests-dht-Test-that-lookups-are-sent-post-brick-up.patch113
-rw-r--r--0132-glusterd-remove-duplicate-occurrence-of-features.sel.patch41
-rw-r--r--0133-glusterd-enable-fips-mode-rchecksum-for-new-volumes.patch62
-rw-r--r--0134-performance-write-behind-remove-request-from-wip-lis.patch79
-rw-r--r--0135-geo-rep-fix-incorrectly-formatted-authorized_keys.patch56
-rw-r--r--0136-glusterd-fix-inconsistent-global-option-output-in-vo.patch51
-rw-r--r--0137-shd-glusterd-Serialize-shd-manager-to-prevent-race-c.patch160
-rw-r--r--0138-glusterd-Add-gluster-volume-stop-operation-to-gluste.patch64
-rw-r--r--0139-ec-shd-Cleanup-self-heal-daemon-resources-during-ec-.patch300
-rw-r--r--0140-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch40
-rw-r--r--0141-socket-ssl-fix-crl-handling.patch295
-rw-r--r--0142-lock-check-null-value-of-dict-to-avoid-log-flooding.patch36
-rw-r--r--0143-packaging-Change-the-dependency-on-nfs-ganesha-to-2..patch42
-rw-r--r--0144-cluster-ec-honor-contention-notifications-for-partia.patch114
-rw-r--r--0145-core-Capture-process-memory-usage-at-the-time-of-cal.patch65
-rw-r--r--0146-dht-Custom-xattrs-are-not-healed-in-case-of-add-bric.patch146
-rw-r--r--0147-glusterd-bulkvoldict-thread-is-not-handling-all-volu.patch80
-rw-r--r--0148-cluster-dht-Lookup-all-files-when-processing-directo.patch70
-rw-r--r--0149-glusterd-Optimize-code-to-copy-dictionary-in-handsha.patch452
-rw-r--r--0150-libglusterfs-define-macros-needed-for-cloudsync.patch38
-rw-r--r--0151-mgmt-glusterd-Make-changes-related-to-cloudsync-xlat.patch156
-rw-r--r--0152-storage-posix-changes-with-respect-to-cloudsync.patch403
-rw-r--r--0153-features-cloudsync-Added-some-new-functions.patch1077
-rw-r--r--0154-cloudsync-cvlt-Cloudsync-plugin-for-commvault-store.patch1394
-rw-r--r--0155-cloudsync-Make-readdirp-return-stat-info-of-all-the-.patch114
-rw-r--r--0156-cloudsync-Fix-bug-in-cloudsync-fops-c.py.patch94
-rw-r--r--0157-afr-frame-Destroy-frame-after-afr_selfheal_entry_gra.patch68
-rw-r--r--0158-glusterfsd-cleanup-Protect-graph-object-under-a-lock.patch162
-rw-r--r--0159-glusterd-add-an-op-version-check.patch66
-rw-r--r--0160-geo-rep-Geo-rep-help-text-issue.patch41
-rw-r--r--0161-geo-rep-Fix-rename-with-existing-destination-with-sa.patch289
-rw-r--r--0162-geo-rep-Fix-sync-method-config.patch210
-rw-r--r--0163-geo-rep-Fix-sync-hang-with-tarssh.patch255
-rw-r--r--0164-cluster-ec-Fix-handling-of-heal-info-cases-without-l.patch165
-rw-r--r--0165-tests-shd-Add-test-coverage-for-shd-mux.patch442
-rw-r--r--0166-glusterd-svc-glusterd_svcs_stop-should-call-individu.patch92
-rw-r--r--0167-glusterd-shd-Optimize-the-glustershd-manager-to-send.patch64
-rw-r--r--0168-cluster-dht-Fix-directory-perms-during-selfheal.patch46
-rw-r--r--0169-Build-Fix-spec-to-enable-rhel8-client-build.patch62
-rw-r--r--0170-geo-rep-Convert-gfid-conflict-resolutiong-logs-into-.patch119
-rw-r--r--0171-posix-add-storage.reserve-size-option.patch295
-rw-r--r--0172-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch141
-rw-r--r--0173-glusterd-store-fips-mode-rchecksum-option-in-the-inf.patch52
-rw-r--r--0174-xlator-log-Add-more-logging-in-xlator_is_cleanup_sta.patch53
-rw-r--r--0175-ec-fini-Fix-race-between-xlator-cleanup-and-on-going.patch241
-rw-r--r--0176-features-shard-Fix-crash-during-background-shard-del.patch289
-rw-r--r--0177-features-shard-Fix-extra-unref-when-inode-object-is-.patch161
-rw-r--r--0178-Cluster-afr-Don-t-treat-all-bricks-having-metadata-p.patch287
-rw-r--r--0179-tests-Fix-split-brain-favorite-child-policy.t-failur.patch72
-rw-r--r--0180-ganesha-scripts-Make-generate-epoch.py-python3-compa.patch44
-rw-r--r--0181-afr-log-before-attempting-data-self-heal.patch59
-rw-r--r--0182-geo-rep-fix-mountbroker-setup.patch55
-rw-r--r--0183-glusterd-svc-Stop-stale-process-using-the-glusterd_p.patch47
-rw-r--r--0184-tests-Add-gating-configuration-file-for-rhel8.patch38
-rw-r--r--0185-gfapi-provide-an-api-for-setting-statedump-path.patch174
-rw-r--r--0186-cli-Remove-brick-warning-seems-unnecessary.patch57
-rw-r--r--0187-gfapi-statedump_path-add-proper-version-number.patch98
-rw-r--r--0188-features-shard-Fix-integer-overflow-in-block-count-a.patch38
-rw-r--r--0189-features-shard-Fix-block-count-accounting-upon-trunc.patch323
-rw-r--r--0190-Build-removing-the-hardcoded-usage-of-python3.patch49
-rw-r--r--0191-Build-Update-python-shebangs-based-on-version.patch49
-rw-r--r--0192-build-Ensure-gluster-cli-package-is-built-as-part-of.patch114
-rw-r--r--0193-spec-fixed-python-dependency-for-rhel6.patch42
-rw-r--r--0194-stack-Make-sure-to-have-unique-call-stacks-in-all-ca.patch144
-rw-r--r--0195-build-package-glusterfs-ganesha-for-rhel7-and-above.patch89
-rw-r--r--0196-posix-ctime-Fix-ctime-upgrade-issue.patch384
-rw-r--r--0197-posix-fix-crash-in-posix_cs_set_state.patch71
-rw-r--r--0198-cluster-ec-Prevent-double-pre-op-xattrops.patch119
-rw-r--r--0199-upcall-Avoid-sending-notifications-for-invalid-inode.patch80
-rw-r--r--0200-gfapi-fix-incorrect-initialization-of-upcall-syncop-.patch206
-rw-r--r--0201-geo-rep-Fix-permissions-for-GEOREP_DIR-in-non-root-s.patch44
-rw-r--r--0202-shd-mux-Fix-race-between-mux_proc-unlink-and-stop.patch46
-rw-r--r--0203-glusterd-shd-Change-shd-logfile-to-a-unique-name.patch233
-rw-r--r--0204-glusterd-conditionally-clear-txn_opinfo-in-stage-op.patch60
-rw-r--r--0205-glusterd-Can-t-run-rebalance-due-to-long-unix-socket.patch195
-rw-r--r--0206-glusterd-ignore-user.-options-from-compatibility-che.patch41
-rw-r--r--0207-glusterd-fix-use-after-free-of-a-dict_t.patch44
-rw-r--r--0208-mem-pool-remove-dead-code.patch161
-rw-r--r--0209-core-avoid-dynamic-TLS-allocation-when-possible.patch1059
-rw-r--r--0210-mem-pool.-c-h-minor-changes.patch129
-rw-r--r--0211-libglusterfs-Fix-compilation-when-disable-mempool-is.patch41
-rw-r--r--0212-core-fix-memory-allocation-issues.patch169
-rw-r--r--0213-cluster-dht-Strip-out-dht-xattrs.patch42
-rw-r--r--0214-geo-rep-Upgrading-config-file-to-new-version.patch114
-rw-r--r--0215-posix-modify-storage.reserve-option-to-take-size-and.patch319
-rw-r--r--0216-Test-case-fixe-for-downstream-3.5.0.patch29
-rw-r--r--0217-uss-Fix-tar-issue-with-ctime-and-uss-enabled.patch75
-rw-r--r--0218-graph-shd-Use-glusterfs_graph_deactivate-to-free-the.patch88
-rw-r--r--0219-posix-add-posix_set_ctime-in-posix_ftruncate.patch35
-rw-r--r--0220-graph-shd-Use-top-down-approach-while-cleaning-xlato.patch190
-rw-r--r--0221-protocol-client-propagte-GF_EVENT_CHILD_PING-only-fo.patch75
-rw-r--r--0222-cluster-dht-Fixed-a-memleak-in-dht_rename_cbk.patch109
-rw-r--r--0223-change-get_real_filename-implementation-to-use-ENOAT.patch123
-rw-r--r--0224-core-replace-inet_addr-with-inet_pton.patch53
-rw-r--r--0225-tests-utils-Fix-py2-py3-util-python-scripts.patch448
-rw-r--r--0226-geo-rep-fix-gluster-command-path-for-non-root-sessio.patch92
-rw-r--r--0227-glusterd-svc-update-pid-of-mux-volumes-from-the-shd-.patch914
-rw-r--r--0228-locks-enable-notify-contention-by-default.patch39
-rw-r--r--0229-glusterd-Show-the-correct-brick-status-in-get-state.patch113
-rw-r--r--0230-Revert-glusterd-svc-update-pid-of-mux-volumes-from-t.patch893
-rw-r--r--0231-Revert-graph-shd-Use-top-down-approach-while-cleanin.patch180
-rw-r--r--0232-cluster-afr-Fix-incorrect-reporting-of-gfid-type-mis.patch228
-rw-r--r--0233-Revert-graph-shd-Use-glusterfs_graph_deactivate-to-f.patch78
-rw-r--r--0234-Revert-glusterd-shd-Change-shd-logfile-to-a-unique-n.patch220
-rw-r--r--0235-Revert-glusterd-svc-Stop-stale-process-using-the-glu.patch38
-rw-r--r--0236-Revert-shd-mux-Fix-race-between-mux_proc-unlink-and-.patch35
-rw-r--r--0237-Revert-ec-fini-Fix-race-between-xlator-cleanup-and-o.patch227
-rw-r--r--0238-Revert-xlator-log-Add-more-logging-in-xlator_is_clea.patch47
-rw-r--r--0239-Revert-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch128
-rw-r--r--0240-Revert-glusterd-shd-Optimize-the-glustershd-manager-.patch54
-rw-r--r--0241-Revert-glusterd-svc-glusterd_svcs_stop-should-call-i.patch82
-rw-r--r--0242-Revert-tests-shd-Add-test-coverage-for-shd-mux.patch427
-rw-r--r--0243-Revert-glusterfsd-cleanup-Protect-graph-object-under.patch154
-rw-r--r--0244-Revert-ec-shd-Cleanup-self-heal-daemon-resources-dur.patch292
-rw-r--r--0245-Revert-shd-glusterd-Serialize-shd-manager-to-prevent.patch151
-rw-r--r--0246-Revert-glusterd-shd-Keep-a-ref-on-volinfo-until-atta.patch53
-rw-r--r--0247-Revert-afr-shd-Cleanup-self-heal-daemon-resources-du.patch144
-rw-r--r--0248-Revert-shd-mux-Fix-coverity-issues-introduced-by-shd.patch151
-rw-r--r--0249-Revert-client-fini-return-fini-after-rpc-cleanup.patch95
-rw-r--r--0250-Revert-mgmt-shd-Implement-multiplexing-in-self-heal-.patch4572
-rw-r--r--0251-tests-Fix-bug-1717819-metadata-split-brain-detection.patch57
-rw-r--r--0252-glusterd-do-not-mark-skip_locking-as-true-for-geo-re.patch63
-rw-r--r--0253-core-fix-deadlock-between-statedump-and-fd_anonymous.patch246
-rw-r--r--0254-Detach-iot_worker-to-release-its-resources.patch43
-rw-r--r--0255-Revert-tier-shd-glusterd-with-shd-mux-the-shd-volfil.patch104
-rw-r--r--0256-features-snapview-server-use-the-same-volfile-server.patch117
-rw-r--r--0257-geo-rep-Test-case-for-upgrading-config-file.patch80
-rw-r--r--0258-geo-rep-Fix-mount-broker-setup-issue.patch53
-rw-r--r--0259-gluster-block-tuning-perf-options.patch47
-rw-r--r--0260-ctime-Set-mdata-xattr-on-legacy-files.patch885
-rw-r--r--0261-features-utime-Fix-mem_put-crash.patch52
-rw-r--r--0262-glusterd-ctime-Disable-ctime-by-default.patch78
-rw-r--r--0263-tests-fix-ctime-related-tests.patch75
-rw-r--r--0264-gfapi-Fix-deadlock-while-processing-upcall.patch259
-rw-r--r--0265-fuse-add-missing-GF_FREE-to-fuse_interrupt.patch47
-rw-r--r--0266-geo-rep-Fix-mount-broker-setup-issue.patch63
-rw-r--r--0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch143
-rw-r--r--0268-rpc-transport-have-default-listen-port.patch46
-rw-r--r--0269-ec-fix-truncate-lock-to-cover-the-write-in-tuncate-c.patch58
-rw-r--r--0270-cluster-ec-inherit-healing-from-lock-when-it-has-inf.patch42
-rw-r--r--0271-cluster-ec-fix-EIO-error-for-concurrent-writes-on-sp.patch116
-rw-r--r--0272-cluster-ec-Always-read-from-good-mask.patch90
-rw-r--r--0273-cluster-ec-Fix-reopen-flags-to-avoid-misbehavior.patch86
-rw-r--r--0274-cluster-ec-Update-lock-good_mask-on-parent-fop-failu.patch49
-rw-r--r--0275-cluster-ec-Create-heal-task-with-heal-process-id.patch74
-rw-r--r--0276-features-utime-always-update-ctime-at-setattr.patch74
-rw-r--r--0277-geo-rep-Fix-Config-Get-Race.patch109
-rw-r--r--0278-geo-rep-Fix-worker-connection-issue.patch45
-rw-r--r--0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch253
-rw-r--r--0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch153
-rw-r--r--0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch105
-rw-r--r--0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch116
-rw-r--r--0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch285
-rw-r--r--0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch61
-rw-r--r--0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch43
-rw-r--r--0286-glusterfs.spec.in-added-script-files-for-machine-com.patch162
-rw-r--r--0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch372
-rw-r--r--0288-cluster-ec-Fix-coverity-issues.patch77
-rw-r--r--0289-cluster-ec-quorum-count-implementation.patch721
-rw-r--r--0290-glusterd-tag-disperse.quorum-count-for-31306.patch84
-rw-r--r--0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch106
-rw-r--r--0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch72
-rw-r--r--0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch69
-rw-r--r--0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch59
-rw-r--r--0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch1164
-rw-r--r--0296-glusterfind-pre-command-failure-on-a-modify.patch62
-rw-r--r--0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch89
-rw-r--r--0298-geo-rep-fix-sub-command-during-worker-connection.patch56
-rw-r--r--0299-geo-rep-performance-improvement-while-syncing-rename.patch156
-rw-r--r--0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch70
-rw-r--r--0301-posix-Brick-is-going-down-unexpectedly.patch61
-rw-r--r--0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch67
-rw-r--r--0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch45
-rw-r--r--0304-cluster-dht-Correct-fd-processing-loop.patch194
-rw-r--r--0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch56
-rw-r--r--0306-cli-fix-distCount-value.patch43
-rw-r--r--0307-ssl-fix-RHEL8-regression-failure.patch42
-rw-r--r--0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch347
-rw-r--r--0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch240
-rw-r--r--0310-tests-test-case-for-non-root-geo-rep-setup.patch284
-rw-r--r--0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch186
-rw-r--r--0312-Scripts-quota_fsck-script-KeyError-contri_size.patch59
-rw-r--r--0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch60
-rw-r--r--0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch38
-rw-r--r--0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch52
-rw-r--r--0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch51
-rw-r--r--0317-Update-rfc.sh-to-rhgs-3.5.1.patch43
-rw-r--r--0318-Update-rfc.sh-to-rhgs-3.5.1.patch114
-rw-r--r--0319-features-snapview-server-obtain-the-list-of-snapshot.patch48
-rw-r--r--0320-gf-event-Handle-unix-volfile-servers.patch58
-rw-r--r--0321-Adding-white-spaces-to-description-of-set-group.patch55
-rw-r--r--0322-glusterd-display-correct-rebalance-data-size-after-g.patch65
-rw-r--r--0323-cli-display-detailed-rebalance-info.patch101
-rw-r--r--0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch128
-rw-r--r--0325-extras-hooks-Install-and-package-newly-added-post-ad.patch52
-rw-r--r--0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch51
-rw-r--r--0327-glusterfind-integrate-with-gfid2path.patch93
-rw-r--r--0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch55
-rw-r--r--0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch165
-rw-r--r--0330-mount.glusterfs-change-the-error-message.patch59
-rw-r--r--0331-features-locks-Do-special-handling-for-op-version-3..patch44
-rw-r--r--0332-Removing-one-top-command-from-gluster-v-help.patch57
-rw-r--r--0333-rpc-Synchronize-slot-allocation-code.patch195
-rw-r--r--0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch54
-rw-r--r--0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch15991
-rw-r--r--0336-spec-check-and-return-exit-code-in-rpm-scripts.patch162
-rw-r--r--0337-fuse-Set-limit-on-invalidate-queue-size.patch455
-rw-r--r--0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch83
-rw-r--r--0339-geo-rep-fix-integer-config-validation.patch93
-rw-r--r--0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch46
-rw-r--r--0341-socket-fix-error-handling.patch742
-rw-r--r--0342-Revert-hooks-remove-selinux-hooks.patch120
-rw-r--r--0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch155
-rw-r--r--0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch412
-rw-r--r--0345-read-ahead-io-cache-turn-off-by-default.patch82
-rw-r--r--0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch223
-rw-r--r--0347-tools-glusterfind-handle-offline-bricks.patch236
-rw-r--r--0348-glusterfind-Fix-py2-py3-issues.patch113
-rw-r--r--0349-glusterfind-python3-compatibility.patch56
-rw-r--r--0350-tools-glusterfind-Remove-an-extra-argument.patch37
-rw-r--r--0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch131
-rw-r--r--0352-spec-fixed-missing-dependencies-for-glusterfs-clouds.patch38
-rw-r--r--0353-build-glusterfs-ganesha-pkg-requires-python3-policyc.patch47
-rw-r--r--0354-core-fix-memory-pool-management-races.patch466
-rw-r--r--0355-core-Prevent-crash-on-process-termination.patch74
-rw-r--r--0356-Update-rfc.sh-to-rhgs-3.5.1-rhel-8.patch26
-rw-r--r--0357-ganesha-ha-updates-for-pcs-0.10.x-i.e.-in-Fedora-29-.patch268
-rw-r--r--0358-inode-fix-wrong-loop-count-in-__inode_ctx_free.patch51
-rw-r--r--0359-dht-gf_defrag_process_dir-is-called-even-if-gf_defra.patch41
-rw-r--r--0360-rpc-Make-ssl-log-more-useful.patch117
-rw-r--r--0361-snap_scheduler-python3-compatibility-and-new-test-ca.patch122
-rw-r--r--0362-write-behind-fix-data-corruption.patch454
-rw-r--r--0363-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch47
-rw-r--r--0364-dht-fixing-rebalance-failures-for-files-with-holes.patch97
-rw-r--r--0365-build-geo-rep-requires-relevant-selinux-permission-f.patch70
-rw-r--r--0366-snapshot-fix-python3-issue-in-gcron.patch55
-rw-r--r--0367-dht-Handle-setxattr-and-rm-race-for-directory-in-reb.patch95
-rw-r--r--0368-Update-rfc.sh-to-rhgs-3.5.2.patch26
-rw-r--r--0369-cluster-ec-Return-correct-error-code-and-log-message.patch53
-rw-r--r--0370-dht-Do-opendir-selectively-in-gf_defrag_process_dir.patch203
-rw-r--r--0371-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch53
-rw-r--r--0372-posix-fix-seek-functionality.patch49
-rw-r--r--0373-build-geo-rep-sub-pkg-requires-policycoreutils-pytho.patch51
-rw-r--r--0374-open-behind-fix-missing-fd-reference.patch121
-rw-r--r--0375-features-shard-Send-correct-size-when-reads-are-sent.patch75
-rw-r--r--0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch70
-rw-r--r--0377-syncop-improve-scaling-and-implement-more-tools.patch862
-rw-r--r--0378-Revert-open-behind-fix-missing-fd-reference.patch120
-rw-r--r--0379-glusterd-add-missing-synccond_broadcast.patch45
-rw-r--r--0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch306
-rw-r--r--0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch48
-rw-r--r--0382-features-shard-Aggregate-file-size-block-count-befor.patch422
-rw-r--r--0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch38
-rw-r--r--0384-Update-rfc.sh-to-rhgs-3.5.3.patch26
-rw-r--r--0385-glusterd-start-glusterd-automatically-on-abnormal-sh.patch50
-rw-r--r--0386-glusterd-increase-the-StartLimitBurst.patch39
-rw-r--r--0387-To-fix-readdir-ahead-memory-leak.patch47
-rw-r--r--0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch142
-rw-r--r--0389-posix-Avoid-diskpace-error-in-case-of-overwriting-th.patch297
-rw-r--r--0390-glusterd-deafult-options-after-volume-reset.patch93
-rw-r--r--0391-glusterd-unlink-the-file-after-killing-the-process.patch39
-rw-r--r--0392-glusterd-Brick-process-fails-to-come-up-with-brickmu.patch187
-rw-r--r--0393-afr-restore-timestamp-of-files-during-metadata-heal.patch129
-rw-r--r--0394-man-gluster-Add-volume-top-command-to-gluster-man-pa.patch38
-rw-r--r--0395-Cli-Removing-old-log-rotate-command.patch111
-rw-r--r--0396-Updating-gluster-manual.patch56
-rw-r--r--0397-mgmt-brick-mux-Avoid-sending-two-response-when-attac.patch52
-rw-r--r--0398-ec-change-error-message-for-heal-commands-for-disper.patch75
-rw-r--r--0399-glusterd-coverity-fixes.patch79
-rw-r--r--0400-cli-throw-a-warning-if-replica-count-greater-than-3.patch98
-rw-r--r--0401-cli-change-the-warning-message.patch70
-rw-r--r--0402-afr-wake-up-index-healer-threads.patch198
-rw-r--r--0403-Fix-spurious-failure-in-bug-1744548-heal-timeout.t.patch84
-rw-r--r--0404-tests-Fix-spurious-failure.patch38
-rw-r--r--0405-core-fix-return-of-local-in-__nlc_inode_ctx_get.patch175
-rw-r--r--0406-afr-support-split-brain-CLI-for-replica-3.patch185
-rw-r--r--0407-geo-rep-Improving-help-message-in-schedule_georep.py.patch60
-rw-r--r--0408-geo-rep-Fix-ssh-port-validation.patch107
-rw-r--r--0409-system-posix-acl-update-ctx-only-if-iatt-is-non-NULL.patch52
-rw-r--r--0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch249
-rw-r--r--0411-tools-glusterfind-validate-session-name.patch116
-rw-r--r--0412-gluster-smb-add-smb-parameter-when-access-gluster-by.patch46
-rw-r--r--0413-extras-hooks-Remove-smb.conf-parameter-allowing-gues.patch46
-rw-r--r--0414-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch62
-rw-r--r--0415-dht-Fix-stale-layout-and-create-issue.patch523
-rw-r--r--0416-tests-fix-spurious-failure-of-bug-1402841.t-mt-dir-s.patch72
-rw-r--r--0417-events-fix-IPv6-memory-corruption.patch153
-rw-r--r--0418-md-cache-avoid-clearing-cache-when-not-necessary.patch439
-rw-r--r--0419-cluster-afr-fix-race-when-bricks-come-up.patch104
-rw-r--r--0420-scripts-quota_fsck-script-TypeError-d-format-not-dic.patch46
-rw-r--r--0421-Improve-logging-in-EC-client-and-lock-translator.patch93
-rw-r--r--0422-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch236
-rw-r--r--0423-ctime-Fix-ctime-inconsisteny-with-utimensat.patch128
-rw-r--r--0424-afr-make-heal-info-lockless.patch884
-rw-r--r--0425-tests-Fix-spurious-self-heald.t-failure.patch187
-rw-r--r--0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch216
-rw-r--r--0427-storage-posix-Fixing-a-coverity-issue.patch38
-rw-r--r--0428-glusterd-ganesha-fixing-resource-leak-in-tear_down_c.patch48
-rw-r--r--0429-dht-rebalance-fixing-failure-occurace-due-to-rebalan.patch61
-rw-r--r--0430-Fix-some-Null-pointer-dereference-coverity-issues.patch291
-rw-r--r--0431-glusterd-check-for-same-node-while-adding-bricks-in-.patch638
-rw-r--r--0432-glusterd-Fix-coverity-defects-put-coverity-annotatio.patch503
-rw-r--r--0433-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch54
-rw-r--r--0434-glusterd-ganesha-fix-Coverity-CID-1405785.patch39
-rw-r--r--0435-glusterd-coverity-fix.patch38
-rw-r--r--0436-glusterd-coverity-fixes.patch187
-rw-r--r--0437-glusterd-prevent-use-after-free-in-glusterd_op_ac_se.patch48
-rw-r--r--0438-dht-sparse-files-rebalance-enhancements.patch324
-rw-r--r--0439-cluster-afr-Delay-post-op-for-fsync.patch438
-rw-r--r--0440-glusterd-snapshot-Improve-log-message-during-snapsho.patch62
-rw-r--r--0441-fuse-occasional-logging-for-fuse-device-weird-write-.patch195
-rw-r--r--0442-fuse-correctly-handle-setxattr-values.patch139
-rw-r--r--0443-fuse-fix-high-sev-coverity-issue.patch55
-rw-r--r--0444-mount-fuse-Fixing-a-coverity-issue.patch40
-rw-r--r--0445-feature-changelog-Avoid-thread-creation-if-xlator-is.patch481
-rw-r--r--0446-bitrot-Make-number-of-signer-threads-configurable.patch594
-rw-r--r--0447-core-brick_mux-brick-crashed-when-creating-and-delet.patch359
-rw-r--r--0448-Posix-Use-simple-approach-to-close-fd.patch341
-rw-r--r--0449-test-Test-case-brick-mux-validation-in-cluster.t-is-.patch107
-rw-r--r--0450-tests-basic-ctime-enable-ctime-before-testing.patch35
-rw-r--r--0451-extras-Modify-group-virt-to-include-network-related-.patch44
-rw-r--r--0452-Tier-DHT-Handle-the-pause-case-missed-out.patch48
-rw-r--r--0453-glusterd-add-brick-command-failure.patch300
-rw-r--r--0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch152
-rw-r--r--0455-locks-prevent-deletion-of-locked-entries.patch1253
-rw-r--r--0456-add-clean-local-after-grant-lock.patch74
-rw-r--r--0457-cluster-ec-Improve-detection-of-new-heals.patch409
-rw-r--r--0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch182
-rw-r--r--0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch181
-rw-r--r--0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch152
-rw-r--r--0461-geo-replication-Fix-IPv6-parsing.patch127
-rw-r--r--0462-Issue-with-gf_fill_iatt_for_dirent.patch43
-rw-r--r--0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch87
-rw-r--r--0464-storage-posix-Remove-nr_files-usage.patch102
-rw-r--r--0465-posix-Implement-a-janitor-thread-to-close-fd.patch384
-rw-r--r--0466-cluster-ec-Change-stale-index-handling.patch68
-rw-r--r--0467-build-Added-dependency-for-glusterfs-selinux.patch38
-rw-r--r--0468-build-Update-the-glusterfs-selinux-version.patch36
-rw-r--r--0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch33
-rw-r--r--0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch63
-rw-r--r--0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch73
-rw-r--r--0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch98
-rw-r--r--0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch75
-rw-r--r--0474-features-locks-posixlk-clear-lock-should-set-error-a.patch49
-rw-r--r--0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch46
-rw-r--r--0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch114
-rw-r--r--0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch51
-rw-r--r--0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch119
-rw-r--r--0479-ganesha-ha-revised-regex-exprs-for-status.patch53
-rw-r--r--0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch255
-rw-r--r--0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch33
-rw-r--r--0482-logger-Always-print-errors-in-english.patch49
-rw-r--r--0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch150
-rw-r--r--0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch90
-rw-r--r--0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch87
-rw-r--r--0486-glusterd-brick-sock-file-deleted-log-error-1560.patch87
-rw-r--r--0487-Events-Log-file-not-re-opened-after-logrotate.patch56
-rw-r--r--0488-glusterd-afr-enable-granular-entry-heal-by-default.patch864
-rw-r--r--0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch141
-rw-r--r--0490-Segmentation-fault-occurs-during-truncate.patch57
-rw-r--r--0491-glusterd-mount-directory-getting-truncated-on-mounti.patch56
-rw-r--r--0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch188
-rw-r--r--0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch283
-rw-r--r--0494-glusterd-start-the-brick-on-a-different-port.patch54
-rw-r--r--0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch60
-rw-r--r--0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch139
-rw-r--r--0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch80
-rw-r--r--0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch69
-rw-r--r--0499-gfapi-give-appropriate-error-when-size-exceeds.patch63
-rw-r--r--0500-features-shard-Convert-shard-block-indices-to-uint64.patch104
-rw-r--r--0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch48
-rw-r--r--0502-dht-fixing-a-permission-update-issue.patch225
-rw-r--r--0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch179
-rw-r--r--0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch109
-rw-r--r--0505-trash-Create-inode_table-only-while-feature-is-enabl.patch107
-rw-r--r--0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch499
-rw-r--r--0507-inode-make-critical-section-smaller.patch764
-rw-r--r--0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch232
-rw-r--r--0509-core-configure-optimum-inode-table-hash_size-for-shd.patch407
-rw-r--r--0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch784
-rw-r--r--0511-features-shard-Missing-format-specifier.patch39
-rw-r--r--0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch105
-rw-r--r--0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch191
-rw-r--r--0514-afr-event-gen-changes.patch308
-rw-r--r--0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch2155
-rw-r--r--0516-afr-return-EIO-for-gfid-split-brains.patch338
-rw-r--r--0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch388
-rw-r--r--0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch41
-rw-r--r--0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch58
-rw-r--r--0520-performance-open-behind-seek-fop-should-open_and_res.patch70
-rw-r--r--0521-open-behind-fix-missing-fd-reference.patch121
-rw-r--r--0522-lcov-improve-line-coverage.patch746
-rw-r--r--0523-open-behind-rewrite-of-internal-logic.patch2720
-rw-r--r--0524-open-behind-fix-call_frame-leak.patch70
-rw-r--r--0525-open-behind-implement-create-fop.patch109
-rw-r--r--0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch44
-rw-r--r--0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch200
-rw-r--r--0528-Extras-Removing-xattr_analysis-script.patch134
-rw-r--r--0529-geo-rep-prompt-should-work-for-ignore_deletes.patch75
-rw-r--r--0530-gfapi-avoid-crash-while-logging-message.patch41
-rw-r--r--0531-Glustereventsd-Default-port-change-2091.patch69
-rw-r--r--0532-glusterd-fix-for-starting-brick-on-new-port.patch79
-rw-r--r--0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch250
-rw-r--r--0534-glusterd-Resolve-use-after-free-bug-2181.patch47
-rw-r--r--0535-multiple-files-use-dict_allocate_and_serialize-where.patch270
-rw-r--r--0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch102
-rw-r--r--0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch387
-rw-r--r--0538-afr-fix-coverity-issue-introduced-by-90cefde.patch46
-rw-r--r--0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch62
-rw-r--r--0540-extras-Disable-write-behind-for-group-samba.patch37
-rw-r--r--0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch545
-rw-r--r--0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch64
-rw-r--r--0543-glusterd-handle-custom-xlator-failure-cases.patch162
-rw-r--r--0900-rhel-9.0-beta-build-fixing-gcc-10-and-LTO-errors.patch2389
-rw-r--r--0901-contrib-remove-contrib-sunrpc-xdr_sizeof.c.patch250
-rw-r--r--glusterfs.spec2823
-rw-r--r--sources1
547 files changed, 136732 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index e69de29..e9935f4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+/glusterfs-6.0.tar.gz
diff --git a/0002-glusterd-fix-op-versions-for-RHS-backwards-compatabi.patch b/0002-glusterd-fix-op-versions-for-RHS-backwards-compatabi.patch
new file mode 100644
index 0000000..9ca880d
--- /dev/null
+++ b/0002-glusterd-fix-op-versions-for-RHS-backwards-compatabi.patch
@@ -0,0 +1,1557 @@
+From 78060c16f88594b3424e512a9ef0e4a8f56e88c3 Mon Sep 17 00:00:00 2001
+From: Kaushal M <kmadappa@redhat.com>
+Date: Thu, 6 Dec 2018 15:04:16 +0530
+Subject: [PATCH 02/52] glusterd: fix op-versions for RHS backwards
+ compatability
+
+Backport of https://code.engineering.redhat.com/gerrit/#/c/60485/
+
+This change fixes the op-version of different features and checks to maintain
+backwards compatability with RHS-3.0 and before.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Icb282444da179b12fbd6ed9f491514602f1a38c2
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/70348
+---
+ libglusterfs/src/glusterfs/globals.h | 45 +++--
+ rpc/rpc-transport/socket/src/socket.c | 4 +-
+ xlators/cluster/dht/src/dht-shared.c | 6 +-
+ xlators/debug/io-stats/src/io-stats.c | 16 +-
+ xlators/features/barrier/src/barrier.c | 4 +-
+ xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 8 +-
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 14 +-
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 16 +-
+ xlators/mgmt/glusterd/src/glusterd-peer-utils.c | 8 +-
+ xlators/mgmt/glusterd/src/glusterd-rebalance.c | 4 +-
+ xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 4 +-
+ xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 6 +-
+ xlators/mgmt/glusterd/src/glusterd-sm.c | 2 +-
+ .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 12 +-
+ xlators/mgmt/glusterd/src/glusterd-snapshot.c | 4 +-
+ xlators/mgmt/glusterd/src/glusterd-store.c | 27 +--
+ xlators/mgmt/glusterd/src/glusterd-syncop.c | 2 +-
+ xlators/mgmt/glusterd/src/glusterd-tier.c | 3 +-
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 8 +-
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 12 +-
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 192 +++++++++++----------
+ xlators/protocol/client/src/client.c | 4 +-
+ xlators/protocol/server/src/server.c | 6 +-
+ xlators/storage/posix/src/posix-common.c | 4 +-
+ 24 files changed, 214 insertions(+), 197 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index 8d898c3..b9da872 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -23,23 +23,28 @@
+ #define GF_AVOID_OVERWRITE "glusterfs.avoid.overwrite"
+ #define GF_CLEAN_WRITE_PROTECTION "glusterfs.clean.writexattr"
+
+-/* Gluster versions - OP-VERSION mapping
++/* RHS versions - OP-VERSION mapping
+ *
+- * 3.3.x - 1
+- * 3.4.x - 2
+- * 3.5.0 - 3
+- * 3.5.1 - 30501
+- * 3.6.0 - 30600
+- * 3.7.0 - 30700
+- * 3.7.1 - 30701
+- * 3.7.2 - 30702
++ * RHS-2.0 Z - 1
++ * RHS-2.1 Z - 2
++ * RHS-2.1 u5 - 20105
++ * RHS-3.0 - 30000
++ * RHS-3.0.4 - 30004
++ * RHGS-3.1 - 30702
+ *
+- * Starting with Gluster v3.6, the op-version will be multi-digit integer values
+- * based on the Glusterfs version, instead of a simply incrementing integer
+- * value. The op-version for a given X.Y.Z release will be an integer XYZ, with
+- * Y and Z 2 digit always 2 digits wide and padded with 0 when needed. This
+- * should allow for some gaps between two Y releases for backports of features
+- * in Z releases.
++ *
++ * NOTE:
++ * Starting with RHS-3.0, the op-version will be multi-digit integer values
++ * based on the RHS version, instead of a simply incrementing integer value. The
++ * op-version for a given RHS X(Major).Y(Minor).Z(Update) release will be an
++ * integer with digits XYZ. The Y and Z values will be 2 digits wide always
++ * padded with 0 as needed. This should allow for some gaps between two Y
++ * releases for backports of features in Z releases.
++ *
++ * NOTE:
++ * Starting with RHGS-3.1, the op-version will be the same as the upstream
++ * GlusterFS op-versions. This is to allow proper access to upstream clients of
++ * version 3.7.x or greater, proper access to the RHGS volumes.
+ */
+ #define GD_OP_VERSION_MIN \
+ 1 /* MIN is the fresh start op-version, mostly \
+@@ -51,7 +56,13 @@
+ introduction of newer \
+ versions */
+
+-#define GD_OP_VERSION_3_6_0 30600 /* Op-Version for GlusterFS 3.6.0 */
++#define GD_OP_VERSION_RHS_3_0 30000 /* Op-Version of RHS 3.0 */
++
++#define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_RHS_3_0
++
++#define GD_OP_VERSION_RHS_2_1_5 20105 /* RHS 2.1 update 5 */
++
++#define GD_OP_VERSION_RHS_3_0_4 30004 /* Op-Version of RHS 3.0.4 */
+
+ #define GD_OP_VERSION_3_7_0 30700 /* Op-version for GlusterFS 3.7.0 */
+
+@@ -115,8 +126,6 @@
+
+ #define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */
+
+-#define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_3_6_0
+-
+ #include "glusterfs/xlator.h"
+ #include "glusterfs/options.h"
+
+diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
+index fa0e0f2..121d46b 100644
+--- a/rpc/rpc-transport/socket/src/socket.c
++++ b/rpc/rpc-transport/socket/src/socket.c
+@@ -4704,7 +4704,7 @@ struct volume_options options[] = {
+ .description = "SSL CA list. Ignored if SSL is not enabled."},
+ {.key = {"ssl-cert-depth"},
+ .type = GF_OPTION_TYPE_INT,
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Maximum certificate-chain depth. If zero, the "
+ "peer's certificate itself must be in the local "
+@@ -4713,7 +4713,7 @@ struct volume_options options[] = {
+ "local list. Ignored if SSL is not enabled."},
+ {.key = {"ssl-cipher-list"},
+ .type = GF_OPTION_TYPE_STR,
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Allowed SSL ciphers. Ignored if SSL is not enabled."},
+ {.key = {"ssl-dh-param"},
+diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
+index c7ef2f1..ea4b7c6 100644
+--- a/xlators/cluster/dht/src/dht-shared.c
++++ b/xlators/cluster/dht/src/dht-shared.c
+@@ -1064,7 +1064,7 @@ struct volume_options dht_options[] = {
+ "When enabled, files will be allocated to bricks "
+ "with a probability proportional to their size. Otherwise, all "
+ "bricks will have the same probability (legacy behavior).",
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+
+@@ -1161,7 +1161,7 @@ struct volume_options dht_options[] = {
+ "from which hash ranges are allocated starting with 0. "
+ "Note that we still use a directory/file's name to determine the "
+ "subvolume to which it hashes",
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ },
+
+ {.key = {"rebal-throttle"},
+@@ -1174,7 +1174,7 @@ struct volume_options dht_options[] = {
+ "migrated at a time. Lazy will allow only one file to "
+ "be migrated at a time and aggressive will allow "
+ "max of [($(processing units) - 4) / 2), 4]",
+- .op_version = {GD_OP_VERSION_3_7_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC
+
+diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
+index f12191f..41b57c5 100644
+--- a/xlators/debug/io-stats/src/io-stats.c
++++ b/xlators/debug/io-stats/src/io-stats.c
+@@ -4333,7 +4333,7 @@ struct volume_options options[] = {
+ .value = {GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG}},
+ {.key = {"client-logger"},
+ .type = GF_OPTION_TYPE_STR,
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .default_value = GF_LOGGER_GLUSTER_LOG,
+@@ -4342,7 +4342,7 @@ struct volume_options options[] = {
+ .value = {GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG}},
+ {.key = {"brick-logger"},
+ .type = GF_OPTION_TYPE_STR,
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .default_value = GF_LOGGER_GLUSTER_LOG,
+@@ -4354,7 +4354,7 @@ struct volume_options options[] = {
+ .value = {GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID}},
+ {.key = {"client-log-format"},
+ .type = GF_OPTION_TYPE_STR,
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .default_value = GF_LOG_FORMAT_WITH_MSG_ID,
+@@ -4362,7 +4362,7 @@ struct volume_options options[] = {
+ .value = {GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID}},
+ {.key = {"brick-log-format"},
+ .type = GF_OPTION_TYPE_STR,
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .default_value = GF_LOG_FORMAT_WITH_MSG_ID,
+@@ -4377,7 +4377,7 @@ struct volume_options options[] = {
+ },
+ {.key = {"client-log-buf-size"},
+ .type = GF_OPTION_TYPE_INT,
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .min = GF_LOG_LRU_BUFSIZE_MIN,
+@@ -4388,7 +4388,7 @@ struct volume_options options[] = {
+ " the value of the option client-log-flush-timeout."},
+ {.key = {"brick-log-buf-size"},
+ .type = GF_OPTION_TYPE_INT,
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .min = GF_LOG_LRU_BUFSIZE_MIN,
+@@ -4406,7 +4406,7 @@ struct volume_options options[] = {
+ },
+ {.key = {"client-log-flush-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .min = GF_LOG_FLUSH_TIMEOUT_MIN,
+@@ -4417,7 +4417,7 @@ struct volume_options options[] = {
+ " the value of the option client-log-flush-timeout."},
+ {.key = {"brick-log-flush-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .min = GF_LOG_FLUSH_TIMEOUT_MIN,
+diff --git a/xlators/features/barrier/src/barrier.c b/xlators/features/barrier/src/barrier.c
+index a601c7f..0923992 100644
+--- a/xlators/features/barrier/src/barrier.c
++++ b/xlators/features/barrier/src/barrier.c
+@@ -774,7 +774,7 @@ struct volume_options options[] = {
+ {.key = {"barrier"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "disable",
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "When \"enabled\", blocks acknowledgements to application "
+ "for file operations such as rmdir, rename, unlink, "
+@@ -784,7 +784,7 @@ struct volume_options options[] = {
+ {.key = {"barrier-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = BARRIER_TIMEOUT,
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "After 'timeout' seconds since the time 'barrier' "
+ "option was set to \"on\", acknowledgements to file "
+diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+index 38483a1..ad9a572 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+@@ -1195,7 +1195,7 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+- if (conf->op_version >= GD_OP_VERSION_3_6_0) {
++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) {
+ brick_mount_dir = NULL;
+
+ snprintf(key, sizeof(key), "brick%d.mount_dir", i);
+@@ -1729,7 +1729,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+- if (conf->op_version >= GD_OP_VERSION_3_6_0) {
++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) {
+ ret = glusterd_get_brick_mount_dir(
+ brickinfo->path, brickinfo->hostname, brickinfo->mount_dir);
+ if (ret) {
+@@ -2085,12 +2085,12 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
+ }
+
+ /* Check if the connected clients are all of version
+- * glusterfs-3.6 and higher. This is needed to prevent some data
++ * RHS-2.1u5 and higher. This is needed to prevent some data
+ * loss issues that could occur when older clients are connected
+ * when rebalance is run.
+ */
+ ret = glusterd_check_client_op_version_support(
+- volname, GD_OP_VERSION_3_6_0, NULL);
++ volname, GD_OP_VERSION_RHS_2_1_5, NULL);
+ if (ret) {
+ ret = gf_asprintf(op_errstr,
+ "Volume %s has one or "
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index f754b52..387643d 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -763,7 +763,7 @@ glusterd_op_txn_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
+ }
+
+ /* Based on the op_version, acquire a cluster or mgmt_v3 lock */
+- if (priv->op_version < GD_OP_VERSION_3_6_0) {
++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) {
+ ret = glusterd_lock(MY_UUID);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_LOCK_FAIL,
+@@ -818,7 +818,7 @@ glusterd_op_txn_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
+ local_locking_done:
+ /* If no volname is given as a part of the command, locks will
+ * not be held, hence sending stage event. */
+- if (volname || (priv->op_version < GD_OP_VERSION_3_6_0))
++ if (volname || (priv->op_version < GD_OP_VERSION_RHS_3_0))
+ event_type = GD_OP_EVENT_START_LOCK;
+ else {
+ txn_op_info.state.state = GD_OP_STATE_LOCK_SENT;
+@@ -849,7 +849,7 @@ out:
+ if (locked && ret) {
+ /* Based on the op-version, we release the
+ * cluster or mgmt_v3 lock */
+- if (priv->op_version < GD_OP_VERSION_3_6_0)
++ if (priv->op_version < GD_OP_VERSION_RHS_3_0)
+ glusterd_unlock(MY_UUID);
+ else {
+ ret = glusterd_mgmt_v3_unlock(volname, MY_UUID, "vol");
+@@ -4432,12 +4432,12 @@ __glusterd_handle_status_volume(rpcsvc_request_t *req)
+ }
+
+ if ((cmd & GF_CLI_STATUS_SNAPD) &&
+- (conf->op_version < GD_OP_VERSION_3_6_0)) {
++ (conf->op_version < GD_OP_VERSION_RHS_3_0)) {
+ snprintf(err_str, sizeof(err_str),
+ "The cluster is operating "
+ "at a lesser version than %d. Getting the status of "
+ "snapd is not allowed in this state",
+- GD_OP_VERSION_3_6_0);
++ GD_OP_VERSION_RHS_3_0);
+ ret = -1;
+ goto out;
+ }
+@@ -4459,7 +4459,7 @@ __glusterd_handle_status_volume(rpcsvc_request_t *req)
+ "The cluster is operating "
+ "at a lesser version than %d. Getting the status of "
+ "tierd is not allowed in this state",
+- GD_OP_VERSION_3_6_0);
++ GD_OP_VERSION_RHS_3_0);
+ ret = -1;
+ goto out;
+ }
+@@ -6430,7 +6430,7 @@ __glusterd_peer_rpc_notify(struct rpc_clnt *rpc, void *mydata,
+ glusterd_friend_sm_state_name_get(peerinfo->state.state));
+
+ if (peerinfo->connected) {
+- if (conf->op_version < GD_OP_VERSION_3_6_0) {
++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) {
+ glusterd_get_lock_owner(&uuid);
+ if (!gf_uuid_is_null(uuid) &&
+ !gf_uuid_compare(peerinfo->uuid, uuid))
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 6495a9d..dd3f9eb 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -194,7 +194,7 @@ glusterd_generate_txn_id(dict_t *dict, uuid_t **txn_id)
+ if (!*txn_id)
+ goto out;
+
+- if (priv->op_version < GD_OP_VERSION_3_6_0)
++ if (priv->op_version < GD_OP_VERSION_RHS_3_0)
+ gf_uuid_copy(**txn_id, priv->global_txn_id);
+ else
+ gf_uuid_generate(**txn_id);
+@@ -1864,12 +1864,12 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
+ }
+
+ if ((cmd & GF_CLI_STATUS_SNAPD) &&
+- (priv->op_version < GD_OP_VERSION_3_6_0)) {
++ (priv->op_version < GD_OP_VERSION_RHS_3_0)) {
+ snprintf(msg, sizeof(msg),
+ "The cluster is operating at "
+ "version less than %d. Getting the "
+ "status of snapd is not allowed in this state.",
+- GD_OP_VERSION_3_6_0);
++ GD_OP_VERSION_RHS_3_0);
+ ret = -1;
+ goto out;
+ }
+@@ -3877,7 +3877,7 @@ glusterd_op_ac_send_lock(glusterd_op_sm_event_t *event, void *ctx)
+ continue;
+
+ /* Based on the op_version, acquire a cluster or mgmt_v3 lock */
+- if (priv->op_version < GD_OP_VERSION_3_6_0) {
++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) {
+ proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_LOCK];
+ if (proc->fn) {
+ ret = proc->fn(NULL, this, peerinfo);
+@@ -3980,7 +3980,7 @@ glusterd_op_ac_send_unlock(glusterd_op_sm_event_t *event, void *ctx)
+ continue;
+ /* Based on the op_version,
+ * release the cluster or mgmt_v3 lock */
+- if (priv->op_version < GD_OP_VERSION_3_6_0) {
++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) {
+ proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_UNLOCK];
+ if (proc->fn) {
+ ret = proc->fn(NULL, this, peerinfo);
+@@ -4957,7 +4957,7 @@ glusterd_op_modify_op_ctx(glusterd_op_t op, void *ctx)
+ count = brick_index_max + other_count + 1;
+
+ /*
+- * a glusterd lesser than version 3.7 will be sending the
++ * a glusterd lesser than version RHS-3.0.4 will be sending the
+ * rdma port in older key. Changing that value from here
+ * to support backward compatibility
+ */
+@@ -4977,7 +4977,7 @@ glusterd_op_modify_op_ctx(glusterd_op_t op, void *ctx)
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret)
+ goto out;
+- if (conf->op_version < GD_OP_VERSION_3_7_0 &&
++ if (conf->op_version < GD_OP_VERSION_RHS_3_0_4 &&
+ volinfo->transport_type == GF_TRANSPORT_RDMA) {
+ ret = glusterd_op_modify_port_key(op_ctx, brick_index_max);
+ if (ret)
+@@ -5576,7 +5576,7 @@ glusterd_op_txn_complete(uuid_t *txn_id)
+ glusterd_op_clear_errstr();
+
+ /* Based on the op-version, we release the cluster or mgmt_v3 lock */
+- if (priv->op_version < GD_OP_VERSION_3_6_0) {
++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) {
+ ret = glusterd_unlock(MY_UUID);
+ /* unlock can't/shouldn't fail here!! */
+ if (ret)
+diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
+index 5b5959e..f24c86e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
+@@ -547,7 +547,7 @@ out:
+ * @prefix. All the parameters are compulsory.
+ *
+ * The complete address list is added to the dict only if the cluster op-version
+- * is >= GD_OP_VERSION_3_6_0
++ * is >= GD_OP_VERSION_3_7_0
+ */
+ int
+ gd_add_friend_to_dict(glusterd_peerinfo_t *friend, dict_t *dict,
+@@ -593,7 +593,7 @@ gd_add_friend_to_dict(glusterd_peerinfo_t *friend, dict_t *dict,
+ goto out;
+ }
+
+- if (conf->op_version < GD_OP_VERSION_3_6_0) {
++ if (conf->op_version < GD_OP_VERSION_3_7_0) {
+ ret = 0;
+ goto out;
+ }
+@@ -778,7 +778,7 @@ gd_update_peerinfo_from_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict,
+ GF_FREE(peerinfo->hostname);
+ peerinfo->hostname = gf_strdup(hostname);
+
+- if (conf->op_version < GD_OP_VERSION_3_6_0) {
++ if (conf->op_version < GD_OP_VERSION_3_7_0) {
+ ret = 0;
+ goto out;
+ }
+@@ -894,7 +894,7 @@ gd_add_peer_hostnames_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict,
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
+
+- if (conf->op_version < GD_OP_VERSION_3_6_0) {
++ if (conf->op_version < GD_OP_VERSION_3_7_0) {
+ ret = 0;
+ goto out;
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+index 34b0294..6365b6e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+@@ -792,13 +792,13 @@ glusterd_mgmt_v3_op_stage_rebalance(dict_t *dict, char **op_errstr)
+ case GF_DEFRAG_CMD_START:
+ case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+ /* Check if the connected clients are all of version
+- * glusterfs-3.6 and higher. This is needed to prevent some data
++ * RHS-2.1u5 and higher. This is needed to prevent some data
+ * loss issues that could occur when older clients are connected
+ * when rebalance is run. This check can be bypassed by using
+ * 'force'
+ */
+ ret = glusterd_check_client_op_version_support(
+- volname, GD_OP_VERSION_3_6_0, NULL);
++ volname, GD_OP_VERSION_RHS_2_1_5, NULL);
+ if (ret) {
+ ret = gf_asprintf(op_errstr,
+ "Volume %s has one or "
+diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+index ca1de1a..0615081 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
++++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+@@ -297,7 +297,7 @@ glusterd_op_stage_replace_brick(dict_t *dict, char **op_errstr,
+ if (ret)
+ goto out;
+
+- } else if (priv->op_version >= GD_OP_VERSION_3_6_0) {
++ } else if (priv->op_version >= GD_OP_VERSION_RHS_3_0) {
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+@@ -396,7 +396,7 @@ glusterd_op_perform_replace_brick(glusterd_volinfo_t *volinfo, char *old_brick,
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+- if (conf->op_version >= GD_OP_VERSION_3_6_0) {
++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) {
+ ret = dict_get_strn(dict, "brick1.mount_dir", SLEN("brick1.mount_dir"),
+ &brick_mount_dir);
+ if (ret) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+index 728781d..4ec9700 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+@@ -288,11 +288,11 @@ __glusterd_probe_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ * we need to add the new hostname to the peer.
+ *
+ * This addition should only be done for cluster op-version >=
+- * GD_OP_VERSION_3_6_0 as address lists are only supported from then on.
++ * GD_OP_VERSION_3_7_0 as address lists are only supported from then on.
+ * Also, this update should only be done when an explicit CLI probe
+ * command was used to begin the probe process.
+ */
+- if ((conf->op_version >= GD_OP_VERSION_3_6_0) &&
++ if ((conf->op_version >= GD_OP_VERSION_3_7_0) &&
+ (gf_uuid_compare(rsp.uuid, peerinfo->uuid) == 0)) {
+ ctx = ((call_frame_t *)myframe)->local;
+ /* Presence of ctx->req implies this probe was started by a cli
+@@ -1544,7 +1544,7 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data)
+ goto out;
+ }
+
+- if (priv->op_version >= GD_OP_VERSION_3_6_0) {
++ if (priv->op_version >= GD_OP_VERSION_RHS_3_0) {
+ ret = glusterd_add_missed_snaps_to_export_dict(peer_data);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
+index 73a11a3..54a7bd1 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
+@@ -955,7 +955,7 @@ glusterd_ac_handle_friend_add_req(glusterd_friend_sm_event_t *event, void *ctx)
+
+ /* Compare missed_snapshot list with the peer *
+ * if volume comparison is successful */
+- if ((op_ret == 0) && (conf->op_version >= GD_OP_VERSION_3_6_0)) {
++ if ((op_ret == 0) && (conf->op_version >= GD_OP_VERSION_RHS_3_0)) {
+ ret = glusterd_import_friend_missed_snap_list(ev_ctx->vols);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+index 1ece374..2958443 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+@@ -470,7 +470,7 @@ gd_add_brick_snap_details_to_dict(dict_t *dict, char *prefix,
+ GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out);
+
+- if (conf->op_version < GD_OP_VERSION_3_6_0) {
++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) {
+ ret = 0;
+ goto out;
+ }
+@@ -547,7 +547,7 @@ gd_add_vol_snap_details_to_dict(dict_t *dict, char *prefix,
+ GF_VALIDATE_OR_GOTO(this->name, (volinfo != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out);
+
+- if (conf->op_version < GD_OP_VERSION_3_6_0) {
++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) {
+ ret = 0;
+ goto out;
+ }
+@@ -846,7 +846,7 @@ gd_import_new_brick_snap_details(dict_t *dict, char *prefix,
+ GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out);
+
+- if (conf->op_version < GD_OP_VERSION_3_6_0) {
++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) {
+ ret = 0;
+ goto out;
+ }
+@@ -903,8 +903,8 @@ out:
+ * Imports the snapshot details of a volume if required and available
+ *
+ * Snapshot details will be imported only if cluster.op_version is greater than
+- * or equal to GD_OP_VERSION_3_6_0, the op-version from which volume snapshot is
+- * supported.
++ * or equal to GD_OP_VERSION_RHS_3_0, the op-version from which volume snapshot
++ * is supported.
+ */
+ int
+ gd_import_volume_snap_details(dict_t *dict, glusterd_volinfo_t *volinfo,
+@@ -928,7 +928,7 @@ gd_import_volume_snap_details(dict_t *dict, glusterd_volinfo_t *volinfo,
+ GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (volname != NULL), out);
+
+- if (conf->op_version < GD_OP_VERSION_3_6_0) {
++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) {
+ ret = 0;
+ goto out;
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+index 8f5cd6d..c56be91 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+@@ -9345,14 +9345,14 @@ glusterd_handle_snapshot_fn(rpcsvc_request_t *req)
+ goto out;
+ }
+
+- if (conf->op_version < GD_OP_VERSION_3_6_0) {
++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) {
+ snprintf(err_str, sizeof(err_str),
+ "Cluster operating version"
+ " is lesser than the supported version "
+ "for a snapshot");
+ op_errno = EG_OPNOTSUP;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION,
+- "%s (%d < %d)", err_str, conf->op_version, GD_OP_VERSION_3_6_0);
++ "%s (%d < %d)", err_str, conf->op_version, GD_OP_VERSION_RHS_3_0);
+ ret = -1;
+ goto out;
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index 7acea05..64447e7 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -313,7 +313,7 @@ gd_store_brick_snap_details_write(int fd, glusterd_brickinfo_t *brickinfo)
+ GF_VALIDATE_OR_GOTO(this->name, (fd > 0), out);
+ GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out);
+
+- if (conf->op_version < GD_OP_VERSION_3_6_0) {
++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) {
+ ret = 0;
+ goto out;
+ }
+@@ -813,7 +813,7 @@ glusterd_volume_write_snap_details(int fd, glusterd_volinfo_t *volinfo)
+ GF_VALIDATE_OR_GOTO(this->name, (fd > 0), out);
+ GF_VALIDATE_OR_GOTO(this->name, (volinfo != NULL), out);
+
+- if (conf->op_version < GD_OP_VERSION_3_6_0) {
++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) {
+ ret = 0;
+ goto out;
+ }
+@@ -967,7 +967,7 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo)
+ goto out;
+ }
+
+- if (conf->op_version >= GD_OP_VERSION_3_6_0) {
++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) {
+ snprintf(buf, sizeof(buf), "%d", volinfo->disperse_count);
+ ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, buf);
+ if (ret)
+@@ -2502,7 +2502,7 @@ glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo)
+ conf = THIS->private;
+ GF_ASSERT(volinfo);
+
+- if (conf->op_version < GD_OP_VERSION_3_6_0) {
++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) {
+ ret = 0;
+ goto out;
+ }
+@@ -2510,15 +2510,16 @@ glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo)
+ /*
+ * This is needed for upgrade situations. Say a volume is created with
+ * older version of glusterfs and upgraded to a glusterfs version equal
+- * to or greater than GD_OP_VERSION_3_6_0. The older glusterd would not
+- * have created the snapd.info file related to snapshot daemon for user
+- * serviceable snapshots. So as part of upgrade when the new glusterd
+- * starts, as part of restore (restoring the volume to be precise), it
+- * tries to snapd related info from snapd.info file. But since there was
+- * no such file till now, the restore operation fails. Thus, to prevent
+- * it from happening check whether user serviceable snapshots features
+- * is enabled before restoring snapd. If its disabled, then simply
+- * exit by returning success (without even checking for the snapd.info).
++ * to or greater than GD_OP_VERSION_RHS_3_0. The older glusterd would
++ * not have created the snapd.info file related to snapshot daemon for
++ * user serviceable snapshots. So as part of upgrade when the new
++ * glusterd starts, as part of restore (restoring the volume to be
++ * precise), it tries to snapd related info from snapd.info file. But
++ * since there was no such file till now, the restore operation fails.
++ * Thus, to prevent it from happening check whether user serviceable
++ * snapshots features is enabled before restoring snapd. If its
++ * disbaled, then simply exit by returning success (without even
++ * checking for the snapd.info).
+ */
+
+ if (!dict_get_str_boolean(volinfo->dict, "features.uss", _gf_false)) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+index 45b221c..1741cf8 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+@@ -1827,7 +1827,7 @@ gd_sync_task_begin(dict_t *op_ctx, rpcsvc_request_t *req)
+ goto out;
+ }
+
+- if (conf->op_version < GD_OP_VERSION_3_6_0)
++ if (conf->op_version < GD_OP_VERSION_RHS_3_0)
+ cluster_lock = _gf_true;
+
+ /* Based on the op_version, acquire a cluster or mgmt_v3 lock */
+diff --git a/xlators/mgmt/glusterd/src/glusterd-tier.c b/xlators/mgmt/glusterd/src/glusterd-tier.c
+index dd86cf5..4dc0d44 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-tier.c
++++ b/xlators/mgmt/glusterd/src/glusterd-tier.c
+@@ -867,7 +867,8 @@ glusterd_op_stage_tier(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+ * when rebalance is run. This check can be bypassed by using
+ * 'force'
+ */
+- ret = glusterd_check_client_op_version_support(volname, GD_OP_VERSION_3_6_0,
++ ret = glusterd_check_client_op_version_support(volname,
++ GD_OP_VERSION_RHS_3_0,
+ NULL);
+ if (ret) {
+ ret = gf_asprintf(op_errstr,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 8bbd795..52b83ec 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -12226,10 +12226,10 @@ gd_update_volume_op_versions(glusterd_volinfo_t *volinfo)
+ }
+
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+- if (volinfo->op_version < GD_OP_VERSION_3_6_0)
+- volinfo->op_version = GD_OP_VERSION_3_6_0;
+- if (volinfo->client_op_version < GD_OP_VERSION_3_6_0)
+- volinfo->client_op_version = GD_OP_VERSION_3_6_0;
++ if (volinfo->op_version < GD_OP_VERSION_3_7_0)
++ volinfo->op_version = GD_OP_VERSION_3_7_0;
++ if (volinfo->client_op_version < GD_OP_VERSION_3_7_0)
++ volinfo->client_op_version = GD_OP_VERSION_3_7_0;
+ }
+
+ return;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index 7cfba3d..86ef470 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -1389,7 +1389,7 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr,
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+- if (priv->op_version >= GD_OP_VERSION_3_6_0) {
++ if (priv->op_version >= GD_OP_VERSION_RHS_3_0) {
+ ret = glusterd_get_brick_mount_dir(brick_info->path,
+ brick_info->hostname,
+ brick_info->mount_dir);
+@@ -1698,7 +1698,7 @@ glusterd_op_stage_start_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+- if (priv->op_version >= GD_OP_VERSION_3_6_0) {
++ if (priv->op_version >= GD_OP_VERSION_RHS_3_0) {
+ if (strlen(brickinfo->mount_dir) < 1) {
+ ret = glusterd_get_brick_mount_dir(
+ brickinfo->path, brickinfo->hostname, brickinfo->mount_dir);
+@@ -2395,10 +2395,10 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr)
+ volname);
+ goto out;
+ }
+- if (priv->op_version < GD_OP_VERSION_3_6_0) {
++ if (priv->op_version < GD_OP_VERSION_3_7_0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION,
+ "Disperse volume "
+- "needs op-version 3.6.0 or higher");
++ "needs op-version 30700 or higher");
+ ret = -1;
+ goto out;
+ }
+@@ -2494,7 +2494,7 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr)
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+- if (priv->op_version >= GD_OP_VERSION_3_6_0) {
++ if (priv->op_version >= GD_OP_VERSION_RHS_3_0) {
+ brick_mount_dir = NULL;
+ ret = snprintf(key, sizeof(key), "brick%d.mount_dir", i);
+ ret = dict_get_strn(dict, key, ret, &brick_mount_dir);
+@@ -2703,7 +2703,7 @@ glusterd_op_start_volume(dict_t *dict, char **op_errstr)
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+- if (conf->op_version >= GD_OP_VERSION_3_6_0) {
++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) {
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ brick_count++;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index dc58e11..d07fc10 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -807,7 +807,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "cluster.subvols-per-directory",
+ .voltype = "cluster/distribute",
+ .option = "directory-layout-spread",
+- .op_version = 2,
++ .op_version = 1,
+ .validate_fn = validate_subvols_per_directory,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "cluster.readdir-optimize",
+@@ -817,25 +817,25 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "cluster.rsync-hash-regex",
+ .voltype = "cluster/distribute",
+ .type = NO_DOC,
+- .op_version = 3,
++ .op_version = 2,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "cluster.extra-hash-regex",
+ .voltype = "cluster/distribute",
+ .type = NO_DOC,
+- .op_version = 3,
++ .op_version = 2,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "cluster.dht-xattr-name",
+ .voltype = "cluster/distribute",
+ .option = "xattr-name",
+ .type = NO_DOC,
+- .op_version = 3,
++ .op_version = 2,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {
+ .key = "cluster.randomize-hash-range-by-gfid",
+ .voltype = "cluster/distribute",
+ .option = "randomize-hash-range-by-gfid",
+ .type = NO_DOC,
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT,
+ },
+ {
+@@ -877,12 +877,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "cluster/nufa",
+ .option = "local-volume-name",
+ .type = NO_DOC,
+- .op_version = 3,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {
+ .key = "cluster.weighted-rebalance",
+ .voltype = "cluster/distribute",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_3_7_0,
+ },
+
+ /* Switch xlator options (Distribute special case) */
+@@ -890,13 +890,13 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "cluster/distribute",
+ .option = "!switch",
+ .type = NO_DOC,
+- .op_version = 3,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "cluster.switch-pattern",
+ .voltype = "cluster/switch",
+ .option = "pattern.switch.case",
+ .type = NO_DOC,
+- .op_version = 3,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+
+ /* AFR xlator options */
+@@ -1014,16 +1014,16 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "cluster.readdir-failover",
+ .voltype = "cluster/replicate",
+- .op_version = 2,
++ .op_version = 1,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "cluster.ensure-durability",
+ .voltype = "cluster/replicate",
+- .op_version = 3,
++ .op_version = 2,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "cluster.consistent-metadata",
+ .voltype = "cluster/replicate",
+ .type = DOC,
+- .op_version = GD_OP_VERSION_3_7_0,
++ .op_version = GD_OP_VERSION_RHS_3_0_4,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "cluster.heal-wait-queue-length",
+ .voltype = "cluster/replicate",
+@@ -1080,45 +1080,45 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .key = "diagnostics.brick-logger",
+ .voltype = "debug/io-stats",
+ .option = "!logger",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {.key = "diagnostics.client-logger",
+ .voltype = "debug/io-stats",
+ .option = "!logger",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {
+ .key = "diagnostics.brick-log-format",
+ .voltype = "debug/io-stats",
+ .option = "!log-format",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {.key = "diagnostics.client-log-format",
+ .voltype = "debug/io-stats",
+ .option = "!log-format",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {
+ .key = "diagnostics.brick-log-buf-size",
+ .voltype = "debug/io-stats",
+ .option = "!log-buf-size",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {.key = "diagnostics.client-log-buf-size",
+ .voltype = "debug/io-stats",
+ .option = "!log-buf-size",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {
+ .key = "diagnostics.brick-log-flush-timeout",
+ .voltype = "debug/io-stats",
+ .option = "!log-flush-timeout",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {.key = "diagnostics.client-log-flush-timeout",
+ .voltype = "debug/io-stats",
+ .option = "!log-flush-timeout",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "diagnostics.stats-dump-interval",
+ .voltype = "debug/io-stats",
+@@ -1203,6 +1203,10 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "performance/io-threads",
+ .option = "pass-through",
+ .op_version = GD_OP_VERSION_4_1_0},
++ {.key = "performance.least-rate-limit",
++ .voltype = "performance/io-threads",
++ .op_version = 1
++ },
+
+ /* Other perf xlators' options */
+ {.key = "performance.io-cache-pass-through",
+@@ -1237,12 +1241,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "performance.nfs.flush-behind",
+ .voltype = "performance/write-behind",
+ .option = "flush-behind",
+- .op_version = 1,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "performance.write-behind-window-size",
+ .voltype = "performance/write-behind",
+ .option = "cache-size",
+- .op_version = 1,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {
+ .key = "performance.resync-failed-syncs-after-fsync",
+@@ -1262,27 +1266,27 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "performance.nfs.write-behind-window-size",
+ .voltype = "performance/write-behind",
+ .option = "cache-size",
+- .op_version = 1,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "performance.strict-o-direct",
+ .voltype = "performance/write-behind",
+ .option = "strict-O_DIRECT",
+- .op_version = 2,
++ .op_version = 1,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "performance.nfs.strict-o-direct",
+ .voltype = "performance/write-behind",
+ .option = "strict-O_DIRECT",
+- .op_version = 2,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "performance.strict-write-ordering",
+ .voltype = "performance/write-behind",
+ .option = "strict-write-ordering",
+- .op_version = 2,
++ .op_version = 1,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "performance.nfs.strict-write-ordering",
+ .voltype = "performance/write-behind",
+ .option = "strict-write-ordering",
+- .op_version = 2,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "performance.write-behind-trickling-writes",
+ .voltype = "performance/write-behind",
+@@ -1302,12 +1306,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "performance.lazy-open",
+ .voltype = "performance/open-behind",
+ .option = "lazy-open",
+- .op_version = 3,
++ .op_version = 2,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "performance.read-after-open",
+ .voltype = "performance/open-behind",
+ .option = "read-after-open",
+- .op_version = 3,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {
+ .key = "performance.open-behind-pass-through",
+@@ -1389,22 +1393,22 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "encryption/crypt",
+ .option = "!feat",
+ .value = "off",
+- .op_version = 3,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .description = "enable/disable client-side encryption for "
+ "the volume.",
+ .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT},
+
+ {.key = "encryption.master-key",
+ .voltype = "encryption/crypt",
+- .op_version = 3,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "encryption.data-key-size",
+ .voltype = "encryption/crypt",
+- .op_version = 3,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "encryption.block-size",
+ .voltype = "encryption/crypt",
+- .op_version = 3,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+
+ /* Client xlator options */
+@@ -1431,7 +1435,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "network.remote-dio",
+ .voltype = "protocol/client",
+ .option = "filter-O_DIRECT",
+- .op_version = 2,
++ .op_version = 1,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {
+ .key = "client.own-thread",
+@@ -1443,7 +1447,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {
+ .key = "client.event-threads",
+ .voltype = "protocol/client",
+- .op_version = GD_OP_VERSION_3_7_0,
++ .op_version = GD_OP_VERSION_RHS_3_0_4,
+ },
+ {.key = "client.tcp-user-timeout",
+ .voltype = "protocol/client",
+@@ -1501,7 +1505,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "server.root-squash",
+ .voltype = "protocol/server",
+ .option = "root-squash",
+- .op_version = 2},
++ .op_version = 1},
+ {.key = "server.all-squash",
+ .voltype = "protocol/server",
+ .option = "all-squash",
+@@ -1509,11 +1513,11 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "server.anonuid",
+ .voltype = "protocol/server",
+ .option = "anonuid",
+- .op_version = 3},
++ .op_version = GD_OP_VERSION_RHS_3_0},
+ {.key = "server.anongid",
+ .voltype = "protocol/server",
+ .option = "anongid",
+- .op_version = 3},
++ .op_version = GD_OP_VERSION_RHS_3_0},
+ {.key = "server.statedump-path",
+ .voltype = "protocol/server",
+ .option = "statedump-path",
+@@ -1522,7 +1526,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "protocol/server",
+ .option = "rpc.outstanding-rpc-limit",
+ .type = GLOBAL_DOC,
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "server.ssl",
+ .voltype = "protocol/server",
+ .value = "off",
+@@ -1540,12 +1544,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ "the clients that are allowed to access the server."
+ "By default, all TLS authenticated clients are "
+ "allowed to access the server.",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ {
+ .key = "server.manage-gids",
+ .voltype = "protocol/server",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {
+ .key = "server.dynamic-auth",
+@@ -1556,12 +1560,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .key = "client.send-gids",
+ .voltype = "protocol/client",
+ .type = NO_DOC,
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {
+ .key = "server.gid-timeout",
+ .voltype = "protocol/server",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {
+ .key = "server.own-thread",
+@@ -1573,7 +1577,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {
+ .key = "server.event-threads",
+ .voltype = "protocol/server",
+- .op_version = GD_OP_VERSION_3_7_0,
++ .op_version = GD_OP_VERSION_RHS_3_0_4,
+ },
+ {
+ .key = "server.tcp-user-timeout",
+@@ -1643,13 +1647,13 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .key = SSL_CERT_DEPTH_OPT,
+ .voltype = "rpc-transport/socket",
+ .option = "!ssl-cert-depth",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ {
+ .key = SSL_CIPHER_LIST_OPT,
+ .voltype = "rpc-transport/socket",
+ .option = "!ssl-cipher-list",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ {
+ .key = SSL_DH_PARAM_OPT,
+@@ -1690,8 +1694,8 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "performance.readdir-ahead",
+ .voltype = "performance/readdir-ahead",
+ .option = "!perf",
+- .value = "on",
+- .op_version = 3,
++ .value = "off",
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .description = "enable/disable readdir-ahead translator in the volume.",
+ .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT},
+ {.key = "performance.io-cache",
+@@ -1804,7 +1808,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ /* Feature translators */
+ {.key = "features.uss",
+ .voltype = "features/snapview-server",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .value = "off",
+ .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT,
+ .validate_fn = validate_uss,
+@@ -1813,7 +1817,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+
+ {.key = "features.snapshot-directory",
+ .voltype = "features/snapview-client",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .value = ".snaps",
+ .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT,
+ .validate_fn = validate_uss_dir,
+@@ -1823,7 +1827,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+
+ {.key = "features.show-snapshot-directory",
+ .voltype = "features/snapview-client",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .value = "off",
+ .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT,
+ .description = "show entry point in readdir output of "
+@@ -1847,30 +1851,30 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "features/cdc",
+ .option = "!feat",
+ .value = "off",
+- .op_version = 3,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ .description = "enable/disable network compression translator",
+ .flags = VOLOPT_FLAG_XLATOR_OPT},
+ {.key = "network.compression.window-size",
+ .voltype = "features/cdc",
+ .option = "window-size",
+- .op_version = 3},
++ .op_version = GD_OP_VERSION_RHS_3_0},
+ {.key = "network.compression.mem-level",
+ .voltype = "features/cdc",
+ .option = "mem-level",
+- .op_version = 3},
++ .op_version = GD_OP_VERSION_RHS_3_0},
+ {.key = "network.compression.min-size",
+ .voltype = "features/cdc",
+ .option = "min-size",
+- .op_version = 3},
++ .op_version = GD_OP_VERSION_RHS_3_0},
+ {.key = "network.compression.compression-level",
+ .voltype = "features/cdc",
+ .option = "compression-level",
+- .op_version = 3},
++ .op_version = GD_OP_VERSION_RHS_3_0},
+ {.key = "network.compression.debug",
+ .voltype = "features/cdc",
+ .option = "debug",
+ .type = NO_DOC,
+- .op_version = 3},
++ .op_version = GD_OP_VERSION_RHS_3_0},
+ #endif
+
+ /* Quota xlator options */
+@@ -1886,28 +1890,28 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "features/quota",
+ .option = "default-soft-limit",
+ .type = NO_DOC,
+- .op_version = 3,
++ .op_version = 2,
+ },
+ {
+ .key = "features.soft-timeout",
+ .voltype = "features/quota",
+ .option = "soft-timeout",
+ .type = NO_DOC,
+- .op_version = 3,
++ .op_version = 2,
+ },
+ {
+ .key = "features.hard-timeout",
+ .voltype = "features/quota",
+ .option = "hard-timeout",
+ .type = NO_DOC,
+- .op_version = 3,
++ .op_version = 2,
+ },
+ {
+ .key = "features.alert-time",
+ .voltype = "features/quota",
+ .option = "alert-time",
+ .type = NO_DOC,
+- .op_version = 3,
++ .op_version = 2,
+ },
+ {
+ .key = "features.quota-deem-statfs",
+@@ -2009,22 +2013,22 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "debug/error-gen",
+ .option = "failure",
+ .type = NO_DOC,
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "debug.error-number",
+ .voltype = "debug/error-gen",
+ .option = "error-no",
+ .type = NO_DOC,
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "debug.random-failure",
+ .voltype = "debug/error-gen",
+ .option = "random-failure",
+ .type = NO_DOC,
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "debug.error-fops",
+ .voltype = "debug/error-gen",
+ .option = "enable",
+ .type = NO_DOC,
+- .op_version = 3},
++ .op_version = 2},
+
+ /* NFS xlator options */
+ {.key = "nfs.enable-ino32",
+@@ -2066,7 +2070,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "nfs/server",
+ .option = "rpc.outstanding-rpc-limit",
+ .type = GLOBAL_DOC,
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "nfs.port",
+ .voltype = "nfs/server",
+ .option = "nfs.port",
+@@ -2128,7 +2132,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "nfs/server",
+ .option = "nfs.acl",
+ .type = GLOBAL_DOC,
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "nfs.mount-udp",
+ .voltype = "nfs/server",
+ .option = "nfs.mount-udp",
+@@ -2144,14 +2148,14 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "nfs/server",
+ .option = "nfs.rpc-statd",
+ .type = NO_DOC,
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {
+ .key = "nfs.log-level",
+ .voltype = "nfs/server",
+ .option = "nfs.log-level",
+ .type = NO_DOC,
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {.key = "nfs.server-aux-gids",
+ .voltype = "nfs/server",
+@@ -2162,27 +2166,27 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "nfs/server",
+ .option = "nfs.drc",
+ .type = GLOBAL_DOC,
+- .op_version = 3},
++ .op_version = 1},
+ {.key = "nfs.drc-size",
+ .voltype = "nfs/server",
+ .option = "nfs.drc-size",
+ .type = GLOBAL_DOC,
+- .op_version = 3},
++ .op_version = 1},
+ {.key = "nfs.read-size",
+ .voltype = "nfs/server",
+ .option = "nfs3.read-size",
+ .type = GLOBAL_DOC,
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "nfs.write-size",
+ .voltype = "nfs/server",
+ .option = "nfs3.write-size",
+ .type = GLOBAL_DOC,
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "nfs.readdir-size",
+ .voltype = "nfs/server",
+ .option = "nfs3.readdir-size",
+ .type = GLOBAL_DOC,
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "nfs.rdirplus",
+ .voltype = "nfs/server",
+ .option = "nfs.rdirplus",
+@@ -2219,7 +2223,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "features.read-only",
+ .voltype = "features/read-only",
+ .option = "read-only",
+- .op_version = 1,
++ .op_version = 2,
+ .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT},
+ {.key = "features.worm",
+ .voltype = "features/worm",
+@@ -2266,14 +2270,14 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "storage.linux-aio", .voltype = "storage/posix", .op_version = 1},
+ {.key = "storage.batch-fsync-mode",
+ .voltype = "storage/posix",
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "storage.batch-fsync-delay-usec",
+ .voltype = "storage/posix",
+- .op_version = 3},
++ .op_version = 2},
+ {
+ .key = "storage.xattr-user-namespace-mode",
+ .voltype = "storage/posix",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {.key = "storage.owner-uid",
+ .voltype = "storage/posix",
+@@ -2285,15 +2289,15 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .op_version = 1},
+ {.key = "storage.node-uuid-pathinfo",
+ .voltype = "storage/posix",
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "storage.health-check-interval",
+ .voltype = "storage/posix",
+- .op_version = 3},
++ .op_version = 2},
+ {
+ .option = "update-link-count-parent",
+ .key = "storage.build-pgfid",
+ .voltype = "storage/posix",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {
+ .option = "gfid2path",
+@@ -2363,7 +2367,9 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "storage/posix",
+ .op_version = GD_OP_VERSION_4_1_0,
+ },
+- {.key = "storage.bd-aio", .voltype = "storage/bd", .op_version = 3},
++ {.key = "storage.bd-aio",
++ .voltype = "storage/bd",
++ .op_version = GD_OP_VERSION_RHS_3_0},
+ {.key = "config.memory-accounting",
+ .voltype = "mgmt/glusterd",
+ .option = "!config",
+@@ -2385,37 +2391,37 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = GLUSTERD_QUORUM_TYPE_KEY,
+ .voltype = "mgmt/glusterd",
+ .value = "off",
+- .op_version = 2},
++ .op_version = 1},
+ {.key = GLUSTERD_QUORUM_RATIO_KEY,
+ .voltype = "mgmt/glusterd",
+ .value = "0",
+- .op_version = 2},
++ .op_version = 1},
+ /* changelog translator - global tunables */
+ {.key = "changelog.changelog",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "changelog.changelog-dir",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "changelog.encoding",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "changelog.rollover-time",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+- .op_version = 3},
++ .op_version = 2},
+ {.key = "changelog.fsync-interval",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+- .op_version = 3},
++ .op_version = 2},
+ {
+ .key = "changelog.changelog-barrier-timeout",
+ .voltype = "features/changelog",
+ .value = BARRIER_TIMEOUT,
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {.key = "changelog.capture-del-path",
+ .voltype = "features/changelog",
+@@ -2426,18 +2432,18 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "features/barrier",
+ .value = "disable",
+ .type = NO_DOC,
+- .op_version = GD_OP_VERSION_3_7_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {
+ .key = "features.barrier-timeout",
+ .voltype = "features/barrier",
+ .value = BARRIER_TIMEOUT,
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {
+ .key = GLUSTERD_GLOBAL_OP_VERSION_KEY,
+ .voltype = "mgmt/glusterd",
+- .op_version = GD_OP_VERSION_3_6_0,
++ .op_version = GD_OP_VERSION_RHS_3_0,
+ },
+ {
+ .key = GLUSTERD_MAX_OP_VERSION_KEY,
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index c8e84f6..dea6c28 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -3002,7 +3002,7 @@ struct volume_options options[] = {
+ {.key = {"send-gids"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE},
+ {.key = {"event-threads"},
+ .type = GF_OPTION_TYPE_INT,
+@@ -3013,7 +3013,7 @@ struct volume_options options[] = {
+ "in parallel. Larger values would help process"
+ " responses faster, depending on available processing"
+ " power. Range 1-32 threads.",
+- .op_version = {GD_OP_VERSION_3_7_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {NULL}},
+ };
+diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
+index b4b447b..6ae63ba 100644
+--- a/xlators/protocol/server/src/server.c
++++ b/xlators/protocol/server/src/server.c
+@@ -1854,13 +1854,13 @@ struct volume_options server_options[] = {
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Resolve groups on the server-side.",
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"gid-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "300",
+ .description = "Timeout in seconds for the cached groups to expire.",
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"event-threads"},
+ .type = GF_OPTION_TYPE_INT,
+@@ -1871,7 +1871,7 @@ struct volume_options server_options[] = {
+ "in parallel. Larger values would help process"
+ " responses faster, depending on available processing"
+ " power.",
+- .op_version = {GD_OP_VERSION_3_7_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"dynamic-auth"},
+ .type = GF_OPTION_TYPE_BOOL,
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index f0d8e3f..ed82e35 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -1243,7 +1243,7 @@ struct volume_options posix_options[] = {
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enable placeholders for gfid to path conversion",
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"gfid2path"},
+ .type = GF_OPTION_TYPE_BOOL,
+@@ -1279,7 +1279,7 @@ struct volume_options posix_options[] = {
+ " The raw filesystem will not be compatible with OS X Finder.\n"
+ "\t- Strip: Will strip the user namespace before setting. The raw "
+ "filesystem will work in OS X.\n",
+- .op_version = {GD_OP_VERSION_3_6_0},
++ .op_version = {GD_OP_VERSION_RHS_3_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ #endif
+ {
+--
+1.8.3.1
+
diff --git a/0003-rpc-set-bind-insecure-to-off-by-default.patch b/0003-rpc-set-bind-insecure-to-off-by-default.patch
new file mode 100644
index 0000000..639b62f
--- /dev/null
+++ b/0003-rpc-set-bind-insecure-to-off-by-default.patch
@@ -0,0 +1,51 @@
+From 9b58731c83bc1ee9c5f2a3cd58a8f845cf09ee82 Mon Sep 17 00:00:00 2001
+From: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
+Date: Mon, 21 Mar 2016 13:54:19 +0530
+Subject: [PATCH 03/52] rpc: set bind-insecure to off by default
+
+commit 243a5b429f225acb8e7132264fe0a0835ff013d5 turn's 'ON'
+allow-insecure and bind-insecure by default.
+
+Problem:
+Now with newer versions we have bind-insecure 'ON' by default.
+So, while upgrading subset of nodes from a trusted storage pool,
+nodes which have older versions of glusterfs will expect
+connection from secure ports only (since they still have
+bind-insecure off) thus they reject connection from upgraded
+nodes which now have insecure ports.
+
+Hence we will run into connection issues between peers.
+
+Solution:
+This patch will turn bind-insecure 'OFF' by default to avoid
+problem explained above.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Id7a19b4872399d3b019243b0857c9c7af75472f7
+Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/70313
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ rpc/rpc-lib/src/rpc-transport.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c
+index f9cbdf1..4beaaf9 100644
+--- a/rpc/rpc-lib/src/rpc-transport.c
++++ b/rpc/rpc-lib/src/rpc-transport.c
+@@ -269,8 +269,8 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
+ else
+ trans->bind_insecure = 0;
+ } else {
+- /* By default allow bind insecure */
+- trans->bind_insecure = 1;
++ /* Turning off bind insecure by default*/
++ trans->bind_insecure = 0;
+ }
+
+ ret = dict_get_str(options, "transport-type", &type);
+--
+1.8.3.1
+
diff --git a/0004-glusterd-spec-fixing-autogen-issue.patch b/0004-glusterd-spec-fixing-autogen-issue.patch
new file mode 100644
index 0000000..f3cb2ec
--- /dev/null
+++ b/0004-glusterd-spec-fixing-autogen-issue.patch
@@ -0,0 +1,47 @@
+From aa73240892a7072be68772370fd95173e6e77d10 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Mon, 21 Mar 2016 17:07:00 +0530
+Subject: [PATCH 04/52] glusterd/spec: fixing autogen issue
+
+Backport of https://code.engineering.redhat.com/gerrit/#/c/59463/
+
+Because of the incorrect build section, autogen.sh wasn't re-run during the rpm
+build process. The `extras/Makefile.in` was not regenerated with the changes
+made to `extras/Makefile.am` in the firewalld patch. This meant that
+`extras/Makefile` was generated without the firewalld changes. So the firewalld
+config wasn't installed during `make install` and rpmbuild later failed when it
+failed to find `/usr/lib/firewalld/glusterfs.xml`
+
+Label: DOWNSTREAM ONLY
+
+>Reviewed-on: https://code.engineering.redhat.com/gerrit/59463
+
+Change-Id: I498bcceeacbd839640282eb6467c9f1464505697
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/70343
+Reviewed-by: Milind Changire <mchangir@redhat.com>
+---
+ glusterfs.spec.in | 7 +------
+ 1 file changed, 1 insertion(+), 6 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index c655f16..f5c1f79 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -700,12 +700,7 @@ done
+
+ %build
+
+-# RHEL6 and earlier need to manually replace config.guess and config.sub
+-%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
+-./autogen.sh
+-%endif
+-
+-%configure \
++./autogen.sh && %configure \
+ %{?_with_asan} \
+ %{?_with_cmocka} \
+ %{?_with_debug} \
+--
+1.8.3.1
+
diff --git a/0005-libglusterfs-glusterd-Fix-compilation-errors.patch b/0005-libglusterfs-glusterd-Fix-compilation-errors.patch
new file mode 100644
index 0000000..5aa4f20
--- /dev/null
+++ b/0005-libglusterfs-glusterd-Fix-compilation-errors.patch
@@ -0,0 +1,36 @@
+From 44f758a56c5c5ad340ebc6d6a6478e8712c2c101 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Mon, 21 Mar 2016 22:31:02 +0530
+Subject: [PATCH 05/52] libglusterfs/glusterd: Fix compilation errors
+
+1. Removed duplicate definition of GD_OP_VER_PERSISTENT_AFR_XATTRS introduced in
+d367a88 where GD_OP_VER_PERSISTENT_AFR_XATTRS was redfined
+
+2. Fixed incorrect op-version
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Icfa3206e8a41a11875641f57523732b80837f8f6
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/70384
+Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-store.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index 64447e7..51ca3d1 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -967,7 +967,7 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo)
+ goto out;
+ }
+
+- if (conf->op_version >= GD_OP_VERSION_RHS_3_0) {
++ if (conf->op_version >= GD_OP_VERSION_3_7_0) {
+ snprintf(buf, sizeof(buf), "%d", volinfo->disperse_count);
+ ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, buf);
+ if (ret)
+--
+1.8.3.1
+
diff --git a/0006-build-remove-ghost-directory-entries.patch b/0006-build-remove-ghost-directory-entries.patch
new file mode 100644
index 0000000..68dd8f3
--- /dev/null
+++ b/0006-build-remove-ghost-directory-entries.patch
@@ -0,0 +1,58 @@
+From 1f28e008825ae291208a9e6c714dd642f715a2a1 Mon Sep 17 00:00:00 2001
+From: "Bala.FA" <barumuga@redhat.com>
+Date: Mon, 7 Apr 2014 15:24:10 +0530
+Subject: [PATCH 06/52] build: remove ghost directory entries
+
+ovirt requires hook directories for gluster management and ghost
+directories are no more ghost entries
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Iaf1066ba0655619024f87eaaa039f0010578c567
+Signed-off-by: Bala.FA <barumuga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/60133
+Tested-by: Milind Changire <mchangir@redhat.com>
+---
+ glusterfs.spec.in | 19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index f5c1f79..6be492e 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -799,15 +799,30 @@ install -D -p -m 0644 extras/glusterfs-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs
+
+ %if ( 0%{!?_without_georeplication:1} )
+-# geo-rep ghosts
+ mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/geo-replication
+ touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
+ install -D -p -m 0644 extras/glusterfs-georep-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-georep
+ %endif
+
++%if ( 0%{!?_without_syslog:1} )
++%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
++install -D -p -m 0644 extras/gluster-rsyslog-7.2.conf \
++ %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example
++%endif
++
++%if ( 0%{?rhel} && 0%{?rhel} == 6 )
++install -D -p -m 0644 extras/gluster-rsyslog-5.8.conf \
++ %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example
++%endif
++
++%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
++install -D -p -m 0644 extras/logger.conf.example \
++ %{buildroot}%{_sysconfdir}/glusterfs/logger.conf.example
++%endif
++%endif
++
+ %if ( 0%{!?_without_server:1} )
+-# the rest of the ghosts
+ touch %{buildroot}%{_sharedstatedir}/glusterd/glusterd.info
+ touch %{buildroot}%{_sharedstatedir}/glusterd/options
+ subdirs=(add-brick create copy-file delete gsync-create remove-brick reset set start stop)
+--
+1.8.3.1
+
diff --git a/0007-build-add-RHGS-specific-changes.patch b/0007-build-add-RHGS-specific-changes.patch
new file mode 100644
index 0000000..ac092bd
--- /dev/null
+++ b/0007-build-add-RHGS-specific-changes.patch
@@ -0,0 +1,620 @@
+From 7744475550cd27f58f536741e9c50c639d3b02d8 Mon Sep 17 00:00:00 2001
+From: "Bala.FA" <barumuga@redhat.com>
+Date: Thu, 6 Dec 2018 20:06:27 +0530
+Subject: [PATCH 07/52] build: add RHGS specific changes
+
+Label: DOWNSTREAM ONLY
+
+Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1074947
+Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1097782
+Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1115267
+Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1221743
+Change-Id: I08333334745adf2350e772c6454ffcfe9c08cb89
+Reviewed-on: https://code.engineering.redhat.com/gerrit/24983
+Reviewed-on: https://code.engineering.redhat.com/gerrit/25451
+Reviewed-on: https://code.engineering.redhat.com/gerrit/25518
+Reviewed-on: https://code.engineering.redhat.com/gerrit/25983
+Signed-off-by: Bala.FA <barumuga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/60134
+Tested-by: Milind Changire <mchangir@redhat.com>
+---
+ glusterfs.spec.in | 485 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 481 insertions(+), 4 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 6be492e..eb04491 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -95,9 +95,16 @@
+ %{?_without_server:%global _without_server --without-server}
+
+ # disable server components forcefully as rhel <= 6
+-%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
++%if ( 0%{?rhel} )
++%if ( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" )
++%global _without_server %{nil}
++%else
+ %global _without_server --without-server
+ %endif
++%endif
++
++%global _without_extra_xlators 1
++%global _without_regression_tests 1
+
+ # syslog
+ # if you wish to build rpms without syslog logging, compile like this
+@@ -229,7 +236,8 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist}
+ %else
+ Name: @PACKAGE_NAME@
+ Version: @PACKAGE_VERSION@
+-Release: 0.@PACKAGE_RELEASE@%{?dist}
++Release: @PACKAGE_RELEASE@%{?dist}
++ExcludeArch: i686
+ %endif
+ License: GPLv2 or LGPLv3+
+ URL: http://docs.gluster.org/
+@@ -243,8 +251,6 @@ Source8: glusterfsd.init
+ Source0: @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz
+ %endif
+
+-BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
+-
+ Requires(pre): shadow-utils
+ %if ( 0%{?_with_systemd:1} )
+ BuildRequires: systemd
+@@ -384,7 +390,9 @@ This package provides cloudsync plugins for archival feature.
+ Summary: Development Libraries
+ Requires: %{name}%{?_isa} = %{version}-%{release}
+ # Needed for the Glupy examples to work
++%if ( 0%{!?_without_extra_xlators:1} )
+ Requires: %{name}-extra-xlators%{?_isa} = %{version}-%{release}
++%endif
+
+ %description devel
+ GlusterFS is a distributed file-system capable of scaling to several
+@@ -397,6 +405,7 @@ is in user space and easily manageable.
+
+ This package provides the development libraries and include files.
+
++%if ( 0%{!?_without_extra_xlators:1} )
+ %package extra-xlators
+ Summary: Extra Gluster filesystem Translators
+ # We need python-gluster rpm for gluster module's __init__.py in Python
+@@ -415,6 +424,7 @@ is in user space and easily manageable.
+
+ This package provides extra filesystem Translators, such as Glupy,
+ for GlusterFS.
++%endif
+
+ %package fuse
+ Summary: Fuse client
+@@ -440,6 +450,30 @@ is in user space and easily manageable.
+ This package provides support to FUSE based clients and inlcudes the
+ glusterfs(d) binary.
+
++%if ( 0%{!?_without_server:1} )
++%package ganesha
++Summary: NFS-Ganesha configuration
++Group: Applications/File
++
++Requires: %{name}-server%{?_isa} = %{version}-%{release}
++Requires: nfs-ganesha-gluster, pcs, dbus
++%if ( 0%{?rhel} && 0%{?rhel} == 6 )
++Requires: cman, pacemaker, corosync
++%endif
++
++%description ganesha
++GlusterFS is a distributed file-system capable of scaling to several
++petabytes. It aggregates various storage bricks over Infiniband RDMA
++or TCP/IP interconnect into one large parallel network file
++system. GlusterFS is one of the most sophisticated file systems in
++terms of features and extensibility. It borrows a powerful concept
++called Translators from GNU Hurd kernel. Much of the code in GlusterFS
++is in user space and easily manageable.
++
++This package provides the configuration and related files for using
++NFS-Ganesha as the NFS server using GlusterFS
++%endif
++
+ %if ( 0%{!?_without_georeplication:1} )
+ %package geo-replication
+ Summary: GlusterFS Geo-replication
+@@ -541,6 +575,7 @@ is in user space and easily manageable.
+ This package provides support to ib-verbs library.
+ %endif
+
++%if ( 0%{!?_without_regression_tests:1} )
+ %package regression-tests
+ Summary: Development Tools
+ Requires: %{name}%{?_isa} = %{version}-%{release}
+@@ -556,6 +591,7 @@ Requires: nfs-utils xfsprogs yajl psmisc bc
+ %description regression-tests
+ The Gluster Test Framework, is a suite of scripts used for
+ regression testing of Gluster.
++%endif
+
+ %if ( 0%{!?_without_ocf:1} )
+ %package resource-agents
+@@ -1092,6 +1128,16 @@ exit 0
+ %if 0%{?_tmpfilesdir:1} && 0%{!?_without_server:1}
+ %{_tmpfilesdir}/gluster.conf
+ %endif
++%if ( 0%{?_without_extra_xlators:1} )
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/rot-13.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/testing/performance/symlink-cache.so
++%endif
++%if ( 0%{?_without_regression_tests:1} )
++%exclude %{_datadir}/glusterfs/run-tests.sh
++%exclude %{_datadir}/glusterfs/tests
++%endif
+
+ %files api
+ %exclude %{_libdir}/*.so
+@@ -1134,12 +1180,14 @@ exit 0
+ %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/client.so
+
++%if ( 0%{!?_without_extra_xlators:1} )
+ %files extra-xlators
+ %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+ %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so
+ %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so
++%endif
+
+ %files fuse
+ # glusterfs is a symlink to glusterfsd, -server depends on -fuse.
+@@ -1239,11 +1287,13 @@ exit 0
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma*
+ %endif
+
++%if ( 0%{!?_without_regression_tests:1} )
+ %files regression-tests
+ %dir %{_datadir}/glusterfs
+ %{_datadir}/glusterfs/run-tests.sh
+ %{_datadir}/glusterfs/tests
+ %exclude %{_datadir}/glusterfs/tests/vagrant
++%endif
+
+ %if ( 0%{!?_without_ocf:1} )
+ %files resource-agents
+@@ -1424,6 +1474,433 @@ exit 0
+ %endif
+ %endif
+
++##-----------------------------------------------------------------------------
++## All %pretrans should be placed here and keep them sorted
++##
++%if 0%{!?_without_server:1}
++%pretrans -p <lua>
++if not posix.access("/bin/bash", "x") then
++ -- initial installation, no shell, no running glusterfsd
++ return 0
++end
++
++-- TODO: move this completely to a lua script
++-- For now, we write a temporary bash script and execute that.
++
++script = [[#!/bin/sh
++pidof -c -o %PPID -x glusterfsd &>/dev/null
++
++if [ $? -eq 0 ]; then
++ pushd . > /dev/null 2>&1
++ for volume in /var/lib/glusterd/vols/*; do cd $volume;
++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
++ echo "ERROR: Distribute volumes detected. In-service rolling upgrade requires distribute volume(s) to be stopped."
++ echo "ERROR: Please stop distribute volume(s) before proceeding... exiting!"
++ exit 1;
++ fi
++ done
++
++ popd > /dev/null 2>&1
++ echo "WARNING: Updating glusterfs requires its processes to be killed. This action does NOT incur downtime."
++ echo "WARNING: Ensure to wait for the upgraded server to finish healing before proceeding."
++ echo "WARNING: Refer upgrade section of install guide for more details"
++ echo "Please run # service glusterd stop; pkill glusterfs; pkill glusterfsd; pkill gsyncd.py;"
++ exit 1;
++fi
++]]
++
++-- rpm in RHEL5 does not have os.tmpname()
++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/
++tmpname = "/tmp/glusterfs_pretrans_" .. os.date("%s")
++tmpfile = io.open(tmpname, "w")
++tmpfile:write(script)
++tmpfile:close()
++ok, how, val = os.execute("/bin/bash " .. tmpname)
++os.remove(tmpname)
++if not (ok == 0) then
++ error("Detected running glusterfs processes", ok)
++end
++
++
++
++%pretrans api -p <lua>
++if not posix.access("/bin/bash", "x") then
++ -- initial installation, no shell, no running glusterfsd
++ return 0
++end
++
++-- TODO: move this completely to a lua script
++-- For now, we write a temporary bash script and execute that.
++
++script = [[#!/bin/sh
++pidof -c -o %PPID -x glusterfsd &>/dev/null
++
++if [ $? -eq 0 ]; then
++ pushd . > /dev/null 2>&1
++ for volume in /var/lib/glusterd/vols/*; do cd $volume;
++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
++ exit 1;
++ fi
++ done
++
++ popd > /dev/null 2>&1
++ exit 1;
++fi
++]]
++
++-- rpm in RHEL5 does not have os.tmpname()
++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/
++tmpname = "/tmp/glusterfs-api_pretrans_" .. os.date("%s")
++tmpfile = io.open(tmpname, "w")
++tmpfile:write(script)
++tmpfile:close()
++ok, how, val = os.execute("/bin/bash " .. tmpname)
++os.remove(tmpname)
++if not (ok == 0) then
++ error("Detected running glusterfs processes", ok)
++end
++
++
++
++%pretrans api-devel -p <lua>
++if not posix.access("/bin/bash", "x") then
++ -- initial installation, no shell, no running glusterfsd
++ return 0
++end
++
++-- TODO: move this completely to a lua script
++-- For now, we write a temporary bash script and execute that.
++
++script = [[#!/bin/sh
++pidof -c -o %PPID -x glusterfsd &>/dev/null
++
++if [ $? -eq 0 ]; then
++ pushd . > /dev/null 2>&1
++ for volume in /var/lib/glusterd/vols/*; do cd $volume;
++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
++ exit 1;
++ fi
++ done
++
++ popd > /dev/null 2>&1
++ exit 1;
++fi
++]]
++
++-- rpm in RHEL5 does not have os.tmpname()
++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/
++tmpname = "/tmp/glusterfs-api-devel_pretrans_" .. os.date("%s")
++tmpfile = io.open(tmpname, "w")
++tmpfile:write(script)
++tmpfile:close()
++ok, how, val = os.execute("/bin/bash " .. tmpname)
++os.remove(tmpname)
++if not (ok == 0) then
++ error("Detected running glusterfs processes", ok)
++end
++
++
++
++%pretrans devel -p <lua>
++if not posix.access("/bin/bash", "x") then
++ -- initial installation, no shell, no running glusterfsd
++ return 0
++end
++
++-- TODO: move this completely to a lua script
++-- For now, we write a temporary bash script and execute that.
++
++script = [[#!/bin/sh
++pidof -c -o %PPID -x glusterfsd &>/dev/null
++
++if [ $? -eq 0 ]; then
++ pushd . > /dev/null 2>&1
++ for volume in /var/lib/glusterd/vols/*; do cd $volume;
++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
++ exit 1;
++ fi
++ done
++
++ popd > /dev/null 2>&1
++ exit 1;
++fi
++]]
++
++-- rpm in RHEL5 does not have os.tmpname()
++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/
++tmpname = "/tmp/glusterfs-devel_pretrans_" .. os.date("%s")
++tmpfile = io.open(tmpname, "w")
++tmpfile:write(script)
++tmpfile:close()
++ok, how, val = os.execute("/bin/bash " .. tmpname)
++os.remove(tmpname)
++if not (ok == 0) then
++ error("Detected running glusterfs processes", ok)
++end
++
++
++
++%pretrans fuse -p <lua>
++if not posix.access("/bin/bash", "x") then
++ -- initial installation, no shell, no running glusterfsd
++ return 0
++end
++
++-- TODO: move this completely to a lua script
++-- For now, we write a temporary bash script and execute that.
++
++script = [[#!/bin/sh
++pidof -c -o %PPID -x glusterfsd &>/dev/null
++
++if [ $? -eq 0 ]; then
++ pushd . > /dev/null 2>&1
++ for volume in /var/lib/glusterd/vols/*; do cd $volume;
++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
++ exit 1;
++ fi
++ done
++
++ popd > /dev/null 2>&1
++ exit 1;
++fi
++]]
++
++-- rpm in RHEL5 does not have os.tmpname()
++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/
++tmpname = "/tmp/glusterfs-fuse_pretrans_" .. os.date("%s")
++tmpfile = io.open(tmpname, "w")
++tmpfile:write(script)
++tmpfile:close()
++ok, how, val = os.execute("/bin/bash " .. tmpname)
++os.remove(tmpname)
++if not (ok == 0) then
++ error("Detected running glusterfs processes", ok)
++end
++
++
++
++%if 0%{?_can_georeplicate}
++%if ( 0%{!?_without_georeplication:1} )
++%pretrans geo-replication -p <lua>
++if not posix.access("/bin/bash", "x") then
++ -- initial installation, no shell, no running glusterfsd
++ return 0
++end
++
++-- TODO: move this completely to a lua script
++-- For now, we write a temporary bash script and execute that.
++
++script = [[#!/bin/sh
++pidof -c -o %PPID -x glusterfsd &>/dev/null
++
++if [ $? -eq 0 ]; then
++ pushd . > /dev/null 2>&1
++ for volume in /var/lib/glusterd/vols/*; do cd $volume;
++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
++ exit 1;
++ fi
++ done
++
++ popd > /dev/null 2>&1
++ exit 1;
++fi
++]]
++
++-- rpm in RHEL5 does not have os.tmpname()
++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/
++tmpname = "/tmp/glusterfs-geo-replication_pretrans_" .. os.date("%s")
++tmpfile = io.open(tmpname, "w")
++tmpfile:write(script)
++tmpfile:close()
++ok, how, val = os.execute("/bin/bash " .. tmpname)
++os.remove(tmpname)
++if not (ok == 0) then
++ error("Detected running glusterfs processes", ok)
++end
++%endif
++%endif
++
++
++
++%pretrans libs -p <lua>
++if not posix.access("/bin/bash", "x") then
++ -- initial installation, no shell, no running glusterfsd
++ return 0
++end
++
++-- TODO: move this completely to a lua script
++-- For now, we write a temporary bash script and execute that.
++
++script = [[#!/bin/sh
++pidof -c -o %PPID -x glusterfsd &>/dev/null
++
++if [ $? -eq 0 ]; then
++ pushd . > /dev/null 2>&1
++ for volume in /var/lib/glusterd/vols/*; do cd $volume;
++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
++ exit 1;
++ fi
++ done
++
++ popd > /dev/null 2>&1
++ exit 1;
++fi
++]]
++
++-- rpm in RHEL5 does not have os.tmpname()
++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/
++tmpname = "/tmp/glusterfs-libs_pretrans_" .. os.date("%s")
++tmpfile = io.open(tmpname, "w")
++tmpfile:write(script)
++tmpfile:close()
++ok, how, val = os.execute("/bin/bash " .. tmpname)
++os.remove(tmpname)
++if not (ok == 0) then
++ error("Detected running glusterfs processes", ok)
++end
++
++
++
++%if ( 0%{!?_without_rdma:1} )
++%pretrans rdma -p <lua>
++if not posix.access("/bin/bash", "x") then
++ -- initial installation, no shell, no running glusterfsd
++ return 0
++end
++
++-- TODO: move this completely to a lua script
++-- For now, we write a temporary bash script and execute that.
++
++script = [[#!/bin/sh
++pidof -c -o %PPID -x glusterfsd &>/dev/null
++
++if [ $? -eq 0 ]; then
++ pushd . > /dev/null 2>&1
++ for volume in /var/lib/glusterd/vols/*; do cd $volume;
++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
++ exit 1;
++ fi
++ done
++
++ popd > /dev/null 2>&1
++ exit 1;
++fi
++]]
++
++-- rpm in RHEL5 does not have os.tmpname()
++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/
++tmpname = "/tmp/glusterfs-rdma_pretrans_" .. os.date("%s")
++tmpfile = io.open(tmpname, "w")
++tmpfile:write(script)
++tmpfile:close()
++ok, how, val = os.execute("/bin/bash " .. tmpname)
++os.remove(tmpname)
++if not (ok == 0) then
++ error("Detected running glusterfs processes", ok)
++end
++%endif
++
++
++
++%if ( 0%{!?_without_ocf:1} )
++%pretrans resource-agents -p <lua>
++if not posix.access("/bin/bash", "x") then
++ -- initial installation, no shell, no running glusterfsd
++ return 0
++end
++
++-- TODO: move this completely to a lua script
++-- For now, we write a temporary bash script and execute that.
++
++script = [[#!/bin/sh
++pidof -c -o %PPID -x glusterfsd &>/dev/null
++
++if [ $? -eq 0 ]; then
++ pushd . > /dev/null 2>&1
++ for volume in /var/lib/glusterd/vols/*; do cd $volume;
++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
++ exit 1;
++ fi
++ done
++
++ popd > /dev/null 2>&1
++ exit 1;
++fi
++]]
++
++-- rpm in RHEL5 does not have os.tmpname()
++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/
++tmpname = "/tmp/glusterfs-resource-agents_pretrans_" .. os.date("%s")
++tmpfile = io.open(tmpname, "w")
++tmpfile:write(script)
++tmpfile:close()
++ok, how, val = os.execute("/bin/bash " .. tmpname)
++os.remove(tmpname)
++if not (ok == 0) then
++ error("Detected running glusterfs processes", ok)
++end
++%endif
++
++
++
++%pretrans server -p <lua>
++if not posix.access("/bin/bash", "x") then
++ -- initial installation, no shell, no running glusterfsd
++ return 0
++end
++
++-- TODO: move this completely to a lua script
++-- For now, we write a temporary bash script and execute that.
++
++script = [[#!/bin/sh
++pidof -c -o %PPID -x glusterfsd &>/dev/null
++
++if [ $? -eq 0 ]; then
++ pushd . > /dev/null 2>&1
++ for volume in /var/lib/glusterd/vols/*; do cd $volume;
++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
++ exit 1;
++ fi
++ done
++
++ popd > /dev/null 2>&1
++ exit 1;
++fi
++]]
++
++-- rpm in RHEL5 does not have os.tmpname()
++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/
++tmpname = "/tmp/glusterfs-server_pretrans_" .. os.date("%s")
++tmpfile = io.open(tmpname, "w")
++tmpfile:write(script)
++tmpfile:close()
++ok, how, val = os.execute("/bin/bash " .. tmpname)
++os.remove(tmpname)
++if not (ok == 0) then
++ error("Detected running glusterfs processes", ok)
++end
++%endif
++
+ %changelog
+ * Wed Mar 6 2019 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+ - remove unneeded ldconfig in scriptlets
+--
+1.8.3.1
+
diff --git a/0008-secalert-remove-setuid-bit-for-fusermount-glusterfs.patch b/0008-secalert-remove-setuid-bit-for-fusermount-glusterfs.patch
new file mode 100644
index 0000000..66a39d2
--- /dev/null
+++ b/0008-secalert-remove-setuid-bit-for-fusermount-glusterfs.patch
@@ -0,0 +1,35 @@
+From 0ab54c5b274f29fcdd4787325c7183a84e875bbc Mon Sep 17 00:00:00 2001
+From: "Bala.FA" <barumuga@redhat.com>
+Date: Thu, 22 May 2014 08:37:27 +0530
+Subject: [PATCH 08/52] secalert: remove setuid bit for fusermount-glusterfs
+
+glusterfs-fuse: File /usr/bin/fusermount-glusterfs on x86_64 is setuid
+root but is not on the setxid whitelist
+
+Label: DOWNSTREAM ONLY
+
+Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=989480
+Change-Id: Icf6e5db72ae15ccc60b02be6713fb6c4f4c8a15f
+Signed-off-by: Bala.FA <barumuga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/25453
+Signed-off-by: Bala.FA <barumuga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/60135
+Tested-by: Milind Changire <mchangir@redhat.com>
+---
+ contrib/fuse-util/Makefile.am | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/contrib/fuse-util/Makefile.am b/contrib/fuse-util/Makefile.am
+index abbc10e..a071c81 100644
+--- a/contrib/fuse-util/Makefile.am
++++ b/contrib/fuse-util/Makefile.am
+@@ -9,6 +9,5 @@ AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+ install-exec-hook:
+ -chown root $(DESTDIR)$(bindir)/fusermount-glusterfs
+- chmod u+s $(DESTDIR)$(bindir)/fusermount-glusterfs
+
+ CLEANFILES =
+--
+1.8.3.1
+
diff --git a/0009-build-introduce-security-hardening-flags-in-gluster.patch b/0009-build-introduce-security-hardening-flags-in-gluster.patch
new file mode 100644
index 0000000..7cfe937
--- /dev/null
+++ b/0009-build-introduce-security-hardening-flags-in-gluster.patch
@@ -0,0 +1,57 @@
+From 2adb5d540e9344149ae2591811ad34928775e6fd Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Wed, 3 Jun 2015 11:09:21 +0530
+Subject: [PATCH 09/52] build: introduce security hardening flags in gluster
+
+This patch introduces two of the security hardening compiler flags RELRO & PIE
+in gluster codebase. Using _hardened_build as 1 doesn't guarantee the existance
+of these flags in the compilation as different versions of RHEL have different
+redhat-rpm-config macro. So the idea is to export these flags at spec file
+level.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I0a1a56d0a8f54f110d306ba5e55e39b1b073dc84
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/49780
+Reviewed-by: Balamurugan Arumugam <barumuga@redhat.com>
+Tested-by: Balamurugan Arumugam <barumuga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/60137
+Tested-by: Milind Changire <mchangir@redhat.com>
+---
+ glusterfs.spec.in | 19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index eb04491..8a31a98 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -736,6 +736,25 @@ done
+
+ %build
+
++# In RHEL7 few hardening flags are available by default, however the RELRO
++# default behaviour is partial, convert to full
++%if ( 0%{?rhel} && 0%{?rhel} >= 7 )
++LDFLAGS="$RPM_LD_FLAGS -Wl,-z,relro,-z,now"
++export LDFLAGS
++%else
++%if ( 0%{?rhel} && 0%{?rhel} == 6 )
++CFLAGS="$RPM_OPT_FLAGS -fPIE -DPIE"
++LDFLAGS="$RPM_LD_FLAGS -pie -Wl,-z,relro,-z,now"
++%else
++#It appears that with gcc-4.1.2 in RHEL5 there is an issue using both -fPIC and
++ # -fPIE that makes -z relro not work; -fPIE seems to undo what -fPIC does
++CFLAGS="$CFLAGS $RPM_OPT_FLAGS"
++LDFLAGS="$RPM_LD_FLAGS -Wl,-z,relro,-z,now"
++%endif
++export CFLAGS
++export LDFLAGS
++%endif
++
+ ./autogen.sh && %configure \
+ %{?_with_asan} \
+ %{?_with_cmocka} \
+--
+1.8.3.1
+
diff --git a/0010-spec-fix-add-pre-transaction-scripts-for-geo-rep-and.patch b/0010-spec-fix-add-pre-transaction-scripts-for-geo-rep-and.patch
new file mode 100644
index 0000000..9226936
--- /dev/null
+++ b/0010-spec-fix-add-pre-transaction-scripts-for-geo-rep-and.patch
@@ -0,0 +1,100 @@
+From bf5906cbc9bf986c7495db792d098001e28c47e3 Mon Sep 17 00:00:00 2001
+From: Niels de Vos <ndevos@redhat.com>
+Date: Wed, 22 Apr 2015 15:39:59 +0200
+Subject: [PATCH 10/52] spec: fix/add pre-transaction scripts for geo-rep and
+ cli packages
+
+The cli subpackage never had a %pretrans script, this has been added
+now.
+
+The %pretrans script for ge-repliaction was never included in the RPM
+package because it was disable by a undefined macro (_can_georeplicate).
+This macro is not used/set anywhere else and _without_georeplication
+should take care of it anyway.
+
+Note: This is a Red Hat Gluster Storage specific patch. Upstream
+ packaging guidelines do not allow these kind of 'features'.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I16aab5bba72f1ed178f3bcac47f9d8ef767cfcef
+Signed-off-by: Niels de Vos <ndevos@redhat.com>
+Signed-off-by: Bala.FA <barumuga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/50491
+Reviewed-on: https://code.engineering.redhat.com/gerrit/60138
+Tested-by: Milind Changire <mchangir@redhat.com>
+---
+ glusterfs.spec.in | 43 +++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 41 insertions(+), 2 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 8a31a98..b70dbfc 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1626,6 +1626,47 @@ end
+
+
+
++%pretrans cli -p <lua>
++if not posix.access("/bin/bash", "x") then
++ -- initial installation, no shell, no running glusterfsd
++ return 0
++end
++
++-- TODO: move this completely to a lua script
++-- For now, we write a temporary bash script and execute that.
++
++script = [[#!/bin/sh
++pidof -c -o %PPID -x glusterfsd &>/dev/null
++
++if [ $? -eq 0 ]; then
++ pushd . > /dev/null 2>&1
++ for volume in /var/lib/glusterd/vols/*; do cd $volume;
++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
++ exit 1;
++ fi
++ done
++
++ popd > /dev/null 2>&1
++ exit 1;
++fi
++]]
++
++-- rpm in RHEL5 does not have os.tmpname()
++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/
++tmpname = "/tmp/glusterfs-cli_pretrans_" .. os.date("%s")
++tmpfile = io.open(tmpname, "w")
++tmpfile:write(script)
++tmpfile:close()
++ok, how, val = os.execute("/bin/bash " .. tmpname)
++os.remove(tmpname)
++if not (ok == 0) then
++ error("Detected running glusterfs processes", ok)
++end
++
++
++
+ %pretrans devel -p <lua>
+ if not posix.access("/bin/bash", "x") then
+ -- initial installation, no shell, no running glusterfsd
+@@ -1708,7 +1749,6 @@ end
+
+
+
+-%if 0%{?_can_georeplicate}
+ %if ( 0%{!?_without_georeplication:1} )
+ %pretrans geo-replication -p <lua>
+ if not posix.access("/bin/bash", "x") then
+@@ -1749,7 +1789,6 @@ if not (ok == 0) then
+ error("Detected running glusterfs processes", ok)
+ end
+ %endif
+-%endif
+
+
+
+--
+1.8.3.1
+
diff --git a/0011-rpm-glusterfs-devel-for-client-builds-should-not-dep.patch b/0011-rpm-glusterfs-devel-for-client-builds-should-not-dep.patch
new file mode 100644
index 0000000..cc79317
--- /dev/null
+++ b/0011-rpm-glusterfs-devel-for-client-builds-should-not-dep.patch
@@ -0,0 +1,138 @@
+From 40eb62a8872ce061416e899fb6c0784b6253ab16 Mon Sep 17 00:00:00 2001
+From: Niels de Vos <ndevos@redhat.com>
+Date: Fri, 7 Dec 2018 14:05:21 +0530
+Subject: [PATCH 11/52] rpm: glusterfs-devel for client-builds should not
+ depend on -server
+
+glusterfs-devel for client-side packages should *not* include the
+libgfdb.so symlink and libgfdb.pc file or any of the libchangelog
+ones.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ifb4a9cf48841e5af5dd0a98b6de51e2ee469fc56
+Signed-off-by: Niels de Vos <ndevos@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/51019
+Reviewed-by: Balamurugan Arumugam <barumuga@redhat.com>
+Tested-by: Balamurugan Arumugam <barumuga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/60139
+Tested-by: Milind Changire <mchangir@redhat.com>
+---
+ glusterfs.spec.in | 86 +++++++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 62 insertions(+), 24 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index b70dbfc..1c631db 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -450,30 +450,6 @@ is in user space and easily manageable.
+ This package provides support to FUSE based clients and inlcudes the
+ glusterfs(d) binary.
+
+-%if ( 0%{!?_without_server:1} )
+-%package ganesha
+-Summary: NFS-Ganesha configuration
+-Group: Applications/File
+-
+-Requires: %{name}-server%{?_isa} = %{version}-%{release}
+-Requires: nfs-ganesha-gluster, pcs, dbus
+-%if ( 0%{?rhel} && 0%{?rhel} == 6 )
+-Requires: cman, pacemaker, corosync
+-%endif
+-
+-%description ganesha
+-GlusterFS is a distributed file-system capable of scaling to several
+-petabytes. It aggregates various storage bricks over Infiniband RDMA
+-or TCP/IP interconnect into one large parallel network file
+-system. GlusterFS is one of the most sophisticated file systems in
+-terms of features and extensibility. It borrows a powerful concept
+-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+-is in user space and easily manageable.
+-
+-This package provides the configuration and related files for using
+-NFS-Ganesha as the NFS server using GlusterFS
+-%endif
+-
+ %if ( 0%{!?_without_georeplication:1} )
+ %package geo-replication
+ Summary: GlusterFS Geo-replication
+@@ -1157,6 +1133,62 @@ exit 0
+ %exclude %{_datadir}/glusterfs/run-tests.sh
+ %exclude %{_datadir}/glusterfs/tests
+ %endif
++%if 0%{?_without_server:1}
++%exclude %{_sysconfdir}/glusterfs/gluster-rsyslog-5.8.conf
++%exclude %{_sysconfdir}/glusterfs/gluster-rsyslog-7.2.conf
++%exclude %{_sysconfdir}/glusterfs/glusterd.vol
++%exclude %{_sysconfdir}/glusterfs/glusterfs-georep-logrotate
++%exclude %{_sysconfdir}/glusterfs/glusterfs-logrotate
++%exclude %{_sysconfdir}/glusterfs/group-db-workload
++%exclude %{_sysconfdir}/glusterfs/group-distributed-virt
++%exclude %{_sysconfdir}/glusterfs/group-gluster-block
++%exclude %{_sysconfdir}/glusterfs/group-metadata-cache
++%exclude %{_sysconfdir}/glusterfs/group-nl-cache
++%exclude %{_sysconfdir}/glusterfs/group-virt.example
++%exclude %{_sysconfdir}/glusterfs/logger.conf.example
++%exclude %{_sysconfdir}/rsyslog.d/gluster.conf.example
++%exclude %{_prefix}/bin/glusterfind
++%exclude %{_prefix}/lib/firewalld/services/glusterfs.xml
++%exclude %{_prefix}/lib/systemd/system/glusterd.service
++%exclude %{_prefix}/lib/systemd/system/glusterfssharedstorage.service
++%exclude %{_prefix}/lib/tmpfiles.d/gluster.conf
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/arbiter.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/leases.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/marker.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix-locks.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quota.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quotad.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/selinux.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/snapview-server.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/thin-arbiter.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/trash.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/upcall.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt/glusterd.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/decompounder.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server.so
++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage/posix.so
++%exclude %{_libexecdir}/glusterfs/*
++%exclude %{_sbindir}/conf.py
++%exclude %{_sbindir}/gcron.py
++%exclude %{_sbindir}/gf_attach
++%exclude %{_sbindir}/gfind_missing_files
++%exclude %{_sbindir}/glfsheal
++%exclude %{_sbindir}/gluster
++%exclude %{_sbindir}/gluster-setgfid2path
++%exclude %{_sbindir}/glusterd
++%exclude %{_sbindir}/snap_scheduler.py
++%exclude %{_datadir}/glusterfs/scripts/control-cpu-load.sh
++%exclude %{_datadir}/glusterfs/scripts/control-mem.sh
++%exclude %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh
++%exclude %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh
++%exclude %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh
++%exclude %{_sharedstatedir}/glusterd/*
++%endif
+
+ %files api
+ %exclude %{_libdir}/*.so
+@@ -1190,7 +1222,13 @@ exit 0
+ %exclude %{_includedir}/glusterfs/api
+ %exclude %{_libdir}/libgfapi.so
+ %{_libdir}/*.so
++%if ( 0%{?_without_server:1} )
++%exclude %{_libdir}/pkgconfig/libgfchangelog.pc
++%exclude %{_libdir}/libgfchangelog.so
++%else
+ %{_libdir}/pkgconfig/libgfchangelog.pc
++%{_libdir}/libgfchangelog.so
++%endif
+
+ %files client-xlators
+ %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+--
+1.8.3.1
+
diff --git a/0012-build-add-pretrans-check.patch b/0012-build-add-pretrans-check.patch
new file mode 100644
index 0000000..efac62f
--- /dev/null
+++ b/0012-build-add-pretrans-check.patch
@@ -0,0 +1,73 @@
+From f054086daf4549a6227196fe37a57a7e49aa5849 Mon Sep 17 00:00:00 2001
+From: "Bala.FA" <barumuga@redhat.com>
+Date: Fri, 7 Dec 2018 14:13:40 +0530
+Subject: [PATCH 12/52] build: add pretrans check
+
+This patch adds pretrans check for client-xlators
+
+NOTE: ganesha and python-gluster sub-packages are now obsolete
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I454016319832c11902c0ca79a79fbbcf8ac0a121
+Signed-off-by: Bala.FA <barumuga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/50967
+Reviewed-on: https://code.engineering.redhat.com/gerrit/60140
+Tested-by: Milind Changire <mchangir@redhat.com>
+---
+ glusterfs.spec.in | 39 +++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 39 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 1c631db..a1ff6e0 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1704,6 +1704,45 @@ if not (ok == 0) then
+ end
+
+
++%pretrans client-xlators -p <lua>
++if not posix.access("/bin/bash", "x") then
++ -- initial installation, no shell, no running glusterfsd
++ return 0
++end
++
++-- TODO: move this completely to a lua script
++-- For now, we write a temporary bash script and execute that.
++
++script = [[#!/bin/sh
++pidof -c -o %PPID -x glusterfsd &>/dev/null
++
++if [ $? -eq 0 ]; then
++ pushd . > /dev/null 2>&1
++ for volume in /var/lib/glusterd/vols/*; do cd $volume;
++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
++ exit 1;
++ fi
++ done
++
++ popd > /dev/null 2>&1
++ exit 1;
++fi
++]]
++
++-- rpm in RHEL5 does not have os.tmpname()
++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/
++tmpname = "/tmp/glusterfs-client-xlators_pretrans_" .. os.date("%s")
++tmpfile = io.open(tmpname, "w")
++tmpfile:write(script)
++tmpfile:close()
++ok, how, val = os.execute("/bin/bash " .. tmpname)
++os.remove(tmpname)
++if not (ok == 0) then
++ error("Detected running glusterfs processes", ok)
++end
++
+
+ %pretrans devel -p <lua>
+ if not posix.access("/bin/bash", "x") then
+--
+1.8.3.1
+
diff --git a/0013-glusterd-fix-info-file-checksum-mismatch-during-upgr.patch b/0013-glusterd-fix-info-file-checksum-mismatch-during-upgr.patch
new file mode 100644
index 0000000..5873f3e
--- /dev/null
+++ b/0013-glusterd-fix-info-file-checksum-mismatch-during-upgr.patch
@@ -0,0 +1,50 @@
+From 39932e6bbc8de25813387bb1394cc7942b79ef46 Mon Sep 17 00:00:00 2001
+From: anand <anekkunt@redhat.com>
+Date: Wed, 18 Nov 2015 16:13:46 +0530
+Subject: [PATCH 13/52] glusterd: fix info file checksum mismatch during
+ upgrade
+
+peers are moving rejected state when upgrading from RHS2.1 to RHGS3.1.2
+due to checksum mismatch.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ifea6b7dfe8477c7f17eefc5ca87ced58aaa21c84
+Signed-off-by: anand <anekkunt@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/61774
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-store.c | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index 51ca3d1..fb52957 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -1009,10 +1009,18 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo)
+ goto out;
+ }
+
+- snprintf(buf, sizeof(buf), "%d", volinfo->op_version);
+- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_OP_VERSION, buf);
+- if (ret)
+- goto out;
++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) {
++ snprintf (buf, sizeof (buf), "%d", volinfo->op_version);
++ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_OP_VERSION, buf);
++ if (ret)
++ goto out;
++
++ snprintf (buf, sizeof (buf), "%d", volinfo->client_op_version);
++ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
++ buf);
++ if (ret)
++ goto out;
++ }
+
+ snprintf(buf, sizeof(buf), "%d", volinfo->client_op_version);
+ ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
+--
+1.8.3.1
+
diff --git a/0014-build-spec-file-conflict-resolution.patch b/0014-build-spec-file-conflict-resolution.patch
new file mode 100644
index 0000000..fb8aeba
--- /dev/null
+++ b/0014-build-spec-file-conflict-resolution.patch
@@ -0,0 +1,72 @@
+From f76d2370160c50a1f59d08a03a444254c289da60 Mon Sep 17 00:00:00 2001
+From: Milind Changire <mchangir@redhat.com>
+Date: Fri, 7 Dec 2018 16:18:07 +0530
+Subject: [PATCH 14/52] build: spec file conflict resolution
+
+Missed conflict resolution for removing references to
+gluster.conf.example as mentioned in patch titled:
+packaging: gratuitous dependencies on rsyslog-mm{count,jsonparse}
+by Kaleb
+
+References to hook scripts S31ganesha-start.sh and
+S31ganesha-reset.sh got lost in the downstream only
+patch conflict resolution.
+
+Commented blanket reference to %{_sharedsstatedir}/glusterd/*
+in section %files server to avoid rpmbuild warning related to
+multiple references to hook scripts and other files under
+/var/lib/glusterd.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I9d409f1595ab985ed9f79d9d4f4298877609ba17
+Signed-off-by: Milind Changire <mchangir@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/70535
+Reviewed-by: Rajesh Joseph <rjoseph@redhat.com>
+Tested-by: Rajesh Joseph <rjoseph@redhat.com>
+---
+ glusterfs.spec.in | 21 +--------------------
+ 1 file changed, 1 insertion(+), 20 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index a1ff6e0..8c57f57 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -96,9 +96,7 @@
+
+ # disable server components forcefully as rhel <= 6
+ %if ( 0%{?rhel} )
+-%if ( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" )
+-%global _without_server %{nil}
+-%else
++%if (!(( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" )))
+ %global _without_server --without-server
+ %endif
+ %endif
+@@ -836,23 +834,6 @@ install -D -p -m 0644 extras/glusterfs-georep-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-georep
+ %endif
+
+-%if ( 0%{!?_without_syslog:1} )
+-%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+-install -D -p -m 0644 extras/gluster-rsyslog-7.2.conf \
+- %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example
+-%endif
+-
+-%if ( 0%{?rhel} && 0%{?rhel} == 6 )
+-install -D -p -m 0644 extras/gluster-rsyslog-5.8.conf \
+- %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example
+-%endif
+-
+-%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+-install -D -p -m 0644 extras/logger.conf.example \
+- %{buildroot}%{_sysconfdir}/glusterfs/logger.conf.example
+-%endif
+-%endif
+-
+ %if ( 0%{!?_without_server:1} )
+ touch %{buildroot}%{_sharedstatedir}/glusterd/glusterd.info
+ touch %{buildroot}%{_sharedstatedir}/glusterd/options
+--
+1.8.3.1
+
diff --git a/0015-build-randomize-temp-file-names-in-pretrans-scriptle.patch b/0015-build-randomize-temp-file-names-in-pretrans-scriptle.patch
new file mode 100644
index 0000000..b82e19b
--- /dev/null
+++ b/0015-build-randomize-temp-file-names-in-pretrans-scriptle.patch
@@ -0,0 +1,198 @@
+From 3d0e09400dc21dbb5f76fd9ca4bfce3edad0d626 Mon Sep 17 00:00:00 2001
+From: Milind Changire <mchangir@redhat.com>
+Date: Fri, 14 Oct 2016 12:53:27 +0530
+Subject: [PATCH 15/52] build: randomize temp file names in pretrans scriptlets
+
+Security issue CVE-2015-1795 mentions about possibility of file name
+spoof attack for the %pretrans server scriptlet.
+Since %pretrans scriptlets are executed only for server builds, we can
+use os.tmpname() to randomize temporary file names for all %pretrans
+scriptlets using this mechanism.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ic82433897432794b6d311d836355aa4bad886369
+Signed-off-by: Milind Changire <mchangir@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/86187
+Reviewed-by: Siddharth Sharma <siddharth@redhat.com>
+Reviewed-by: Niels de Vos <ndevos@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ glusterfs.spec.in | 84 +++++++++++++++++++++++++++++++------------------------
+ 1 file changed, 48 insertions(+), 36 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 8c57f57..3a98822 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1549,9 +1549,10 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- rpm in RHEL5 does not have os.tmpname()
+--- io.tmpfile() can not be resolved to a filename to pass to bash :-/
+-tmpname = "/tmp/glusterfs_pretrans_" .. os.date("%s")
++-- Since we run pretrans scripts only for RPMs built for a server build,
++-- we can now use os.tmpname() since it is available on RHEL6 and later
++-- platforms which are server platforms.
++tmpname = os.tmpname()
+ tmpfile = io.open(tmpname, "w")
+ tmpfile:write(script)
+ tmpfile:close()
+@@ -1590,9 +1591,10 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- rpm in RHEL5 does not have os.tmpname()
+--- io.tmpfile() can not be resolved to a filename to pass to bash :-/
+-tmpname = "/tmp/glusterfs-api_pretrans_" .. os.date("%s")
++-- Since we run pretrans scripts only for RPMs built for a server build,
++-- we can now use os.tmpname() since it is available on RHEL6 and later
++-- platforms which are server platforms.
++tmpname = os.tmpname()
+ tmpfile = io.open(tmpname, "w")
+ tmpfile:write(script)
+ tmpfile:close()
+@@ -1631,9 +1633,10 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- rpm in RHEL5 does not have os.tmpname()
+--- io.tmpfile() can not be resolved to a filename to pass to bash :-/
+-tmpname = "/tmp/glusterfs-api-devel_pretrans_" .. os.date("%s")
++-- Since we run pretrans scripts only for RPMs built for a server build,
++-- we can now use os.tmpname() since it is available on RHEL6 and later
++-- platforms which are server platforms.
++tmpname = os.tmpname()
+ tmpfile = io.open(tmpname, "w")
+ tmpfile:write(script)
+ tmpfile:close()
+@@ -1672,9 +1675,10 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- rpm in RHEL5 does not have os.tmpname()
+--- io.tmpfile() can not be resolved to a filename to pass to bash :-/
+-tmpname = "/tmp/glusterfs-cli_pretrans_" .. os.date("%s")
++-- Since we run pretrans scripts only for RPMs built for a server build,
++-- we can now use os.tmpname() since it is available on RHEL6 and later
++-- platforms which are server platforms.
++tmpname = os.tmpname()
+ tmpfile = io.open(tmpname, "w")
+ tmpfile:write(script)
+ tmpfile:close()
+@@ -1712,9 +1716,10 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- rpm in RHEL5 does not have os.tmpname()
+--- io.tmpfile() can not be resolved to a filename to pass to bash :-/
+-tmpname = "/tmp/glusterfs-client-xlators_pretrans_" .. os.date("%s")
++-- Since we run pretrans scripts only for RPMs built for a server build,
++-- we can now use os.tmpname() since it is available on RHEL6 and later
++-- platforms which are server platforms.
++tmpname = os.tmpname()
+ tmpfile = io.open(tmpname, "w")
+ tmpfile:write(script)
+ tmpfile:close()
+@@ -1752,9 +1757,10 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- rpm in RHEL5 does not have os.tmpname()
+--- io.tmpfile() can not be resolved to a filename to pass to bash :-/
+-tmpname = "/tmp/glusterfs-devel_pretrans_" .. os.date("%s")
++-- Since we run pretrans scripts only for RPMs built for a server build,
++-- we can now use os.tmpname() since it is available on RHEL6 and later
++-- platforms which are server platforms.
++tmpname = os.tmpname()
+ tmpfile = io.open(tmpname, "w")
+ tmpfile:write(script)
+ tmpfile:close()
+@@ -1793,9 +1799,10 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- rpm in RHEL5 does not have os.tmpname()
+--- io.tmpfile() can not be resolved to a filename to pass to bash :-/
+-tmpname = "/tmp/glusterfs-fuse_pretrans_" .. os.date("%s")
++-- Since we run pretrans scripts only for RPMs built for a server build,
++-- we can now use os.tmpname() since it is available on RHEL6 and later
++-- platforms which are server platforms.
++tmpname = os.tmpname()
+ tmpfile = io.open(tmpname, "w")
+ tmpfile:write(script)
+ tmpfile:close()
+@@ -1835,9 +1842,10 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- rpm in RHEL5 does not have os.tmpname()
+--- io.tmpfile() can not be resolved to a filename to pass to bash :-/
+-tmpname = "/tmp/glusterfs-geo-replication_pretrans_" .. os.date("%s")
++-- Since we run pretrans scripts only for RPMs built for a server build,
++-- we can now use os.tmpname() since it is available on RHEL6 and later
++-- platforms which are server platforms.
++tmpname = os.tmpname()
+ tmpfile = io.open(tmpname, "w")
+ tmpfile:write(script)
+ tmpfile:close()
+@@ -1877,9 +1885,10 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- rpm in RHEL5 does not have os.tmpname()
+--- io.tmpfile() can not be resolved to a filename to pass to bash :-/
+-tmpname = "/tmp/glusterfs-libs_pretrans_" .. os.date("%s")
++-- Since we run pretrans scripts only for RPMs built for a server build,
++-- we can now use os.tmpname() since it is available on RHEL6 and later
++-- platforms which are server platforms.
++tmpname = os.tmpname()
+ tmpfile = io.open(tmpname, "w")
+ tmpfile:write(script)
+ tmpfile:close()
+@@ -1919,9 +1928,10 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- rpm in RHEL5 does not have os.tmpname()
+--- io.tmpfile() can not be resolved to a filename to pass to bash :-/
+-tmpname = "/tmp/glusterfs-rdma_pretrans_" .. os.date("%s")
++-- Since we run pretrans scripts only for RPMs built for a server build,
++-- we can now use os.tmpname() since it is available on RHEL6 and later
++-- platforms which are server platforms.
++tmpname = os.tmpname()
+ tmpfile = io.open(tmpname, "w")
+ tmpfile:write(script)
+ tmpfile:close()
+@@ -1962,9 +1972,10 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- rpm in RHEL5 does not have os.tmpname()
+--- io.tmpfile() can not be resolved to a filename to pass to bash :-/
+-tmpname = "/tmp/glusterfs-resource-agents_pretrans_" .. os.date("%s")
++-- Since we run pretrans scripts only for RPMs built for a server build,
++-- we can now use os.tmpname() since it is available on RHEL6 and later
++-- platforms which are server platforms.
++tmpname = os.tmpname()
+ tmpfile = io.open(tmpname, "w")
+ tmpfile:write(script)
+ tmpfile:close()
+@@ -2004,9 +2015,10 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- rpm in RHEL5 does not have os.tmpname()
+--- io.tmpfile() can not be resolved to a filename to pass to bash :-/
+-tmpname = "/tmp/glusterfs-server_pretrans_" .. os.date("%s")
++-- Since we run pretrans scripts only for RPMs built for a server build,
++-- we can now use os.tmpname() since it is available on RHEL6 and later
++-- platforms which are server platforms.
++tmpname = os.tmpname()
+ tmpfile = io.open(tmpname, "w")
+ tmpfile:write(script)
+ tmpfile:close()
+--
+1.8.3.1
+
diff --git a/0016-glusterd-parallel-readdir-Change-the-op-version-of-p.patch b/0016-glusterd-parallel-readdir-Change-the-op-version-of-p.patch
new file mode 100644
index 0000000..402b835
--- /dev/null
+++ b/0016-glusterd-parallel-readdir-Change-the-op-version-of-p.patch
@@ -0,0 +1,42 @@
+From c283f15ac9bfb1c98ce95ed0000ebed81cd3b318 Mon Sep 17 00:00:00 2001
+From: Poornima G <pgurusid@redhat.com>
+Date: Wed, 26 Apr 2017 14:07:58 +0530
+Subject: [PATCH 16/52] glusterd, parallel-readdir: Change the op-version of
+ parallel-readdir to 31100
+
+Issue: Downstream 3.2 was released with op-version 31001, parallel-readdir
+feature in upstream was released in 3.10 and hence with op-version 31000.
+With this, parallel-readdir will be allowed in 3.2 cluster/clients as well.
+But 3.2 didn't have parallel-readdir feature backported.
+
+Fix:
+Increase the op-version of parallel-readdir feature only in downstream
+to 31100(3.3 highest op-version)
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I2640520985627f3a1cb4fb96e28350f8bb9b146c
+Signed-off-by: Poornima G <pgurusid@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/104403
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index d07fc10..a31ecda 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2718,7 +2718,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .option = "parallel-readdir",
+ .value = "off",
+ .type = DOC,
+- .op_version = GD_OP_VERSION_3_10_0,
++ .op_version = GD_OP_VERSION_3_11_0,
+ .validate_fn = validate_parallel_readdir,
+ .description = "If this option is enabled, the readdir operation "
+ "is performed in parallel on all the bricks, thus "
+--
+1.8.3.1
+
diff --git a/0017-glusterd-Revert-op-version-for-cluster.max-brick-per.patch b/0017-glusterd-Revert-op-version-for-cluster.max-brick-per.patch
new file mode 100644
index 0000000..f536c9c
--- /dev/null
+++ b/0017-glusterd-Revert-op-version-for-cluster.max-brick-per.patch
@@ -0,0 +1,37 @@
+From 5d3315a53611f23a69f88bc8266448e258e2e10f Mon Sep 17 00:00:00 2001
+From: Samikshan Bairagya <sbairagy@redhat.com>
+Date: Mon, 10 Jul 2017 11:54:52 +0530
+Subject: [PATCH 17/52] glusterd: Revert op-version for
+ "cluster.max-brick-per-process"
+
+The op-version for the "cluster.max-brick-per-process" option was
+set to 3.12.0 in the upstream patch and was backported here:
+https://code.engineering.redhat.com/gerrit/#/c/111799. This commit
+reverts the op-version for this option to 3.11.1 instead.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I23639cef43d41915eea0394d019b1e0796a99d7b
+Signed-off-by: Samikshan Bairagya <sbairagy@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/111804
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index a31ecda..9a6fe9f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2794,7 +2794,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = GLUSTERD_BRICKMUX_LIMIT_KEY,
+ .voltype = "mgmt/glusterd",
+ .value = GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE,
+- .op_version = GD_OP_VERSION_3_12_0,
++ .op_version = GD_OP_VERSION_3_11_1,
+ .validate_fn = validate_mux_limit,
+ .type = GLOBAL_DOC,
+ .description = "This option can be used to limit the number of brick "
+--
+1.8.3.1
+
diff --git a/0018-cli-Add-message-for-user-before-modifying-brick-mult.patch b/0018-cli-Add-message-for-user-before-modifying-brick-mult.patch
new file mode 100644
index 0000000..ab3530e
--- /dev/null
+++ b/0018-cli-Add-message-for-user-before-modifying-brick-mult.patch
@@ -0,0 +1,56 @@
+From 539626a64e5b8cfe05d42f5398073e8a57644073 Mon Sep 17 00:00:00 2001
+From: Samikshan Bairagya <sbairagy@redhat.com>
+Date: Wed, 9 Aug 2017 14:32:59 +0530
+Subject: [PATCH 18/52] cli: Add message for user before modifying
+ brick-multiplex option
+
+Users should ne notified that brick-multiplexing feature is
+supported only for container workloads (CNS/CRS). It should also be
+made known to users that it is advisable to either have all volumes
+in stopped state or have no bricks running before modifying the
+"brick-multiplex" option. This commit makes sure these messages
+are displayed to the user before brick-multiplexing is enabled or
+disabled.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ic40294b26c691ea03185c4d1fce840ef23f95718
+Signed-off-by: Samikshan Bairagya <sbairagy@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/114793
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ cli/src/cli-cmd-parser.c | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index d9913f6..f148c59 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -1698,6 +1698,24 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words,
+ }
+ }
+
++ if ((strcmp (key, "cluster.brick-multiplex") == 0)) {
++ question = "Brick-multiplexing is supported only for "
++ "container workloads (CNS/CRS). Also it is "
++ "advised to make sure that either all "
++ "volumes are in stopped state or no bricks "
++ "are running before this option is modified."
++ "Do you still want to continue?";
++
++ answer = cli_cmd_get_confirmation (state, question);
++ if (GF_ANSWER_NO == answer) {
++ gf_log ("cli", GF_LOG_ERROR, "Operation "
++ "cancelled, exiting");
++ *op_errstr = gf_strdup ("Aborted by user.");
++ ret = -1;
++ goto out;
++ }
++ }
++
+ ret = dict_set_int32(dict, "count", wordcount - 3);
+
+ if (ret)
+--
+1.8.3.1
+
diff --git a/0019-build-launch-glusterd-upgrade-after-all-new-bits-are.patch b/0019-build-launch-glusterd-upgrade-after-all-new-bits-are.patch
new file mode 100644
index 0000000..e1287c9
--- /dev/null
+++ b/0019-build-launch-glusterd-upgrade-after-all-new-bits-are.patch
@@ -0,0 +1,99 @@
+From 8a3035bf612943694a3cd1c6a857bd009e84f55d Mon Sep 17 00:00:00 2001
+From: Milind Changire <mchangir@redhat.com>
+Date: Tue, 10 Oct 2017 09:58:24 +0530
+Subject: [PATCH 19/52] build: launch glusterd upgrade after all new bits are
+ installed
+
+Problem:
+glusterd upgrade mode needs new bits from glusterfs-rdma which
+optional and causes the dependency graph to break since it is
+not tied into glusterfs-server requirements
+
+Solution:
+Run glusterd upgrade mode after all new bits are installed
+i.e. in %posttrans server section
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I356e02d0bf0eaaef43c20ce07b388262f63093a4
+Signed-off-by: Milind Changire <mchangir@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/120094
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
+---
+ glusterfs.spec.in | 51 +++++++++++++++++++++++++++++----------------------
+ 1 file changed, 29 insertions(+), 22 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 3a98822..208a82d 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -946,28 +946,6 @@ fi
+ %firewalld_reload
+ %endif
+
+-pidof -c -o %PPID -x glusterd &> /dev/null
+-if [ $? -eq 0 ]; then
+- kill -9 `pgrep -f gsyncd.py` &> /dev/null
+-
+- killall --wait glusterd &> /dev/null
+- glusterd --xlator-option *.upgrade=on -N
+-
+- #Cleaning leftover glusterd socket file which is created by glusterd in
+- #rpm_script_t context.
+- rm -f %{_rundir}/glusterd.socket
+-
+- # glusterd _was_ running, we killed it, it exited after *.upgrade=on,
+- # so start it again
+- %service_start glusterd
+-else
+- glusterd --xlator-option *.upgrade=on -N
+-
+- #Cleaning leftover glusterd socket file which is created by glusterd in
+- #rpm_script_t context.
+- rm -f %{_rundir}/glusterd.socket
+-fi
+-exit 0
+ %endif
+
+ ##-----------------------------------------------------------------------------
+@@ -2027,6 +2005,35 @@ os.remove(tmpname)
+ if not (ok == 0) then
+ error("Detected running glusterfs processes", ok)
+ end
++
++%posttrans server
++pidof -c -o %PPID -x glusterd &> /dev/null
++if [ $? -eq 0 ]; then
++ kill -9 `pgrep -f gsyncd.py` &> /dev/null
++
++ killall --wait -SIGTERM glusterd &> /dev/null
++
++ if [ "$?" != "0" ]; then
++ echo "killall failed while killing glusterd"
++ fi
++
++ glusterd --xlator-option *.upgrade=on -N
++
++ #Cleaning leftover glusterd socket file which is created by glusterd in
++ #rpm_script_t context.
++ rm -rf /var/run/glusterd.socket
++
++ # glusterd _was_ running, we killed it, it exited after *.upgrade=on,
++ # so start it again
++ %service_start glusterd
++else
++ glusterd --xlator-option *.upgrade=on -N
++
++ #Cleaning leftover glusterd socket file which is created by glusterd in
++ #rpm_script_t context.
++ rm -rf /var/run/glusterd.socket
++fi
++
+ %endif
+
+ %changelog
+--
+1.8.3.1
+
diff --git a/0020-spec-unpackaged-files-found-for-RHEL-7-client-build.patch b/0020-spec-unpackaged-files-found-for-RHEL-7-client-build.patch
new file mode 100644
index 0000000..c00c7f4
--- /dev/null
+++ b/0020-spec-unpackaged-files-found-for-RHEL-7-client-build.patch
@@ -0,0 +1,38 @@
+From 968e5e698a070f9e6905a86c9c8338c36fcfa339 Mon Sep 17 00:00:00 2001
+From: moagrawa <moagrawa@redhat.com>
+Date: Mon, 15 Jan 2018 18:21:27 +0530
+Subject: [PATCH 20/52] spec: unpackaged files found for RHEL-7 client build
+
+Problem: unpackages files found for RHEL-7 client build
+
+Solution: Update glusterfs.specs.in to exclude unpackage files
+Label: DOWNSTREAM ONLY
+
+Change-Id: I761188a6a8447105b53bf3334ded963c645cab5b
+Signed-off-by: moagrawa <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/127758
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Milind Changire <mchangir@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 208a82d..ec06176 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1141,8 +1141,10 @@ exit 0
+ %exclude %{_sbindir}/gluster-setgfid2path
+ %exclude %{_sbindir}/glusterd
+ %exclude %{_sbindir}/snap_scheduler.py
++%if ( 0%{?_with_systemd:1} )
+ %exclude %{_datadir}/glusterfs/scripts/control-cpu-load.sh
+ %exclude %{_datadir}/glusterfs/scripts/control-mem.sh
++%endif
+ %exclude %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh
+ %exclude %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh
+ %exclude %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh
+--
+1.8.3.1
+
diff --git a/0021-cli-glusterfsd-remove-copyright-information.patch b/0021-cli-glusterfsd-remove-copyright-information.patch
new file mode 100644
index 0000000..0aa1d07
--- /dev/null
+++ b/0021-cli-glusterfsd-remove-copyright-information.patch
@@ -0,0 +1,66 @@
+From fbc7f0e5ac8c292b865a8e02ceed2efa101d145c Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Mon, 12 Mar 2018 19:47:11 +0530
+Subject: [PATCH 21/52] cli/glusterfsd: remove copyright information
+
+There's no point of dumping upstream copyright information in --version.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I3a10e30878698e1d53082936bbf22bca560a3896
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/132445
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Milind Changire <mchangir@redhat.com>
+---
+ cli/src/cli.c | 11 +----------
+ glusterfsd/src/glusterfsd.c | 11 +----------
+ 2 files changed, 2 insertions(+), 20 deletions(-)
+
+diff --git a/cli/src/cli.c b/cli/src/cli.c
+index 84ce0f4..08f117e 100644
+--- a/cli/src/cli.c
++++ b/cli/src/cli.c
+@@ -65,16 +65,7 @@ extern int connected;
+ /* using argp for command line parsing */
+
+ const char *argp_program_version =
+- "" PACKAGE_NAME " " PACKAGE_VERSION
+- "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION
+- "\n"
+- "Copyright (c) 2006-2016 Red Hat, Inc. "
+- "<https://www.gluster.org/>\n"
+- "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n"
+- "It is licensed to you under your choice of the GNU Lesser\n"
+- "General Public License, version 3 or any later version (LGPLv3\n"
+- "or later), or the GNU General Public License, version 2 (GPLv2),\n"
+- "in all cases as published by the Free Software Foundation.";
++ PACKAGE_NAME" "PACKAGE_VERSION;
+ const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
+
+ struct rpc_clnt *global_quotad_rpc;
+diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
+index 5d46b3d..c983882 100644
+--- a/glusterfsd/src/glusterfsd.c
++++ b/glusterfsd/src/glusterfsd.c
+@@ -86,16 +86,7 @@ static char argp_doc[] =
+ "--volfile-server=SERVER [MOUNT-POINT]\n"
+ "--volfile=VOLFILE [MOUNT-POINT]";
+ const char *argp_program_version =
+- "" PACKAGE_NAME " " PACKAGE_VERSION
+- "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION
+- "\n"
+- "Copyright (c) 2006-2016 Red Hat, Inc. "
+- "<https://www.gluster.org/>\n"
+- "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n"
+- "It is licensed to you under your choice of the GNU Lesser\n"
+- "General Public License, version 3 or any later version (LGPLv3\n"
+- "or later), or the GNU General Public License, version 2 (GPLv2),\n"
+- "in all cases as published by the Free Software Foundation.";
++ PACKAGE_NAME" "PACKAGE_VERSION;
+ const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
+
+ static error_t
+--
+1.8.3.1
+
diff --git a/0022-cli-Remove-upstream-doc-reference.patch b/0022-cli-Remove-upstream-doc-reference.patch
new file mode 100644
index 0000000..5f9bf28
--- /dev/null
+++ b/0022-cli-Remove-upstream-doc-reference.patch
@@ -0,0 +1,40 @@
+From 00db0c44d109e6f3e394487bf76ff28ba2eee7de Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Thu, 15 Mar 2018 12:56:02 +0530
+Subject: [PATCH 22/52] cli: Remove upstream doc reference
+
+...that is displayed while creating replica 2 volumes.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I16b45c8ad3a33cdd2a464d84f51d006d8f568b23
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/132744
+Reviewed-by: Karthik Subrahmanya <ksubrahm@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ cli/src/cli-cmd-parser.c | 7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index f148c59..760a10c 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -606,11 +606,8 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
+ "Replica 2 volumes are prone"
+ " to split-brain. Use "
+ "Arbiter or Replica 3 to "
+- "avoid this. See: "
+- "http://docs.gluster.org/en/latest/"
+- "Administrator%20Guide/"
+- "Split%20brain%20and%20ways%20to%20deal%20with%20it/."
+- "\nDo you still want to "
++ "avoid this.\n"
++ "Do you still want to "
+ "continue?\n";
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+--
+1.8.3.1
+
diff --git a/0023-hooks-remove-selinux-hooks.patch b/0023-hooks-remove-selinux-hooks.patch
new file mode 100644
index 0000000..3d14855
--- /dev/null
+++ b/0023-hooks-remove-selinux-hooks.patch
@@ -0,0 +1,148 @@
+From 421743b7cfa6a249544f6abb4cca5a612bd20ea1 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Tue, 11 Dec 2018 16:21:43 +0530
+Subject: [PATCH 23/52] hooks: remove selinux hooks
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I810466a0ca99ab21f5a8eac8cdffbb18333d10ad
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/135800
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Jiffin Thottan <jthottan@redhat.com>
+Reviewed-by: Milind Changire <mchangir@redhat.com>
+---
+ configure.ac | 20 --------------------
+ extras/hook-scripts/Makefile.am | 2 +-
+ extras/hook-scripts/create/Makefile.am | 1 -
+ extras/hook-scripts/create/post/Makefile.am | 8 --------
+ extras/hook-scripts/delete/Makefile.am | 1 -
+ extras/hook-scripts/delete/pre/Makefile.am | 8 --------
+ glusterfs.spec.in | 2 --
+ 7 files changed, 1 insertion(+), 41 deletions(-)
+ delete mode 100644 extras/hook-scripts/create/Makefile.am
+ delete mode 100644 extras/hook-scripts/create/post/Makefile.am
+ delete mode 100644 extras/hook-scripts/delete/Makefile.am
+ delete mode 100644 extras/hook-scripts/delete/pre/Makefile.am
+
+diff --git a/configure.ac b/configure.ac
+index 2f341de..0d06f5a 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -214,10 +214,6 @@ AC_CONFIG_FILES([Makefile
+ extras/hook-scripts/add-brick/Makefile
+ extras/hook-scripts/add-brick/pre/Makefile
+ extras/hook-scripts/add-brick/post/Makefile
+- extras/hook-scripts/create/Makefile
+- extras/hook-scripts/create/post/Makefile
+- extras/hook-scripts/delete/Makefile
+- extras/hook-scripts/delete/pre/Makefile
+ extras/hook-scripts/start/Makefile
+ extras/hook-scripts/start/post/Makefile
+ extras/hook-scripts/set/Makefile
+@@ -909,21 +905,6 @@ fi
+ AM_CONDITIONAL([BUILD_CLOUDSYNC], [test "x$BUILD_CLOUDSYNC" = "xyes"])
+ dnl end cloudsync section
+
+-dnl SELinux feature enablement
+-case $host_os in
+- linux*)
+- AC_ARG_ENABLE([selinux],
+- AC_HELP_STRING([--disable-selinux],
+- [Disable SELinux features]),
+- [USE_SELINUX="${enableval}"], [USE_SELINUX="yes"])
+- ;;
+- *)
+- USE_SELINUX=no
+- ;;
+-esac
+-AM_CONDITIONAL(USE_SELINUX, test "x${USE_SELINUX}" = "xyes")
+-dnl end of SELinux feature enablement
+-
+ AC_CHECK_HEADERS([execinfo.h], [have_backtrace=yes])
+ if test "x${have_backtrace}" = "xyes"; then
+ AC_DEFINE(HAVE_BACKTRACE, 1, [define if found backtrace])
+@@ -1599,7 +1580,6 @@ echo "XML output : $BUILD_XML_OUTPUT"
+ echo "Unit Tests : $BUILD_UNITTEST"
+ echo "Track priv ports : $TRACK_PRIVPORTS"
+ echo "POSIX ACLs : $BUILD_POSIX_ACLS"
+-echo "SELinux features : $USE_SELINUX"
+ echo "firewalld-config : $BUILD_FIREWALLD"
+ echo "Events : $BUILD_EVENTS"
+ echo "EC dynamic support : $EC_DYNAMIC_SUPPORT"
+diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am
+index 26059d7..771b37e 100644
+--- a/extras/hook-scripts/Makefile.am
++++ b/extras/hook-scripts/Makefile.am
+@@ -1,5 +1,5 @@
+ EXTRA_DIST = S40ufo-stop.py S56glusterd-geo-rep-create-post.sh
+-SUBDIRS = add-brick create delete set start stop reset
++SUBDIRS = add-brick set start stop reset
+
+ scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/
+ if USE_GEOREP
+diff --git a/extras/hook-scripts/create/Makefile.am b/extras/hook-scripts/create/Makefile.am
+deleted file mode 100644
+index b083a91..0000000
+--- a/extras/hook-scripts/create/Makefile.am
++++ /dev/null
+@@ -1 +0,0 @@
+-SUBDIRS = post
+diff --git a/extras/hook-scripts/create/post/Makefile.am b/extras/hook-scripts/create/post/Makefile.am
+deleted file mode 100644
+index fd1892e..0000000
+--- a/extras/hook-scripts/create/post/Makefile.am
++++ /dev/null
+@@ -1,8 +0,0 @@
+-EXTRA_DIST = S10selinux-label-brick.sh
+-
+-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/
+-if WITH_SERVER
+-if USE_SELINUX
+-scripts_SCRIPTS = S10selinux-label-brick.sh
+-endif
+-endif
+diff --git a/extras/hook-scripts/delete/Makefile.am b/extras/hook-scripts/delete/Makefile.am
+deleted file mode 100644
+index c98a05d..0000000
+--- a/extras/hook-scripts/delete/Makefile.am
++++ /dev/null
+@@ -1 +0,0 @@
+-SUBDIRS = pre
+diff --git a/extras/hook-scripts/delete/pre/Makefile.am b/extras/hook-scripts/delete/pre/Makefile.am
+deleted file mode 100644
+index 4fbfbe7..0000000
+--- a/extras/hook-scripts/delete/pre/Makefile.am
++++ /dev/null
+@@ -1,8 +0,0 @@
+-EXTRA_DIST = S10selinux-del-fcontext.sh
+-
+-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/
+-if WITH_SERVER
+-if USE_SELINUX
+-scripts_SCRIPTS = S10selinux-del-fcontext.sh
+-endif
+-endif
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index ec06176..db50b8e 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1413,7 +1413,6 @@ exit 0
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post
+- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/S10selinux-label-brick.sh
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post
+@@ -1422,7 +1421,6 @@ exit 0
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post
+ %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre
+- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/S10selinux-del-fcontext.sh
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre
+--
+1.8.3.1
+
diff --git a/0024-glusterd-Make-localtime-logging-option-invisible-in-.patch b/0024-glusterd-Make-localtime-logging-option-invisible-in-.patch
new file mode 100644
index 0000000..59fe63f
--- /dev/null
+++ b/0024-glusterd-Make-localtime-logging-option-invisible-in-.patch
@@ -0,0 +1,50 @@
+From 79c19f0c6d02228aa8cf4b9299afeb7e0b2ad0da Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Mon, 16 Apr 2018 17:44:19 +0530
+Subject: [PATCH 24/52] glusterd: Make localtime-logging option invisible in
+ downstream
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ie631edebb7e19152392bfd3c369a96e88796bd75
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/135754
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 2 +-
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 3 ++-
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index dd3f9eb..cbbb5d9 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -86,7 +86,7 @@ glusterd_all_vol_opts valid_all_vol_opts[] = {
+ * TBD: Discuss the default value for this. Maybe this should be a
+ * dynamic value depending on the memory specifications per node */
+ {GLUSTERD_BRICKMUX_LIMIT_KEY, GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE},
+- {GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"},
++ /*{GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"},*/
+ {GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"},
+ {NULL},
+ };
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 9a6fe9f..fed2864 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2850,10 +2850,11 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ "to have enabled when clients and/or bricks support "
+ "SELinux."},
+ {.key = GLUSTERD_LOCALTIME_LOGGING_KEY,
++ /*{.key = GLUSTERD_LOCALTIME_LOGGING_KEY,
+ .voltype = "mgmt/glusterd",
+ .type = GLOBAL_DOC,
+ .op_version = GD_OP_VERSION_3_12_0,
+- .validate_fn = validate_boolean},
++ .validate_fn = validate_boolean},*/
+ {.key = GLUSTERD_DAEMON_LOG_LEVEL_KEY,
+ .voltype = "mgmt/glusterd",
+ .type = GLOBAL_NO_DOC,
+--
+1.8.3.1
+
diff --git a/0025-build-make-RHGS-version-available-for-server.patch b/0025-build-make-RHGS-version-available-for-server.patch
new file mode 100644
index 0000000..90f2592
--- /dev/null
+++ b/0025-build-make-RHGS-version-available-for-server.patch
@@ -0,0 +1,45 @@
+From 12ae1a9a62c2c94af44f55b03575ab8806bd22ee Mon Sep 17 00:00:00 2001
+From: Milind Changire <mchangir@redhat.com>
+Date: Mon, 23 Apr 2018 13:16:30 +0530
+Subject: [PATCH 25/52] build: make RHGS version available for server
+
+Make /usr/share/glusterfs/release available for gluserfs-server package.
+This file contains the RHGS release number for the release.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I7485f77cfb8ca7f0f8363a20124900ae9ae8a528
+Signed-off-by: Milind Changire <mchangir@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/137139
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ glusterfs.spec.in | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index db50b8e..bdb47ba 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -862,6 +862,10 @@ install -p -m 0744 -D extras/command-completion/gluster.bash \
+ %{buildroot}%{_sysconfdir}/bash_completion.d/gluster
+ %endif
+
++%if ( 0%{!?_without_server:1} )
++echo "RHGS 3.5" > %{buildroot}%{_datadir}/glusterfs/release
++%endif
++
+ %clean
+ rm -rf %{buildroot}
+
+@@ -1452,6 +1456,7 @@ exit 0
+
+ # Extra utility script
+ %dir %{_libexecdir}/glusterfs
++ %{_datadir}/glusterfs/release
+ %dir %{_datadir}/glusterfs/scripts
+ %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh
+ %if ( 0%{?_with_systemd:1} )
+--
+1.8.3.1
+
diff --git a/0026-glusterd-Introduce-daemon-log-level-cluster-wide-opt.patch b/0026-glusterd-Introduce-daemon-log-level-cluster-wide-opt.patch
new file mode 100644
index 0000000..8aa9fde
--- /dev/null
+++ b/0026-glusterd-Introduce-daemon-log-level-cluster-wide-opt.patch
@@ -0,0 +1,68 @@
+From a3538a7d1fb7674acdf0934847f4004d8fbc4709 Mon Sep 17 00:00:00 2001
+From: Milind Changire <mchangir@redhat.com>
+Date: Tue, 11 Dec 2018 17:57:50 +0530
+Subject: [PATCH 26/52] glusterd: Introduce daemon-log-level cluster wide
+ option
+
+This option, applicable to the node level daemons can be very helpful in
+controlling the log level of these services. Please note any daemon
+which is started prior to setting the specific value of this option (if
+not INFO) will need to go through a restart to have this change into
+effect.
+
+> upstream patch : https://review.gluster.org/#/c/20442/
+
+Please note there's a difference in deownstream delta. The op-version
+against this option is already tageed as 3_11_2 in RHGS 3.3.1 and hence
+the same is retained. Marking this DOWNSTREAM_ONLY label because of
+
+Label: DOWNSTREAM ONLY
+
+IMPORTANT:
+This patch only sets .op_version in glusterd-volume-set.c to
+GD_OP_VERSION_3_11_2 as per Atin's recommendation on
+Tue, Dec 11, 2018 5:46pm IST
+
+>Change-Id: I7f6d2620bab2b094c737f5cc816bc093e9c9c4c9
+>fixes: bz#1597473
+>Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+
+Change-Id: I7f6d2620bab2b094c737f5cc816bc093e9c9c4c9
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/143137
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sanju Rakonde <srakonde@redhat.com>
+---
+ libglusterfs/src/glusterfs/globals.h | 2 ++
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index b9da872..a278f18 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -104,6 +104,8 @@
+
+ #define GD_OP_VERSION_3_11_1 31101 /* Op-version for GlusterFS 3.11.1 */
+
++#define GD_OP_VERSION_3_11_2 31102 /* Op-version for GlusterFS 3.11.2 */
++
+ #define GD_OP_VERSION_3_12_0 31200 /* Op-version for GlusterFS 3.12.0 */
+
+ #define GD_OP_VERSION_3_12_2 31202 /* Op-version for GlusterFS 3.12.2 */
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index fed2864..84f2705 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2859,7 +2859,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "mgmt/glusterd",
+ .type = GLOBAL_NO_DOC,
+ .value = "INFO",
+- .op_version = GD_OP_VERSION_5_0},
++ .op_version = GD_OP_VERSION_3_11_2},
+ {.key = "debug.delay-gen",
+ .voltype = "debug/delay-gen",
+ .option = "!debug",
+--
+1.8.3.1
+
diff --git a/0027-glusterd-change-op-version-of-fips-mode-rchecksum.patch b/0027-glusterd-change-op-version-of-fips-mode-rchecksum.patch
new file mode 100644
index 0000000..76b430c
--- /dev/null
+++ b/0027-glusterd-change-op-version-of-fips-mode-rchecksum.patch
@@ -0,0 +1,50 @@
+From 9be3c4745b161f1815f77cd19b550ac9795845f5 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Thu, 20 Sep 2018 22:01:05 +0530
+Subject: [PATCH 27/52] glusterd: change op-version of fips-mode-rchecksum
+
+..to GD_OP_VERSION_3_13_3 since GD_OP_VERSION_4_0_0 is not present in
+rhgs-3.4.1
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I759272748177d174b15123faffc2305f7a5ec58f
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/150714
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/globals.h | 2 ++
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index a278f18..4a82889 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -118,6 +118,8 @@
+
+ #define GD_OP_VERSION_3_13_2 31302 /* Op-version for GlusterFS 3.13.2 */
+
++#define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */
++
+ #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */
+
+ #define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 84f2705..2bd0a9c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2329,7 +2329,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .key = "storage.fips-mode-rchecksum",
+ .type = NO_DOC,
+ .voltype = "storage/posix",
+- .op_version = GD_OP_VERSION_4_0_0,
++ .op_version = GD_OP_VERSION_3_13_3,
+ },
+ {
+ .option = "force-create-mode",
+--
+1.8.3.1
+
diff --git a/0028-glusterd-Reset-op-version-for-features.shard-deletio.patch b/0028-glusterd-Reset-op-version-for-features.shard-deletio.patch
new file mode 100644
index 0000000..b39c16b
--- /dev/null
+++ b/0028-glusterd-Reset-op-version-for-features.shard-deletio.patch
@@ -0,0 +1,52 @@
+From 64ffcf770c5c0087f8937b5235ed0ad5b0efe7f2 Mon Sep 17 00:00:00 2001
+From: Krutika Dhananjay <kdhananj@redhat.com>
+Date: Wed, 12 Sep 2018 21:41:35 +0530
+Subject: [PATCH 28/52] glusterd: Reset op-version for
+ "features.shard-deletion-rate"
+
+The op-version for the "features.shard-deletion-rate" option was set to
+4.2.0 in the upstream patch and backported at
+e75be952569eb69325d5f505f7ab94aace31be52.
+This commit reverts the op-version for this option to 3.13.3.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ie3d12f3119ad7a4b40d81bd8bd6ed591658e8371
+Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/154865
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/globals.h | 2 ++
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index 4a82889..4d95f75 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -120,6 +120,8 @@
+
+ #define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */
+
++#define GD_OP_VERSION_3_13_4 31304 /* Op-version for GlusterFS 3.13.4 */
++
+ #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */
+
+ #define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 2bd0a9c..2f3271f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2552,7 +2552,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ },
+ {.key = "features.shard-deletion-rate",
+ .voltype = "features/shard",
+- .op_version = GD_OP_VERSION_5_0,
++ .op_version = GD_OP_VERSION_3_13_4,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {
+ .key = "features.scrub-throttle",
+--
+1.8.3.1
+
diff --git a/0029-glusterd-Reset-op-version-for-features.shard-lru-lim.patch b/0029-glusterd-Reset-op-version-for-features.shard-lru-lim.patch
new file mode 100644
index 0000000..752a81b
--- /dev/null
+++ b/0029-glusterd-Reset-op-version-for-features.shard-lru-lim.patch
@@ -0,0 +1,39 @@
+From b504052d003aa41fbd44eec286d1733b6f2a168e Mon Sep 17 00:00:00 2001
+From: Krutika Dhananjay <kdhananj@redhat.com>
+Date: Tue, 6 Nov 2018 18:44:55 +0530
+Subject: [PATCH 29/52] glusterd: Reset op-version for
+ "features.shard-lru-limit"
+
+The op-version for the "features.shard-lru-limit" option was set to
+4.2.0 in the upstream patch and backported at
+41e7e33c6512e98a1567e5a5532d3898b59cfa98
+
+This commit reverts the op-version for this option to 3.13.4.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I7d3ed6b373851267c78fc6815a83bee2c0906413
+Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/155127
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 2f3271f..4bf89a6 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2546,7 +2546,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {
+ .key = "features.shard-lru-limit",
+ .voltype = "features/shard",
+- .op_version = GD_OP_VERSION_5_0,
++ .op_version = GD_OP_VERSION_3_13_4,
+ .flags = VOLOPT_FLAG_CLIENT_OPT,
+ .type = NO_DOC,
+ },
+--
+1.8.3.1
+
diff --git a/0030-selinux-glusterd-add-features.selinux-to-glusterd-vo.patch b/0030-selinux-glusterd-add-features.selinux-to-glusterd-vo.patch
new file mode 100644
index 0000000..b50236d
--- /dev/null
+++ b/0030-selinux-glusterd-add-features.selinux-to-glusterd-vo.patch
@@ -0,0 +1,42 @@
+From 1d2d29396ee25f09c7d379a992ac9bd244e89c39 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Thu, 13 Dec 2018 14:28:57 +0530
+Subject: [PATCH 30/52] selinux/glusterd : add "features.selinux" to
+ glusterd-volume-set.c
+
+updates: #593
+Change-Id: I38675ba4d47c8ba7f94cfb4734692683ddb3dcfd
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 4bf89a6..11265bf 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -1203,10 +1203,9 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "performance/io-threads",
+ .option = "pass-through",
+ .op_version = GD_OP_VERSION_4_1_0},
+- {.key = "performance.least-rate-limit",
+- .voltype = "performance/io-threads",
+- .op_version = 1
+- },
++ {.key = "performance.least-rate-limit",
++ .voltype = "performance/io-threads",
++ .op_version = 1},
+
+ /* Other perf xlators' options */
+ {.key = "performance.io-cache-pass-through",
+@@ -2849,7 +2848,6 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ "trusted.gluster.selinux on the bricks. Recommended "
+ "to have enabled when clients and/or bricks support "
+ "SELinux."},
+- {.key = GLUSTERD_LOCALTIME_LOGGING_KEY,
+ /*{.key = GLUSTERD_LOCALTIME_LOGGING_KEY,
+ .voltype = "mgmt/glusterd",
+ .type = GLOBAL_DOC,
+--
+1.8.3.1
+
diff --git a/0031-glusterd-turn-off-selinux-feature-in-downstream.patch b/0031-glusterd-turn-off-selinux-feature-in-downstream.patch
new file mode 100644
index 0000000..a7e1e26
--- /dev/null
+++ b/0031-glusterd-turn-off-selinux-feature-in-downstream.patch
@@ -0,0 +1,34 @@
+From c3176144e531e22bfe97d0fef3b0e3e449fb1d32 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Mon, 16 Apr 2018 13:47:12 +0530
+Subject: [PATCH 31/52] glusterd: turn off selinux feature in downstream
+
+In RHGS 3.4.0 selinux feature was never meant to be qualified.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I0cd5eb5207a757c8b6ef789980c061f211410bd5
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/135716
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 11265bf..d1244e4 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2842,7 +2842,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = VKEY_FEATURES_SELINUX,
+ .voltype = "features/selinux",
+ .type = NO_DOC,
+- .value = "on",
++ .value = "off",
+ .op_version = GD_OP_VERSION_3_11_0,
+ .description = "Convert security.selinux xattrs to "
+ "trusted.gluster.selinux on the bricks. Recommended "
+--
+1.8.3.1
+
diff --git a/0032-glusterd-update-gd-op-version-to-3_7_0.patch b/0032-glusterd-update-gd-op-version-to-3_7_0.patch
new file mode 100644
index 0000000..82b34e3
--- /dev/null
+++ b/0032-glusterd-update-gd-op-version-to-3_7_0.patch
@@ -0,0 +1,29 @@
+From bfa7055c3901b34a49f7933ea9edcf6465843de1 Mon Sep 17 00:00:00 2001
+From: Milind Changire <mchangir@redhat.com>
+Date: Wed, 23 Jan 2019 14:22:00 +0530
+Subject: [PATCH 32/52] glusterd: update gd-op-version to 3_7_0
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ia6456134cd7e544a415692d09cd1ccbb6e02dd82
+Signed-off-by: Milind Changire <mchangir@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-rebalance.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+index 6365b6e..e20e3c4 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+@@ -1174,7 +1174,7 @@ glusterd_op_stage_rebalance(dict_t *dict, char **op_errstr)
+ * 'force'
+ */
+ ret = glusterd_check_client_op_version_support(
+- volname, GD_OP_VERSION_3_6_0, NULL);
++ volname, GD_OP_VERSION_3_7_0, NULL);
+ if (ret) {
+ ret = gf_asprintf(op_errstr,
+ "Volume %s has one or "
+--
+1.8.3.1
+
diff --git a/0033-build-add-missing-explicit-package-dependencies.patch b/0033-build-add-missing-explicit-package-dependencies.patch
new file mode 100644
index 0000000..57c2919
--- /dev/null
+++ b/0033-build-add-missing-explicit-package-dependencies.patch
@@ -0,0 +1,83 @@
+From 52e2d75c2c8e32d2e4f69840e34d21b39279284a Mon Sep 17 00:00:00 2001
+From: Milind Changire <mchangir@redhat.com>
+Date: Thu, 13 Dec 2018 12:46:56 +0530
+Subject: [PATCH 33/52] build: add missing explicit package dependencies
+
+Add dependencies for glusterfs-libs, and other packages.
+This is an Errata Tool whine.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ieaadb6e4ffa84d1811aa740f7891855568ecbcbb
+Signed-off-by: Milind Changire <mchangir@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/158501
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index bdb47ba..9cd4372 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -323,6 +323,7 @@ and client framework.
+ Summary: GlusterFS api library
+ Requires: %{name}%{?_isa} = %{version}-%{release}
+ Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
++Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+
+ %description api
+ GlusterFS is a distributed file-system capable of scaling to several
+@@ -340,6 +341,7 @@ Summary: Development Libraries
+ Requires: %{name}%{?_isa} = %{version}-%{release}
+ Requires: %{name}-devel%{?_isa} = %{version}-%{release}
+ Requires: libacl-devel
++Requires: %{name}-api%{?_isa} = %{version}-%{release}
+
+ %description api-devel
+ GlusterFS is a distributed file-system capable of scaling to several
+@@ -391,6 +393,8 @@ Requires: %{name}%{?_isa} = %{version}-%{release}
+ %if ( 0%{!?_without_extra_xlators:1} )
+ Requires: %{name}-extra-xlators%{?_isa} = %{version}-%{release}
+ %endif
++Requires: %{name}-libs%{?_isa} = %{version}-%{release}
++Requires: %{name}-server%{?_isa} = %{version}-%{release}
+
+ %description devel
+ GlusterFS is a distributed file-system capable of scaling to several
+@@ -435,6 +439,7 @@ Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
+
+ Obsoletes: %{name}-client < %{version}-%{release}
+ Provides: %{name}-client = %{version}-%{release}
++Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+
+ %description fuse
+ GlusterFS is a distributed file-system capable of scaling to several
+@@ -459,6 +464,7 @@ Requires: python%{_pythonver}-gluster = %{version}-%{release}
+
+ Requires: rsync
+ Requires: util-linux
++Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+
+ %description geo-replication
+ GlusterFS is a distributed file-system capable of scaling to several
+@@ -536,6 +542,7 @@ BuildRequires: libibverbs-devel
+ BuildRequires: librdmacm-devel >= 1.0.15
+ %endif
+ Requires: %{name}%{?_isa} = %{version}-%{release}
++Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+
+ %description rdma
+ GlusterFS is a distributed file-system capable of scaling to several
+@@ -664,6 +671,7 @@ This package provides the glusterfs thin-arbiter translator.
+
+ %package client-xlators
+ Summary: GlusterFS client-side translators
++Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+
+ %description client-xlators
+ GlusterFS is a distributed file-system capable of scaling to several
+--
+1.8.3.1
+
diff --git a/0034-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch b/0034-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch
new file mode 100644
index 0000000..587a25a
--- /dev/null
+++ b/0034-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch
@@ -0,0 +1,59 @@
+From 463a920541a7579f2407f22597e4014494422804 Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Mon, 17 Dec 2018 14:07:01 +0530
+Subject: [PATCH 34/52] glusterd: introduce a new op-version for rhgs-3.4.3
+
+This patch introduces a new op-version 31305 for rhgs-3.4.3 and
+sets the max op-version to 31305.
+
+For migrating profile commands (commit e68845ff7018e5d81d7979684b18e6eda449b088)
+we used GD_OP_VERSION_6_0 in upstream. we are changing
+it to GD_OP_VERSION_3_13_5 here.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ie3a05c70eb4e406889c468343f54e999b1218f19
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/158795
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/glusterfs/globals.h | 2 ++
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 4 ++--
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index 4d95f75..6642ba0 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -122,6 +122,8 @@
+
+ #define GD_OP_VERSION_3_13_4 31304 /* Op-version for GlusterFS 3.13.4 */
+
++#define GD_OP_VERSION_3_13_5 31305 /* Op-version for GlusterFS 3.13.5 */
++
+ #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */
+
+ #define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index 387643d..de44af7 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -3085,12 +3085,12 @@ __glusterd_handle_cli_profile_volume(rpcsvc_request_t *req)
+ goto out;
+ }
+
+- if (conf->op_version < GD_OP_VERSION_6_0) {
++ if (conf->op_version < GD_OP_VERSION_3_13_5) {
+ gf_msg_debug(this->name, 0,
+ "The cluster is operating at "
+ "version less than %d. Falling back "
+ "to op-sm framework.",
+- GD_OP_VERSION_6_0);
++ GD_OP_VERSION_3_13_5);
+ ret = glusterd_op_begin(req, cli_op, dict, err_str, sizeof(err_str));
+ glusterd_friend_sm();
+ glusterd_op_sm();
+--
+1.8.3.1
+
diff --git a/0035-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch b/0035-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch
new file mode 100644
index 0000000..643ba3a
--- /dev/null
+++ b/0035-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch
@@ -0,0 +1,41 @@
+From 254033a80d85460675c921c272fb94bb7e9f67d4 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Tue, 18 Dec 2018 17:57:25 +0530
+Subject: [PATCH 35/52] glusterd: tag rebalance mgmt_v3 command to op-version
+ 31305
+
+In upstream migrating rebalance command is tagged to op-version 60000
+but in downstream the latest new op-version is 31305.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I30bbad3efca29bf42b9a750581eb1aebc8a30ff9
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/158943
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-rebalance.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+index e20e3c4..ed5ded5 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+@@ -573,12 +573,12 @@ __glusterd_handle_defrag_volume(rpcsvc_request_t *req)
+ } else
+ op = GD_OP_REBALANCE;
+
+- if (priv->op_version < GD_OP_VERSION_6_0) {
++ if (priv->op_version < GD_OP_VERSION_3_13_5) {
+ gf_msg_debug(this->name, 0,
+ "The cluster is operating at "
+ "version less than %d. Falling back "
+ "to op-sm framework.",
+- GD_OP_VERSION_6_0);
++ GD_OP_VERSION_3_13_5);
+ ret = glusterd_op_begin(req, op, dict, msg, sizeof(msg));
+ glusterd_friend_sm();
+ glusterd_op_sm();
+--
+1.8.3.1
+
diff --git a/0036-build-add-conditional-dependency-on-server-for-devel.patch b/0036-build-add-conditional-dependency-on-server-for-devel.patch
new file mode 100644
index 0000000..352078b
--- /dev/null
+++ b/0036-build-add-conditional-dependency-on-server-for-devel.patch
@@ -0,0 +1,47 @@
+From d6458c40706d8886187bd9c2016087a3a1eee882 Mon Sep 17 00:00:00 2001
+From: Milind Changire <mchangir@redhat.com>
+Date: Wed, 19 Dec 2018 13:17:42 +0530
+Subject: [PATCH 36/52] build: add conditional dependency on server for devel
+
+Add conditional depedency on server for glusterfs-devel
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Icc45df3db137dbc03d240c1ac774b5c8735c5f2f
+Signed-off-by: Milind Changire <mchangir@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/159030
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 9cd4372..9db5a34 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -394,7 +394,9 @@ Requires: %{name}%{?_isa} = %{version}-%{release}
+ Requires: %{name}-extra-xlators%{?_isa} = %{version}-%{release}
+ %endif
+ Requires: %{name}-libs%{?_isa} = %{version}-%{release}
++%if ( 0%{!?_without_server:1} )
+ Requires: %{name}-server%{?_isa} = %{version}-%{release}
++%endif
+
+ %description devel
+ GlusterFS is a distributed file-system capable of scaling to several
+@@ -2067,6 +2069,11 @@ fi
+ * Thu Feb 21 2019 Jiffin Tony Thottan <jthottan@redhat.com>
+ - Obsoleting gluster-gnfs package
+
++* Wed Dec 19 2018 Milind Changire <mchangir@redhat.com>
++- Add explicit package dependencies (#1656357)
++- Remove absolute paths from spec file (#1350745)
++- Do not package crypt.so for FIPS compliance (#1653224)
++
+ * Wed Nov 28 2018 Krutika Dhananjay <kdhananj@redhat.com>
+ - Install /var/lib/glusterd/groups/distributed-virt by default
+
+--
+1.8.3.1
+
diff --git a/0037-cli-change-the-warning-message.patch b/0037-cli-change-the-warning-message.patch
new file mode 100644
index 0000000..e4a4544
--- /dev/null
+++ b/0037-cli-change-the-warning-message.patch
@@ -0,0 +1,35 @@
+From 7e0342e0d01204f136b0bd28931a6313ea216649 Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Wed, 6 Feb 2019 19:06:45 +0530
+Subject: [PATCH 37/52] cli: change the warning message
+
+This patch changes the warning message user gets, when enabling brick
+multiplexing to reflect OCS instead of CNS/CRS.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Id5fd87955d5a692f8e57560245f8b0cf9882e1da
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/162405
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ cli/src/cli-cmd-parser.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index 760a10c..541dc62 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -1697,7 +1697,7 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words,
+
+ if ((strcmp (key, "cluster.brick-multiplex") == 0)) {
+ question = "Brick-multiplexing is supported only for "
+- "container workloads (CNS/CRS). Also it is "
++ "OCS converged or independent mode. Also it is "
+ "advised to make sure that either all "
+ "volumes are in stopped state or no bricks "
+ "are running before this option is modified."
+--
+1.8.3.1
+
diff --git a/0038-spec-avoid-creation-of-temp-file-in-lua-script.patch b/0038-spec-avoid-creation-of-temp-file-in-lua-script.patch
new file mode 100644
index 0000000..00a5af3
--- /dev/null
+++ b/0038-spec-avoid-creation-of-temp-file-in-lua-script.patch
@@ -0,0 +1,230 @@
+From a577dd0a3cbf435681f10d095a0dca0595c6a354 Mon Sep 17 00:00:00 2001
+From: Milind Changire <mchangir@redhat.com>
+Date: Sat, 9 Feb 2019 14:01:28 +0530
+Subject: [PATCH 38/52] spec: avoid creation of temp file in lua script
+
+Avoiding creation of temporary file to execute bash shell script from a
+lua scriptlet increases install time security.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ie5b9035f292402b18dea768aca8bc82a1e7fa615
+Signed-off-by: Milind Changire <mchangir@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/162621
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 120 ++++++------------------------------------------------
+ 1 file changed, 12 insertions(+), 108 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 9db5a34..df8d116 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1542,15 +1542,7 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- Since we run pretrans scripts only for RPMs built for a server build,
+--- we can now use os.tmpname() since it is available on RHEL6 and later
+--- platforms which are server platforms.
+-tmpname = os.tmpname()
+-tmpfile = io.open(tmpname, "w")
+-tmpfile:write(script)
+-tmpfile:close()
+-ok, how, val = os.execute("/bin/bash " .. tmpname)
+-os.remove(tmpname)
++ok, how, val = os.execute(script)
+ if not (ok == 0) then
+ error("Detected running glusterfs processes", ok)
+ end
+@@ -1584,15 +1576,7 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- Since we run pretrans scripts only for RPMs built for a server build,
+--- we can now use os.tmpname() since it is available on RHEL6 and later
+--- platforms which are server platforms.
+-tmpname = os.tmpname()
+-tmpfile = io.open(tmpname, "w")
+-tmpfile:write(script)
+-tmpfile:close()
+-ok, how, val = os.execute("/bin/bash " .. tmpname)
+-os.remove(tmpname)
++ok, how, val = os.execute(script)
+ if not (ok == 0) then
+ error("Detected running glusterfs processes", ok)
+ end
+@@ -1626,15 +1610,7 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- Since we run pretrans scripts only for RPMs built for a server build,
+--- we can now use os.tmpname() since it is available on RHEL6 and later
+--- platforms which are server platforms.
+-tmpname = os.tmpname()
+-tmpfile = io.open(tmpname, "w")
+-tmpfile:write(script)
+-tmpfile:close()
+-ok, how, val = os.execute("/bin/bash " .. tmpname)
+-os.remove(tmpname)
++ok, how, val = os.execute(script)
+ if not (ok == 0) then
+ error("Detected running glusterfs processes", ok)
+ end
+@@ -1668,15 +1644,7 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- Since we run pretrans scripts only for RPMs built for a server build,
+--- we can now use os.tmpname() since it is available on RHEL6 and later
+--- platforms which are server platforms.
+-tmpname = os.tmpname()
+-tmpfile = io.open(tmpname, "w")
+-tmpfile:write(script)
+-tmpfile:close()
+-ok, how, val = os.execute("/bin/bash " .. tmpname)
+-os.remove(tmpname)
++ok, how, val = os.execute(script)
+ if not (ok == 0) then
+ error("Detected running glusterfs processes", ok)
+ end
+@@ -1709,15 +1677,7 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- Since we run pretrans scripts only for RPMs built for a server build,
+--- we can now use os.tmpname() since it is available on RHEL6 and later
+--- platforms which are server platforms.
+-tmpname = os.tmpname()
+-tmpfile = io.open(tmpname, "w")
+-tmpfile:write(script)
+-tmpfile:close()
+-ok, how, val = os.execute("/bin/bash " .. tmpname)
+-os.remove(tmpname)
++ok, how, val = os.execute(script)
+ if not (ok == 0) then
+ error("Detected running glusterfs processes", ok)
+ end
+@@ -1750,15 +1710,7 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- Since we run pretrans scripts only for RPMs built for a server build,
+--- we can now use os.tmpname() since it is available on RHEL6 and later
+--- platforms which are server platforms.
+-tmpname = os.tmpname()
+-tmpfile = io.open(tmpname, "w")
+-tmpfile:write(script)
+-tmpfile:close()
+-ok, how, val = os.execute("/bin/bash " .. tmpname)
+-os.remove(tmpname)
++ok, how, val = os.execute(script)
+ if not (ok == 0) then
+ error("Detected running glusterfs processes", ok)
+ end
+@@ -1792,15 +1744,7 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- Since we run pretrans scripts only for RPMs built for a server build,
+--- we can now use os.tmpname() since it is available on RHEL6 and later
+--- platforms which are server platforms.
+-tmpname = os.tmpname()
+-tmpfile = io.open(tmpname, "w")
+-tmpfile:write(script)
+-tmpfile:close()
+-ok, how, val = os.execute("/bin/bash " .. tmpname)
+-os.remove(tmpname)
++ok, how, val = os.execute(script)
+ if not (ok == 0) then
+ error("Detected running glusterfs processes", ok)
+ end
+@@ -1835,15 +1779,7 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- Since we run pretrans scripts only for RPMs built for a server build,
+--- we can now use os.tmpname() since it is available on RHEL6 and later
+--- platforms which are server platforms.
+-tmpname = os.tmpname()
+-tmpfile = io.open(tmpname, "w")
+-tmpfile:write(script)
+-tmpfile:close()
+-ok, how, val = os.execute("/bin/bash " .. tmpname)
+-os.remove(tmpname)
++ok, how, val = os.execute(script)
+ if not (ok == 0) then
+ error("Detected running glusterfs processes", ok)
+ end
+@@ -1878,15 +1814,7 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- Since we run pretrans scripts only for RPMs built for a server build,
+--- we can now use os.tmpname() since it is available on RHEL6 and later
+--- platforms which are server platforms.
+-tmpname = os.tmpname()
+-tmpfile = io.open(tmpname, "w")
+-tmpfile:write(script)
+-tmpfile:close()
+-ok, how, val = os.execute("/bin/bash " .. tmpname)
+-os.remove(tmpname)
++ok, how, val = os.execute(script)
+ if not (ok == 0) then
+ error("Detected running glusterfs processes", ok)
+ end
+@@ -1921,15 +1849,7 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- Since we run pretrans scripts only for RPMs built for a server build,
+--- we can now use os.tmpname() since it is available on RHEL6 and later
+--- platforms which are server platforms.
+-tmpname = os.tmpname()
+-tmpfile = io.open(tmpname, "w")
+-tmpfile:write(script)
+-tmpfile:close()
+-ok, how, val = os.execute("/bin/bash " .. tmpname)
+-os.remove(tmpname)
++ok, how, val = os.execute(script)
+ if not (ok == 0) then
+ error("Detected running glusterfs processes", ok)
+ end
+@@ -1965,15 +1885,7 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- Since we run pretrans scripts only for RPMs built for a server build,
+--- we can now use os.tmpname() since it is available on RHEL6 and later
+--- platforms which are server platforms.
+-tmpname = os.tmpname()
+-tmpfile = io.open(tmpname, "w")
+-tmpfile:write(script)
+-tmpfile:close()
+-ok, how, val = os.execute("/bin/bash " .. tmpname)
+-os.remove(tmpname)
++ok, how, val = os.execute(script)
+ if not (ok == 0) then
+ error("Detected running glusterfs processes", ok)
+ end
+@@ -2008,15 +1920,7 @@ if [ $? -eq 0 ]; then
+ fi
+ ]]
+
+--- Since we run pretrans scripts only for RPMs built for a server build,
+--- we can now use os.tmpname() since it is available on RHEL6 and later
+--- platforms which are server platforms.
+-tmpname = os.tmpname()
+-tmpfile = io.open(tmpname, "w")
+-tmpfile:write(script)
+-tmpfile:close()
+-ok, how, val = os.execute("/bin/bash " .. tmpname)
+-os.remove(tmpname)
++ok, how, val = os.execute(script)
+ if not (ok == 0) then
+ error("Detected running glusterfs processes", ok)
+ end
+--
+1.8.3.1
+
diff --git a/0039-cli-fix-query-to-user-during-brick-mux-selection.patch b/0039-cli-fix-query-to-user-during-brick-mux-selection.patch
new file mode 100644
index 0000000..82684cb
--- /dev/null
+++ b/0039-cli-fix-query-to-user-during-brick-mux-selection.patch
@@ -0,0 +1,61 @@
+From ec707e099d4e4338d1ea21560d367b02e6339532 Mon Sep 17 00:00:00 2001
+From: Sunil Kumar Acharya <sheggodu@redhat.com>
+Date: Mon, 1 Apr 2019 16:16:47 +0530
+Subject: [PATCH 39/52] cli: fix query to user during brick-mux selection
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I59472066b917ea2b23de72bcd91dc3e275d5e055
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-parser.c | 33 +++++++++++++++++----------------
+ 1 file changed, 17 insertions(+), 16 deletions(-)
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index 541dc62..d9ccba1 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -1693,23 +1693,24 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words,
+ goto out;
+ }
+ }
+- }
+-
+- if ((strcmp (key, "cluster.brick-multiplex") == 0)) {
+- question = "Brick-multiplexing is supported only for "
+- "OCS converged or independent mode. Also it is "
+- "advised to make sure that either all "
+- "volumes are in stopped state or no bricks "
+- "are running before this option is modified."
+- "Do you still want to continue?";
+
+- answer = cli_cmd_get_confirmation (state, question);
+- if (GF_ANSWER_NO == answer) {
+- gf_log ("cli", GF_LOG_ERROR, "Operation "
+- "cancelled, exiting");
+- *op_errstr = gf_strdup ("Aborted by user.");
+- ret = -1;
+- goto out;
++ if ((strcmp (key, "cluster.brick-multiplex") == 0)) {
++ question =
++ "Brick-multiplexing is supported only for "
++ "OCS converged or independent mode. Also it is "
++ "advised to make sure that either all "
++ "volumes are in stopped state or no bricks "
++ "are running before this option is modified."
++ "Do you still want to continue?";
++
++ answer = cli_cmd_get_confirmation (state, question);
++ if (GF_ANSWER_NO == answer) {
++ gf_log ("cli", GF_LOG_ERROR, "Operation "
++ "cancelled, exiting");
++ *op_errstr = gf_strdup ("Aborted by user.");
++ ret = -1;
++ goto out;
++ }
+ }
+ }
+
+--
+1.8.3.1
+
diff --git a/0040-build-Remove-unsupported-test-cases-failing-consiste.patch b/0040-build-Remove-unsupported-test-cases-failing-consiste.patch
new file mode 100644
index 0000000..1d10507
--- /dev/null
+++ b/0040-build-Remove-unsupported-test-cases-failing-consiste.patch
@@ -0,0 +1,136 @@
+From 79c74009892804419dce264399f3fde357d5b1c3 Mon Sep 17 00:00:00 2001
+From: Susant Palai <spalai@redhat.com>
+Date: Tue, 2 Apr 2019 11:07:03 +0530
+Subject: [PATCH 40/52] build: Remove unsupported test cases failing
+ consistently
+
+The following two test cases failing in downstream regression runs.
+Hence removing them as they are not supported downstream.
+
+tests/basic/cloudsync-sanity.t
+tests/bugs/distribute/bug-882278.t
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ie4b506639a017ec9910e44df1b721d9bfadf07b3
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/166662
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+Tested-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/cloudsync-sanity.t | 22 ------------
+ tests/bugs/distribute/bug-882278.t | 73 --------------------------------------
+ 2 files changed, 95 deletions(-)
+ delete mode 100644 tests/basic/cloudsync-sanity.t
+ delete mode 100755 tests/bugs/distribute/bug-882278.t
+
+diff --git a/tests/basic/cloudsync-sanity.t b/tests/basic/cloudsync-sanity.t
+deleted file mode 100644
+index 3cf719d..0000000
+--- a/tests/basic/cloudsync-sanity.t
++++ /dev/null
+@@ -1,22 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../include.rc
+-. $(dirname $0)/../volume.rc
+-
+-cleanup;
+-
+-TEST glusterd
+-TEST pidof glusterd
+-
+-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6,7,8,9};
+-TEST $CLI volume set $V0 features.cloudsync enable;
+-TEST $CLI volume start $V0;
+-
+-## Mount FUSE
+-TEST $GFS -s $H0 --volfile-id $V0 $M1;
+-
+-# This test covers lookup, mkdir, mknod, symlink, link, rename,
+-# create operations
+-TEST $(dirname $0)/rpc-coverage.sh $M1
+-
+-cleanup;
+diff --git a/tests/bugs/distribute/bug-882278.t b/tests/bugs/distribute/bug-882278.t
+deleted file mode 100755
+index 8cb5147..0000000
+--- a/tests/bugs/distribute/bug-882278.t
++++ /dev/null
+@@ -1,73 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-cleanup
+-
+-# Is there a good reason to require --fqdn elsewhere? It's worse than useless
+-# here.
+-H0=$(hostname -s)
+-
+-function recreate {
+- # The rm is necessary so we don't get fooled by leftovers from old runs.
+- rm -rf $1 && mkdir -p $1
+-}
+-
+-function count_lines {
+- grep "$1" $2/* | wc -l
+-}
+-
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume info;
+-
+-## Start and create a volume
+-TEST recreate ${B0}/${V0}-0
+-TEST recreate ${B0}/${V0}-1
+-TEST $CLI volume create $V0 $H0:$B0/${V0}-{0,1}
+-TEST $CLI volume set $V0 cluster.nufa on
+-
+-function volinfo_field()
+-{
+- local vol=$1;
+- local field=$2;
+-
+- $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+-}
+-
+-
+-## Verify volume is created
+-EXPECT "$V0" volinfo_field $V0 'Volume Name';
+-EXPECT 'Created' volinfo_field $V0 'Status';
+-
+-## Start volume and verify
+-TEST $CLI volume start $V0;
+-EXPECT 'Started' volinfo_field $V0 'Status';
+-
+-## Mount native
+-special_option="--xlator-option ${V0}-dht.local-volume-name=${V0}-client-1"
+-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $special_option $M0
+-
+-## Create a bunch of test files.
+-for i in $(seq 0 99); do
+- echo hello > $(printf $M0/file%02d $i)
+-done
+-
+-## Make sure the files went to the right place. There might be link files in
+-## the other brick, but they won't have any contents.
+-EXPECT "0" count_lines hello ${B0}/${V0}-0
+-EXPECT "100" count_lines hello ${B0}/${V0}-1
+-
+-if [ "$EXIT_EARLY" = "1" ]; then
+- exit 0;
+-fi
+-
+-## Finish up
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+-TEST $CLI volume stop $V0;
+-EXPECT 'Stopped' volinfo_field $V0 'Status';
+-
+-TEST $CLI volume delete $V0;
+-TEST ! $CLI volume info $V0;
+-
+-cleanup;
+--
+1.8.3.1
+
diff --git a/0041-tests-geo-rep-Build-failed-in-Jenkins-for-test-bug-1.patch b/0041-tests-geo-rep-Build-failed-in-Jenkins-for-test-bug-1.patch
new file mode 100644
index 0000000..c1e1720
--- /dev/null
+++ b/0041-tests-geo-rep-Build-failed-in-Jenkins-for-test-bug-1.patch
@@ -0,0 +1,43 @@
+From c8f0ac9b429e1ff73a3e87247193c35c66212540 Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <sacharya@redhat.com>
+Date: Tue, 2 Apr 2019 12:06:53 +0530
+Subject: [PATCH 41/52] tests/geo-rep: Build failed in Jenkins for test
+ bug-1600145.t
+
+Problem: the ((strcmp (key, "cluster.brick-multiplex") == 0))
+comparision in cli/src/cli-cmd-parser.c is expecting
+either yes or no confirmation from cli, which is not handled
+in bug-1600145.t, causing test to wait till timeout and
+then fail.
+
+Solution: Passing yes as pipeline to
+`gluster v set all cluster.brick-multiplex on` in bug-1600145.t
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I1a6c2a992b65380cea145fd1c46d22ec1251c77e
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/166694
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+Tested-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+Reviewed-by: Sunny Kumar <sunkumar@redhat.com>
+---
+ tests/00-geo-rep/bug-1600145.t | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tests/00-geo-rep/bug-1600145.t b/tests/00-geo-rep/bug-1600145.t
+index 1d38bf9..359bc4f 100644
+--- a/tests/00-geo-rep/bug-1600145.t
++++ b/tests/00-geo-rep/bug-1600145.t
+@@ -29,7 +29,7 @@ slave_mnt=$M1
+
+ ##create_and_start_master_volume
+ TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2};
+-gluster v set all cluster.brick-multiplex on
++yes | gluster v set all cluster.brick-multiplex on
+ TEST $CLI volume start $GMV0
+
+ ##create_and_start_slave_volume
+--
+1.8.3.1
+
diff --git a/0042-spec-client-server-Builds-are-failing-on-rhel-6.patch b/0042-spec-client-server-Builds-are-failing-on-rhel-6.patch
new file mode 100644
index 0000000..7e3d69f
--- /dev/null
+++ b/0042-spec-client-server-Builds-are-failing-on-rhel-6.patch
@@ -0,0 +1,123 @@
+From f25a92028ecc2018953a6375bba43a21d3a93566 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Thu, 4 Apr 2019 16:18:51 +0530
+Subject: [PATCH 42/52] spec: (client|server) Builds are failing on rhel-6
+
+Problem: 1) For sever-rpm gluster build is throwing an error
+ undefined reference to `dlcose` on RHEL 6
+ 2) For server-rpm build is throwing reference for
+ For Not found for rot-13.so and symlink-cache.so
+ 3) For client-rpms build is throwing an error
+ File Not found for all files with exclude
+ file in without_server check
+
+Solution: 1) For server-rpm add LIB_DL link in Makefile
+ and remove reference for rot.so and symlink-cache.so
+ from glusterfs.spec.in
+ 2) Remove exclude files list as they are not
+ being build
+
+Label: DOWNSTREAM ONLY
+Change-Id: I2b41604cbc8525b91231b0c5caee588c5d5d6b08
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/166962
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 54 -----------------------------------
+ xlators/mgmt/glusterd/src/Makefile.am | 2 +-
+ 2 files changed, 1 insertion(+), 55 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index df8d116..7c7f7c0 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1097,72 +1097,18 @@ exit 0
+ %{_tmpfilesdir}/gluster.conf
+ %endif
+ %if ( 0%{?_without_extra_xlators:1} )
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/rot-13.so
+ %exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so
+ %exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/testing/performance/symlink-cache.so
+ %endif
+ %if ( 0%{?_without_regression_tests:1} )
+ %exclude %{_datadir}/glusterfs/run-tests.sh
+ %exclude %{_datadir}/glusterfs/tests
+ %endif
+ %if 0%{?_without_server:1}
+-%exclude %{_sysconfdir}/glusterfs/gluster-rsyslog-5.8.conf
+-%exclude %{_sysconfdir}/glusterfs/gluster-rsyslog-7.2.conf
+-%exclude %{_sysconfdir}/glusterfs/glusterd.vol
+-%exclude %{_sysconfdir}/glusterfs/glusterfs-georep-logrotate
+-%exclude %{_sysconfdir}/glusterfs/glusterfs-logrotate
+-%exclude %{_sysconfdir}/glusterfs/group-db-workload
+-%exclude %{_sysconfdir}/glusterfs/group-distributed-virt
+-%exclude %{_sysconfdir}/glusterfs/group-gluster-block
+-%exclude %{_sysconfdir}/glusterfs/group-metadata-cache
+-%exclude %{_sysconfdir}/glusterfs/group-nl-cache
+-%exclude %{_sysconfdir}/glusterfs/group-virt.example
+-%exclude %{_sysconfdir}/glusterfs/logger.conf.example
+-%exclude %{_sysconfdir}/rsyslog.d/gluster.conf.example
+-%exclude %{_prefix}/bin/glusterfind
+-%exclude %{_prefix}/lib/firewalld/services/glusterfs.xml
+-%exclude %{_prefix}/lib/systemd/system/glusterd.service
+-%exclude %{_prefix}/lib/systemd/system/glusterfssharedstorage.service
+-%exclude %{_prefix}/lib/tmpfiles.d/gluster.conf
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/arbiter.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/leases.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/marker.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix-locks.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quota.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quotad.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/selinux.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/snapview-server.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/thin-arbiter.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/trash.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/upcall.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt/glusterd.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/decompounder.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server.so
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage/posix.so
+-%exclude %{_libexecdir}/glusterfs/*
+-%exclude %{_sbindir}/conf.py
+-%exclude %{_sbindir}/gcron.py
+-%exclude %{_sbindir}/gf_attach
+-%exclude %{_sbindir}/gfind_missing_files
+-%exclude %{_sbindir}/glfsheal
+-%exclude %{_sbindir}/gluster
+-%exclude %{_sbindir}/gluster-setgfid2path
+-%exclude %{_sbindir}/glusterd
+-%exclude %{_sbindir}/snap_scheduler.py
+ %if ( 0%{?_with_systemd:1} )
+ %exclude %{_datadir}/glusterfs/scripts/control-cpu-load.sh
+ %exclude %{_datadir}/glusterfs/scripts/control-mem.sh
+ %endif
+-%exclude %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh
+-%exclude %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh
+-%exclude %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh
+-%exclude %{_sharedstatedir}/glusterd/*
+ %endif
+
+ %files api
+diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am
+index 6d09e37..c8dd8e3 100644
+--- a/xlators/mgmt/glusterd/src/Makefile.am
++++ b/xlators/mgmt/glusterd/src/Makefile.am
+@@ -6,7 +6,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mgmt
+ glusterd_la_CPPFLAGS = $(AM_CPPFLAGS) \
+ -DFILTERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/filter\" \
+ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\"
+-glusterd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
++glusterd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) $(LIB_DL)
+ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \
+ glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \
+ glusterd-store.c glusterd-handshake.c glusterd-pmap.c \
+--
+1.8.3.1
+
diff --git a/0043-inode-don-t-dump-the-whole-table-to-CLI.patch b/0043-inode-don-t-dump-the-whole-table-to-CLI.patch
new file mode 100644
index 0000000..7e9d3c3
--- /dev/null
+++ b/0043-inode-don-t-dump-the-whole-table-to-CLI.patch
@@ -0,0 +1,137 @@
+From 416dfc70ef87400e1ddfd70e5b6e512d330b54a6 Mon Sep 17 00:00:00 2001
+From: Sheetal Pamecha <sheetal.pamecha08@gmail.com>
+Date: Tue, 2 Apr 2019 23:25:11 +0530
+Subject: [PATCH 43/52] inode: don't dump the whole table to CLI
+
+dumping the whole inode table detail to screen doesn't solve any
+purpose. We should be getting only toplevel details on CLI, and
+then if one wants to debug further, then they need to get to
+'statedump' to get full details.
+
+Patch on upstream master: https://review.gluster.org/#/c/glusterfs/+/22347/
+
+BUG: 1578703
+Change-Id: Ie7e7f5a67c1606e3c18ce21ee6df6c7e4550c211
+Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/166768
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-rpc-ops.c | 23 ++++++++++++++++++++++-
+ libglusterfs/src/inode.c | 13 +++++++++++++
+ 2 files changed, 35 insertions(+), 1 deletion(-)
+
+diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
+index 78043cd..12e7fcc 100644
+--- a/cli/src/cli-rpc-ops.c
++++ b/cli/src/cli-rpc-ops.c
+@@ -7606,15 +7606,24 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix)
+ uint32_t active_size = 0;
+ uint32_t lru_size = 0;
+ uint32_t purge_size = 0;
++ uint32_t lru_limit = 0;
+ int i = 0;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(prefix);
+
++ snprintf(key, sizeof(key), "%s.lru_limit", prefix);
++ ret = dict_get_uint32(dict, key, &lru_limit);
++ if (ret)
++ goto out;
++ cli_out("LRU limit : %u", lru_limit);
++
+ snprintf(key, sizeof(key), "%s.active_size", prefix);
+ ret = dict_get_uint32(dict, key, &active_size);
+ if (ret)
+ goto out;
++
++#ifdef DEBUG
+ if (active_size != 0) {
+ cli_out("Active inodes:");
+ cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type");
+@@ -7626,10 +7635,16 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix)
+ }
+ cli_out(" ");
+
++#else
++ cli_out("Active Inodes : %u", active_size);
++
++#endif
+ snprintf(key, sizeof(key), "%s.lru_size", prefix);
+ ret = dict_get_uint32(dict, key, &lru_size);
+ if (ret)
+ goto out;
++
++#ifdef DEBUG
+ if (lru_size != 0) {
+ cli_out("LRU inodes:");
+ cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type");
+@@ -7640,11 +7655,15 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix)
+ cli_print_volume_status_inode_entry(dict, key);
+ }
+ cli_out(" ");
++#else
++ cli_out("LRU Inodes : %u", lru_size);
++#endif
+
+ snprintf(key, sizeof(key), "%s.purge_size", prefix);
+ ret = dict_get_uint32(dict, key, &purge_size);
+ if (ret)
+ goto out;
++#ifdef DEBUG
+ if (purge_size != 0) {
+ cli_out("Purged inodes:");
+ cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type");
+@@ -7654,7 +7673,9 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix)
+ snprintf(key, sizeof(key), "%s.purge%d", prefix, i);
+ cli_print_volume_status_inode_entry(dict, key);
+ }
+-
++#else
++ cli_out("Purge Inodes : %u", purge_size);
++#endif
+ out:
+ return;
+ }
+diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
+index 87f74e0..96ddea5 100644
+--- a/libglusterfs/src/inode.c
++++ b/libglusterfs/src/inode.c
+@@ -2598,6 +2598,11 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict)
+ if (ret)
+ return;
+
++ snprintf(key, sizeof(key), "%s.itable.lru_limit", prefix);
++ ret = dict_set_uint32(dict, key, itable->lru_limit);
++ if (ret)
++ goto out;
++
+ snprintf(key, sizeof(key), "%s.itable.active_size", prefix);
+ ret = dict_set_uint32(dict, key, itable->active_size);
+ if (ret)
+@@ -2613,6 +2618,13 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict)
+ if (ret)
+ goto out;
+
++#ifdef DEBUG
++ /* Dumping inode details in dictionary and sending it to CLI is not
++ required as when a developer (or support team) asks for this command
++ output, they just want to get top level detail of inode table.
++ If one wants to debug, let them take statedump and debug, this
++ wouldn't be available in CLI during production setup.
++ */
+ list_for_each_entry(inode, &itable->active, list)
+ {
+ snprintf(key, sizeof(key), "%s.itable.active%d", prefix, count++);
+@@ -2632,6 +2644,7 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict)
+ snprintf(key, sizeof(key), "%s.itable.purge%d", prefix, count++);
+ inode_dump_to_dict(inode, key, dict);
+ }
++#endif
+
+ out:
+ pthread_mutex_unlock(&itable->lock);
+--
+1.8.3.1
+
diff --git a/0044-cluster-ec-Don-t-enqueue-an-entry-if-it-is-already-h.patch b/0044-cluster-ec-Don-t-enqueue-an-entry-if-it-is-already-h.patch
new file mode 100644
index 0000000..30ab28b
--- /dev/null
+++ b/0044-cluster-ec-Don-t-enqueue-an-entry-if-it-is-already-h.patch
@@ -0,0 +1,360 @@
+From bc6588890ce94101a63b861178cf38db5549d8a8 Mon Sep 17 00:00:00 2001
+From: Ashish Pandey <aspandey@redhat.com>
+Date: Wed, 28 Nov 2018 11:22:52 +0530
+Subject: [PATCH 44/52] cluster/ec: Don't enqueue an entry if it is already
+ healing
+
+Problem:
+1 - heal-wait-qlength is by default 128. If shd is disabled
+and we need to heal files, client side heal is needed.
+If we access these files that will trigger the heal.
+However, it has been observed that a file will be enqueued
+multiple times in the heal wait queue, which in turn causes
+queue to be filled and prevent other files to be enqueued.
+
+2 - While a file is going through healing and a write fop from
+mount comes on that file, it sends write on all the bricks including
+healing one. At the end it updates version and size on all the
+bricks. However, it does not unset dirty flag on all the bricks,
+even if this write fop was successful on all the bricks.
+After healing completion this dirty flag remain set and never
+gets cleaned up if SHD is disabled.
+
+Solution:
+1 - If an entry is already in queue or going through heal process,
+don't enqueue next client side request to heal the same file.
+
+2 - Unset dirty on all the bricks at the end if fop has succeeded on
+all the bricks even if some of the bricks are going through heal.
+
+backport of : https://review.gluster.org/#/c/glusterfs/+/21744/
+
+Change-Id: Ia61ffe230c6502ce6cb934425d55e2f40dd1a727
+BUG: 1600918
+Signed-off-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/166296
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/ec/bug-1236065.t | 1 -
+ xlators/cluster/ec/src/ec-common.c | 43 +++++++++------
+ xlators/cluster/ec/src/ec-common.h | 8 +++
+ xlators/cluster/ec/src/ec-heal.c | 104 +++++++++++++++++++++++++++++++-----
+ xlators/cluster/ec/src/ec-helpers.c | 1 +
+ xlators/cluster/ec/src/ec-types.h | 1 +
+ 6 files changed, 127 insertions(+), 31 deletions(-)
+
+diff --git a/tests/bugs/ec/bug-1236065.t b/tests/bugs/ec/bug-1236065.t
+index 76d25d7..9181e73 100644
+--- a/tests/bugs/ec/bug-1236065.t
++++ b/tests/bugs/ec/bug-1236065.t
+@@ -85,7 +85,6 @@ TEST pidof glusterd
+ EXPECT "$V0" volinfo_field $V0 'Volume Name'
+ EXPECT 'Started' volinfo_field $V0 'Status'
+ EXPECT '7' online_brick_count
+-
+ ## cleanup
+ cd
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index 8d65670..5183680 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -313,14 +313,15 @@ ec_check_status(ec_fop_data_t *fop)
+
+ gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
+ "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+- "remaining=%s, good=%s, bad=%s)",
++ "remaining=%s, good=%s, bad=%s, %s)",
+ gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+ ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+ ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
+- ec->nodes));
++ ec->nodes),
++ ec_msg_str(fop));
+ if (fop->use_fd) {
+ if (fop->fd != NULL) {
+ ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
+@@ -2371,37 +2372,47 @@ ec_update_info(ec_lock_link_t *link)
+ uint64_t dirty[2] = {0, 0};
+ uint64_t size;
+ ec_t *ec = NULL;
++ uintptr_t mask;
+
+ lock = link->lock;
+ ctx = lock->ctx;
+ ec = link->fop->xl->private;
+
+ /* pre_version[*] will be 0 if have_version is false */
+- version[0] = ctx->post_version[0] - ctx->pre_version[0];
+- version[1] = ctx->post_version[1] - ctx->pre_version[1];
++ version[EC_DATA_TXN] = ctx->post_version[EC_DATA_TXN] -
++ ctx->pre_version[EC_DATA_TXN];
++ version[EC_METADATA_TXN] = ctx->post_version[EC_METADATA_TXN] -
++ ctx->pre_version[EC_METADATA_TXN];
+
+ size = ctx->post_size - ctx->pre_size;
+ /* If we set the dirty flag for update fop, we have to unset it.
+ * If fop has failed on some bricks, leave the dirty as marked. */
++
+ if (lock->unlock_now) {
++ if (version[EC_DATA_TXN]) {
++ /*A data fop will have difference in post and pre version
++ *and for data fop we send writes on healing bricks also */
++ mask = lock->good_mask | lock->healing;
++ } else {
++ mask = lock->good_mask;
++ }
+ /* Ensure that nodes are up while doing final
+ * metadata update.*/
+- if (!(ec->node_mask & ~lock->good_mask) &&
+- !(ec->node_mask & ~ec->xl_up)) {
+- if (ctx->dirty[0] != 0) {
+- dirty[0] = -1;
++ if (!(ec->node_mask & ~(mask)) && !(ec->node_mask & ~ec->xl_up)) {
++ if (ctx->dirty[EC_DATA_TXN] != 0) {
++ dirty[EC_DATA_TXN] = -1;
+ }
+- if (ctx->dirty[1] != 0) {
+- dirty[1] = -1;
++ if (ctx->dirty[EC_METADATA_TXN] != 0) {
++ dirty[EC_METADATA_TXN] = -1;
+ }
+ /*If everything is fine and we already
+ *have version xattr set on entry, there
+ *is no need to update version again*/
+- if (ctx->pre_version[0]) {
+- version[0] = 0;
++ if (ctx->pre_version[EC_DATA_TXN]) {
++ version[EC_DATA_TXN] = 0;
+ }
+- if (ctx->pre_version[1]) {
+- version[1] = 0;
++ if (ctx->pre_version[EC_METADATA_TXN]) {
++ version[EC_METADATA_TXN] = 0;
+ }
+ } else {
+ link->optimistic_changelog = _gf_false;
+@@ -2410,8 +2421,8 @@ ec_update_info(ec_lock_link_t *link)
+ memset(ctx->dirty, 0, sizeof(ctx->dirty));
+ }
+
+- if ((version[0] != 0) || (version[1] != 0) || (dirty[0] != 0) ||
+- (dirty[1] != 0)) {
++ if ((version[EC_DATA_TXN] != 0) || (version[EC_METADATA_TXN] != 0) ||
++ (dirty[EC_DATA_TXN] != 0) || (dirty[EC_METADATA_TXN] != 0)) {
+ ec_update_size_version(link, version, size, dirty);
+ return _gf_true;
+ }
+diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
+index 115e147..54aaa77 100644
+--- a/xlators/cluster/ec/src/ec-common.h
++++ b/xlators/cluster/ec/src/ec-common.h
+@@ -190,4 +190,12 @@ ec_lock_unlocked(call_frame_t *frame, void *cookie, xlator_t *this,
+ void
+ ec_update_fd_status(fd_t *fd, xlator_t *xl, int child_index,
+ int32_t ret_status);
++gf_boolean_t
++ec_is_entry_healing(ec_fop_data_t *fop);
++void
++ec_set_entry_healing(ec_fop_data_t *fop);
++void
++ec_reset_entry_healing(ec_fop_data_t *fop);
++char *
++ec_msg_str(ec_fop_data_t *fop);
+ #endif /* __EC_COMMON_H__ */
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index eaf80e0..1ca12c1 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -103,6 +103,48 @@ ec_sh_key_match(dict_t *dict, char *key, data_t *val, void *mdata)
+ }
+ /* FOP: heal */
+
++void
++ec_set_entry_healing(ec_fop_data_t *fop)
++{
++ ec_inode_t *ctx = NULL;
++ loc_t *loc = NULL;
++
++ if (!fop)
++ return;
++
++ loc = &fop->loc[0];
++ LOCK(&loc->inode->lock);
++ {
++ ctx = __ec_inode_get(loc->inode, fop->xl);
++ if (ctx) {
++ ctx->heal_count += 1;
++ }
++ }
++ UNLOCK(&loc->inode->lock);
++}
++
++void
++ec_reset_entry_healing(ec_fop_data_t *fop)
++{
++ ec_inode_t *ctx = NULL;
++ loc_t *loc = NULL;
++ int32_t heal_count = 0;
++ if (!fop)
++ return;
++
++ loc = &fop->loc[0];
++ LOCK(&loc->inode->lock);
++ {
++ ctx = __ec_inode_get(loc->inode, fop->xl);
++ if (ctx) {
++ ctx->heal_count += -1;
++ heal_count = ctx->heal_count;
++ }
++ }
++ UNLOCK(&loc->inode->lock);
++ GF_ASSERT(heal_count >= 0);
++}
++
+ uintptr_t
+ ec_heal_check(ec_fop_data_t *fop, uintptr_t *pgood)
+ {
+@@ -2507,17 +2549,6 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
+ "Heal is not required for : %s ", uuid_utoa(loc->gfid));
+ goto out;
+ }
+-
+- msources = alloca0(ec->nodes);
+- mhealed_sinks = alloca0(ec->nodes);
+- ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks);
+- if (ret == 0) {
+- mgood = ec_char_array_to_mask(msources, ec->nodes);
+- mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes);
+- } else {
+- op_ret = -1;
+- op_errno = -ret;
+- }
+ sources = alloca0(ec->nodes);
+ healed_sinks = alloca0(ec->nodes);
+ if (IA_ISREG(loc->inode->ia_type)) {
+@@ -2538,8 +2569,19 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
+ op_ret = -1;
+ op_errno = -ret;
+ }
++ msources = alloca0(ec->nodes);
++ mhealed_sinks = alloca0(ec->nodes);
++ ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks);
++ if (ret == 0) {
++ mgood = ec_char_array_to_mask(msources, ec->nodes);
++ mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes);
++ } else {
++ op_ret = -1;
++ op_errno = -ret;
++ }
+
+ out:
++ ec_reset_entry_healing(fop);
+ if (fop->cbks.heal) {
+ fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno,
+ ec_char_array_to_mask(participants, ec->nodes),
+@@ -2650,11 +2692,33 @@ ec_handle_healers_done(ec_fop_data_t *fop)
+ ec_launch_heal(ec, heal_fop);
+ }
+
++gf_boolean_t
++ec_is_entry_healing(ec_fop_data_t *fop)
++{
++ ec_inode_t *ctx = NULL;
++ int32_t heal_count = 0;
++ loc_t *loc = NULL;
++
++ loc = &fop->loc[0];
++
++ LOCK(&loc->inode->lock);
++ {
++ ctx = __ec_inode_get(loc->inode, fop->xl);
++ if (ctx) {
++ heal_count = ctx->heal_count;
++ }
++ }
++ UNLOCK(&loc->inode->lock);
++ GF_ASSERT(heal_count >= 0);
++ return heal_count;
++}
++
+ void
+ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
+ {
+ gf_boolean_t can_heal = _gf_true;
+ ec_t *ec = this->private;
++ ec_fop_data_t *fop_rel = NULL;
+
+ if (fop->req_frame == NULL) {
+ LOCK(&ec->lock);
+@@ -2662,8 +2726,13 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
+ if ((ec->background_heals > 0) &&
+ (ec->heal_wait_qlen + ec->background_heals) >
+ (ec->heal_waiters + ec->healers)) {
+- list_add_tail(&fop->healer, &ec->heal_waiting);
+- ec->heal_waiters++;
++ if (!ec_is_entry_healing(fop)) {
++ list_add_tail(&fop->healer, &ec->heal_waiting);
++ ec->heal_waiters++;
++ ec_set_entry_healing(fop);
++ } else {
++ fop_rel = fop;
++ }
+ fop = __ec_dequeue_heals(ec);
+ } else {
+ can_heal = _gf_false;
+@@ -2673,8 +2742,12 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
+ }
+
+ if (can_heal) {
+- if (fop)
++ if (fop) {
++ if (fop->req_frame != NULL) {
++ ec_set_entry_healing(fop);
++ }
+ ec_launch_heal(ec, fop);
++ }
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Max number of heals are "
+@@ -2682,6 +2755,9 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
+ ec_fop_set_error(fop, EBUSY);
+ ec_heal_fail(ec, fop);
+ }
++ if (fop_rel) {
++ ec_heal_done(0, NULL, fop_rel);
++ }
+ }
+
+ void
+diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
+index e6b0359..43f6e3b 100644
+--- a/xlators/cluster/ec/src/ec-helpers.c
++++ b/xlators/cluster/ec/src/ec-helpers.c
+@@ -717,6 +717,7 @@ __ec_inode_get(inode_t *inode, xlator_t *xl)
+ memset(ctx, 0, sizeof(*ctx));
+ INIT_LIST_HEAD(&ctx->heal);
+ INIT_LIST_HEAD(&ctx->stripe_cache.lru);
++ ctx->heal_count = 0;
+ value = (uint64_t)(uintptr_t)ctx;
+ if (__inode_ctx_set(inode, xl, &value) != 0) {
+ GF_FREE(ctx);
+diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
+index f3d63ca..6ae4a2b 100644
+--- a/xlators/cluster/ec/src/ec-types.h
++++ b/xlators/cluster/ec/src/ec-types.h
+@@ -171,6 +171,7 @@ struct _ec_inode {
+ gf_boolean_t have_config;
+ gf_boolean_t have_version;
+ gf_boolean_t have_size;
++ int32_t heal_count;
+ ec_config_t config;
+ uint64_t pre_version[2];
+ uint64_t post_version[2];
+--
+1.8.3.1
+
diff --git a/0045-glusterd-fix-txn-id-mem-leak.patch b/0045-glusterd-fix-txn-id-mem-leak.patch
new file mode 100644
index 0000000..b9b2b3e
--- /dev/null
+++ b/0045-glusterd-fix-txn-id-mem-leak.patch
@@ -0,0 +1,126 @@
+From 6c004c6c8b8f98f56e186740881520b8364e6f85 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Mon, 18 Mar 2019 16:08:04 +0530
+Subject: [PATCH 45/52] glusterd: fix txn-id mem leak
+
+This commit ensures the following:
+1. Don't send commit op request to the remote nodes when gluster v
+status all is executed as for the status all transaction the local
+commit gets the name of the volumes and remote commit ops are
+technically a no-op. So no need for additional rpc requests.
+2. In op state machine flow, if the transaction is in staged state and
+op_info.skip_locking is true, then no need to set the txn id in the
+priv->glusterd_txn_opinfo dictionary which never gets freed.
+
+> Fixes: bz#1691164
+> Change-Id: Ib6a9300ea29633f501abac2ba53fb72ff648c822
+> Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+
+upstream patch: https://review.gluster.org/#/c/glusterfs/+/22388/
+
+BUG: 1670415
+Change-Id: Ib6a9300ea29633f501abac2ba53fb72ff648c822
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/166449
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 26 ++++++++++++++++++++------
+ xlators/mgmt/glusterd/src/glusterd-syncop.c | 16 ++++++++++++++++
+ 2 files changed, 36 insertions(+), 6 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index cbbb5d9..12d857a 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -5652,6 +5652,9 @@ glusterd_op_ac_stage_op(glusterd_op_sm_event_t *event, void *ctx)
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
++ glusterd_op_info_t txn_op_info = {
++ {0},
++ };
+
+ this = THIS;
+ GF_ASSERT(this);
+@@ -5686,6 +5689,7 @@ glusterd_op_ac_stage_op(glusterd_op_sm_event_t *event, void *ctx)
+ ret = -1;
+ goto out;
+ }
++ ret = glusterd_get_txn_opinfo(&event->txn_id, &txn_op_info);
+
+ ret = dict_set_bin(rsp_dict, "transaction_id", txn_id, sizeof(*txn_id));
+ if (ret) {
+@@ -5704,6 +5708,12 @@ out:
+
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+
++ /* for no volname transactions, the txn_opinfo needs to be cleaned up
++ * as there's no unlock event triggered
++ */
++ if (txn_op_info.skip_locking)
++ ret = glusterd_clear_txn_opinfo(txn_id);
++
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+@@ -8159,12 +8169,16 @@ glusterd_op_sm()
+ "Unable to clear "
+ "transaction's opinfo");
+ } else {
+- ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+- if (ret)
+- gf_msg(this->name, GF_LOG_ERROR, 0,
+- GD_MSG_TRANS_OPINFO_SET_FAIL,
+- "Unable to set "
+- "transaction's opinfo");
++ if (!(event_type == GD_OP_EVENT_STAGE_OP &&
++ opinfo.state.state == GD_OP_STATE_STAGED &&
++ opinfo.skip_locking)) {
++ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
++ if (ret)
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ GD_MSG_TRANS_OPINFO_SET_FAIL,
++ "Unable to set "
++ "transaction's opinfo");
++ }
+ }
+
+ glusterd_destroy_op_event_ctx(event);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+index 1741cf8..618d8bc 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+@@ -1392,6 +1392,8 @@ gd_commit_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char *errstr = NULL;
+ struct syncargs args = {0};
+ int type = GF_QUOTA_OPTION_TYPE_NONE;
++ uint32_t cmd = 0;
++ gf_boolean_t origin_glusterd = _gf_false;
+
+ this = THIS;
+ GF_ASSERT(this);
+@@ -1449,6 +1451,20 @@ commit_done:
+ gd_syncargs_init(&args, op_ctx);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
++ origin_glusterd = is_origin_glusterd(req_dict);
++
++ if (op == GD_OP_STATUS_VOLUME) {
++ ret = dict_get_uint32(req_dict, "cmd", &cmd);
++ if (ret)
++ goto out;
++
++ if (origin_glusterd) {
++ if ((cmd & GF_CLI_STATUS_ALL)) {
++ ret = 0;
++ goto out;
++ }
++ }
++ }
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+--
+1.8.3.1
+
diff --git a/0046-protocol-client-Do-not-fallback-to-anon-fd-if-fd-is-.patch b/0046-protocol-client-Do-not-fallback-to-anon-fd-if-fd-is-.patch
new file mode 100644
index 0000000..5365515
--- /dev/null
+++ b/0046-protocol-client-Do-not-fallback-to-anon-fd-if-fd-is-.patch
@@ -0,0 +1,98 @@
+From a0661449cd8ba7b851fec473191733767f4541b8 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Thu, 28 Mar 2019 17:55:54 +0530
+Subject: [PATCH 46/52] protocol/client: Do not fallback to anon-fd if fd is
+ not open
+
+If an open comes on a file when a brick is down and after the brick comes up,
+a fop comes on the fd, client xlator would still wind the fop on anon-fd
+leading to wrong behavior of the fops in some cases.
+
+Example:
+If lk fop is issued on the fd just after the brick is up in the scenario above,
+lk fop will be sent on anon-fd instead of failing it on that client xlator.
+This lock will never be freed upon close of the fd as flush on anon-fd is
+invalid and is not wound below server xlator.
+
+As a fix, failing the fop unless the fd has FALLBACK_TO_ANON_FD flag.
+
+ >Upstream-patch: https://review.gluster.org/c/glusterfs/+/15804
+ >Change-Id: I77692d056660b2858e323bdabdfe0a381807cccc
+ >fixes bz#1390914
+
+BUG: 1695057
+Change-Id: Id656bea8dde14327212fbe7ecc97519dc5b32098
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/166833
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/protocol/bug-1390914.t | 36 ++++++++++++++++++++++++++++
+ xlators/protocol/client/src/client-helpers.c | 8 ++++++-
+ 2 files changed, 43 insertions(+), 1 deletion(-)
+ create mode 100644 tests/bugs/protocol/bug-1390914.t
+
+diff --git a/tests/bugs/protocol/bug-1390914.t b/tests/bugs/protocol/bug-1390914.t
+new file mode 100644
+index 0000000..e3dab92
+--- /dev/null
++++ b/tests/bugs/protocol/bug-1390914.t
+@@ -0,0 +1,36 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../fileio.rc
++cleanup;
++
++#test that fops are not wound on anon-fd when fd is not open on that brick
++TEST glusterd;
++TEST pidof glusterd
++
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3};
++TEST $CLI volume set $V0 performance.open-behind off
++TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
++TEST $CLI volume heal $V0 disable
++TEST $CLI volume start $V0
++TEST $CLI volume profile $V0 start
++TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
++
++TEST touch $M0/1
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST fd_open 200 'w' "$M0/1"
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++
++#lk should only happen on 2 bricks, if there is a bug, it will plant a lock
++#with anon-fd on first-brick which will never be released because flush won't
++#be wound below server xlator for anon-fd
++TEST flock -x -n 200
++TEST fd_close 200
++
++TEST fd_open 200 'w' "$M0/1"
++#this lock will fail if there is a stale lock
++TEST flock -x -n 200
++TEST fd_close 200
++cleanup;
+diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
+index 55e87b3..2dd7106 100644
+--- a/xlators/protocol/client/src/client-helpers.c
++++ b/xlators/protocol/client/src/client-helpers.c
+@@ -419,7 +419,13 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
+ {
+ fdctx = this_fd_get_ctx(fd, this);
+ if (!fdctx) {
+- *remote_fd = GF_ANON_FD_NO;
++ if (fd->anonymous) {
++ *remote_fd = GF_ANON_FD_NO;
++ } else {
++ *remote_fd = -1;
++ gf_msg_debug(this->name, EBADF, "not a valid fd for gfid: %s",
++ uuid_utoa(fd->inode->gfid));
++ }
+ } else {
+ if (__is_fd_reopen_in_progress(fdctx))
+ *remote_fd = -1;
+--
+1.8.3.1
+
diff --git a/0047-client-rpc-Fix-the-payload-being-sent-on-the-wire.patch b/0047-client-rpc-Fix-the-payload-being-sent-on-the-wire.patch
new file mode 100644
index 0000000..cc71864
--- /dev/null
+++ b/0047-client-rpc-Fix-the-payload-being-sent-on-the-wire.patch
@@ -0,0 +1,1652 @@
+From 83d816370f7540d4065baac704df65c648a03125 Mon Sep 17 00:00:00 2001
+From: Poornima G <pgurusid@redhat.com>
+Date: Sun, 24 Mar 2019 09:40:50 +0530
+Subject: [PATCH 47/52] client-rpc: Fix the payload being sent on the wire
+
+The fops allocate 3 kind of payload(buffer) in the client xlator:
+- fop payload, this is the buffer allocated by the write and put fop
+- rsphdr paylod, this is the buffer required by the reply cbk of
+ some fops like lookup, readdir.
+- rsp_paylod, this is the buffer required by the reply cbk of fops like
+ readv etc.
+
+Currently, in the lookup and readdir fop the rsphdr is sent as payload,
+hence the allocated rsphdr buffer is also sent on the wire, increasing
+the bandwidth consumption on the wire.
+
+With this patch, the issue is fixed.
+
+>Fixes: bz#1692093
+>Change-Id: Ie8158921f4db319e60ad5f52d851fa5c9d4a269b
+>Signed-off-by: Poornima G <pgurusid@redhat.com>
+>Backport-of: https://review.gluster.org/22402/
+
+BUG: 1693935
+Change-Id: Id12746a4c9416288bc1387c8b018bbe9cc4b637d
+Signed-off-by: Poornima G <pgurusid@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/166535
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/protocol/client/src/client-handshake.c | 29 ++-
+ xlators/protocol/client/src/client-helpers.c | 14 +-
+ xlators/protocol/client/src/client-rpc-fops.c | 235 ++++++++++++----------
+ xlators/protocol/client/src/client-rpc-fops_v2.c | 236 ++++++++++++-----------
+ xlators/protocol/client/src/client.c | 22 ++-
+ xlators/protocol/client/src/client.h | 16 +-
+ 6 files changed, 308 insertions(+), 244 deletions(-)
+
+diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
+index f9631c5..c43756a 100644
+--- a/xlators/protocol/client/src/client-handshake.c
++++ b/xlators/protocol/client/src/client-handshake.c
+@@ -34,7 +34,6 @@ typedef struct client_fd_lk_local {
+ clnt_fd_ctx_t *fdctx;
+ } clnt_fd_lk_local_t;
+
+-
+ int32_t
+ client3_getspec(call_frame_t *frame, xlator_t *this, void *data)
+ {
+@@ -201,8 +200,8 @@ clnt_release_reopen_fd(xlator_t *this, clnt_fd_ctx_t *fdctx)
+ req.fd = fdctx->remote_fd;
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RELEASE,
+- clnt_release_reopen_fd_cbk, NULL, NULL, 0, NULL,
+- 0, NULL, (xdrproc_t)xdr_gfs3_releasedir_req);
++ clnt_release_reopen_fd_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_releasedir_req);
+ out:
+ if (ret) {
+ clnt_fd_lk_reacquire_failed(this, fdctx, conf);
+@@ -486,8 +485,8 @@ protocol_client_reopendir(clnt_fd_ctx_t *fdctx, xlator_t *this)
+ frame->local = local;
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPENDIR,
+- client3_3_reopendir_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_opendir_req);
++ client3_3_reopendir_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_opendir_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DIR_OP_FAILED,
+ "failed to send the re-opendir request");
+@@ -547,8 +546,8 @@ protocol_client_reopenfile(clnt_fd_ctx_t *fdctx, xlator_t *this)
+ local->loc.path);
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPEN,
+- client3_3_reopen_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_open_req);
++ client3_3_reopen_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_open_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DIR_OP_FAILED,
+ "failed to send the re-open request");
+@@ -745,8 +744,8 @@ protocol_client_reopendir_v2(clnt_fd_ctx_t *fdctx, xlator_t *this)
+ frame->local = local;
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPENDIR,
+- client4_0_reopendir_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_opendir_req);
++ client4_0_reopendir_cbk, NULL,
++ (xdrproc_t)xdr_gfx_opendir_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DIR_OP_FAILED,
+ "failed to send the re-opendir request");
+@@ -806,8 +805,8 @@ protocol_client_reopenfile_v2(clnt_fd_ctx_t *fdctx, xlator_t *this)
+ local->loc.path);
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPEN,
+- client4_0_reopen_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_open_req);
++ client4_0_reopen_cbk, NULL,
++ (xdrproc_t)xdr_gfx_open_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DIR_OP_FAILED,
+ "failed to send the re-open request");
+@@ -1312,7 +1311,6 @@ client_setvolume(xlator_t *this, struct rpc_clnt *rpc)
+
+ ret = client_submit_request(this, &req, fr, conf->handshake,
+ GF_HNDSK_SETVOLUME, client_setvolume_cbk, NULL,
+- NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_setvolume_req);
+
+ fail:
+@@ -1522,8 +1520,7 @@ client_query_portmap(xlator_t *this, struct rpc_clnt *rpc)
+
+ ret = client_submit_request(this, &req, fr, &clnt_pmap_prog,
+ GF_PMAP_PORTBYBRICK, client_query_portmap_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_pmap_port_by_brick_req);
++ NULL, (xdrproc_t)xdr_pmap_port_by_brick_req);
+
+ fail:
+ return ret;
+@@ -1624,8 +1621,8 @@ client_handshake(xlator_t *this, struct rpc_clnt *rpc)
+
+ req.gfs_id = 0xbabe;
+ ret = client_submit_request(this, &req, frame, conf->dump, GF_DUMP_DUMP,
+- client_dump_version_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gf_dump_req);
++ client_dump_version_cbk, NULL,
++ (xdrproc_t)xdr_gf_dump_req);
+
+ out:
+ return ret;
+diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
+index 2dd7106..53b4484 100644
+--- a/xlators/protocol/client/src/client-helpers.c
++++ b/xlators/protocol/client/src/client-helpers.c
+@@ -3082,8 +3082,7 @@ send_release4_0_over_wire(xlator_t *this, clnt_fd_ctx_t *fdctx,
+ gf_msg_trace(this->name, 0, "sending releasedir on fd");
+ (void)client_submit_request(
+ this, &req, fr, conf->fops, GFS3_OP_RELEASEDIR,
+- client4_0_releasedir_cbk, NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfx_releasedir_req);
++ client4_0_releasedir_cbk, NULL, (xdrproc_t)xdr_gfx_releasedir_req);
+ } else {
+ gfx_release_req req = {
+ {
+@@ -3094,8 +3093,8 @@ send_release4_0_over_wire(xlator_t *this, clnt_fd_ctx_t *fdctx,
+ req.fd = fdctx->remote_fd;
+ gf_msg_trace(this->name, 0, "sending release on fd");
+ (void)client_submit_request(this, &req, fr, conf->fops, GFS3_OP_RELEASE,
+- client4_0_release_cbk, NULL, NULL, 0, NULL,
+- 0, NULL, (xdrproc_t)xdr_gfx_release_req);
++ client4_0_release_cbk, NULL,
++ (xdrproc_t)xdr_gfx_release_req);
+ }
+
+ return 0;
+@@ -3118,8 +3117,7 @@ send_release3_3_over_wire(xlator_t *this, clnt_fd_ctx_t *fdctx,
+ gf_msg_trace(this->name, 0, "sending releasedir on fd");
+ (void)client_submit_request(
+ this, &req, fr, conf->fops, GFS3_OP_RELEASEDIR,
+- client3_3_releasedir_cbk, NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfs3_releasedir_req);
++ client3_3_releasedir_cbk, NULL, (xdrproc_t)xdr_gfs3_releasedir_req);
+ } else {
+ gfs3_release_req req = {
+ {
+@@ -3130,8 +3128,8 @@ send_release3_3_over_wire(xlator_t *this, clnt_fd_ctx_t *fdctx,
+ req.fd = fdctx->remote_fd;
+ gf_msg_trace(this->name, 0, "sending release on fd");
+ (void)client_submit_request(this, &req, fr, conf->fops, GFS3_OP_RELEASE,
+- client3_3_release_cbk, NULL, NULL, 0, NULL,
+- 0, NULL, (xdrproc_t)xdr_gfs3_release_req);
++ client3_3_release_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_release_req);
+ }
+
+ return 0;
+diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
+index b7df7cc..1c8b31b 100644
+--- a/xlators/protocol/client/src/client-rpc-fops.c
++++ b/xlators/protocol/client/src/client-rpc-fops.c
+@@ -3234,11 +3234,13 @@ client3_3_lookup(call_frame_t *frame, xlator_t *this, void *data)
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ memset(vector, 0, sizeof(vector));
++ memset(&cp, 0, sizeof(client_payload_t));
+
+ conf = this->private;
+ args = data;
+@@ -3288,9 +3290,12 @@ client3_3_lookup(call_frame_t *frame, xlator_t *this, void *data)
+ op_errno = -ret;
+ goto unwind;
+ }
++
++ cp.rsphdr = rsphdr;
++ cp.rsphdr_cnt = count;
++ cp.rsp_iobref = local->iobref;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LOOKUP,
+- client3_3_lookup_cbk, NULL, rsphdr, count, NULL,
+- 0, local->iobref,
++ client3_3_lookup_cbk, &cp,
+ (xdrproc_t)xdr_gfs3_lookup_req);
+
+ if (ret) {
+@@ -3338,8 +3343,8 @@ client3_3_stat(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_STAT,
+- client3_3_stat_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_stat_req);
++ client3_3_stat_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_stat_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3381,8 +3386,8 @@ client3_3_truncate(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_TRUNCATE,
+- client3_3_truncate_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_truncate_req);
++ client3_3_truncate_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_truncate_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3425,8 +3430,7 @@ client3_3_ftruncate(call_frame_t *frame, xlator_t *this, void *data)
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_FTRUNCATE, client3_3_ftruncate_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfs3_ftruncate_req);
++ NULL, (xdrproc_t)xdr_gfs3_ftruncate_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3468,8 +3472,8 @@ client3_3_access(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ACCESS,
+- client3_3_access_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_access_req);
++ client3_3_access_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_access_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3505,10 +3509,12 @@ client3_3_readlink(call_frame_t *frame, xlator_t *this, void *data)
+ struct iovec vector[MAX_IOVEC] = {
+ {0},
+ };
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
++ memset(&cp, 0, sizeof(client_payload_t));
+ args = data;
+
+ conf = this->private;
+@@ -3547,9 +3553,11 @@ client3_3_readlink(call_frame_t *frame, xlator_t *this, void *data)
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+
++ cp.rsphdr = rsphdr;
++ cp.rsphdr_cnt = count;
++ cp.rsp_iobref = local->iobref;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READLINK,
+- client3_3_readlink_cbk, NULL, rsphdr, count,
+- NULL, 0, local->iobref,
++ client3_3_readlink_cbk, &cp,
+ (xdrproc_t)xdr_gfs3_readlink_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+@@ -3595,8 +3603,8 @@ client3_3_unlink(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_UNLINK,
+- client3_3_unlink_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_unlink_req);
++ client3_3_unlink_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_unlink_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3638,8 +3646,8 @@ client3_3_rmdir(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RMDIR,
+- client3_3_rmdir_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_rmdir_req);
++ client3_3_rmdir_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_rmdir_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3697,8 +3705,8 @@ client3_3_symlink(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SYMLINK,
+- client3_3_symlink_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_symlink_req);
++ client3_3_symlink_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_symlink_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3743,8 +3751,8 @@ client3_3_rename(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RENAME,
+- client3_3_rename_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_rename_req);
++ client3_3_rename_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_rename_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3802,8 +3810,8 @@ client3_3_link(call_frame_t *frame, xlator_t *this, void *data)
+ loc_path(&local->loc2, NULL);
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LINK,
+- client3_3_link_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_link_req);
++ client3_3_link_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_link_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3857,8 +3865,8 @@ client3_3_mknod(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_MKNOD,
+- client3_3_mknod_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_mknod_req);
++ client3_3_mknod_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_mknod_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3924,8 +3932,8 @@ client3_3_mkdir(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_MKDIR,
+- client3_3_mkdir_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_mkdir_req);
++ client3_3_mkdir_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_mkdir_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3982,8 +3990,8 @@ client3_3_create(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_CREATE,
+- client3_3_create_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_create_req);
++ client3_3_create_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_create_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4043,8 +4051,8 @@ client3_3_open(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPEN,
+- client3_3_open_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_open_req);
++ client3_3_open_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_open_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4079,10 +4087,12 @@ client3_3_readv(call_frame_t *frame, xlator_t *this, void *data)
+ };
+ struct iobuf *rsp_iobuf = NULL;
+ struct iobref *rsp_iobref = NULL;
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
++ memset(&cp, 0, sizeof(client_payload_t));
+ args = data;
+ conf = this->private;
+
+@@ -4130,9 +4140,12 @@ client3_3_readv(call_frame_t *frame, xlator_t *this, void *data)
+ local->iobref = rsp_iobref;
+ rsp_iobref = NULL;
+
++ cp.rsp_payload = &rsp_vec;
++ cp.rsp_payload_cnt = 1;
++ cp.rsp_iobref = local->iobref;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READ,
+- client3_3_readv_cbk, NULL, NULL, 0, &rsp_vec, 1,
+- local->iobref, (xdrproc_t)xdr_gfs3_read_req);
++ client3_3_readv_cbk, &cp,
++ (xdrproc_t)xdr_gfs3_read_req);
+ if (ret) {
+ // unwind is done in the cbk
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+@@ -4167,10 +4180,12 @@ client3_3_writev(call_frame_t *frame, xlator_t *this, void *data)
+ };
+ int op_errno = ESTALE;
+ int ret = 0;
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
++ memset(&cp, 0, sizeof(client_payload_t));
+ args = data;
+ conf = this->private;
+
+@@ -4187,9 +4202,12 @@ client3_3_writev(call_frame_t *frame, xlator_t *this, void *data)
+ op_errno = -ret;
+ goto unwind;
+ }
++
++ cp.iobref = args->iobref;
++ cp.payload = args->vector;
++ cp.payload_cnt = args->count;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_WRITE,
+- client3_3_writev_cbk, args->iobref,
+- args->vector, args->count, NULL, 0, NULL,
++ client3_3_writev_cbk, &cp,
+ (xdrproc_t)xdr_gfs3_write_req);
+ if (ret) {
+ /*
+@@ -4248,8 +4266,8 @@ client3_3_flush(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH,
+- client3_3_flush_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_flush_req);
++ client3_3_flush_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_flush_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4291,8 +4309,8 @@ client3_3_fsync(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSYNC,
+- client3_3_fsync_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_fsync_req);
++ client3_3_fsync_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_fsync_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4336,8 +4354,8 @@ client3_3_fstat(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSTAT,
+- client3_3_fstat_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_fstat_req);
++ client3_3_fstat_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_fstat_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4391,8 +4409,8 @@ client3_3_opendir(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPENDIR,
+- client3_3_opendir_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_opendir_req);
++ client3_3_opendir_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_opendir_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4435,8 +4453,8 @@ client3_3_fsyncdir(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSYNCDIR,
+- client3_3_fsyncdir_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_fsyncdir_req);
++ client3_3_fsyncdir_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_fsyncdir_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4479,8 +4497,8 @@ client3_3_statfs(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_STATFS,
+- client3_3_statfs_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_statfs_req);
++ client3_3_statfs_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_statfs_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4523,8 +4541,8 @@ client3_3_setxattr(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SETXATTR,
+- client3_3_setxattr_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_setxattr_req);
++ client3_3_setxattr_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_setxattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4570,8 +4588,7 @@ client3_3_fsetxattr(call_frame_t *frame, xlator_t *this, void *data)
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_FSETXATTR, client3_3_fsetxattr_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfs3_fsetxattr_req);
++ NULL, (xdrproc_t)xdr_gfs3_fsetxattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4611,10 +4628,12 @@ client3_3_fgetxattr(call_frame_t *frame, xlator_t *this, void *data)
+ struct iovec vector[MAX_IOVEC] = {
+ {0},
+ };
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
++ memset(&cp, 0, sizeof(client_payload_t));
+ args = data;
+ conf = this->private;
+
+@@ -4654,9 +4673,12 @@ client3_3_fgetxattr(call_frame_t *frame, xlator_t *this, void *data)
+ op_errno = -ret;
+ goto unwind;
+ }
++
++ cp.rsphdr = rsphdr;
++ cp.rsphdr_cnt = count;
++ cp.rsp_iobref = local->iobref;
+ ret = client_submit_request(this, &req, frame, conf->fops,
+- GFS3_OP_FGETXATTR, client3_3_fgetxattr_cbk,
+- NULL, rsphdr, count, NULL, 0, local->iobref,
++ GFS3_OP_FGETXATTR, client3_3_fgetxattr_cbk, &cp,
+ (xdrproc_t)xdr_gfs3_fgetxattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+@@ -4699,11 +4721,14 @@ client3_3_getxattr(call_frame_t *frame, xlator_t *this, void *data)
+ struct iovec vector[MAX_IOVEC] = {
+ {0},
+ };
++ client_payload_t cp;
+
+ if (!frame || !this || !data) {
+ op_errno = 0;
+ goto unwind;
+ }
++
++ memset(&cp, 0, sizeof(client_payload_t));
+ args = data;
+
+ local = mem_get0(this->local_pool);
+@@ -4775,9 +4800,12 @@ client3_3_getxattr(call_frame_t *frame, xlator_t *this, void *data)
+ op_errno = -ret;
+ goto unwind;
+ }
++
++ cp.rsphdr = rsphdr;
++ cp.rsphdr_cnt = count;
++ cp.rsp_iobref = local->iobref;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_GETXATTR,
+- client3_3_getxattr_cbk, NULL, rsphdr, count,
+- NULL, 0, local->iobref,
++ client3_3_getxattr_cbk, &cp,
+ (xdrproc_t)xdr_gfs3_getxattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+@@ -4822,10 +4850,12 @@ client3_3_xattrop(call_frame_t *frame, xlator_t *this, void *data)
+ struct iovec vector[MAX_IOVEC] = {
+ {0},
+ };
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
++ memset(&cp, 0, sizeof(client_payload_t));
+ args = data;
+
+ if (!(args->loc && args->loc->inode))
+@@ -4871,9 +4901,12 @@ client3_3_xattrop(call_frame_t *frame, xlator_t *this, void *data)
+ op_errno = -ret;
+ goto unwind;
+ }
++
++ cp.rsphdr = rsphdr;
++ cp.rsphdr_cnt = count;
++ cp.rsp_iobref = local->iobref;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_XATTROP,
+- client3_3_xattrop_cbk, NULL, rsphdr, count,
+- NULL, 0, local->iobref,
++ client3_3_xattrop_cbk, &cp,
+ (xdrproc_t)xdr_gfs3_xattrop_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+@@ -4918,10 +4951,12 @@ client3_3_fxattrop(call_frame_t *frame, xlator_t *this, void *data)
+ struct iovec vector[MAX_IOVEC] = {
+ {0},
+ };
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
++ memset(&cp, 0, sizeof(client_payload_t));
+ args = data;
+ conf = this->private;
+
+@@ -4962,9 +4997,11 @@ client3_3_fxattrop(call_frame_t *frame, xlator_t *this, void *data)
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+
++ cp.rsphdr = rsphdr;
++ cp.rsphdr_cnt = count;
++ cp.rsp_iobref = local->iobref;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FXATTROP,
+- client3_3_fxattrop_cbk, NULL, rsphdr, count,
+- NULL, 0, local->iobref,
++ client3_3_fxattrop_cbk, &cp,
+ (xdrproc_t)xdr_gfs3_fxattrop_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+@@ -5016,8 +5053,7 @@ client3_3_removexattr(call_frame_t *frame, xlator_t *this, void *data)
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_REMOVEXATTR, client3_3_removexattr_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfs3_removexattr_req);
++ NULL, (xdrproc_t)xdr_gfs3_removexattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5059,10 +5095,9 @@ client3_3_fremovexattr(call_frame_t *frame, xlator_t *this, void *data)
+ op_errno = -ret;
+ goto unwind;
+ }
+- ret = client_submit_request(this, &req, frame, conf->fops,
+- GFS3_OP_FREMOVEXATTR,
+- client3_3_fremovexattr_cbk, NULL, NULL, 0, NULL,
+- 0, NULL, (xdrproc_t)xdr_gfs3_fremovexattr_req);
++ ret = client_submit_request(
++ this, &req, frame, conf->fops, GFS3_OP_FREMOVEXATTR,
++ client3_3_fremovexattr_cbk, NULL, (xdrproc_t)xdr_gfs3_fremovexattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5104,8 +5139,8 @@ client3_3_lease(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LEASE,
+- client3_3_lease_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_lease_req);
++ client3_3_lease_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_lease_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5167,7 +5202,7 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK,
+- client3_3_lk_cbk, NULL, NULL, 0, NULL, 0, NULL,
++ client3_3_lk_cbk, NULL,
+ (xdrproc_t)xdr_gfs3_lk_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+@@ -5210,8 +5245,8 @@ client3_3_inodelk(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_INODELK,
+- client3_3_inodelk_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_inodelk_req);
++ client3_3_inodelk_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_inodelk_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5260,8 +5295,8 @@ client3_3_finodelk(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FINODELK,
+- client3_3_finodelk_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_finodelk_req);
++ client3_3_finodelk_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_finodelk_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5305,8 +5340,8 @@ client3_3_entrylk(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ENTRYLK,
+- client3_3_entrylk_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_entrylk_req);
++ client3_3_entrylk_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_entrylk_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5349,8 +5384,8 @@ client3_3_fentrylk(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FENTRYLK,
+- client3_3_fentrylk_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_fentrylk_req);
++ client3_3_fentrylk_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_fentrylk_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5391,8 +5426,7 @@ client3_3_rchecksum(call_frame_t *frame, xlator_t *this, void *data)
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_RCHECKSUM, client3_3_rchecksum_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfs3_rchecksum_req);
++ NULL, (xdrproc_t)xdr_gfs3_rchecksum_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5433,10 +5467,12 @@ client3_3_readdir(call_frame_t *frame, xlator_t *this, void *data)
+ {0},
+ };
+ int readdir_rsp_size = 0;
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
++ memset(&cp, 0, sizeof(client_payload_t));
+ args = data;
+ conf = this->private;
+
+@@ -5486,9 +5522,11 @@ client3_3_readdir(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+
++ cp.rsphdr = rsphdr;
++ cp.rsphdr_cnt = count;
++ cp.rsp_iobref = rsp_iobref;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READDIR,
+- client3_3_readdir_cbk, NULL, rsphdr, count,
+- NULL, 0, rsp_iobref,
++ client3_3_readdir_cbk, &cp,
+ (xdrproc_t)xdr_gfs3_readdir_req);
+
+ if (ret) {
+@@ -5534,10 +5572,12 @@ client3_3_readdirp(call_frame_t *frame, xlator_t *this, void *data)
+ {0},
+ };
+ clnt_local_t *local = NULL;
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
++ memset(&cp, 0, sizeof(client_payload_t));
+ args = data;
+ conf = this->private;
+
+@@ -5587,9 +5627,11 @@ client3_3_readdirp(call_frame_t *frame, xlator_t *this, void *data)
+
+ local->fd = fd_ref(args->fd);
+
++ cp.rsphdr = rsphdr;
++ cp.rsphdr_cnt = count;
++ cp.rsp_iobref = rsp_iobref;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READDIRP,
+- client3_3_readdirp_cbk, NULL, rsphdr, count,
+- NULL, 0, rsp_iobref,
++ client3_3_readdirp_cbk, &cp,
+ (xdrproc_t)xdr_gfs3_readdirp_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+@@ -5637,8 +5679,8 @@ client3_3_setattr(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SETATTR,
+- client3_3_setattr_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_setattr_req);
++ client3_3_setattr_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_setattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5679,8 +5721,8 @@ client3_3_fsetattr(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSETATTR,
+- client3_3_fsetattr_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_fsetattr_req);
++ client3_3_fsetattr_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_fsetattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5722,8 +5764,7 @@ client3_3_fallocate(call_frame_t *frame, xlator_t *this, void *data)
+
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_FALLOCATE, client3_3_fallocate_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfs3_fallocate_req);
++ NULL, (xdrproc_t)xdr_gfs3_fallocate_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5764,8 +5805,8 @@ client3_3_discard(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_DISCARD,
+- client3_3_discard_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_discard_req);
++ client3_3_discard_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_discard_req);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5807,8 +5848,8 @@ client3_3_zerofill(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ZEROFILL,
+- client3_3_zerofill_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_zerofill_req);
++ client3_3_zerofill_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_zerofill_req);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5850,7 +5891,7 @@ client3_3_ipc(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_IPC,
+- client3_3_ipc_cbk, NULL, NULL, 0, NULL, 0, NULL,
++ client3_3_ipc_cbk, NULL,
+ (xdrproc_t)xdr_gfs3_ipc_req);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+@@ -5895,8 +5936,8 @@ client3_3_seek(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SEEK,
+- client3_3_seek_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfs3_seek_req);
++ client3_3_seek_cbk, NULL,
++ (xdrproc_t)xdr_gfs3_seek_req);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -6083,8 +6124,7 @@ client3_3_getactivelk(call_frame_t *frame, xlator_t *this, void *data)
+
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_GETACTIVELK, client3_3_getactivelk_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfs3_getactivelk_req);
++ NULL, (xdrproc_t)xdr_gfs3_getactivelk_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -6141,8 +6181,7 @@ client3_3_setactivelk(call_frame_t *frame, xlator_t *this, void *data)
+
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_SETACTIVELK, client3_3_setactivelk_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfs3_setactivelk_req);
++ NULL, (xdrproc_t)xdr_gfs3_setactivelk_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c
+index 8f3ee41..2673b6e 100644
+--- a/xlators/protocol/client/src/client-rpc-fops_v2.c
++++ b/xlators/protocol/client/src/client-rpc-fops_v2.c
+@@ -3005,11 +3005,13 @@ client4_0_lookup(call_frame_t *frame, xlator_t *this, void *data)
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ memset(vector, 0, sizeof(vector));
++ memset(&cp, 0, sizeof(client_payload_t));
+
+ conf = this->private;
+ args = data;
+@@ -3059,9 +3061,12 @@ client4_0_lookup(call_frame_t *frame, xlator_t *this, void *data)
+ op_errno = -ret;
+ goto unwind;
+ }
++
++ cp.rsphdr = rsphdr;
++ cp.rsphdr_cnt = count;
++ cp.rsp_iobref = local->iobref;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LOOKUP,
+- client4_0_lookup_cbk, NULL, rsphdr, count, NULL,
+- 0, local->iobref,
++ client4_0_lookup_cbk, &cp,
+ (xdrproc_t)xdr_gfx_lookup_req);
+
+ if (ret) {
+@@ -3109,8 +3114,8 @@ client4_0_stat(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_STAT,
+- client4_0_stat_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_stat_req);
++ client4_0_stat_cbk, NULL,
++ (xdrproc_t)xdr_gfx_stat_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3153,8 +3158,8 @@ client4_0_truncate(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_TRUNCATE,
+- client4_0_truncate_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_truncate_req);
++ client4_0_truncate_cbk, NULL,
++ (xdrproc_t)xdr_gfx_truncate_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3198,8 +3203,7 @@ client4_0_ftruncate(call_frame_t *frame, xlator_t *this, void *data)
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_FTRUNCATE, client4_0_ftruncate_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfx_ftruncate_req);
++ NULL, (xdrproc_t)xdr_gfx_ftruncate_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3241,8 +3245,8 @@ client4_0_access(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ACCESS,
+- client4_0_access_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_access_req);
++ client4_0_access_cbk, NULL,
++ (xdrproc_t)xdr_gfx_access_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3295,8 +3299,8 @@ client4_0_readlink(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READLINK,
+- client4_0_readlink_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_readlink_req);
++ client4_0_readlink_cbk, NULL,
++ (xdrproc_t)xdr_gfx_readlink_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3338,8 +3342,8 @@ client4_0_unlink(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_UNLINK,
+- client4_0_unlink_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_unlink_req);
++ client4_0_unlink_cbk, NULL,
++ (xdrproc_t)xdr_gfx_unlink_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3381,8 +3385,8 @@ client4_0_rmdir(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RMDIR,
+- client4_0_rmdir_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_rmdir_req);
++ client4_0_rmdir_cbk, NULL,
++ (xdrproc_t)xdr_gfx_rmdir_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3440,8 +3444,8 @@ client4_0_symlink(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SYMLINK,
+- client4_0_symlink_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_symlink_req);
++ client4_0_symlink_cbk, NULL,
++ (xdrproc_t)xdr_gfx_symlink_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3486,8 +3490,8 @@ client4_0_rename(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RENAME,
+- client4_0_rename_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_rename_req);
++ client4_0_rename_cbk, NULL,
++ (xdrproc_t)xdr_gfx_rename_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3546,8 +3550,8 @@ client4_0_link(call_frame_t *frame, xlator_t *this, void *data)
+ loc_path(&local->loc2, NULL);
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LINK,
+- client4_0_link_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_link_req);
++ client4_0_link_cbk, NULL,
++ (xdrproc_t)xdr_gfx_link_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3601,8 +3605,8 @@ client4_0_mknod(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_MKNOD,
+- client4_0_mknod_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_mknod_req);
++ client4_0_mknod_cbk, NULL,
++ (xdrproc_t)xdr_gfx_mknod_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3668,8 +3672,8 @@ client4_0_mkdir(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_MKDIR,
+- client4_0_mkdir_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_mkdir_req);
++ client4_0_mkdir_cbk, NULL,
++ (xdrproc_t)xdr_gfx_mkdir_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3726,8 +3730,8 @@ client4_0_create(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_CREATE,
+- client4_0_create_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_create_req);
++ client4_0_create_cbk, NULL,
++ (xdrproc_t)xdr_gfx_create_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3787,8 +3791,8 @@ client4_0_open(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPEN,
+- client4_0_open_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_open_req);
++ client4_0_open_cbk, NULL,
++ (xdrproc_t)xdr_gfx_open_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3823,10 +3827,12 @@ client4_0_readv(call_frame_t *frame, xlator_t *this, void *data)
+ };
+ struct iobuf *rsp_iobuf = NULL;
+ struct iobref *rsp_iobref = NULL;
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
++ memset(&cp, 0, sizeof(client_payload_t));
+ args = data;
+ conf = this->private;
+
+@@ -3872,9 +3878,12 @@ client4_0_readv(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+
++ cp.rsp_payload = &rsp_vec;
++ cp.rsp_payload_cnt = 1;
++ cp.rsp_iobref = local->iobref;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READ,
+- client4_0_readv_cbk, NULL, NULL, 0, &rsp_vec, 1,
+- local->iobref, (xdrproc_t)xdr_gfx_read_req);
++ client4_0_readv_cbk, &cp,
++ (xdrproc_t)xdr_gfx_read_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -3905,10 +3914,12 @@ client4_0_writev(call_frame_t *frame, xlator_t *this, void *data)
+ };
+ int op_errno = ESTALE;
+ int ret = 0;
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
++ memset(&cp, 0, sizeof(client_payload_t));
+ args = data;
+ conf = this->private;
+
+@@ -3926,9 +3937,11 @@ client4_0_writev(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+
++ cp.iobref = args->iobref;
++ cp.payload = args->vector;
++ cp.payload_cnt = args->count;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_WRITE,
+- client4_0_writev_cbk, args->iobref,
+- args->vector, args->count, NULL, 0, NULL,
++ client4_0_writev_cbk, &cp,
+ (xdrproc_t)xdr_gfx_write_req);
+ if (ret) {
+ /*
+@@ -3987,8 +4000,8 @@ client4_0_flush(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH,
+- client4_0_flush_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_flush_req);
++ client4_0_flush_cbk, NULL,
++ (xdrproc_t)xdr_gfx_flush_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4030,8 +4043,8 @@ client4_0_fsync(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSYNC,
+- client4_0_fsync_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_fsync_req);
++ client4_0_fsync_cbk, NULL,
++ (xdrproc_t)xdr_gfx_fsync_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4075,8 +4088,8 @@ client4_0_fstat(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSTAT,
+- client4_0_fstat_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_fstat_req);
++ client4_0_fstat_cbk, NULL,
++ (xdrproc_t)xdr_gfx_fstat_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4130,8 +4143,8 @@ client4_0_opendir(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPENDIR,
+- client4_0_opendir_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_opendir_req);
++ client4_0_opendir_cbk, NULL,
++ (xdrproc_t)xdr_gfx_opendir_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4175,8 +4188,8 @@ client4_0_fsyncdir(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSYNCDIR,
+- client4_0_fsyncdir_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_fsyncdir_req);
++ client4_0_fsyncdir_cbk, NULL,
++ (xdrproc_t)xdr_gfx_fsyncdir_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4219,8 +4232,8 @@ client4_0_statfs(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_STATFS,
+- client4_0_statfs_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_statfs_req);
++ client4_0_statfs_cbk, NULL,
++ (xdrproc_t)xdr_gfx_statfs_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4263,8 +4276,8 @@ client4_0_setxattr(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SETXATTR,
+- client4_0_setxattr_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_setxattr_req);
++ client4_0_setxattr_cbk, NULL,
++ (xdrproc_t)xdr_gfx_setxattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4310,8 +4323,7 @@ client4_0_fsetxattr(call_frame_t *frame, xlator_t *this, void *data)
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_FSETXATTR, client4_0_fsetxattr_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfx_fsetxattr_req);
++ NULL, (xdrproc_t)xdr_gfx_fsetxattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4364,8 +4376,7 @@ client4_0_fgetxattr(call_frame_t *frame, xlator_t *this, void *data)
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_FGETXATTR, client4_0_fgetxattr_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfx_fgetxattr_req);
++ NULL, (xdrproc_t)xdr_gfx_fgetxattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4452,8 +4463,8 @@ client4_0_getxattr(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_GETXATTR,
+- client4_0_getxattr_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_getxattr_req);
++ client4_0_getxattr_cbk, NULL,
++ (xdrproc_t)xdr_gfx_getxattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4514,8 +4525,8 @@ client4_0_xattrop(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_XATTROP,
+- client4_0_xattrop_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_xattrop_req);
++ client4_0_xattrop_cbk, NULL,
++ (xdrproc_t)xdr_gfx_xattrop_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4566,8 +4577,8 @@ client4_0_fxattrop(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FXATTROP,
+- client4_0_fxattrop_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_fxattrop_req);
++ client4_0_fxattrop_cbk, NULL,
++ (xdrproc_t)xdr_gfx_fxattrop_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4613,8 +4624,7 @@ client4_0_removexattr(call_frame_t *frame, xlator_t *this, void *data)
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_REMOVEXATTR, client4_0_removexattr_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfx_removexattr_req);
++ NULL, (xdrproc_t)xdr_gfx_removexattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4656,10 +4666,9 @@ client4_0_fremovexattr(call_frame_t *frame, xlator_t *this, void *data)
+ op_errno = -ret;
+ goto unwind;
+ }
+- ret = client_submit_request(this, &req, frame, conf->fops,
+- GFS3_OP_FREMOVEXATTR,
+- client4_0_fremovexattr_cbk, NULL, NULL, 0, NULL,
+- 0, NULL, (xdrproc_t)xdr_gfx_fremovexattr_req);
++ ret = client_submit_request(
++ this, &req, frame, conf->fops, GFS3_OP_FREMOVEXATTR,
++ client4_0_fremovexattr_cbk, NULL, (xdrproc_t)xdr_gfx_fremovexattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4701,8 +4710,8 @@ client4_0_lease(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LEASE,
+- client4_0_lease_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_lease_req);
++ client4_0_lease_cbk, NULL,
++ (xdrproc_t)xdr_gfx_lease_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4764,7 +4773,7 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK,
+- client4_0_lk_cbk, NULL, NULL, 0, NULL, 0, NULL,
++ client4_0_lk_cbk, NULL,
+ (xdrproc_t)xdr_gfx_lk_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+@@ -4807,8 +4816,8 @@ client4_0_inodelk(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_INODELK,
+- client4_0_inodelk_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_inodelk_req);
++ client4_0_inodelk_cbk, NULL,
++ (xdrproc_t)xdr_gfx_inodelk_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4857,8 +4866,8 @@ client4_0_finodelk(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FINODELK,
+- client4_0_finodelk_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_finodelk_req);
++ client4_0_finodelk_cbk, NULL,
++ (xdrproc_t)xdr_gfx_finodelk_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4902,8 +4911,8 @@ client4_0_entrylk(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ENTRYLK,
+- client4_0_entrylk_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_entrylk_req);
++ client4_0_entrylk_cbk, NULL,
++ (xdrproc_t)xdr_gfx_entrylk_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4946,8 +4955,8 @@ client4_0_fentrylk(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FENTRYLK,
+- client4_0_fentrylk_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_fentrylk_req);
++ client4_0_fentrylk_cbk, NULL,
++ (xdrproc_t)xdr_gfx_fentrylk_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -4988,10 +4997,12 @@ client4_0_readdir(call_frame_t *frame, xlator_t *this, void *data)
+ {0},
+ };
+ int readdir_rsp_size = 0;
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
++ memset(&cp, 0, sizeof(client_payload_t));
+ args = data;
+ conf = this->private;
+
+@@ -5041,9 +5052,11 @@ client4_0_readdir(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+
++ cp.rsphdr = rsphdr;
++ cp.rsphdr_cnt = count;
++ cp.rsp_iobref = rsp_iobref;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READDIR,
+- client4_0_readdir_cbk, NULL, rsphdr, count,
+- NULL, 0, rsp_iobref,
++ client4_0_readdir_cbk, &cp,
+ (xdrproc_t)xdr_gfx_readdir_req);
+
+ if (ret) {
+@@ -5089,10 +5102,12 @@ client4_0_readdirp(call_frame_t *frame, xlator_t *this, void *data)
+ {0},
+ };
+ clnt_local_t *local = NULL;
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
++ memset(&cp, 0, sizeof(client_payload_t));
+ args = data;
+ conf = this->private;
+
+@@ -5142,9 +5157,11 @@ client4_0_readdirp(call_frame_t *frame, xlator_t *this, void *data)
+
+ local->fd = fd_ref(args->fd);
+
++ cp.rsphdr = rsphdr;
++ cp.rsphdr_cnt = count;
++ cp.rsp_iobref = rsp_iobref;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READDIRP,
+- client4_0_readdirp_cbk, NULL, rsphdr, count,
+- NULL, 0, rsp_iobref,
++ client4_0_readdirp_cbk, &cp,
+ (xdrproc_t)xdr_gfx_readdirp_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+@@ -5192,8 +5209,8 @@ client4_0_setattr(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SETATTR,
+- client4_0_setattr_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_setattr_req);
++ client4_0_setattr_cbk, NULL,
++ (xdrproc_t)xdr_gfx_setattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5235,8 +5252,7 @@ client4_0_fallocate(call_frame_t *frame, xlator_t *this, void *data)
+
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_FALLOCATE, client4_0_fallocate_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfx_fallocate_req);
++ NULL, (xdrproc_t)xdr_gfx_fallocate_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5277,8 +5293,8 @@ client4_0_discard(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_DISCARD,
+- client4_0_discard_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_discard_req);
++ client4_0_discard_cbk, NULL,
++ (xdrproc_t)xdr_gfx_discard_req);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5320,8 +5336,8 @@ client4_0_zerofill(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ZEROFILL,
+- client4_0_zerofill_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_zerofill_req);
++ client4_0_zerofill_cbk, NULL,
++ (xdrproc_t)xdr_gfx_zerofill_req);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5363,7 +5379,7 @@ client4_0_ipc(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_IPC,
+- client4_0_ipc_cbk, NULL, NULL, 0, NULL, 0, NULL,
++ client4_0_ipc_cbk, NULL,
+ (xdrproc_t)xdr_gfx_ipc_req);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+@@ -5408,8 +5424,8 @@ client4_0_seek(call_frame_t *frame, xlator_t *this, void *data)
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SEEK,
+- client4_0_seek_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_seek_req);
++ client4_0_seek_cbk, NULL,
++ (xdrproc_t)xdr_gfx_seek_req);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5458,8 +5474,7 @@ client4_0_getactivelk(call_frame_t *frame, xlator_t *this, void *data)
+
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_GETACTIVELK, client4_0_getactivelk_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfx_getactivelk_req);
++ NULL, (xdrproc_t)xdr_gfx_getactivelk_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5514,8 +5529,7 @@ client4_0_setactivelk(call_frame_t *frame, xlator_t *this, void *data)
+
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_SETACTIVELK, client4_0_setactivelk_cbk,
+- NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfx_setactivelk_req);
++ NULL, (xdrproc_t)xdr_gfx_setactivelk_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5785,8 +5799,8 @@ client4_0_namelink(call_frame_t *frame, xlator_t *this, void *data)
+
+ dict_to_xdr(args->xdata, &req.xdata);
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_NAMELINK,
+- client4_namelink_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_namelink_req);
++ client4_namelink_cbk, NULL,
++ (xdrproc_t)xdr_gfx_namelink_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -5837,8 +5851,8 @@ client4_0_icreate(call_frame_t *frame, xlator_t *this, void *data)
+ op_errno = ESTALE;
+ dict_to_xdr(args->xdata, &req.xdata);
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ICREATE,
+- client4_icreate_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_icreate_req);
++ client4_icreate_cbk, NULL,
++ (xdrproc_t)xdr_gfx_icreate_req);
+ if (ret)
+ goto free_reqdata;
+ GF_FREE(req.xdata.pairs.pairs_val);
+@@ -5864,10 +5878,12 @@ client4_0_put(call_frame_t *frame, xlator_t *this, void *data)
+ int op_errno = ESTALE;
+ int ret = 0;
+ clnt_local_t *local = NULL;
++ client_payload_t cp;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
++ memset(&cp, 0, sizeof(client_payload_t));
+ args = data;
+ conf = this->private;
+
+@@ -5890,9 +5906,11 @@ client4_0_put(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+
++ cp.iobref = args->iobref;
++ cp.payload = args->vector;
++ cp.payload_cnt = args->count;
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_PUT,
+- client4_0_put_cbk, args->iobref, args->vector,
+- args->count, NULL, 0, NULL,
++ client4_0_put_cbk, &cp,
+ (xdrproc_t)xdr_gfx_put_req);
+ if (ret) {
+ /*
+@@ -5959,10 +5977,10 @@ client4_0_copy_file_range(call_frame_t *frame, xlator_t *this, void *data)
+ local->attempt_reopen_out = client_is_reopen_needed(args->fd_out, this,
+ req.fd_out);
+
+- ret = client_submit_request(
+- this, &req, frame, conf->fops, GFS3_OP_COPY_FILE_RANGE,
+- client4_0_copy_file_range_cbk, NULL, NULL, 0, NULL, 0, NULL,
+- (xdrproc_t)xdr_gfx_copy_file_range_req);
++ ret = client_submit_request(this, &req, frame, conf->fops,
++ GFS3_OP_COPY_FILE_RANGE,
++ client4_0_copy_file_range_cbk, NULL,
++ (xdrproc_t)xdr_gfx_copy_file_range_req);
+ if (ret) {
+ /*
+ * If the lower layers fail to submit a request, they'll also
+@@ -6009,8 +6027,8 @@ client4_0_fsetattr(call_frame_t *frame, xlator_t *this, void *data)
+ goto unwind;
+ }
+ ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSETATTR,
+- client4_0_fsetattr_cbk, NULL, NULL, 0, NULL, 0,
+- NULL, (xdrproc_t)xdr_gfx_fsetattr_req);
++ client4_0_fsetattr_cbk, NULL,
++ (xdrproc_t)xdr_gfx_fsetattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+@@ -6054,9 +6072,9 @@ client4_0_rchecksum(call_frame_t *frame, xlator_t *this, void *data)
+
+ dict_to_xdr(args->xdata, &req.xdata);
+
+- ret = client_submit_request(
+- this, &req, frame, conf->fops, GFS3_OP_RCHECKSUM, client4_rchecksum_cbk,
+- NULL, NULL, 0, NULL, 0, NULL, (xdrproc_t)xdr_gfx_rchecksum_req);
++ ret = client_submit_request(this, &req, frame, conf->fops,
++ GFS3_OP_RCHECKSUM, client4_rchecksum_cbk, NULL,
++ (xdrproc_t)xdr_gfx_rchecksum_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index dea6c28..2d75714 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -131,10 +131,7 @@ client_type_to_gf_type(short l_type)
+ int
+ client_submit_request(xlator_t *this, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn,
+- struct iobref *iobref, struct iovec *payload,
+- int payloadcnt, struct iovec *rsp_payload,
+- int rsp_payload_count, struct iobref *rsp_iobref,
+- xdrproc_t xdrproc)
++ client_payload_t *cp, xdrproc_t xdrproc)
+ {
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+@@ -180,8 +177,8 @@ client_submit_request(xlator_t *this, void *req, call_frame_t *frame,
+ goto out;
+ }
+
+- if (iobref != NULL) {
+- ret = iobref_merge(new_iobref, iobref);
++ if (cp && cp->iobref != NULL) {
++ ret = iobref_merge(new_iobref, cp->iobref);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, PC_MSG_NO_MEMORY,
+ "cannot merge "
+@@ -224,9 +221,16 @@ client_submit_request(xlator_t *this, void *req, call_frame_t *frame,
+ }
+
+ /* Send the msg */
+- ret = rpc_clnt_submit(conf->rpc, prog, procnum, cbkfn, &iov, count, payload,
+- payloadcnt, new_iobref, frame, payload, payloadcnt,
+- rsp_payload, rsp_payload_count, rsp_iobref);
++ if (cp) {
++ ret = rpc_clnt_submit(conf->rpc, prog, procnum, cbkfn, &iov, count,
++ cp->payload, cp->payload_cnt, new_iobref, frame,
++ cp->rsphdr, cp->rsphdr_cnt, cp->rsp_payload,
++ cp->rsp_payload_cnt, cp->rsp_iobref);
++ } else {
++ ret = rpc_clnt_submit(conf->rpc, prog, procnum, cbkfn, &iov, count,
++ NULL, 0, new_iobref, frame, NULL, 0, NULL, 0,
++ NULL);
++ }
+
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "rpc_clnt_submit failed");
+diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
+index 71f84f3..f12fa61 100644
+--- a/xlators/protocol/client/src/client.h
++++ b/xlators/protocol/client/src/client.h
+@@ -345,6 +345,17 @@ typedef struct client_args {
+ lock_migration_info_t *locklist;
+ } clnt_args_t;
+
++typedef struct client_payload {
++ struct iobref *iobref;
++ struct iovec *payload;
++ struct iovec *rsphdr;
++ struct iovec *rsp_payload;
++ struct iobref *rsp_iobref;
++ int payload_cnt;
++ int rsphdr_cnt;
++ int rsp_payload_cnt;
++} client_payload_t;
++
+ typedef ssize_t (*gfs_serialize_t)(struct iovec outmsg, void *args);
+
+ clnt_fd_ctx_t *
+@@ -359,10 +370,7 @@ client_local_wipe(clnt_local_t *local);
+ int
+ client_submit_request(xlator_t *this, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbk,
+- struct iobref *iobref, struct iovec *rsphdr,
+- int rsphdr_count, struct iovec *rsp_payload,
+- int rsp_count, struct iobref *rsp_iobref,
+- xdrproc_t xdrproc);
++ client_payload_t *cp, xdrproc_t xdrproc);
+
+ int
+ client_submit_compound_request(xlator_t *this, void *req, call_frame_t *frame,
+--
+1.8.3.1
+
diff --git a/0048-gfapi-Unblock-epoll-thread-for-upcall-processing.patch b/0048-gfapi-Unblock-epoll-thread-for-upcall-processing.patch
new file mode 100644
index 0000000..9f5f3bf
--- /dev/null
+++ b/0048-gfapi-Unblock-epoll-thread-for-upcall-processing.patch
@@ -0,0 +1,115 @@
+From 2449a1824c6f7b57889335caaeb09f4c5cb3efce Mon Sep 17 00:00:00 2001
+From: Soumya Koduri <skoduri@redhat.com>
+Date: Thu, 28 Mar 2019 14:59:00 +0530
+Subject: [PATCH 48/52] gfapi: Unblock epoll thread for upcall processing
+
+With commit#ad35193,we have made changes to offload
+processing upcall notifications to synctask so as not
+to block epoll threads. However seems like the issue wasnt
+fully addressed.
+
+In "glfs_cbk_upcall_data" -> "synctask_new1" after creating synctask
+if there is no callback defined, the thread waits on synctask_join
+till the syncfn is finished. So that way even with those changes,
+epoll threads are blocked till the upcalls are processed.
+
+Hence the right fix now is to define a callback function for that
+synctask "glfs_cbk_upcall_syncop" so as to unblock epoll/notify threads
+completely and the upcall processing can happen in parallel by synctask
+threads.
+
+Upstream references-
+mainline : https://review.gluster.org/22436
+release-6.0 : https://review.gluster.org/22459
+
+Change-Id: I4d8645e3588fab2c3ca534e0112773aaab68a5dd
+fixes: bz#1694565
+Signed-off-by: Soumya Koduri <skoduri@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/166586
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ api/src/glfs-fops.c | 42 ++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 34 insertions(+), 8 deletions(-)
+
+diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
+index 88cd32b..01ba60b 100644
+--- a/api/src/glfs-fops.c
++++ b/api/src/glfs-fops.c
+@@ -5714,6 +5714,16 @@ out:
+ }
+
+ static int
++glfs_upcall_syncop_cbk(int ret, call_frame_t *frame, void *opaque)
++{
++ struct upcall_syncop_args *args = opaque;
++
++ GF_FREE(args->upcall_data);
++ GF_FREE(args);
++ return 0;
++}
++
++static int
+ glfs_cbk_upcall_syncop(void *opaque)
+ {
+ struct upcall_syncop_args *args = opaque;
+@@ -5770,15 +5780,13 @@ out:
+ GLFS_FREE(up_arg);
+ }
+
+- return ret;
++ return 0;
+ }
+
+ static void
+ glfs_cbk_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data)
+ {
+- struct upcall_syncop_args args = {
+- 0,
+- };
++ struct upcall_syncop_args *args = NULL;
+ int ret = -1;
+
+ if (!fs || !upcall_data)
+@@ -5789,16 +5797,34 @@ glfs_cbk_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data)
+ goto out;
+ }
+
+- args.fs = fs;
+- args.upcall_data = upcall_data;
++ args = GF_CALLOC(1, sizeof(struct upcall_syncop_args),
++ glfs_mt_upcall_entry_t);
++ if (!args) {
++ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED,
++ "Upcall syncop args allocation failed.");
++ goto out;
++ }
++
++ /* Note: we are not taking any ref on fs here.
++ * Ideally applications have to unregister for upcall events
++ * or stop polling for upcall events before performing
++ * glfs_fini. And as for outstanding synctasks created, we wait
++ * for all syncenv threads to finish tasks before cleaning up the
++ * fs->ctx. Hence it seems safe to process these callback
++ * notification without taking any lock/ref.
++ */
++ args->fs = fs;
++ args->upcall_data = gf_memdup(upcall_data, sizeof(*upcall_data));
+
+- ret = synctask_new(THIS->ctx->env, glfs_cbk_upcall_syncop, NULL, NULL,
+- &args);
++ ret = synctask_new(THIS->ctx->env, glfs_cbk_upcall_syncop,
++ glfs_upcall_syncop_cbk, NULL, args);
+ /* should we retry incase of failure? */
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_UPCALL_SYNCOP_FAILED,
+ "Synctak for Upcall event_type(%d) and gfid(%s) failed",
+ upcall_data->event_type, (char *)(upcall_data->gfid));
++ GF_FREE(args->upcall_data);
++ GF_FREE(args);
+ }
+
+ out:
+--
+1.8.3.1
+
diff --git a/0049-transport-socket-log-shutdown-msg-occasionally.patch b/0049-transport-socket-log-shutdown-msg-occasionally.patch
new file mode 100644
index 0000000..7cee050
--- /dev/null
+++ b/0049-transport-socket-log-shutdown-msg-occasionally.patch
@@ -0,0 +1,49 @@
+From e205516ef874d617e4756856098bf10e17b0ea3d Mon Sep 17 00:00:00 2001
+From: Raghavendra G <rgowdapp@redhat.com>
+Date: Fri, 22 Mar 2019 10:40:45 +0530
+Subject: [PATCH 49/52] transport/socket: log shutdown msg occasionally
+
+>Change-Id: If3fc0884e7e2f45de2d278b98693b7a473220a5f
+>Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
+>Fixes: bz#1691616
+
+BUG: 1691620
+Change-Id: If3fc0884e7e2f45de2d278b98693b7a473220a5f
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167088
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ rpc/rpc-transport/socket/src/socket.c | 4 ++--
+ rpc/rpc-transport/socket/src/socket.h | 1 +
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
+index 121d46b..f6de1d3 100644
+--- a/rpc/rpc-transport/socket/src/socket.c
++++ b/rpc/rpc-transport/socket/src/socket.c
+@@ -807,8 +807,8 @@ __socket_shutdown(rpc_transport_t *this)
+ gf_log(this->name, GF_LOG_DEBUG, "shutdown() returned %d. %s", ret,
+ strerror(errno));
+ } else {
+- gf_log(this->name, GF_LOG_INFO, "intentional socket shutdown(%d)",
+- priv->sock);
++ GF_LOG_OCCASIONALLY(priv->shutdown_log_ctr, this->name, GF_LOG_INFO,
++ "intentional socket shutdown(%d)", priv->sock);
+ }
+
+ return ret;
+diff --git a/rpc/rpc-transport/socket/src/socket.h b/rpc/rpc-transport/socket/src/socket.h
+index 32339d3..e1ccae2 100644
+--- a/rpc/rpc-transport/socket/src/socket.h
++++ b/rpc/rpc-transport/socket/src/socket.h
+@@ -219,6 +219,7 @@ typedef struct {
+ int keepalivecnt;
+ int timeout;
+ int log_ctr;
++ int shutdown_log_ctr;
+ /* ssl_error_required is used only during the SSL connection setup
+ * phase.
+ * It holds the error code returned by SSL_get_error() and is used to
+--
+1.8.3.1
+
diff --git a/0050-geo-rep-Fix-syncing-multiple-rename-of-symlink.patch b/0050-geo-rep-Fix-syncing-multiple-rename-of-symlink.patch
new file mode 100644
index 0000000..f7c0f65
--- /dev/null
+++ b/0050-geo-rep-Fix-syncing-multiple-rename-of-symlink.patch
@@ -0,0 +1,142 @@
+From 161a039f8088bf8ce7000d8175e3793219525179 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Thu, 28 Mar 2019 07:17:16 -0400
+Subject: [PATCH 50/52] geo-rep: Fix syncing multiple rename of symlink
+
+Problem:
+Geo-rep fails to sync rename of symlink if it's
+renamed multiple times if creation and rename
+happened successively
+
+Worker crash at slave:
+Traceback (most recent call last):
+ File "/usr/libexec/glusterfs/python/syncdaemon/repce.py", in worker
+ res = getattr(self.obj, rmeth)(*in_data[2:])
+ File "/usr/libexec/glusterfs/python/syncdaemon/resource.py", in entry_ops
+ [ESTALE, EINVAL, EBUSY])
+ File "/usr/libexec/glusterfs/python/syncdaemon/syncdutils.py", in errno_wrap
+ return call(*arg)
+ File "/usr/libexec/glusterfs/python/syncdaemon/libcxattr.py", in lsetxattr
+ cls.raise_oserr()
+ File "/usr/libexec/glusterfs/python/syncdaemon/libcxattr.py", in raise_oserr
+ raise OSError(errn, os.strerror(errn))
+OSError: [Errno 12] Cannot allocate memory
+
+Geo-rep Behaviour:
+1. SYMLINK doesn't record target path in changelog.
+ So while syncing SYMLINK, readlink is done on
+ master to get target path.
+
+2. Geo-rep will create destination if source is not
+ present while syncing RENAME. Hence while syncing
+ RENAME of SYMLINK, target path is collected from
+ destination.
+
+Cause:
+If symlink is created and renamed multiple times, creation of
+symlink is ignored, as it's no longer present on master at
+that path. While symlink is renamed multiple times at master,
+when syncing first RENAME of SYMLINK, both source and destination
+is not present, hence target path is not known. In this case,
+while creating destination directly at slave, regular file
+attributes were encoded into blob instead of symlink,
+causing failure in gfid-access translator while decoding
+blob.
+
+Solution:
+While syncing of RENAME of SYMLINK, when target is not known
+and when src and destination is not present on the master,
+don't create destination. Ignore the rename. It's ok to ignore.
+If it's unliked, it's fine. If it's renamed to something else,
+it will be synced then.
+
+Backport of:
+> Patch: https://review.gluster.org/22438
+> Change-Id: Ibdfa495513b7c05b5370ab0b89c69a6802338d87
+> BUG: 1693648
+> Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+Change-Id: Ibdfa495513b7c05b5370ab0b89c69a6802338d87
+fixes: bz#1670429
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167122
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/resource.py | 23 ++++++++++++++---------
+ tests/00-geo-rep/georep-basic-dr-rsync.t | 1 +
+ tests/geo-rep.rc | 12 ++++++++++++
+ 3 files changed, 27 insertions(+), 9 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
+index a2d0b16..c290d86 100644
+--- a/geo-replication/syncdaemon/resource.py
++++ b/geo-replication/syncdaemon/resource.py
+@@ -625,15 +625,20 @@ class Server(object):
+ # exist with different gfid.
+ if not matching_disk_gfid(gfid, entry):
+ if e['stat'] and not stat.S_ISDIR(e['stat']['mode']):
+- if stat.S_ISLNK(e['stat']['mode']) and \
+- e['link'] is not None:
+- st1 = lstat(en)
+- if isinstance(st1, int):
+- (pg, bname) = entry2pb(en)
+- blob = entry_pack_symlink(cls, gfid, bname,
+- e['link'], e['stat'])
+- elif not matching_disk_gfid(gfid, en):
+- collect_failure(e, EEXIST, uid, gid, True)
++ if stat.S_ISLNK(e['stat']['mode']):
++ # src is not present, so don't sync symlink as
++ # we don't know target. It's ok to ignore. If
++ # it's unliked, it's fine. If it's renamed to
++ # something else, it will be synced then.
++ if e['link'] is not None:
++ st1 = lstat(en)
++ if isinstance(st1, int):
++ (pg, bname) = entry2pb(en)
++ blob = entry_pack_symlink(cls, gfid, bname,
++ e['link'],
++ e['stat'])
++ elif not matching_disk_gfid(gfid, en):
++ collect_failure(e, EEXIST, uid, gid, True)
+ else:
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t
+index 4a03930..8b64370 100644
+--- a/tests/00-geo-rep/georep-basic-dr-rsync.t
++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t
+@@ -110,6 +110,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+ #Check History Crawl.
+ TEST $GEOREP_CLI $master $slave stop
+ TEST create_data "history"
++TEST create_rename_symlink_case
+ TEST $GEOREP_CLI $master $slave start
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc
+index 396b4c4..d723129 100644
+--- a/tests/geo-rep.rc
++++ b/tests/geo-rep.rc
+@@ -19,6 +19,18 @@ function check_common_secret_file()
+ echo $?
+ }
+
++function create_rename_symlink_case()
++{
++ mkdir ${mastermnt}/MUL_REN_SYMLINK
++ cd ${mastermnt}/MUL_REN_SYMLINK
++ mkdir sym_dir1
++ ln -s "sym_dir1" sym1
++ mv sym1 sym2
++ mv sym2 sym3
++ mv sym3 sym4
++ cd -
++}
++
+ function create_data()
+ {
+ prefix=$1
+--
+1.8.3.1
+
diff --git a/0051-spec-update-rpm-install-condition.patch b/0051-spec-update-rpm-install-condition.patch
new file mode 100644
index 0000000..8d5ce47
--- /dev/null
+++ b/0051-spec-update-rpm-install-condition.patch
@@ -0,0 +1,67 @@
+From 71f4d55770287288f39b31a0435916ac3d9f742b Mon Sep 17 00:00:00 2001
+From: Sunil Kumar Acharya <sheggodu@redhat.com>
+Date: Fri, 5 Apr 2019 22:27:52 +0530
+Subject: [PATCH 51/52] spec: update rpm install condition
+
+Update code to allow rpm install without gluster process shutdown.
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1493284
+Change-Id: I308e7e4629a2428927a6df34536e3cd645a54f8c
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167089
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Milind Changire <mchangir@redhat.com>
+---
+ glusterfs.spec.in | 34 ----------------------------------
+ 1 file changed, 34 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 7c7f7c0..0d57b49 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1629,40 +1629,6 @@ if not (ok == 0) then
+ end
+
+
+-%pretrans devel -p <lua>
+-if not posix.access("/bin/bash", "x") then
+- -- initial installation, no shell, no running glusterfsd
+- return 0
+-end
+-
+--- TODO: move this completely to a lua script
+--- For now, we write a temporary bash script and execute that.
+-
+-script = [[#!/bin/sh
+-pidof -c -o %PPID -x glusterfsd &>/dev/null
+-
+-if [ $? -eq 0 ]; then
+- pushd . > /dev/null 2>&1
+- for volume in /var/lib/glusterd/vols/*; do cd $volume;
+- vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
+- volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
+- if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
+- exit 1;
+- fi
+- done
+-
+- popd > /dev/null 2>&1
+- exit 1;
+-fi
+-]]
+-
+-ok, how, val = os.execute(script)
+-if not (ok == 0) then
+- error("Detected running glusterfs processes", ok)
+-end
+-
+-
+-
+ %pretrans fuse -p <lua>
+ if not posix.access("/bin/bash", "x") then
+ -- initial installation, no shell, no running glusterfsd
+--
+1.8.3.1
+
diff --git a/0052-geo-rep-IPv6-support.patch b/0052-geo-rep-IPv6-support.patch
new file mode 100644
index 0000000..12c6e1b
--- /dev/null
+++ b/0052-geo-rep-IPv6-support.patch
@@ -0,0 +1,299 @@
+From d7bb933742f4d9135621590bf13713633c549af1 Mon Sep 17 00:00:00 2001
+From: Aravinda VK <avishwan@redhat.com>
+Date: Thu, 14 Mar 2019 20:06:54 +0530
+Subject: [PATCH 52/52] geo-rep: IPv6 support
+
+`address_family=inet6` needs to be added while mounting master and
+slave volumes in gverify script.
+
+New option introduced to gluster cli(`--inet6`) which will be used
+internally by geo-rep while calling `gluster volume info
+--remote-host=<ipv6>`.
+
+Backport of https://review.gluster.org/22363
+
+Fixes: bz#1688231
+Change-Id: I1e0d42cae07158df043e64a2f991882d8c897837
+Signed-off-by: Aravinda VK <avishwan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167120
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli.c | 11 ++++++++++
+ cli/src/cli.h | 1 +
+ geo-replication/src/gverify.sh | 22 ++++++++++++++++----
+ geo-replication/syncdaemon/argsupgrade.py | 13 +++++++++++-
+ geo-replication/syncdaemon/gsyncd.py | 1 +
+ geo-replication/syncdaemon/subcmds.py | 9 +++++++--
+ xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 30 ++++++++++++++++++++++++++--
+ 7 files changed, 78 insertions(+), 9 deletions(-)
+
+diff --git a/cli/src/cli.c b/cli/src/cli.c
+index 08f117e..c33d152 100644
+--- a/cli/src/cli.c
++++ b/cli/src/cli.c
+@@ -433,6 +433,12 @@ cli_opt_parse(char *opt, struct cli_state *state)
+ return 0;
+ }
+
++ oarg = strtail(opt, "inet6");
++ if (oarg) {
++ state->address_family = "inet6";
++ return 0;
++ }
++
+ oarg = strtail(opt, "log-file=");
+ if (oarg) {
+ state->log_file = oarg;
+@@ -679,6 +685,11 @@ cli_rpc_init(struct cli_state *state)
+ this = THIS;
+ cli_rpc_prog = &cli_prog;
+
++ /* If address family specified in CLI */
++ if (state->address_family) {
++ addr_family = state->address_family;
++ }
++
+ /* Connect to glusterd using the specified method, giving preference
+ * to a unix socket connection. If nothing is specified, connect to
+ * the default glusterd socket.
+diff --git a/cli/src/cli.h b/cli/src/cli.h
+index 5df86d5..b79a0a2 100644
+--- a/cli/src/cli.h
++++ b/cli/src/cli.h
+@@ -136,6 +136,7 @@ struct cli_state {
+ gf_loglevel_t log_level;
+
+ char *glusterd_sock;
++ char *address_family;
+ };
+
+ struct cli_local {
+diff --git a/geo-replication/src/gverify.sh b/geo-replication/src/gverify.sh
+index d048de0..7c88f9f 100755
+--- a/geo-replication/src/gverify.sh
++++ b/geo-replication/src/gverify.sh
+@@ -94,6 +94,7 @@ echo $cmd_line;
+ function master_stats()
+ {
+ MASTERVOL=$1;
++ local inet6=$2;
+ local d;
+ local i;
+ local disk_size;
+@@ -102,7 +103,12 @@ function master_stats()
+ local m_status;
+
+ d=$(mktemp -d -t ${0##*/}.XXXXXX 2>/dev/null);
+- glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id $MASTERVOL -l $master_log_file $d;
++ if [ "$inet6" = "inet6" ]; then
++ glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --xlator-option="transport.address-family=inet6" --volfile-id $MASTERVOL -l $master_log_file $d;
++ else
++ glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id $MASTERVOL -l $master_log_file $d;
++ fi
++
+ i=$(get_inode_num $d);
+ if [[ "$i" -ne "1" ]]; then
+ echo 0:0;
+@@ -124,12 +130,18 @@ function slave_stats()
+ SLAVEUSER=$1;
+ SLAVEHOST=$2;
+ SLAVEVOL=$3;
++ local inet6=$4;
+ local cmd_line;
+ local ver;
+ local status;
+
+ d=$(mktemp -d -t ${0##*/}.XXXXXX 2>/dev/null);
+- glusterfs --xlator-option="*dht.lookup-unhashed=off" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d;
++ if [ "$inet6" = "inet6" ]; then
++ glusterfs --xlator-option="*dht.lookup-unhashed=off" --xlator-option="transport.address-family=inet6" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d;
++ else
++ glusterfs --xlator-option="*dht.lookup-unhashed=off" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d;
++ fi
++
+ i=$(get_inode_num $d);
+ if [[ "$i" -ne "1" ]]; then
+ echo 0:0;
+@@ -167,6 +179,8 @@ function main()
+ log_file=$6
+ > $log_file
+
++ inet6=$7
++
+ # Use FORCE_BLOCKER flag in the error message to differentiate
+ # between the errors which the force command should bypass
+
+@@ -204,8 +218,8 @@ function main()
+ fi;
+
+ ERRORS=0;
+- master_data=$(master_stats $1);
+- slave_data=$(slave_stats $2 $3 $4);
++ master_data=$(master_stats $1 ${inet6});
++ slave_data=$(slave_stats $2 $3 $4 ${inet6});
+ master_disk_size=$(echo $master_data | cut -f1 -d':');
+ slave_disk_size=$(echo $slave_data | cut -f1 -d':');
+ master_used_size=$(echo $master_data | cut -f2 -d':');
+diff --git a/geo-replication/syncdaemon/argsupgrade.py b/geo-replication/syncdaemon/argsupgrade.py
+index 4018143..7af4063 100644
+--- a/geo-replication/syncdaemon/argsupgrade.py
++++ b/geo-replication/syncdaemon/argsupgrade.py
+@@ -84,6 +84,10 @@ def upgrade():
+ # fail when it does stat to check the existence.
+ init_gsyncd_template_conf()
+
++ inet6 = False
++ if "--inet6" in sys.argv:
++ inet6 = True
++
+ if "--monitor" in sys.argv:
+ # python gsyncd.py --path=/bricks/b1
+ # --monitor -c gsyncd.conf
+@@ -147,8 +151,11 @@ def upgrade():
+
+ user, hname = remote_addr.split("@")
+
++ if not inet6:
++ hname = gethostbyname(hname)
++
+ print(("ssh://%s@%s:gluster://127.0.0.1:%s" % (
+- user, gethostbyname(hname), vol)))
++ user, hname, vol)))
+
+ sys.exit(0)
+ elif "--normalize-url" in sys.argv:
+@@ -346,3 +353,7 @@ def upgrade():
+
+ if pargs.reset_sync_time:
+ sys.argv.append("--reset-sync-time")
++
++ if inet6:
++ # Add `--inet6` as first argument
++ sys.argv = [sys.argv[0], "--inet6"] + sys.argv[1:]
+diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
+index 037f351..effe0ce 100644
+--- a/geo-replication/syncdaemon/gsyncd.py
++++ b/geo-replication/syncdaemon/gsyncd.py
+@@ -47,6 +47,7 @@ def main():
+ sys.exit(0)
+
+ parser = ArgumentParser()
++ parser.add_argument("--inet6", action="store_true")
+ sp = parser.add_subparsers(dest="subcmd")
+
+ # Monitor Status File update
+diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py
+index 30050ec..4ece7e0 100644
+--- a/geo-replication/syncdaemon/subcmds.py
++++ b/geo-replication/syncdaemon/subcmds.py
+@@ -110,8 +110,13 @@ def subcmd_voluuidget(args):
+
+ ParseError = XET.ParseError if hasattr(XET, 'ParseError') else SyntaxError
+
+- po = Popen(['gluster', '--xml', '--remote-host=' + args.host,
+- 'volume', 'info', args.volname], bufsize=0,
++ cmd = ['gluster', '--xml', '--remote-host=' + args.host,
++ 'volume', 'info', args.volname]
++
++ if args.inet6:
++ cmd.append("--inet6")
++
++ po = Popen(cmd, bufsize=0,
+ stdin=None, stdout=PIPE, stderr=PIPE,
+ universal_newlines=True)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+index 49baa58..0f40bea 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
++++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+@@ -76,6 +76,19 @@ static char *gsync_reserved_opts[] = {"gluster-command",
+ static char *gsync_no_restart_opts[] = {"checkpoint", "log_rsync_performance",
+ "log-rsync-performance", NULL};
+
++void
++set_gsyncd_inet6_arg(runner_t *runner)
++{
++ xlator_t *this = NULL;
++ char *af;
++ int ret;
++
++ this = THIS;
++ ret = dict_get_str(this->options, "transport.address-family", &af);
++ if (ret == 0)
++ runner_argprintf(runner, "--%s", af);
++}
++
+ int
+ __glusterd_handle_sys_exec(rpcsvc_request_t *req)
+ {
+@@ -384,6 +397,7 @@ glusterd_urltransform_init(runner_t *runner, const char *transname)
+ {
+ runinit(runner);
+ runner_add_arg(runner, GSYNCD_PREFIX "/gsyncd");
++ set_gsyncd_inet6_arg(runner);
+ runner_argprintf(runner, "--%s-url", transname);
+ }
+
+@@ -725,6 +739,7 @@ glusterd_get_slave_voluuid(char *slave_host, char *slave_vol, char *vol_uuid)
+
+ runinit(&runner);
+ runner_add_arg(&runner, GSYNCD_PREFIX "/gsyncd");
++ set_gsyncd_inet6_arg(&runner);
+ runner_add_arg(&runner, "--slavevoluuid-get");
+ runner_argprintf(&runner, "%s::%s", slave_host, slave_vol);
+
+@@ -788,6 +803,7 @@ glusterd_gsync_get_config(char *master, char *slave, char *conf_path,
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
+ runner_argprintf(&runner, "%s", conf_path);
++ set_gsyncd_inet6_arg(&runner);
+ runner_argprintf(&runner, "--iprefix=%s", DATADIR);
+ runner_argprintf(&runner, ":%s", master);
+ runner_add_args(&runner, slave, "--config-get-all", NULL);
+@@ -917,6 +933,7 @@ glusterd_gsync_get_status(char *master, char *slave, char *conf_path,
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
+ runner_argprintf(&runner, "%s", conf_path);
++ set_gsyncd_inet6_arg(&runner);
+ runner_argprintf(&runner, "--iprefix=%s", DATADIR);
+ runner_argprintf(&runner, ":%s", master);
+ runner_add_args(&runner, slave, "--status-get", NULL);
+@@ -937,6 +954,7 @@ glusterd_gsync_get_param_file(char *prmfile, const char *param, char *master,
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
+ runner_argprintf(&runner, "%s", conf_path);
++ set_gsyncd_inet6_arg(&runner);
+ runner_argprintf(&runner, "--iprefix=%s", DATADIR);
+ runner_argprintf(&runner, ":%s", master);
+ runner_add_args(&runner, slave, "--config-get", NULL);
+@@ -2811,6 +2829,7 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol,
+ char *slave_ip = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
++ char *af = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+@@ -2852,9 +2871,16 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol,
+ runner_argprintf(&runner, "%s", slave_vol);
+ runner_argprintf(&runner, "%d", ssh_port);
+ runner_argprintf(&runner, "%s", log_file_path);
+- gf_msg_debug(this->name, 0, "gverify Args = %s %s %s %s %s %s %s",
++ ret = dict_get_str(this->options, "transport.address-family", &af);
++ if (ret)
++ af = "-";
++
++ runner_argprintf(&runner, "%s", af);
++
++ gf_msg_debug(this->name, 0, "gverify Args = %s %s %s %s %s %s %s %s",
+ runner.argv[0], runner.argv[1], runner.argv[2], runner.argv[3],
+- runner.argv[4], runner.argv[5], runner.argv[6]);
++ runner.argv[4], runner.argv[5], runner.argv[6],
++ runner.argv[7]);
+ runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
+ synclock_unlock(&priv->big_lock);
+ ret = runner_run(&runner);
+--
+1.8.3.1
+
diff --git a/0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch b/0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch
new file mode 100644
index 0000000..337370d
--- /dev/null
+++ b/0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch
@@ -0,0 +1,575 @@
+From 1fb89973551937f34f24b45e07072a6ce6c30ff9 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Mon, 16 Oct 2017 14:18:31 +0530
+Subject: [PATCH 053/124] Revert "packaging: (ganesha) remove glusterfs-ganesha
+ subpackage and related files)"
+
+This reverts commit 0cf2963f12a8b540a7042605d8c79f638fdf6cee.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Id6e7585021bd4dd78a59580cfa4838bdd4e539a0
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167102
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ configure.ac | 3 +
+ extras/Makefile.am | 3 +-
+ extras/ganesha/Makefile.am | 2 +
+ extras/ganesha/config/Makefile.am | 4 +
+ extras/ganesha/config/ganesha-ha.conf.sample | 19 ++++
+ extras/ganesha/scripts/Makefile.am | 4 +
+ extras/ganesha/scripts/create-export-ganesha.sh | 91 +++++++++++++++
+ extras/ganesha/scripts/dbus-send.sh | 60 ++++++++++
+ extras/ganesha/scripts/generate-epoch.py | 48 ++++++++
+ extras/hook-scripts/start/post/Makefile.am | 2 +-
+ extras/hook-scripts/start/post/S31ganesha-start.sh | 122 +++++++++++++++++++++
+ glusterfs.spec.in | 44 +++++++-
+ 12 files changed, 396 insertions(+), 6 deletions(-)
+ create mode 100644 extras/ganesha/Makefile.am
+ create mode 100644 extras/ganesha/config/Makefile.am
+ create mode 100644 extras/ganesha/config/ganesha-ha.conf.sample
+ create mode 100644 extras/ganesha/scripts/Makefile.am
+ create mode 100755 extras/ganesha/scripts/create-export-ganesha.sh
+ create mode 100755 extras/ganesha/scripts/dbus-send.sh
+ create mode 100755 extras/ganesha/scripts/generate-epoch.py
+ create mode 100755 extras/hook-scripts/start/post/S31ganesha-start.sh
+
+diff --git a/configure.ac b/configure.ac
+index 0d06f5a..125ae29 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -196,6 +196,9 @@ AC_CONFIG_FILES([Makefile
+ extras/init.d/glustereventsd-Debian
+ extras/init.d/glustereventsd-Redhat
+ extras/init.d/glustereventsd-FreeBSD
++ extras/ganesha/Makefile
++ extras/ganesha/config/Makefile
++ extras/ganesha/scripts/Makefile
+ extras/systemd/Makefile
+ extras/systemd/glusterd.service
+ extras/systemd/glustereventsd.service
+diff --git a/extras/Makefile.am b/extras/Makefile.am
+index ff5ca9b..983f014 100644
+--- a/extras/Makefile.am
++++ b/extras/Makefile.am
+@@ -11,7 +11,8 @@ EditorModedir = $(docdir)
+ EditorMode_DATA = glusterfs-mode.el glusterfs.vim
+
+ SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM \
+- $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python
++ $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python \
++ ganesha
+
+ confdir = $(sysconfdir)/glusterfs
+ if WITH_SERVER
+diff --git a/extras/ganesha/Makefile.am b/extras/ganesha/Makefile.am
+new file mode 100644
+index 0000000..542de68
+--- /dev/null
++++ b/extras/ganesha/Makefile.am
+@@ -0,0 +1,2 @@
++SUBDIRS = scripts config
++CLEANFILES =
+diff --git a/extras/ganesha/config/Makefile.am b/extras/ganesha/config/Makefile.am
+new file mode 100644
+index 0000000..c729273
+--- /dev/null
++++ b/extras/ganesha/config/Makefile.am
+@@ -0,0 +1,4 @@
++EXTRA_DIST= ganesha-ha.conf.sample
++
++confdir = $(sysconfdir)/ganesha
++conf_DATA = ganesha-ha.conf.sample
+diff --git a/extras/ganesha/config/ganesha-ha.conf.sample b/extras/ganesha/config/ganesha-ha.conf.sample
+new file mode 100644
+index 0000000..c22892b
+--- /dev/null
++++ b/extras/ganesha/config/ganesha-ha.conf.sample
+@@ -0,0 +1,19 @@
++# Name of the HA cluster created.
++# must be unique within the subnet
++HA_NAME="ganesha-ha-360"
++#
++# N.B. you may use short names or long names; you may not use IP addrs.
++# Once you select one, stay with it as it will be mildly unpleasant to
++# clean up if you switch later on. Ensure that all names - short and/or
++# long - are in DNS or /etc/hosts on all machines in the cluster.
++#
++# The subset of nodes of the Gluster Trusted Pool that form the ganesha
++# HA cluster. Hostname is specified.
++HA_CLUSTER_NODES="server1,server2,..."
++#HA_CLUSTER_NODES="server1.lab.redhat.com,server2.lab.redhat.com,..."
++#
++# Virtual IPs for each of the nodes specified above.
++VIP_server1="10.0.2.1"
++VIP_server2="10.0.2.2"
++#VIP_server1_lab_redhat_com="10.0.2.1"
++#VIP_server2_lab_redhat_com="10.0.2.2"
+diff --git a/extras/ganesha/scripts/Makefile.am b/extras/ganesha/scripts/Makefile.am
+new file mode 100644
+index 0000000..00a2c45
+--- /dev/null
++++ b/extras/ganesha/scripts/Makefile.am
+@@ -0,0 +1,4 @@
++EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh
++
++scriptsdir = $(libexecdir)/ganesha
++scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py
+diff --git a/extras/ganesha/scripts/create-export-ganesha.sh b/extras/ganesha/scripts/create-export-ganesha.sh
+new file mode 100755
+index 0000000..1ffba42
+--- /dev/null
++++ b/extras/ganesha/scripts/create-export-ganesha.sh
+@@ -0,0 +1,91 @@
++#!/bin/bash
++
++#This script is called by glusterd when the user
++#tries to export a volume via NFS-Ganesha.
++#An export file specific to a volume
++#is created in GANESHA_DIR/exports.
++
++# Try loading the config from any of the distro
++# specific configuration locations
++if [ -f /etc/sysconfig/ganesha ]
++ then
++ . /etc/sysconfig/ganesha
++fi
++if [ -f /etc/conf.d/ganesha ]
++ then
++ . /etc/conf.d/ganesha
++fi
++if [ -f /etc/default/ganesha ]
++ then
++ . /etc/default/ganesha
++fi
++
++GANESHA_DIR=${1%/}
++OPTION=$2
++VOL=$3
++CONF=$GANESHA_DIR"/ganesha.conf"
++declare -i EXPORT_ID
++
++function check_cmd_status()
++{
++ if [ "$1" != "0" ]
++ then
++ rm -rf $GANESHA_DIR/exports/export.$VOL.conf
++ sed -i /$VOL.conf/d $CONF
++ exit 1
++ fi
++}
++
++
++if [ ! -d "$GANESHA_DIR/exports" ];
++ then
++ mkdir $GANESHA_DIR/exports
++ check_cmd_status `echo $?`
++fi
++
++function write_conf()
++{
++echo -e "# WARNING : Using Gluster CLI will overwrite manual
++# changes made to this file. To avoid it, edit the
++# file and run ganesha-ha.sh --refresh-config."
++
++echo "EXPORT{"
++echo " Export_Id = 2;"
++echo " Path = \"/$VOL\";"
++echo " FSAL {"
++echo " name = "GLUSTER";"
++echo " hostname=\"localhost\";"
++echo " volume=\"$VOL\";"
++echo " }"
++echo " Access_type = RW;"
++echo " Disable_ACL = true;"
++echo ' Squash="No_root_squash";'
++echo " Pseudo=\"/$VOL\";"
++echo ' Protocols = "3", "4" ;'
++echo ' Transports = "UDP","TCP";'
++echo ' SecType = "sys";'
++echo " }"
++}
++if [ "$OPTION" = "on" ];
++then
++ if ! (cat $CONF | grep $VOL.conf\"$ )
++ then
++ write_conf $@ > $GANESHA_DIR/exports/export.$VOL.conf
++ echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF
++ count=`ls -l $GANESHA_DIR/exports/*.conf | wc -l`
++ if [ "$count" = "1" ] ; then
++ EXPORT_ID=2
++ else
++ EXPORT_ID=`cat $GANESHA_DIR/.export_added`
++ check_cmd_status `echo $?`
++ EXPORT_ID=EXPORT_ID+1
++ sed -i s/Export_Id.*/"Export_Id= $EXPORT_ID ;"/ \
++ $GANESHA_DIR/exports/export.$VOL.conf
++ check_cmd_status `echo $?`
++ fi
++ echo $EXPORT_ID > $GANESHA_DIR/.export_added
++ fi
++else
++ rm -rf $GANESHA_DIR/exports/export.$VOL.conf
++ sed -i /$VOL.conf/d $CONF
++fi
+diff --git a/extras/ganesha/scripts/dbus-send.sh b/extras/ganesha/scripts/dbus-send.sh
+new file mode 100755
+index 0000000..ec8d948
+--- /dev/null
++++ b/extras/ganesha/scripts/dbus-send.sh
+@@ -0,0 +1,60 @@
++#!/bin/bash
++
++# Try loading the config from any of the distro
++# specific configuration locations
++if [ -f /etc/sysconfig/ganesha ]
++ then
++ . /etc/sysconfig/ganesha
++fi
++if [ -f /etc/conf.d/ganesha ]
++ then
++ . /etc/conf.d/ganesha
++fi
++if [ -f /etc/default/ganesha ]
++ then
++ . /etc/default/ganesha
++fi
++
++GANESHA_DIR=${1%/}
++OPTION=$2
++VOL=$3
++CONF=$GANESHA_DIR"/ganesha.conf"
++
++function check_cmd_status()
++{
++ if [ "$1" != "0" ]
++ then
++ logger "dynamic export failed on node :${hostname -s}"
++ fi
++}
++
++#This function keeps track of export IDs and increments it with every new entry
++function dynamic_export_add()
++{
++ dbus-send --system \
++--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
++org.ganesha.nfsd.exportmgr.AddExport string:$GANESHA_DIR/exports/export.$VOL.conf \
++string:"EXPORT(Path=/$VOL)"
++ check_cmd_status `echo $?`
++}
++
++#This function removes an export dynamically(uses the export_id of the export)
++function dynamic_export_remove()
++{
++ removed_id=`cat $GANESHA_DIR/exports/export.$VOL.conf |\
++grep Export_Id | awk -F"[=,;]" '{print$2}'| tr -d '[[:space:]]'`
++ dbus-send --print-reply --system \
++--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
++org.ganesha.nfsd.exportmgr.RemoveExport uint16:$removed_id
++ check_cmd_status `echo $?`
++}
++
++if [ "$OPTION" = "on" ];
++then
++ dynamic_export_add $@
++fi
++
++if [ "$OPTION" = "off" ];
++then
++ dynamic_export_remove $@
++fi
+diff --git a/extras/ganesha/scripts/generate-epoch.py b/extras/ganesha/scripts/generate-epoch.py
+new file mode 100755
+index 0000000..5db5e56
+--- /dev/null
++++ b/extras/ganesha/scripts/generate-epoch.py
+@@ -0,0 +1,48 @@
++#!/usr/bin/python
++#
++# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
++# This file is part of GlusterFS.
++#
++# This file is licensed to you under your choice of the GNU Lesser
++# General Public License, version 3 or any later version (LGPLv3 or
++# later), or the GNU General Public License, version 2 (GPLv2), in all
++# cases as published by the Free Software Foundation.
++#
++# Generates unique epoch value on each gluster node to be used by
++# nfs-ganesha service on that node.
++#
++# Configure 'EPOCH_EXEC' option to this script path in
++# '/etc/sysconfig/ganesha' file used by nfs-ganesha service.
++#
++# Construct epoch as follows -
++# first 32-bit contains the now() time
++# rest 32-bit value contains the local glusterd node uuid
++
++import time
++import binascii
++
++# Calculate the now() time into a 64-bit integer value
++def epoch_now():
++ epoch_time = int(time.mktime(time.localtime())) << 32
++ return epoch_time
++
++# Read glusterd UUID and extract first 32-bit of it
++def epoch_uuid():
++ file_name = '/var/lib/glusterd/glusterd.info'
++
++ for line in open(file_name):
++ if "UUID" in line:
++ glusterd_uuid = line.split('=')[1].strip()
++
++ uuid_bin = binascii.unhexlify(glusterd_uuid.replace("-",""))
++
++ epoch_uuid = int(uuid_bin.encode('hex'), 32) & 0xFFFF0000
++ return epoch_uuid
++
++# Construct epoch as follows -
++# first 32-bit contains the now() time
++# rest 32-bit value contains the local glusterd node uuid
++epoch = (epoch_now() | epoch_uuid())
++print str(epoch)
++
++exit(0)
+diff --git a/extras/hook-scripts/start/post/Makefile.am b/extras/hook-scripts/start/post/Makefile.am
+index e32546d..792019d 100644
+--- a/extras/hook-scripts/start/post/Makefile.am
++++ b/extras/hook-scripts/start/post/Makefile.am
+@@ -1,4 +1,4 @@
+-EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh
++EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh S31ganesha-start.sh
+
+ hookdir = $(GLUSTERD_WORKDIR)/hooks/1/start/post/
+ if WITH_SERVER
+diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh
+new file mode 100755
+index 0000000..90ba6bc
+--- /dev/null
++++ b/extras/hook-scripts/start/post/S31ganesha-start.sh
+@@ -0,0 +1,122 @@
++#!/bin/bash
++PROGNAME="Sganesha-start"
++OPTSPEC="volname:,gd-workdir:"
++VOL=
++declare -i EXPORT_ID
++ganesha_key="ganesha.enable"
++GANESHA_DIR="/var/run/gluster/shared_storage/nfs-ganesha"
++CONF1="$GANESHA_DIR/ganesha.conf"
++GLUSTERD_WORKDIR=
++
++function parse_args ()
++{
++ ARGS=$(getopt -l $OPTSPEC -o "o" -name $PROGNAME $@)
++ eval set -- "$ARGS"
++
++ while true; do
++ case $1 in
++ --volname)
++ shift
++ VOL=$1
++ ;;
++ --gd-workdir)
++ shift
++ GLUSTERD_WORKDIR=$1
++ ;;
++ *)
++ shift
++ break
++ ;;
++ esac
++ shift
++ done
++}
++
++
++
++#This function generates a new export entry as export.volume_name.conf
++function write_conf()
++{
++echo -e "# WARNING : Using Gluster CLI will overwrite manual
++# changes made to this file. To avoid it, edit the
++# file, copy it over to all the NFS-Ganesha nodes
++# and run ganesha-ha.sh --refresh-config."
++
++echo "EXPORT{"
++echo " Export_Id = 2;"
++echo " Path = \"/$VOL\";"
++echo " FSAL {"
++echo " name = \"GLUSTER\";"
++echo " hostname=\"localhost\";"
++echo " volume=\"$VOL\";"
++echo " }"
++echo " Access_type = RW;"
++echo " Disable_ACL = true;"
++echo " Squash=\"No_root_squash\";"
++echo " Pseudo=\"/$VOL\";"
++echo " Protocols = \"3\", \"4\" ;"
++echo " Transports = \"UDP\",\"TCP\";"
++echo " SecType = \"sys\";"
++echo "}"
++}
++
++#It adds the export dynamically by sending dbus signals
++function export_add()
++{
++ dbus-send --print-reply --system --dest=org.ganesha.nfsd \
++/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \
++string:$GANESHA_DIR/exports/export.$VOL.conf string:"EXPORT(Export_Id=$EXPORT_ID)"
++
++}
++
++# based on src/scripts/ganeshactl/Ganesha/export_mgr.py
++function is_exported()
++{
++ local volume="${1}"
++
++ dbus-send --type=method_call --print-reply --system \
++ --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
++ org.ganesha.nfsd.exportmgr.ShowExports \
++ | grep -w -q "/${volume}"
++
++ return $?
++}
++
++# Check the info file (contains the volume options) to see if Ganesha is
++# enabled for this volume.
++function ganesha_enabled()
++{
++ local volume="${1}"
++ local info_file="${GLUSTERD_WORKDIR}/vols/${VOL}/info"
++ local enabled="off"
++
++ enabled=$(grep -w ${ganesha_key} ${info_file} | cut -d"=" -f2)
++
++ [ "${enabled}" == "on" ]
++
++ return $?
++}
++
++parse_args $@
++
++if ganesha_enabled ${VOL} && ! is_exported ${VOL}
++then
++ if [ ! -e ${GANESHA_DIR}/exports/export.${VOL}.conf ]
++ then
++ #Remove export entry from nfs-ganesha.conf
++ sed -i /$VOL.conf/d $CONF1
++ write_conf ${VOL} > ${GANESHA_DIR}/exports/export.${VOL}.conf
++ EXPORT_ID=`cat $GANESHA_DIR/.export_added`
++ EXPORT_ID=EXPORT_ID+1
++ echo $EXPORT_ID > $GANESHA_DIR/.export_added
++ sed -i s/Export_Id.*/"Export_Id=$EXPORT_ID;"/ \
++ $GANESHA_DIR/exports/export.$VOL.conf
++ echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF1
++ else
++ EXPORT_ID=$(grep ^[[:space:]]*Export_Id $GANESHA_DIR/exports/export.$VOL.conf |\
++ awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]')
++ fi
++ export_add $VOL
++fi
++
++exit 0
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 0d57b49..dd7438c 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -299,7 +299,6 @@ Obsoletes: hekafs
+ Obsoletes: %{name}-common < %{version}-%{release}
+ Obsoletes: %{name}-core < %{version}-%{release}
+ Obsoletes: %{name}-ufo
+-Obsoletes: %{name}-ganesha
+ %if ( 0%{!?_with_gnfs:1} )
+ Obsoletes: %{name}-gnfs
+ %endif
+@@ -455,6 +454,30 @@ is in user space and easily manageable.
+ This package provides support to FUSE based clients and inlcudes the
+ glusterfs(d) binary.
+
++%if ( 0%{!?_without_server:1} )
++%package ganesha
++Summary: NFS-Ganesha configuration
++Group: Applications/File
++
++Requires: %{name}-server%{?_isa} = %{version}-%{release}
++Requires: nfs-ganesha-gluster, pcs, dbus
++%if ( 0%{?rhel} && 0%{?rhel} == 6 )
++Requires: cman, pacemaker, corosync
++%endif
++
++%description ganesha
++GlusterFS is a distributed file-system capable of scaling to several
++petabytes. It aggregates various storage bricks over Infiniband RDMA
++or TCP/IP interconnect into one large parallel network file
++system. GlusterFS is one of the most sophisticated file systems in
++terms of features and extensibility. It borrows a powerful concept
++called Translators from GNU Hurd kernel. Much of the code in GlusterFS
++is in user space and easily manageable.
++
++This package provides the configuration and related files for using
++NFS-Ganesha as the NFS server using GlusterFS
++%endif
++
+ %if ( 0%{!?_without_georeplication:1} )
+ %package geo-replication
+ Summary: GlusterFS Geo-replication
+@@ -1111,6 +1134,12 @@ exit 0
+ %endif
+ %endif
+
++%if ( 0%{?_without_server:1} )
++#exclude ganesha related files
++%exclude %{_sysconfdir}/ganesha/*
++%exclude %{_libexecdir}/ganesha/*
++%endif
++
+ %files api
+ %exclude %{_libdir}/*.so
+ # libgfapi files
+@@ -1273,6 +1302,12 @@ exit 0
+ %exclude %{_datadir}/glusterfs/tests/vagrant
+ %endif
+
++%if ( 0%{!?_without_server:1} )
++%files ganesha
++%{_sysconfdir}/ganesha/*
++%{_libexecdir}/ganesha/*
++%endif
++
+ %if ( 0%{!?_without_ocf:1} )
+ %files resource-agents
+ # /usr/lib is the standard for OCF, also on x86_64
+@@ -1396,6 +1431,7 @@ exit 0
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S29CTDBsetup.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S30samba-start.sh
++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/post
+@@ -1868,6 +1904,9 @@ fi
+ %endif
+
+ %changelog
++* Fri Apr 5 2019 Jiffin Tony Thottan <jthottan@redhat.com>
++- Adding ganesha bits back in gluster repository
++
+ * Wed Mar 6 2019 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+ - remove unneeded ldconfig in scriptlets
+ - reported by Igor Gnatenko in Fedora
+@@ -1960,9 +1999,6 @@ fi
+ * Thu Feb 16 2017 Niels de Vos <ndevos@redhat.com>
+ - Obsolete and Provide python-gluster for upgrading from glusterfs < 3.10
+
+-* Tue Feb 7 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+-- remove ganesha (#1418417)
+-
+ * Wed Feb 1 2017 Poornima G <pgurusid@redhat.com>
+ - Install /var/lib/glusterd/groups/metadata-cache by default
+
+--
+1.8.3.1
+
diff --git a/0054-Revert-glusterd-storhaug-remove-ganesha.patch b/0054-Revert-glusterd-storhaug-remove-ganesha.patch
new file mode 100644
index 0000000..261856c
--- /dev/null
+++ b/0054-Revert-glusterd-storhaug-remove-ganesha.patch
@@ -0,0 +1,1912 @@
+From 1029c27982d2f91cb2d3c4fcc19aa5171111dfb9 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Mon, 16 Oct 2017 14:24:29 +0530
+Subject: [PATCH 054/124] Revert "glusterd: (storhaug) remove ganesha"
+
+This reverts commit 843e1b04b554ab887ec656ae7b468bb93ee4e2f7.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I06b5450344c33f26da3d94b6f67051d41dfbba17
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167103
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-global.c | 57 ++
+ cli/src/cli-cmd-parser.c | 122 ++-
+ cli/src/cli-cmd.c | 3 +-
+ cli/src/cli-rpc-ops.c | 82 ++
+ cli/src/cli.h | 4 +
+ xlators/mgmt/glusterd/src/Makefile.am | 4 +-
+ xlators/mgmt/glusterd/src/glusterd-errno.h | 2 +-
+ xlators/mgmt/glusterd/src/glusterd-ganesha.c | 915 +++++++++++++++++++++
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 79 ++
+ xlators/mgmt/glusterd/src/glusterd-messages.h | 2 +-
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 45 +-
+ .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 196 +++++
+ xlators/mgmt/glusterd/src/glusterd-store.h | 2 +
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 37 +
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 7 +
+ xlators/mgmt/glusterd/src/glusterd.h | 22 +
+ 16 files changed, 1568 insertions(+), 11 deletions(-)
+ create mode 100644 xlators/mgmt/glusterd/src/glusterd-ganesha.c
+
+diff --git a/cli/src/cli-cmd-global.c b/cli/src/cli-cmd-global.c
+index d0729ac..270b76f 100644
+--- a/cli/src/cli-cmd-global.c
++++ b/cli/src/cli-cmd-global.c
+@@ -36,6 +36,10 @@ int
+ cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
+
++int
++cli_cmd_ganesha_cbk(struct cli_state *state, struct cli_cmd_word *word,
++ const char **words, int wordcount);
++
+ struct cli_cmd global_cmds[] = {
+ {
+ "global help",
+@@ -48,6 +52,11 @@ struct cli_cmd global_cmds[] = {
+ cli_cmd_get_state_cbk,
+ "Get local state representation of mentioned daemon",
+ },
++ {
++ "nfs-ganesha {enable| disable} ",
++ cli_cmd_ganesha_cbk,
++ "Enable/disable NFS-Ganesha support",
++ },
+ {NULL, NULL, NULL}};
+
+ int
+@@ -89,6 +98,54 @@ out:
+ }
+
+ int
++cli_cmd_ganesha_cbk(struct cli_state *state, struct cli_cmd_word *word,
++ const char **words, int wordcount)
++
++{
++ int sent = 0;
++ int parse_error = 0;
++ int ret = -1;
++ rpc_clnt_procedure_t *proc = NULL;
++ call_frame_t *frame = NULL;
++ dict_t *options = NULL;
++ cli_local_t *local = NULL;
++ char *op_errstr = NULL;
++
++ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GANESHA];
++
++ frame = create_frame(THIS, THIS->ctx->pool);
++ if (!frame)
++ goto out;
++
++ ret = cli_cmd_ganesha_parse(state, words, wordcount, &options, &op_errstr);
++ if (ret) {
++ if (op_errstr) {
++ cli_err("%s", op_errstr);
++ GF_FREE(op_errstr);
++ } else
++ cli_usage_out(word->pattern);
++ parse_error = 1;
++ goto out;
++ }
++
++ CLI_LOCAL_INIT(local, words, frame, options);
++
++ if (proc->fn) {
++ ret = proc->fn(frame, THIS, options);
++ }
++
++out:
++ if (ret) {
++ cli_cmd_sent_status_get(&sent);
++ if ((sent == 0) && (parse_error == 0))
++ cli_out("Setting global option failed");
++ }
++
++ CLI_STACK_DESTROY(frame);
++ return ret;
++}
++
++int
+ cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+ {
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index d9ccba1..cd9c445 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -1694,7 +1694,7 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words,
+ }
+ }
+
+- if ((strcmp (key, "cluster.brick-multiplex") == 0)) {
++ if ((strcmp(key, "cluster.brick-multiplex") == 0)) {
+ question =
+ "Brick-multiplexing is supported only for "
+ "OCS converged or independent mode. Also it is "
+@@ -1703,11 +1703,12 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words,
+ "are running before this option is modified."
+ "Do you still want to continue?";
+
+- answer = cli_cmd_get_confirmation (state, question);
++ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+- gf_log ("cli", GF_LOG_ERROR, "Operation "
+- "cancelled, exiting");
+- *op_errstr = gf_strdup ("Aborted by user.");
++ gf_log("cli", GF_LOG_ERROR,
++ "Operation "
++ "cancelled, exiting");
++ *op_errstr = gf_strdup("Aborted by user.");
+ ret = -1;
+ goto out;
+ }
+@@ -5848,3 +5849,114 @@ out:
+
+ return ret;
+ }
++
++/* Parsing global option for NFS-Ganesha config
++ * gluster nfs-ganesha enable/disable */
++
++int32_t
++cli_cmd_ganesha_parse(struct cli_state *state, const char **words,
++ int wordcount, dict_t **options, char **op_errstr)
++{
++ dict_t *dict = NULL;
++ int ret = -1;
++ char *key = NULL;
++ char *value = NULL;
++ char *w = NULL;
++ char *opwords[] = {"enable", "disable", NULL};
++ const char *question = NULL;
++ gf_answer_t answer = GF_ANSWER_NO;
++
++ GF_ASSERT(words);
++ GF_ASSERT(options);
++
++ dict = dict_new();
++
++ if (!dict)
++ goto out;
++
++ if (wordcount != 2)
++ goto out;
++
++ key = (char *)words[0];
++ value = (char *)words[1];
++
++ if (!key || !value) {
++ cli_out("Usage : nfs-ganesha <enable/disable>");
++ ret = -1;
++ goto out;
++ }
++
++ ret = gf_strip_whitespace(value, strlen(value));
++ if (ret == -1)
++ goto out;
++
++ if (strcmp(key, "nfs-ganesha")) {
++ gf_asprintf(op_errstr,
++ "Global option: error: ' %s '"
++ "is not a valid global option.",
++ key);
++ ret = -1;
++ goto out;
++ }
++
++ w = str_getunamb(value, opwords);
++ if (!w) {
++ cli_out(
++ "Invalid global option \n"
++ "Usage : nfs-ganesha <enable/disable>");
++ ret = -1;
++ goto out;
++ }
++
++ question =
++ "Enabling NFS-Ganesha requires Gluster-NFS to be"
++ " disabled across the trusted pool. Do you "
++ "still want to continue?\n";
++
++ if (strcmp(value, "enable") == 0) {
++ answer = cli_cmd_get_confirmation(state, question);
++ if (GF_ANSWER_NO == answer) {
++ gf_log("cli", GF_LOG_ERROR,
++ "Global operation "
++ "cancelled, exiting");
++ ret = -1;
++ goto out;
++ }
++ }
++ cli_out("This will take a few minutes to complete. Please wait ..");
++
++ ret = dict_set_str(dict, "key", key);
++ if (ret) {
++ gf_log(THIS->name, GF_LOG_ERROR, "dict set on key failed");
++ goto out;
++ }
++
++ ret = dict_set_str(dict, "value", value);
++ if (ret) {
++ gf_log(THIS->name, GF_LOG_ERROR, "dict set on value failed");
++ goto out;
++ }
++
++ ret = dict_set_str(dict, "globalname", "All");
++ if (ret) {
++ gf_log(THIS->name, GF_LOG_ERROR,
++ "dict set on global"
++ " key failed.");
++ goto out;
++ }
++
++ ret = dict_set_int32(dict, "hold_global_locks", _gf_true);
++ if (ret) {
++ gf_log(THIS->name, GF_LOG_ERROR,
++ "dict set on global key "
++ "failed.");
++ goto out;
++ }
++
++ *options = dict;
++out:
++ if (ret)
++ dict_unref(dict);
++
++ return ret;
++}
+diff --git a/cli/src/cli-cmd.c b/cli/src/cli-cmd.c
+index 2ee8b1b..8c06905 100644
+--- a/cli/src/cli-cmd.c
++++ b/cli/src/cli-cmd.c
+@@ -366,7 +366,8 @@ cli_cmd_submit(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
+ unsigned timeout = 0;
+
+ if ((GLUSTER_CLI_PROFILE_VOLUME == procnum) ||
+- (GLUSTER_CLI_HEAL_VOLUME == procnum))
++ (GLUSTER_CLI_HEAL_VOLUME == procnum) ||
++ (GLUSTER_CLI_GANESHA == procnum))
+ timeout = cli_ten_minutes_timeout;
+ else
+ timeout = cli_default_conn_timeout;
+diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
+index 12e7fcc..736cd18 100644
+--- a/cli/src/cli-rpc-ops.c
++++ b/cli/src/cli-rpc-ops.c
+@@ -2207,6 +2207,62 @@ out:
+ return ret;
+ }
+
++int
++gf_cli_ganesha_cbk(struct rpc_req *req, struct iovec *iov, int count,
++ void *myframe)
++{
++ gf_cli_rsp rsp = {
++ 0,
++ };
++ int ret = -1;
++ dict_t *dict = NULL;
++
++ GF_ASSERT(myframe);
++
++ if (-1 == req->rpc_status) {
++ goto out;
++ }
++
++ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
++ if (ret < 0) {
++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
++ "Failed to decode xdr response");
++ goto out;
++ }
++
++ gf_log("cli", GF_LOG_DEBUG, "Received resp to ganesha");
++
++ dict = dict_new();
++
++ if (!dict) {
++ ret = -1;
++ goto out;
++ }
++
++ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
++ if (ret)
++ goto out;
++
++ if (rsp.op_ret) {
++ if (strcmp(rsp.op_errstr, ""))
++ cli_err("nfs-ganesha: failed: %s", rsp.op_errstr);
++ else
++ cli_err("nfs-ganesha: failed");
++ }
++
++ else {
++ cli_out("nfs-ganesha : success ");
++ }
++
++ ret = rsp.op_ret;
++
++out:
++ if (dict)
++ dict_unref(dict);
++ cli_cmd_broadcast_response(ret);
++ return ret;
++}
++
+ char *
+ is_server_debug_xlator(void *myframe)
+ {
+@@ -4880,6 +4936,31 @@ out:
+ }
+
+ int32_t
++gf_cli_ganesha(call_frame_t *frame, xlator_t *this, void *data)
++{
++ gf_cli_req req = {{
++ 0,
++ }};
++ int ret = 0;
++ dict_t *dict = NULL;
++
++ if (!frame || !this || !data) {
++ ret = -1;
++ goto out;
++ }
++
++ dict = data;
++
++ ret = cli_to_glusterd(&req, frame, gf_cli_ganesha_cbk,
++ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_GANESHA,
++ this, cli_rpc_prog, NULL);
++out:
++ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
++
++ return ret;
++}
++
++int32_t
+ gf_cli_set_volume(call_frame_t *frame, xlator_t *this, void *data)
+ {
+ gf_cli_req req = {{
+@@ -12214,6 +12295,7 @@ struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
+ [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", gf_cli_sys_exec},
+ [GLUSTER_CLI_SNAP] = {"SNAP", gf_cli_snapshot},
+ [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER VOLUME", gf_cli_barrier_volume},
++ [GLUSTER_CLI_GANESHA] = {"GANESHA", gf_cli_ganesha},
+ [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", gf_cli_get_vol_opt},
+ [GLUSTER_CLI_BITROT] = {"BITROT", gf_cli_bitrot},
+ [GLUSTER_CLI_ATTACH_TIER] = {"ATTACH_TIER", gf_cli_attach_tier},
+diff --git a/cli/src/cli.h b/cli/src/cli.h
+index b79a0a2..37e4d9d 100644
+--- a/cli/src/cli.h
++++ b/cli/src/cli.h
+@@ -282,6 +282,10 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr);
+
+ int32_t
++cli_cmd_ganesha_parse(struct cli_state *state, const char **words,
++ int wordcount, dict_t **options, char **op_errstr);
++
++int32_t
+ cli_cmd_get_state_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr);
+
+diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am
+index c8dd8e3..5fe5156 100644
+--- a/xlators/mgmt/glusterd/src/Makefile.am
++++ b/xlators/mgmt/glusterd/src/Makefile.am
+@@ -10,7 +10,7 @@ glusterd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) $(LIB_DL)
+ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \
+ glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \
+ glusterd-store.c glusterd-handshake.c glusterd-pmap.c \
+- glusterd-volgen.c glusterd-rebalance.c \
++ glusterd-volgen.c glusterd-rebalance.c glusterd-ganesha.c \
+ glusterd-quota.c glusterd-bitrot.c glusterd-geo-rep.c \
+ glusterd-replace-brick.c glusterd-log-ops.c glusterd-tier.c \
+ glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \
+@@ -52,6 +52,8 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(CONTRIBDIR)/mount -I$(CONTRIBDIR)/userspace-rcu \
+ -DSBIN_DIR=\"$(sbindir)\" -DDATADIR=\"$(localstatedir)\" \
+ -DGSYNCD_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\" \
++ -DCONFDIR=\"$(localstatedir)/run/gluster/shared_storage/nfs-ganesha\" \
++ -DGANESHA_PREFIX=\"$(libexecdir)/ganesha\" \
+ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE)
+
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-errno.h b/xlators/mgmt/glusterd/src/glusterd-errno.h
+index 7e1575b..c74070e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-errno.h
++++ b/xlators/mgmt/glusterd/src/glusterd-errno.h
+@@ -27,7 +27,7 @@ enum glusterd_op_errno {
+ EG_ISSNAP = 30813, /* Volume is a snap volume */
+ EG_GEOREPRUN = 30814, /* Geo-Replication is running */
+ EG_NOTTHINP = 30815, /* Bricks are not thinly provisioned */
+- EG_NOGANESHA = 30816, /* obsolete ganesha is not enabled */
++ EG_NOGANESHA = 30816, /* Global ganesha is not enabled */
+ };
+
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+new file mode 100644
+index 0000000..fac16e6
+--- /dev/null
++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+@@ -0,0 +1,915 @@
++/*
++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#include <glusterfs/common-utils.h>
++#include "glusterd.h"
++#include "glusterd-op-sm.h"
++#include "glusterd-store.h"
++#include "glusterd-utils.h"
++#include "glusterd-nfs-svc.h"
++#include "glusterd-volgen.h"
++#include "glusterd-messages.h"
++#include <glusterfs/syscall.h>
++
++#include <ctype.h>
++
++int
++start_ganesha(char **op_errstr);
++
++typedef struct service_command {
++ char *binary;
++ char *service;
++ int (*action)(struct service_command *, char *);
++} service_command;
++
++/* parsing_ganesha_ha_conf will allocate the returned string
++ * to be freed (GF_FREE) by the caller
++ * return NULL if error or not found */
++static char *
++parsing_ganesha_ha_conf(const char *key)
++{
++#define MAX_LINE 1024
++ char scratch[MAX_LINE * 2] = {
++ 0,
++ };
++ char *value = NULL, *pointer = NULL, *end_pointer = NULL;
++ FILE *fp;
++
++ fp = fopen(GANESHA_HA_CONF, "r");
++ if (fp == NULL) {
++ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
++ "couldn't open the file %s", GANESHA_HA_CONF);
++ goto end_ret;
++ }
++ while ((pointer = fgets(scratch, MAX_LINE, fp)) != NULL) {
++ /* Read config file until we get matching "^[[:space:]]*key" */
++ if (*pointer == '#') {
++ continue;
++ }
++ while (isblank(*pointer)) {
++ pointer++;
++ }
++ if (strncmp(pointer, key, strlen(key))) {
++ continue;
++ }
++ pointer += strlen(key);
++ /* key found : if we fail to parse, we'll return an error
++ * rather than trying next one
++ * - supposition : conf file is bash compatible : no space
++ * around the '=' */
++ if (*pointer != '=') {
++ gf_msg(THIS->name, GF_LOG_ERROR, errno,
++ GD_MSG_GET_CONFIG_INFO_FAILED, "Parsing %s failed at key %s",
++ GANESHA_HA_CONF, key);
++ goto end_close;
++ }
++ pointer++; /* jump the '=' */
++
++ if (*pointer == '"' || *pointer == '\'') {
++ /* dont get the quote */
++ pointer++;
++ }
++ end_pointer = pointer;
++ /* stop at the next closing quote or blank/newline */
++ do {
++ end_pointer++;
++ } while (!(*end_pointer == '\'' || *end_pointer == '"' ||
++ isspace(*end_pointer) || *end_pointer == '\0'));
++ *end_pointer = '\0';
++
++ /* got it. copy it and return */
++ value = gf_strdup(pointer);
++ break;
++ }
++
++end_close:
++ fclose(fp);
++end_ret:
++ return value;
++}
++
++static int
++sc_systemctl_action(struct service_command *sc, char *command)
++{
++ runner_t runner = {
++ 0,
++ };
++
++ runinit(&runner);
++ runner_add_args(&runner, sc->binary, command, sc->service, NULL);
++ return runner_run(&runner);
++}
++
++static int
++sc_service_action(struct service_command *sc, char *command)
++{
++ runner_t runner = {
++ 0,
++ };
++
++ runinit(&runner);
++ runner_add_args(&runner, sc->binary, sc->service, command, NULL);
++ return runner_run(&runner);
++}
++
++static int
++manage_service(char *action)
++{
++ struct stat stbuf = {
++ 0,
++ };
++ int i = 0;
++ int ret = 0;
++ struct service_command sc_list[] = {{.binary = "/usr/bin/systemctl",
++ .service = "nfs-ganesha",
++ .action = sc_systemctl_action},
++ {.binary = "/sbin/invoke-rc.d",
++ .service = "nfs-ganesha",
++ .action = sc_service_action},
++ {.binary = "/sbin/service",
++ .service = "nfs-ganesha",
++ .action = sc_service_action},
++ {.binary = NULL}};
++
++ while (sc_list[i].binary != NULL) {
++ ret = sys_stat(sc_list[i].binary, &stbuf);
++ if (ret == 0) {
++ gf_msg_debug(THIS->name, 0, "%s found.", sc_list[i].binary);
++ if (strcmp(sc_list[i].binary, "/usr/bin/systemctl") == 0)
++ ret = sc_systemctl_action(&sc_list[i], action);
++ else
++ ret = sc_service_action(&sc_list[i], action);
++
++ return ret;
++ }
++ i++;
++ }
++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNRECOGNIZED_SVC_MNGR,
++ "Could not %s NFS-Ganesha.Service manager for distro"
++ " not recognized.",
++ action);
++ return ret;
++}
++
++/*
++ * Check if the cluster is a ganesha cluster or not *
++ */
++gf_boolean_t
++glusterd_is_ganesha_cluster()
++{
++ int ret = -1;
++ glusterd_conf_t *priv = NULL;
++ xlator_t *this = NULL;
++ gf_boolean_t ret_bool = _gf_false;
++
++ this = THIS;
++ GF_VALIDATE_OR_GOTO("ganesha", this, out);
++ priv = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, priv, out);
++
++ ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL,
++ _gf_false);
++ if (ret == _gf_true) {
++ ret_bool = _gf_true;
++ gf_msg_debug(this->name, 0, "nfs-ganesha is enabled for the cluster");
++ } else
++ gf_msg_debug(this->name, 0, "nfs-ganesha is disabled for the cluster");
++
++out:
++ return ret_bool;
++}
++
++/* Check if ganesha.enable is set to 'on', that checks if
++ * a particular volume is exported via NFS-Ganesha */
++gf_boolean_t
++glusterd_check_ganesha_export(glusterd_volinfo_t *volinfo)
++{
++ char *value = NULL;
++ gf_boolean_t is_exported = _gf_false;
++ int ret = 0;
++
++ ret = glusterd_volinfo_get(volinfo, "ganesha.enable", &value);
++ if ((ret == 0) && value) {
++ if (strcmp(value, "on") == 0) {
++ gf_msg_debug(THIS->name, 0,
++ "ganesha.enable set"
++ " to %s",
++ value);
++ is_exported = _gf_true;
++ }
++ }
++ return is_exported;
++}
++
++/* *
++ * The below function is called as part of commit phase for volume set option
++ * "ganesha.enable". If the value is "on", it creates export configuration file
++ * and then export the volume via dbus command. Incase of "off", the volume
++ * will be already unexported during stage phase, so it will remove the conf
++ * file from shared storage
++ */
++int
++glusterd_check_ganesha_cmd(char *key, char *value, char **errstr, dict_t *dict)
++{
++ int ret = 0;
++ char *volname = NULL;
++
++ GF_ASSERT(key);
++ GF_ASSERT(value);
++ GF_ASSERT(dict);
++
++ if ((strcmp(key, "ganesha.enable") == 0)) {
++ if ((strcmp(value, "on")) && (strcmp(value, "off"))) {
++ gf_asprintf(errstr,
++ "Invalid value"
++ " for volume set command. Use on/off only.");
++ ret = -1;
++ goto out;
++ }
++ if (strcmp(value, "on") == 0) {
++ ret = glusterd_handle_ganesha_op(dict, errstr, key, value);
++
++ } else if (is_origin_glusterd(dict)) {
++ ret = dict_get_str(dict, "volname", &volname);
++ if (ret) {
++ gf_msg("glusterd-ganesha", GF_LOG_ERROR, errno,
++ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
++ goto out;
++ }
++ ret = manage_export_config(volname, "off", errstr);
++ }
++ }
++out:
++ if (ret) {
++ gf_msg("glusterd-ganesha", GF_LOG_ERROR, 0,
++ GD_MSG_NFS_GNS_OP_HANDLE_FAIL,
++ "Handling NFS-Ganesha"
++ " op failed.");
++ }
++ return ret;
++}
++
++int
++glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr)
++{
++ int ret = -1;
++ int value = -1;
++ gf_boolean_t option = _gf_false;
++ char *str = NULL;
++ glusterd_conf_t *priv = NULL;
++ xlator_t *this = NULL;
++
++ GF_ASSERT(dict);
++ this = THIS;
++ GF_ASSERT(this);
++ priv = this->private;
++ GF_ASSERT(priv);
++
++ value = dict_get_str_boolean(dict, "value", _gf_false);
++ if (value == -1) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
++ "value not present.");
++ goto out;
++ }
++ /* This dict_get will fail if the user had never set the key before */
++ /*Ignoring the ret value and proceeding */
++ ret = dict_get_str(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str);
++ if (ret == -1) {
++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DICT_GET_FAILED,
++ "Global dict not present.");
++ ret = 0;
++ goto out;
++ }
++ /* Validity of the value is already checked */
++ ret = gf_string2boolean(str, &option);
++ /* Check if the feature is already enabled, fail in that case */
++ if (value == option) {
++ gf_asprintf(op_errstr, "nfs-ganesha is already %sd.", str);
++ ret = -1;
++ goto out;
++ }
++
++ if (value) {
++ ret = start_ganesha(op_errstr);
++ if (ret) {
++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_START_FAIL,
++ "Could not start NFS-Ganesha");
++ }
++ } else {
++ ret = stop_ganesha(op_errstr);
++ if (ret)
++ gf_msg_debug(THIS->name, 0,
++ "Could not stop "
++ "NFS-Ganesha.");
++ }
++
++out:
++
++ if (ret) {
++ if (!(*op_errstr)) {
++ *op_errstr = gf_strdup("Error, Validation Failed");
++ gf_msg_debug(this->name, 0, "Error, Cannot Validate option :%s",
++ GLUSTERD_STORE_KEY_GANESHA_GLOBAL);
++ } else {
++ gf_msg_debug(this->name, 0, "Error, Cannot Validate option");
++ }
++ }
++ return ret;
++}
++
++int
++glusterd_op_set_ganesha(dict_t *dict, char **errstr)
++{
++ int ret = 0;
++ xlator_t *this = NULL;
++ glusterd_conf_t *priv = NULL;
++ char *key = NULL;
++ char *value = NULL;
++ char *next_version = NULL;
++
++ this = THIS;
++ GF_ASSERT(this);
++ GF_ASSERT(dict);
++
++ priv = this->private;
++ GF_ASSERT(priv);
++
++ ret = dict_get_str(dict, "key", &key);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
++ "Couldn't get key in global option set");
++ goto out;
++ }
++
++ ret = dict_get_str(dict, "value", &value);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
++ "Couldn't get value in global option set");
++ goto out;
++ }
++
++ ret = glusterd_handle_ganesha_op(dict, errstr, key, value);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_SETUP_FAIL,
++ "Initial NFS-Ganesha set up failed");
++ ret = -1;
++ goto out;
++ }
++ ret = dict_set_dynstr_with_alloc(priv->opts,
++ GLUSTERD_STORE_KEY_GANESHA_GLOBAL, value);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED,
++ "Failed to set"
++ " nfs-ganesha in dict.");
++ goto out;
++ }
++ ret = glusterd_get_next_global_opt_version_str(priv->opts, &next_version);
++ if (ret) {
++ gf_msg_debug(THIS->name, 0,
++ "Could not fetch "
++ " global op version");
++ goto out;
++ }
++ ret = dict_set_str(priv->opts, GLUSTERD_GLOBAL_OPT_VERSION, next_version);
++ if (ret)
++ goto out;
++
++ ret = glusterd_store_options(this, priv->opts);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_FAIL,
++ "Failed to store options");
++ goto out;
++ }
++
++out:
++ gf_msg_debug(this->name, 0, "returning %d", ret);
++ return ret;
++}
++
++/* Following function parse GANESHA_HA_CONF
++ * The sample file looks like below,
++ * HA_NAME="ganesha-ha-360"
++ * HA_VOL_NAME="ha-state"
++ * HA_CLUSTER_NODES="server1,server2"
++ * VIP_rhs_1="10.x.x.x"
++ * VIP_rhs_2="10.x.x.x." */
++
++/* Check if the localhost is listed as one of nfs-ganesha nodes */
++gf_boolean_t
++check_host_list(void)
++{
++ glusterd_conf_t *priv = NULL;
++ char *hostname, *hostlist;
++ gf_boolean_t ret = _gf_false;
++ xlator_t *this = NULL;
++
++ this = THIS;
++ priv = THIS->private;
++ GF_ASSERT(priv);
++
++ hostlist = parsing_ganesha_ha_conf("HA_CLUSTER_NODES");
++ if (hostlist == NULL) {
++ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_GET_CONFIG_INFO_FAILED,
++ "couldn't get HA_CLUSTER_NODES from file %s", GANESHA_HA_CONF);
++ return _gf_false;
++ }
++
++ /* Hostlist is a comma separated list now */
++ hostname = strtok(hostlist, ",");
++ while (hostname != NULL) {
++ ret = gf_is_local_addr(hostname);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NFS_GNS_HOST_FOUND,
++ "ganesha host found "
++ "Hostname is %s",
++ hostname);
++ break;
++ }
++ hostname = strtok(NULL, ",");
++ }
++
++ GF_FREE(hostlist);
++ return ret;
++}
++
++int
++manage_export_config(char *volname, char *value, char **op_errstr)
++{
++ runner_t runner = {
++ 0,
++ };
++ int ret = -1;
++
++ GF_ASSERT(volname);
++ runinit(&runner);
++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/create-export-ganesha.sh",
++ CONFDIR, value, volname, NULL);
++ ret = runner_run(&runner);
++
++ if (ret)
++ gf_asprintf(op_errstr,
++ "Failed to create"
++ " NFS-Ganesha export config file.");
++
++ return ret;
++}
++
++/* Exports and unexports a particular volume via NFS-Ganesha */
++int
++ganesha_manage_export(dict_t *dict, char *value, char **op_errstr)
++{
++ runner_t runner = {
++ 0,
++ };
++ int ret = -1;
++ glusterd_volinfo_t *volinfo = NULL;
++ dict_t *vol_opts = NULL;
++ char *volname = NULL;
++ xlator_t *this = NULL;
++ glusterd_conf_t *priv = NULL;
++ gf_boolean_t option = _gf_false;
++
++ runinit(&runner);
++ this = THIS;
++ GF_ASSERT(this);
++ priv = this->private;
++
++ GF_ASSERT(value);
++ GF_ASSERT(dict);
++ GF_ASSERT(priv);
++
++ ret = dict_get_str(dict, "volname", &volname);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
++ "Unable to get volume name");
++ goto out;
++ }
++ ret = gf_string2boolean(value, &option);
++ if (ret == -1) {
++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
++ "invalid value.");
++ goto out;
++ }
++
++ ret = glusterd_volinfo_find(volname, &volinfo);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
++ FMTSTR_CHECK_VOL_EXISTS, volname);
++ goto out;
++ }
++
++ ret = glusterd_check_ganesha_export(volinfo);
++ if (ret && option) {
++ gf_asprintf(op_errstr,
++ "ganesha.enable "
++ "is already 'on'.");
++ ret = -1;
++ goto out;
++
++ } else if (!option && !ret) {
++ gf_asprintf(op_errstr,
++ "ganesha.enable "
++ "is already 'off'.");
++ ret = -1;
++ goto out;
++ }
++
++ /* Check if global option is enabled, proceed only then */
++ ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL,
++ _gf_false);
++ if (ret == -1) {
++ gf_msg_debug(this->name, 0,
++ "Failed to get "
++ "global option dict.");
++ gf_asprintf(op_errstr,
++ "The option "
++ "nfs-ganesha should be "
++ "enabled before setting ganesha.enable.");
++ goto out;
++ }
++ if (!ret) {
++ gf_asprintf(op_errstr,
++ "The option "
++ "nfs-ganesha should be "
++ "enabled before setting ganesha.enable.");
++ ret = -1;
++ goto out;
++ }
++
++ /* *
++ * Create the export file from the node where ganesha.enable "on"
++ * is executed
++ * */
++ if (option) {
++ ret = manage_export_config(volname, "on", op_errstr);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL,
++ "Failed to create"
++ "export file for NFS-Ganesha\n");
++ goto out;
++ }
++ }
++
++ if (check_host_list()) {
++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/dbus-send.sh", CONFDIR,
++ value, volname, NULL);
++ ret = runner_run(&runner);
++ if (ret) {
++ gf_asprintf(op_errstr,
++ "Dynamic export"
++ " addition/deletion failed."
++ " Please see log file for details");
++ goto out;
++ }
++ }
++
++ vol_opts = volinfo->dict;
++ ret = dict_set_dynstr_with_alloc(vol_opts, "features.cache-invalidation",
++ value);
++ if (ret)
++ gf_asprintf(op_errstr,
++ "Cache-invalidation could not"
++ " be set to %s.",
++ value);
++ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
++ if (ret)
++ gf_asprintf(op_errstr, "failed to store volinfo for %s",
++ volinfo->volname);
++
++out:
++ return ret;
++}
++
++int
++tear_down_cluster(gf_boolean_t run_teardown)
++{
++ int ret = 0;
++ runner_t runner = {
++ 0,
++ };
++ struct stat st = {
++ 0,
++ };
++ DIR *dir = NULL;
++ struct dirent *entry = NULL;
++ struct dirent scratch[2] = {
++ {
++ 0,
++ },
++ };
++ char path[PATH_MAX] = {
++ 0,
++ };
++
++ if (run_teardown) {
++ runinit(&runner);
++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh",
++ "teardown", CONFDIR, NULL);
++ ret = runner_run(&runner);
++ /* *
++ * Remove all the entries in CONFDIR expect ganesha.conf and
++ * ganesha-ha.conf
++ */
++ dir = sys_opendir(CONFDIR);
++ if (!dir) {
++ gf_msg_debug(THIS->name, 0,
++ "Failed to open directory %s. "
++ "Reason : %s",
++ CONFDIR, strerror(errno));
++ ret = 0;
++ goto out;
++ }
++
++ GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
++ while (entry) {
++ snprintf(path, PATH_MAX, "%s/%s", CONFDIR, entry->d_name);
++ ret = sys_lstat(path, &st);
++ if (ret == -1) {
++ gf_msg_debug(THIS->name, 0,
++ "Failed to stat entry %s :"
++ " %s",
++ path, strerror(errno));
++ goto out;
++ }
++
++ if (strcmp(entry->d_name, "ganesha.conf") == 0 ||
++ strcmp(entry->d_name, "ganesha-ha.conf") == 0)
++ gf_msg_debug(THIS->name, 0,
++ " %s is not required"
++ " to remove",
++ path);
++ else if (S_ISDIR(st.st_mode))
++ ret = recursive_rmdir(path);
++ else
++ ret = sys_unlink(path);
++
++ if (ret) {
++ gf_msg_debug(THIS->name, 0,
++ " Failed to remove %s. "
++ "Reason : %s",
++ path, strerror(errno));
++ }
++
++ gf_msg_debug(THIS->name, 0, "%s %s",
++ ret ? "Failed to remove" : "Removed", entry->d_name);
++ GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
++ }
++
++ ret = sys_closedir(dir);
++ if (ret) {
++ gf_msg_debug(THIS->name, 0,
++ "Failed to close dir %s. Reason :"
++ " %s",
++ CONFDIR, strerror(errno));
++ }
++ }
++
++out:
++ return ret;
++}
++
++int
++setup_cluster(gf_boolean_t run_setup)
++{
++ int ret = 0;
++ runner_t runner = {
++ 0,
++ };
++
++ if (run_setup) {
++ runinit(&runner);
++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", "setup",
++ CONFDIR, NULL);
++ ret = runner_run(&runner);
++ }
++ return ret;
++}
++
++static int
++teardown(gf_boolean_t run_teardown, char **op_errstr)
++{
++ runner_t runner = {
++ 0,
++ };
++ int ret = 1;
++ glusterd_volinfo_t *volinfo = NULL;
++ glusterd_conf_t *priv = NULL;
++ dict_t *vol_opts = NULL;
++
++ priv = THIS->private;
++
++ ret = tear_down_cluster(run_teardown);
++ if (ret == -1) {
++ gf_asprintf(op_errstr,
++ "Cleanup of NFS-Ganesha"
++ " HA config failed.");
++ goto out;
++ }
++
++ runinit(&runner);
++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", "cleanup",
++ CONFDIR, NULL);
++ ret = runner_run(&runner);
++ if (ret)
++ gf_msg_debug(THIS->name, 0,
++ "Could not clean up"
++ " NFS-Ganesha related config");
++
++ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
++ {
++ vol_opts = volinfo->dict;
++ /* All the volumes exported via NFS-Ganesha will be
++ unexported, hence setting the appropriate keys */
++ ret = dict_set_str(vol_opts, "features.cache-invalidation", "off");
++ if (ret)
++ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED,
++ "Could not set features.cache-invalidation "
++ "to off for %s",
++ volinfo->volname);
++
++ ret = dict_set_str(vol_opts, "ganesha.enable", "off");
++ if (ret)
++ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED,
++ "Could not set ganesha.enable to off for %s",
++ volinfo->volname);
++
++ ret = glusterd_store_volinfo(volinfo,
++ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
++ if (ret)
++ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL,
++ "failed to store volinfo for %s", volinfo->volname);
++ }
++out:
++ return ret;
++}
++
++int
++stop_ganesha(char **op_errstr)
++{
++ int ret = 0;
++ runner_t runner = {
++ 0,
++ };
++
++ runinit(&runner);
++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh",
++ "--setup-ganesha-conf-files", CONFDIR, "no", NULL);
++ ret = runner_run(&runner);
++ if (ret) {
++ gf_asprintf(op_errstr,
++ "removal of symlink ganesha.conf "
++ "in /etc/ganesha failed");
++ }
++
++ if (check_host_list()) {
++ ret = manage_service("stop");
++ if (ret)
++ gf_asprintf(op_errstr,
++ "NFS-Ganesha service could not"
++ "be stopped.");
++ }
++ return ret;
++}
++
++int
++start_ganesha(char **op_errstr)
++{
++ int ret = -1;
++ dict_t *vol_opts = NULL;
++ glusterd_volinfo_t *volinfo = NULL;
++ glusterd_conf_t *priv = NULL;
++ runner_t runner = {
++ 0,
++ };
++
++ priv = THIS->private;
++ GF_ASSERT(priv);
++
++ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
++ {
++ vol_opts = volinfo->dict;
++ /* Gluster-nfs has to be disabled across the trusted pool */
++ /* before attempting to start nfs-ganesha */
++ ret = dict_set_str(vol_opts, NFS_DISABLE_MAP_KEY, "on");
++ if (ret)
++ goto out;
++
++ ret = glusterd_store_volinfo(volinfo,
++ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
++ if (ret) {
++ *op_errstr = gf_strdup(
++ "Failed to store the "
++ "Volume information");
++ goto out;
++ }
++ }
++
++ /* If the nfs svc is not initialized it means that the service is not
++ * running, hence we can skip the process of stopping gluster-nfs
++ * service
++ */
++ if (priv->nfs_svc.inited) {
++ ret = priv->nfs_svc.stop(&(priv->nfs_svc), SIGKILL);
++ if (ret) {
++ ret = -1;
++ gf_asprintf(op_errstr,
++ "Gluster-NFS service could"
++ "not be stopped, exiting.");
++ goto out;
++ }
++ }
++
++ if (check_host_list()) {
++ runinit(&runner);
++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh",
++ "--setup-ganesha-conf-files", CONFDIR, "yes", NULL);
++ ret = runner_run(&runner);
++ if (ret) {
++ gf_asprintf(op_errstr,
++ "creation of symlink ganesha.conf "
++ "in /etc/ganesha failed");
++ goto out;
++ }
++ ret = manage_service("start");
++ if (ret)
++ gf_asprintf(op_errstr,
++ "NFS-Ganesha failed to start."
++ "Please see log file for details");
++ }
++
++out:
++ return ret;
++}
++
++static int
++pre_setup(gf_boolean_t run_setup, char **op_errstr)
++{
++ int ret = 0;
++
++ ret = check_host_list();
++
++ if (ret) {
++ ret = setup_cluster(run_setup);
++ if (ret == -1)
++ gf_asprintf(op_errstr,
++ "Failed to set up HA "
++ "config for NFS-Ganesha. "
++ "Please check the log file for details");
++ }
++
++ return ret;
++}
++
++int
++glusterd_handle_ganesha_op(dict_t *dict, char **op_errstr, char *key,
++ char *value)
++{
++ int32_t ret = -1;
++ gf_boolean_t option = _gf_false;
++
++ GF_ASSERT(dict);
++ GF_ASSERT(op_errstr);
++ GF_ASSERT(key);
++ GF_ASSERT(value);
++
++ if (strcmp(key, "ganesha.enable") == 0) {
++ ret = ganesha_manage_export(dict, value, op_errstr);
++ if (ret < 0)
++ goto out;
++ }
++
++ /* It is possible that the key might not be set */
++ ret = gf_string2boolean(value, &option);
++ if (ret == -1) {
++ gf_asprintf(op_errstr, "Invalid value in key-value pair.");
++ goto out;
++ }
++
++ if (strcmp(key, GLUSTERD_STORE_KEY_GANESHA_GLOBAL) == 0) {
++ /* *
++ * The set up/teardown of pcs cluster should be performed only
++ * once. This will done on the node in which the cli command
++ * 'gluster nfs-ganesha <enable/disable>' got executed. So that
++ * node should part of ganesha HA cluster
++ */
++ if (option) {
++ ret = pre_setup(is_origin_glusterd(dict), op_errstr);
++ if (ret < 0)
++ goto out;
++ } else {
++ ret = teardown(is_origin_glusterd(dict), op_errstr);
++ if (ret < 0)
++ goto out;
++ }
++ }
++
++out:
++ return ret;
++}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index de44af7..528993c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -1911,6 +1911,83 @@ glusterd_op_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
+ return ret;
+ }
+
++int
++__glusterd_handle_ganesha_cmd(rpcsvc_request_t *req)
++{
++ int32_t ret = -1;
++ gf_cli_req cli_req = {{
++ 0,
++ }};
++ dict_t *dict = NULL;
++ glusterd_op_t cli_op = GD_OP_GANESHA;
++ char *op_errstr = NULL;
++ char err_str[2048] = {
++ 0,
++ };
++ xlator_t *this = NULL;
++
++ this = THIS;
++ GF_ASSERT(this);
++
++ GF_ASSERT(req);
++
++ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
++ if (ret < 0) {
++ snprintf(err_str, sizeof(err_str),
++ "Failed to decode "
++ "request received from cli");
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s",
++ err_str);
++ req->rpc_err = GARBAGE_ARGS;
++ goto out;
++ }
++
++ if (cli_req.dict.dict_len) {
++ /* Unserialize the dictionary */
++ dict = dict_new();
++ if (!dict) {
++ ret = -1;
++ goto out;
++ }
++
++ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
++ &dict);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
++ "failed to "
++ "unserialize req-buffer to dictionary");
++ snprintf(err_str, sizeof(err_str),
++ "Unable to decode "
++ "the command");
++ goto out;
++ } else {
++ dict->extra_stdfree = cli_req.dict.dict_val;
++ }
++ }
++
++ gf_msg_trace(this->name, 0, "Received global option request");
++
++ ret = glusterd_op_begin_synctask(req, GD_OP_GANESHA, dict);
++out:
++ if (ret) {
++ if (err_str[0] == '\0')
++ snprintf(err_str, sizeof(err_str), "Operation failed");
++ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str);
++ }
++ if (op_errstr)
++ GF_FREE(op_errstr);
++ if (dict)
++ dict_unref(dict);
++
++ return ret;
++}
++
++int
++glusterd_handle_ganesha_cmd(rpcsvc_request_t *req)
++{
++ return glusterd_big_locked_handler(req, __glusterd_handle_ganesha_cmd);
++}
++
+ static int
+ __glusterd_handle_reset_volume(rpcsvc_request_t *req)
+ {
+@@ -6644,6 +6721,8 @@ rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = {
+ [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER_VOLUME",
+ GLUSTER_CLI_BARRIER_VOLUME,
+ glusterd_handle_barrier, NULL, 0, DRC_NA},
++ [GLUSTER_CLI_GANESHA] = {"GANESHA", GLUSTER_CLI_GANESHA,
++ glusterd_handle_ganesha_cmd, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", GLUSTER_CLI_GET_VOL_OPT,
+ glusterd_handle_get_vol_opt, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_BITROT] = {"BITROT", GLUSTER_CLI_BITROT,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
+index 1a4bd54..9558480 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
++++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
+@@ -297,7 +297,7 @@ GLFS_MSGID(
+ GD_MSG_LOCALTIME_LOGGING_VOL_OPT_VALIDATE_FAIL,
+ GD_MSG_LOCALTIME_LOGGING_ENABLE, GD_MSG_LOCALTIME_LOGGING_DISABLE,
+ GD_MSG_PORTS_EXHAUSTED, GD_MSG_CHANGELOG_GET_FAIL,
+- GD_MSG_MANAGER_FUNCTION_FAILED,
++ GD_MSG_MANAGER_FUNCTION_FAILED, GD_MSG_NFS_GANESHA_DISABLED,
+ GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL);
+
+ #endif /* !_GLUSTERD_MESSAGES_H_ */
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 12d857a..a630c48 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -1176,6 +1176,13 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
+ if (ret)
+ goto out;
+
++ if ((strcmp(key, "ganesha.enable") == 0) &&
++ (strcmp(value, "off") == 0)) {
++ ret = ganesha_manage_export(dict, "off", op_errstr);
++ if (ret)
++ goto out;
++ }
++
+ ret = glusterd_check_quota_cmd(key, value, errstr, sizeof(errstr));
+ if (ret)
+ goto out;
+@@ -1677,6 +1684,20 @@ glusterd_op_stage_reset_volume(dict_t *dict, char **op_errstr)
+ goto out;
+ }
+
++ /* *
++ * If key ganesha.enable is set, then volume should be unexported from
++ * ganesha server. Also it is a volume-level option, perform only when
++ * volume name not equal to "all"(in other words if volinfo != NULL)
++ */
++ if (volinfo && (!strcmp(key, "all") || !strcmp(key, "ganesha.enable"))) {
++ if (glusterd_check_ganesha_export(volinfo)) {
++ ret = ganesha_manage_export(dict, "off", op_errstr);
++ if (ret)
++ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL,
++ "Could not reset ganesha.enable key");
++ }
++ }
++
+ if (strcmp(key, "all")) {
+ exists = glusterd_check_option_exists(key, &key_fixed);
+ if (exists == -1) {
+@@ -2393,6 +2414,15 @@ glusterd_op_reset_volume(dict_t *dict, char **op_rspstr)
+ }
+ }
+
++ if (!strcmp(key, "ganesha.enable") || !strcmp(key, "all")) {
++ if (glusterd_check_ganesha_export(volinfo)) {
++ ret = manage_export_config(volname, "off", op_rspstr);
++ if (ret)
++ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL,
++ "Could not reset ganesha.enable key");
++ }
++ }
++
+ out:
+ GF_FREE(key_fixed);
+ if (quorum_action)
+@@ -2964,6 +2994,10 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+ }
+ }
+
++ ret = glusterd_check_ganesha_cmd(key, value, errstr, dict);
++ if (ret == -1)
++ goto out;
++
+ if (!is_key_glusterd_hooks_friendly(key)) {
+ ret = glusterd_check_option_exists(key, &key_fixed);
+ GF_ASSERT(ret);
+@@ -4494,7 +4528,8 @@ glusterd_op_build_payload(dict_t **req, char **op_errstr, dict_t *op_ctx)
+
+ case GD_OP_SYNC_VOLUME:
+ case GD_OP_COPY_FILE:
+- case GD_OP_SYS_EXEC: {
++ case GD_OP_SYS_EXEC:
++ case GD_OP_GANESHA: {
+ dict_copy(dict, req_dict);
+ } break;
+
+@@ -5944,6 +5979,10 @@ glusterd_op_stage_validate(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ ret = glusterd_op_stage_set_volume(dict, op_errstr);
+ break;
+
++ case GD_OP_GANESHA:
++ ret = glusterd_op_stage_set_ganesha(dict, op_errstr);
++ break;
++
+ case GD_OP_RESET_VOLUME:
+ ret = glusterd_op_stage_reset_volume(dict, op_errstr);
+ break;
+@@ -6074,7 +6113,9 @@ glusterd_op_commit_perform(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ case GD_OP_SET_VOLUME:
+ ret = glusterd_op_set_volume(dict, op_errstr);
+ break;
+-
++ case GD_OP_GANESHA:
++ ret = glusterd_op_set_ganesha(dict, op_errstr);
++ break;
+ case GD_OP_RESET_VOLUME:
+ ret = glusterd_op_reset_volume(dict, op_errstr);
+ break;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+index 2958443..041946d 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+@@ -3788,6 +3788,148 @@ out:
+ return ret;
+ }
+
++/* *
++ * Here there are two possibilities, either destination is snaphot or
++ * clone. In the case of snapshot nfs_ganesha export file will be copied
++ * to snapdir. If it is clone , then new export file will be created for
++ * the clone in the GANESHA_EXPORT_DIRECTORY, replacing occurences of
++ * volname with clonename
++ */
++int
++glusterd_copy_nfs_ganesha_file(glusterd_volinfo_t *src_vol,
++ glusterd_volinfo_t *dest_vol)
++{
++ int32_t ret = -1;
++ char snap_dir[PATH_MAX] = {
++ 0,
++ };
++ char src_path[PATH_MAX] = {
++ 0,
++ };
++ char dest_path[PATH_MAX] = {
++ 0,
++ };
++ char buffer[BUFSIZ] = {
++ 0,
++ };
++ char *find_ptr = NULL;
++ char *buff_ptr = NULL;
++ char *tmp_ptr = NULL;
++ xlator_t *this = NULL;
++ glusterd_conf_t *priv = NULL;
++ struct stat stbuf = {
++ 0,
++ };
++ FILE *src = NULL;
++ FILE *dest = NULL;
++
++ this = THIS;
++ GF_VALIDATE_OR_GOTO("snapshot", this, out);
++ priv = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, priv, out);
++
++ GF_VALIDATE_OR_GOTO(this->name, src_vol, out);
++ GF_VALIDATE_OR_GOTO(this->name, dest_vol, out);
++
++ if (glusterd_check_ganesha_export(src_vol) == _gf_false) {
++ gf_msg_debug(this->name, 0,
++ "%s is not exported via "
++ "NFS-Ganesha. Skipping copy of export conf.",
++ src_vol->volname);
++ ret = 0;
++ goto out;
++ }
++
++ if (src_vol->is_snap_volume) {
++ GLUSTERD_GET_SNAP_DIR(snap_dir, src_vol->snapshot, priv);
++ ret = snprintf(src_path, PATH_MAX, "%s/export.%s.conf", snap_dir,
++ src_vol->snapshot->snapname);
++ } else {
++ ret = snprintf(src_path, PATH_MAX, "%s/export.%s.conf",
++ GANESHA_EXPORT_DIRECTORY, src_vol->volname);
++ }
++ if (ret < 0 || ret >= PATH_MAX)
++ goto out;
++
++ ret = sys_lstat(src_path, &stbuf);
++ if (ret) {
++ /*
++ * This code path is hit, only when the src_vol is being *
++ * exported via NFS-Ganesha. So if the conf file is not *
++ * available, we fail the snapshot operation. *
++ */
++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
++ "Stat on %s failed with %s", src_path, strerror(errno));
++ goto out;
++ }
++
++ if (dest_vol->is_snap_volume) {
++ memset(snap_dir, 0, PATH_MAX);
++ GLUSTERD_GET_SNAP_DIR(snap_dir, dest_vol->snapshot, priv);
++ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf",
++ snap_dir, dest_vol->snapshot->snapname);
++ if (ret < 0)
++ goto out;
++
++ ret = glusterd_copy_file(src_path, dest_path);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
++ "Failed to copy %s in %s", src_path, dest_path);
++ goto out;
++ }
++
++ } else {
++ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf",
++ GANESHA_EXPORT_DIRECTORY, dest_vol->volname);
++ if (ret < 0)
++ goto out;
++
++ src = fopen(src_path, "r");
++ dest = fopen(dest_path, "w");
++
++ if (!src || !dest) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
++ "Failed to open %s", dest ? src_path : dest_path);
++ ret = -1;
++ goto out;
++ }
++
++ /* *
++ * if the source volume is snapshot, the export conf file
++ * consists of orginal volname
++ */
++ if (src_vol->is_snap_volume)
++ find_ptr = gf_strdup(src_vol->parent_volname);
++ else
++ find_ptr = gf_strdup(src_vol->volname);
++
++ if (!find_ptr)
++ goto out;
++
++ /* Replacing volname with clonename */
++ while (fgets(buffer, BUFSIZ, src)) {
++ buff_ptr = buffer;
++ while ((tmp_ptr = strstr(buff_ptr, find_ptr))) {
++ while (buff_ptr < tmp_ptr)
++ fputc((int)*buff_ptr++, dest);
++ fputs(dest_vol->volname, dest);
++ buff_ptr += strlen(find_ptr);
++ }
++ fputs(buff_ptr, dest);
++ memset(buffer, 0, BUFSIZ);
++ }
++ }
++out:
++ if (src)
++ fclose(src);
++ if (dest)
++ fclose(dest);
++ if (find_ptr)
++ GF_FREE(find_ptr);
++
++ return ret;
++}
++
+ int32_t
+ glusterd_restore_geo_rep_files(glusterd_volinfo_t *snap_vol)
+ {
+@@ -3876,6 +4018,60 @@ out:
+ return ret;
+ }
+
++int
++glusterd_restore_nfs_ganesha_file(glusterd_volinfo_t *src_vol,
++ glusterd_snap_t *snap)
++{
++ int32_t ret = -1;
++ char snap_dir[PATH_MAX] = "";
++ char src_path[PATH_MAX] = "";
++ char dest_path[PATH_MAX] = "";
++ xlator_t *this = NULL;
++ glusterd_conf_t *priv = NULL;
++ struct stat stbuf = {
++ 0,
++ };
++
++ this = THIS;
++ GF_VALIDATE_OR_GOTO("snapshot", this, out);
++ priv = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, priv, out);
++
++ GF_VALIDATE_OR_GOTO(this->name, src_vol, out);
++ GF_VALIDATE_OR_GOTO(this->name, snap, out);
++
++ GLUSTERD_GET_SNAP_DIR(snap_dir, snap, priv);
++
++ ret = snprintf(src_path, sizeof(src_path), "%s/export.%s.conf", snap_dir,
++ snap->snapname);
++ if (ret < 0)
++ goto out;
++
++ ret = sys_lstat(src_path, &stbuf);
++ if (ret) {
++ if (errno == ENOENT) {
++ ret = 0;
++ gf_msg_debug(this->name, 0, "%s not found", src_path);
++ } else
++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
++ "Stat on %s failed with %s", src_path, strerror(errno));
++ goto out;
++ }
++
++ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf",
++ GANESHA_EXPORT_DIRECTORY, src_vol->volname);
++ if (ret < 0)
++ goto out;
++
++ ret = glusterd_copy_file(src_path, dest_path);
++ if (ret)
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
++ "Failed to copy %s in %s", src_path, dest_path);
++
++out:
++ return ret;
++}
++
+ /* Snapd functions */
+ int
+ glusterd_is_snapd_enabled(glusterd_volinfo_t *volinfo)
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
+index e60be6e..41d0001 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.h
++++ b/xlators/mgmt/glusterd/src/glusterd-store.h
+@@ -118,6 +118,8 @@ typedef enum glusterd_store_ver_ac_ {
+ #define GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SKIPPED "migration-skipped"
+ #define GLUSTERD_STORE_KEY_VOL_MIGRATION_RUN_TIME "migration-run-time"
+
++#define GLUSTERD_STORE_KEY_GANESHA_GLOBAL "nfs-ganesha"
++
+ int32_t
+ glusterd_store_volinfo(glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_ver_ac_t ac);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index 86ef470..a0417ca 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -1823,6 +1823,18 @@ glusterd_op_stage_stop_volume(dict_t *dict, char **op_errstr)
+ goto out;
+ }
+
++ ret = glusterd_check_ganesha_export(volinfo);
++ if (ret) {
++ ret = ganesha_manage_export(dict, "off", op_errstr);
++ if (ret) {
++ gf_msg(THIS->name, GF_LOG_WARNING, 0,
++ GD_MSG_NFS_GNS_UNEXPRT_VOL_FAIL,
++ "Could not "
++ "unexport volume via NFS-Ganesha");
++ ret = 0;
++ }
++ }
++
+ if (glusterd_is_defrag_on(volinfo)) {
+ snprintf(msg, sizeof(msg),
+ "rebalance session is "
+@@ -2674,6 +2686,8 @@ glusterd_op_start_volume(dict_t *dict, char **op_errstr)
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_svc_t *svc = NULL;
++ char *str = NULL;
++ gf_boolean_t option = _gf_false;
+
+ this = THIS;
+ GF_ASSERT(this);
+@@ -2731,6 +2745,29 @@ glusterd_op_start_volume(dict_t *dict, char **op_errstr)
+ }
+ }
+
++ ret = dict_get_str(conf->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str);
++ if (ret != 0) {
++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,
++ "Global dict not present.");
++ ret = 0;
++
++ } else {
++ ret = gf_string2boolean(str, &option);
++ /* Check if the feature is enabled and set nfs-disable to true */
++ if (option) {
++ gf_msg_debug(this->name, 0, "NFS-Ganesha is enabled");
++ /* Gluster-nfs should not start when NFS-Ganesha is enabled*/
++ ret = dict_set_str(volinfo->dict, NFS_DISABLE_MAP_KEY, "on");
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
++ "Failed to set nfs.disable for"
++ "volume %s",
++ volname);
++ goto out;
++ }
++ }
++ }
++
+ ret = glusterd_start_volume(volinfo, flags, _gf_true);
+ if (ret)
+ goto out;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index d1244e4..13f423a 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2597,6 +2597,13 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "features/upcall",
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
++ {
++ .key = "ganesha.enable",
++ .voltype = "features/ganesha",
++ .value = "off",
++ .option = "ganesha.enable",
++ .op_version = GD_OP_VERSION_3_7_0,
++ },
+ /* Lease translator options */
+ {
+ .key = "features.leases",
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index 67867f8..5135181 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -61,6 +61,9 @@
+ #define GLUSTERD_LOCALTIME_LOGGING_KEY "cluster.localtime-logging"
+ #define GLUSTERD_DAEMON_LOG_LEVEL_KEY "cluster.daemon-log-level"
+
++#define GANESHA_HA_CONF CONFDIR "/ganesha-ha.conf"
++#define GANESHA_EXPORT_DIRECTORY CONFDIR "/exports"
++
+ #define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256
+ #define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90
+ #define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100
+@@ -1356,6 +1359,25 @@ glusterd_op_stop_volume(dict_t *dict);
+ int
+ glusterd_op_delete_volume(dict_t *dict);
+ int
++glusterd_handle_ganesha_op(dict_t *dict, char **op_errstr, char *key,
++ char *value);
++int
++glusterd_check_ganesha_cmd(char *key, char *value, char **errstr, dict_t *dict);
++int
++glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr);
++int
++glusterd_op_set_ganesha(dict_t *dict, char **errstr);
++int
++ganesha_manage_export(dict_t *dict, char *value, char **op_errstr);
++gf_boolean_t
++glusterd_is_ganesha_cluster();
++gf_boolean_t
++glusterd_check_ganesha_export(glusterd_volinfo_t *volinfo);
++int
++stop_ganesha(char **op_errstr);
++int
++tear_down_cluster(gf_boolean_t run_teardown);
++int
+ manage_export_config(char *volname, char *value, char **op_errstr);
+
+ int
+--
+1.8.3.1
+
diff --git a/0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch b/0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch
new file mode 100644
index 0000000..b2d6052
--- /dev/null
+++ b/0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch
@@ -0,0 +1,1897 @@
+From 267135e74d94d8a1e56a263b37ca4689020b9e53 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Mon, 16 Oct 2017 16:58:28 +0530
+Subject: [PATCH 055/124] Revert "storhaug HA: first step, remove resource
+ agents and setup script"
+
+This reverts commit c822e354e16646adf18bbc5123798663faa543b2.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Idd50fe1a5be5a3258d560518d810f9ec4c57621a
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167104
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ configure.ac | 1 +
+ extras/ganesha/Makefile.am | 2 +-
+ extras/ganesha/ocf/Makefile.am | 11 +
+ extras/ganesha/ocf/ganesha_grace | 221 +++++++
+ extras/ganesha/ocf/ganesha_mon | 234 +++++++
+ extras/ganesha/ocf/ganesha_nfsd | 167 +++++
+ extras/ganesha/scripts/Makefile.am | 6 +-
+ extras/ganesha/scripts/ganesha-ha.sh | 1125 ++++++++++++++++++++++++++++++++++
+ glusterfs.spec.in | 8 +-
+ 9 files changed, 1771 insertions(+), 4 deletions(-)
+ create mode 100644 extras/ganesha/ocf/Makefile.am
+ create mode 100644 extras/ganesha/ocf/ganesha_grace
+ create mode 100644 extras/ganesha/ocf/ganesha_mon
+ create mode 100644 extras/ganesha/ocf/ganesha_nfsd
+ create mode 100644 extras/ganesha/scripts/ganesha-ha.sh
+
+diff --git a/configure.ac b/configure.ac
+index 125ae29..baa811a 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -199,6 +199,7 @@ AC_CONFIG_FILES([Makefile
+ extras/ganesha/Makefile
+ extras/ganesha/config/Makefile
+ extras/ganesha/scripts/Makefile
++ extras/ganesha/ocf/Makefile
+ extras/systemd/Makefile
+ extras/systemd/glusterd.service
+ extras/systemd/glustereventsd.service
+diff --git a/extras/ganesha/Makefile.am b/extras/ganesha/Makefile.am
+index 542de68..9eaa401 100644
+--- a/extras/ganesha/Makefile.am
++++ b/extras/ganesha/Makefile.am
+@@ -1,2 +1,2 @@
+-SUBDIRS = scripts config
++SUBDIRS = scripts config ocf
+ CLEANFILES =
+diff --git a/extras/ganesha/ocf/Makefile.am b/extras/ganesha/ocf/Makefile.am
+new file mode 100644
+index 0000000..990a609
+--- /dev/null
++++ b/extras/ganesha/ocf/Makefile.am
+@@ -0,0 +1,11 @@
++EXTRA_DIST= ganesha_grace ganesha_mon ganesha_nfsd
++
++# The root of the OCF resource agent hierarchy
++# Per the OCF standard, it's always "lib",
++# not "lib64" (even on 64-bit platforms).
++ocfdir = $(prefix)/lib/ocf
++
++# The provider directory
++radir = $(ocfdir)/resource.d/heartbeat
++
++ra_SCRIPTS = ganesha_grace ganesha_mon ganesha_nfsd
+diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace
+new file mode 100644
+index 0000000..825f716
+--- /dev/null
++++ b/extras/ganesha/ocf/ganesha_grace
+@@ -0,0 +1,221 @@
++#!/bin/bash
++#
++# Copyright (c) 2014 Anand Subramanian anands@redhat.com
++# Copyright (c) 2015 Red Hat Inc.
++# All Rights Reserved.
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of version 2 of the GNU General Public License as
++# published by the Free Software Foundation.
++#
++# This program is distributed in the hope that it would be useful, but
++# WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
++#
++# Further, this software is distributed without any warranty that it is
++# free of the rightful claim of any third person regarding infringement
++# or the like. Any license provided herein, whether implied or
++# otherwise, applies only to this software file. Patent licenses, if
++# any, provided herein do not apply to combinations of this program with
++# other software, or any other product whatsoever.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write the Free Software Foundation,
++# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
++#
++#
++
++# Initialization:
++: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
++. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
++
++if [ -n "$OCF_DEBUG_LIBRARY" ]; then
++ . $OCF_DEBUG_LIBRARY
++else
++ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
++ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
++fi
++
++OCF_RESKEY_grace_active_default="grace-active"
++: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}}
++
++ganesha_meta_data() {
++ cat <<END
++<?xml version="1.0"?>
++<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
++<resource-agent name="ganesha_grace">
++<version>1.0</version>
++
++<longdesc lang="en">
++This Linux-specific resource agent acts as a dummy
++resource agent for nfs-ganesha.
++</longdesc>
++
++<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc>
++
++<parameters>
++<parameter name="grace_active">
++<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc>
++<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc>
++<content type="string" default="grace-active" />
++</parameter>
++</parameters>
++
++<actions>
++<action name="start" timeout="40s" />
++<action name="stop" timeout="40s" />
++<action name="status" timeout="20s" interval="60s" />
++<action name="monitor" depth="0" timeout="10s" interval="5s" />
++<action name="notify" timeout="10s" />
++<action name="meta-data" timeout="20s" />
++</actions>
++</resource-agent>
++END
++
++return ${OCF_SUCCESS}
++}
++
++ganesha_grace_usage() {
++ echo "ganesha.nfsd USAGE"
++}
++
++# Make sure meta-data and usage always succeed
++case $__OCF_ACTION in
++ meta-data) ganesha_meta_data
++ exit ${OCF_SUCCESS}
++ ;;
++ usage|help) ganesha_usage
++ exit ${OCF_SUCCESS}
++ ;;
++ *)
++ ;;
++esac
++
++ganesha_grace_start()
++{
++ local rc=${OCF_ERR_GENERIC}
++ local host=$(hostname -s)
++
++ ocf_log debug "ganesha_grace_start()"
++ # give ganesha_mon RA a chance to set the crm_attr first
++ # I mislike the sleep, but it's not clear that looping
++ # with a small sleep is necessarily better
++ # start has a 40sec timeout, so a 5sec sleep here is okay
++ sleep 5
++ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
++ if [ $? -ne 0 ]; then
++ host=$(hostname)
++ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null )
++ if [ $? -ne 0 ]; then
++ ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
++ fi
++ fi
++
++ # Three possibilities:
++ # 1. There is no attribute at all and attr_updater returns
++ # a zero length string. This happens when
++ # ganesha_mon::monitor hasn't run at least once to set
++ # the attribute. The assumption here is that the system
++ # is coming up. We pretend, for now, that the node is
++ # healthy, to allow the system to continue coming up.
++ # It will cure itself in a few seconds
++ # 2. There is an attribute, and it has the value "1"; this
++ # node is healthy.
++ # 3. There is an attribute, but it has no value or the value
++ # "0"; this node is not healthy.
++
++ # case 1
++ if [[ -z "${attr}" ]]; then
++ return ${OCF_SUCCESS}
++ fi
++
++ # case 2
++ if [[ "${attr}" = *"value=1" ]]; then
++ return ${OCF_SUCCESS}
++ fi
++
++ # case 3
++ return ${OCF_NOT_RUNNING}
++}
++
++ganesha_grace_stop()
++{
++
++ ocf_log debug "ganesha_grace_stop()"
++ return ${OCF_SUCCESS}
++}
++
++ganesha_grace_notify()
++{
++ # since this is a clone RA we should only ever see pre-start
++ # or post-stop
++ mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
++ case "${mode}" in
++ pre-start | post-stop)
++ dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname}
++ if [ $? -ne 0 ]; then
++ ocf_log info "dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} failed"
++ fi
++ ;;
++ esac
++
++ return ${OCF_SUCCESS}
++}
++
++ganesha_grace_monitor()
++{
++ local host=$(hostname -s)
++
++ ocf_log debug "monitor"
++
++ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
++ if [ $? -ne 0 ]; then
++ host=$(hostname)
++ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
++ if [ $? -ne 0 ]; then
++ ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
++ fi
++ fi
++
++ # if there is no attribute (yet), maybe it's because
++ # this RA started before ganesha_mon (nfs-mon) has had
++ # chance to create it. In which case we'll pretend
++ # everything is okay this time around
++ if [[ -z "${attr}" ]]; then
++ return ${OCF_SUCCESS}
++ fi
++
++ if [[ "${attr}" = *"value=1" ]]; then
++ return ${OCF_SUCCESS}
++ fi
++
++ return ${OCF_NOT_RUNNING}
++}
++
++ganesha_grace_validate()
++{
++ return ${OCF_SUCCESS}
++}
++
++ganesha_grace_validate
++
++# Translate each action into the appropriate function call
++case $__OCF_ACTION in
++start) ganesha_grace_start
++ ;;
++stop) ganesha_grace_stop
++ ;;
++status|monitor) ganesha_grace_monitor
++ ;;
++notify) ganesha_grace_notify
++ ;;
++*) ganesha_grace_usage
++ exit ${OCF_ERR_UNIMPLEMENTED}
++ ;;
++esac
++
++rc=$?
++
++# The resource agent may optionally log a debug message
++ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc"
++exit $rc
+diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon
+new file mode 100644
+index 0000000..2b4a9d6
+--- /dev/null
++++ b/extras/ganesha/ocf/ganesha_mon
+@@ -0,0 +1,234 @@
++#!/bin/bash
++#
++# Copyright (c) 2014 Anand Subramanian anands@redhat.com
++# Copyright (c) 2015 Red Hat Inc.
++# All Rights Reserved.
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of version 2 of the GNU General Public License as
++# published by the Free Software Foundation.
++#
++# This program is distributed in the hope that it would be useful, but
++# WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
++#
++# Further, this software is distributed without any warranty that it is
++# free of the rightful claim of any third person regarding infringement
++# or the like. Any license provided herein, whether implied or
++# otherwise, applies only to this software file. Patent licenses, if
++# any, provided herein do not apply to combinations of this program with
++# other software, or any other product whatsoever.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write the Free Software Foundation,
++# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
++#
++#
++
++# Initialization:
++: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
++. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
++
++if [ -n "${OCF_DEBUG_LIBRARY}" ]; then
++ . ${OCF_DEBUG_LIBRARY}
++else
++ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
++ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
++fi
++
++# Defaults
++OCF_RESKEY_ganesha_active_default="ganesha-active"
++OCF_RESKEY_grace_active_default="grace-active"
++OCF_RESKEY_grace_delay_default="5"
++
++: ${OCF_RESKEY_ganesha_active=${OCF_RESKEY_ganesha_active_default}}
++: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}}
++: ${OCF_RESKEY_grace_delay=${OCF_RESKEY_grace_delay_default}}
++
++ganesha_meta_data() {
++ cat <<END
++<?xml version="1.0"?>
++<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
++<resource-agent name="ganesha_mon">
++<version>1.0</version>
++
++<longdesc lang="en">
++This Linux-specific resource agent acts as a dummy
++resource agent for nfs-ganesha.
++</longdesc>
++
++<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc>
++
++<parameters>
++<parameter name="ganesha_active">
++<longdesc lang="en">NFS-Ganesha daemon active attribute</longdesc>
++<shortdesc lang="en">NFS-Ganesha daemon active attribute</shortdesc>
++<content type="string" default="ganesha-active" />
++</parameter>
++<parameter name="grace_active">
++<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc>
++<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc>
++<content type="string" default="grace-active" />
++</parameter>
++<parameter name="grace_delay">
++<longdesc lang="en">
++NFS-Ganesha grace delay.
++When changing this, adjust the ganesha_grace RA's monitor interval to match.
++</longdesc>
++<shortdesc lang="en">NFS-Ganesha grace delay</shortdesc>
++<content type="string" default="5" />
++</parameter>
++</parameters>
++
++<actions>
++<action name="start" timeout="40s" />
++<action name="stop" timeout="40s" />
++<action name="status" timeout="20s" interval="60s" />
++<action name="monitor" depth="0" timeout="10s" interval="10s" />
++<action name="meta-data" timeout="20s" />
++</actions>
++</resource-agent>
++END
++
++return ${OCF_SUCCESS}
++}
++
++ganesha_mon_usage() {
++ echo "ganesha.nfsd USAGE"
++}
++
++# Make sure meta-data and usage always succeed
++case ${__OCF_ACTION} in
++ meta-data) ganesha_meta_data
++ exit ${OCF_SUCCESS}
++ ;;
++ usage|help) ganesha_usage
++ exit ${OCF_SUCCESS}
++ ;;
++ *)
++ ;;
++esac
++
++ganesha_mon_start()
++{
++ ocf_log debug "ganesha_mon_start"
++ ganesha_mon_monitor
++ return $OCF_SUCCESS
++}
++
++ganesha_mon_stop()
++{
++ ocf_log debug "ganesha_mon_stop"
++ return $OCF_SUCCESS
++}
++
++ganesha_mon_monitor()
++{
++ local host=$(hostname -s)
++ local pid_file="/var/run/ganesha.pid"
++ local rhel6_pid_file="/var/run/ganesha.nfsd.pid"
++ local proc_pid="/proc/"
++
++ # RHEL6 /etc/init.d/nfs-ganesha adds -p /var/run/ganesha.nfsd.pid
++ # RHEL7 systemd does not. Would be nice if all distros used the
++ # same pid file.
++ if [ -e ${rhel6_pid_file} ]; then
++ pid_file=${rhel6_pid_file}
++ fi
++ if [ -e ${pid_file} ]; then
++ proc_pid="${proc_pid}$(cat ${pid_file})"
++ fi
++
++ if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then
++
++ attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1
++ if [ $? -ne 0 ]; then
++ ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed"
++ fi
++
++ # ganesha_grace (nfs-grace) RA follows grace-active attr
++ # w/ constraint location
++ attrd_updater -n ${OCF_RESKEY_grace_active} -v 1
++ if [ $? -ne 0 ]; then
++ ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed"
++ fi
++
++ # ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace)
++ # track grace-active crm_attr (attr != crm_attr)
++ # we can't just use the attr as there's no way to query
++ # its value in RHEL6 pacemaker
++
++ crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
++ if [ $? -ne 0 ]; then
++ host=$(hostname)
++ crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
++ if [ $? -ne 0 ]; then
++ ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed"
++ fi
++ fi
++
++ return ${OCF_SUCCESS}
++ fi
++
++ # VIP fail-over is triggered by clearing the
++ # ganesha-active node attribute on this node.
++ #
++ # Meanwhile the ganesha_grace notify() runs when its
++ # nfs-grace resource is disabled on a node; which
++ # is triggered by clearing the grace-active attribute
++ # on this node.
++ #
++ # We need to allow time for it to run and put
++ # the remaining ganesha.nfsds into grace before
++ # initiating the VIP fail-over.
++
++ attrd_updater -D -n ${OCF_RESKEY_grace_active}
++ if [ $? -ne 0 ]; then
++ ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed"
++ fi
++
++ host=$(hostname -s)
++ crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
++ if [ $? -ne 0 ]; then
++ host=$(hostname)
++ crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
++ if [ $? -ne 0 ]; then
++ ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed"
++ fi
++ fi
++
++ sleep ${OCF_RESKEY_grace_delay}
++
++ attrd_updater -D -n ${OCF_RESKEY_ganesha_active}
++ if [ $? -ne 0 ]; then
++ ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed"
++ fi
++
++ return ${OCF_SUCCESS}
++}
++
++ganesha_mon_validate()
++{
++ return ${OCF_SUCCESS}
++}
++
++ganesha_mon_validate
++
++# Translate each action into the appropriate function call
++case ${__OCF_ACTION} in
++start) ganesha_mon_start
++ ;;
++stop) ganesha_mon_stop
++ ;;
++status|monitor) ganesha_mon_monitor
++ ;;
++*) ganesha_mon_usage
++ exit ${OCF_ERR_UNIMPLEMENTED}
++ ;;
++esac
++
++rc=$?
++
++# The resource agent may optionally log a debug message
++ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc"
++exit $rc
+diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd
+new file mode 100644
+index 0000000..93fc8be
+--- /dev/null
++++ b/extras/ganesha/ocf/ganesha_nfsd
+@@ -0,0 +1,167 @@
++#!/bin/bash
++#
++# Copyright (c) 2014 Anand Subramanian anands@redhat.com
++# Copyright (c) 2015 Red Hat Inc.
++# All Rights Reserved.
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of version 2 of the GNU General Public License as
++# published by the Free Software Foundation.
++#
++# This program is distributed in the hope that it would be useful, but
++# WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
++#
++# Further, this software is distributed without any warranty that it is
++# free of the rightful claim of any third person regarding infringement
++# or the like. Any license provided herein, whether implied or
++# otherwise, applies only to this software file. Patent licenses, if
++# any, provided herein do not apply to combinations of this program with
++# other software, or any other product whatsoever.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write the Free Software Foundation,
++# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
++#
++#
++
++# Initialization:
++: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
++. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
++
++if [ -n "${OCF_DEBUG_LIBRARY}" ]; then
++ . ${OCF_DEBUG_LIBRARY}
++else
++ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
++ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
++fi
++
++OCF_RESKEY_ha_vol_mnt_default="/var/run/gluster/shared_storage"
++: ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}}
++
++ganesha_meta_data() {
++ cat <<END
++<?xml version="1.0"?>
++<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
++<resource-agent name="ganesha_nfsd">
++<version>1.0</version>
++
++<longdesc lang="en">
++This Linux-specific resource agent acts as a dummy
++resource agent for nfs-ganesha.
++</longdesc>
++
++<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc>
++
++<parameters>
++<parameter name="ha_vol_mnt">
++<longdesc lang="en">HA State Volume Mount Point</longdesc>
++<shortdesc lang="en">HA_State Volume Mount Point</shortdesc>
++<content type="string" default="" />
++</parameter>
++</parameters>
++
++<actions>
++<action name="start" timeout="5s" />
++<action name="stop" timeout="5s" />
++<action name="status" depth="0" timeout="5s" interval="0" />
++<action name="monitor" depth="0" timeout="5s" interval="0" />
++<action name="meta-data" timeout="20s" />
++</actions>
++</resource-agent>
++END
++
++return ${OCF_SUCCESS}
++}
++
++ganesha_nfsd_usage() {
++ echo "ganesha.nfsd USAGE"
++}
++
++# Make sure meta-data and usage always succeed
++case $__OCF_ACTION in
++ meta-data) ganesha_meta_data
++ exit ${OCF_SUCCESS}
++ ;;
++ usage|help) ganesha_usage
++ exit ${OCF_SUCCESS}
++ ;;
++ *)
++ ;;
++esac
++
++ganesha_nfsd_start()
++{
++ local long_host=$(hostname)
++
++ if [[ -d /var/lib/nfs ]]; then
++ mv /var/lib/nfs /var/lib/nfs.backup
++ if [ $? -ne 0 ]; then
++ ocf_log notice "mv /var/lib/nfs /var/lib/nfs.backup failed"
++ fi
++ ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs
++ if [ $? -ne 0 ]; then
++ ocf_log notice "ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs failed"
++ fi
++ fi
++
++ return ${OCF_SUCCESS}
++}
++
++ganesha_nfsd_stop()
++{
++
++ if [ -L /var/lib/nfs -a -d /var/lib/nfs.backup ]; then
++ rm -f /var/lib/nfs
++ if [ $? -ne 0 ]; then
++ ocf_log notice "rm -f /var/lib/nfs failed"
++ fi
++ mv /var/lib/nfs.backup /var/lib/nfs
++ if [ $? -ne 0 ]; then
++ ocf_log notice "mv /var/lib/nfs.backup /var/lib/nfs failed"
++ fi
++ fi
++
++ return ${OCF_SUCCESS}
++}
++
++ganesha_nfsd_monitor()
++{
++ # pacemaker checks to see if RA is already running before starting it.
++ # if we return success, then it's presumed it's already running and
++ # doesn't need to be started, i.e. invoke the start action.
++ # return something other than success to make pacemaker invoke the
++ # start action
++ if [[ -L /var/lib/nfs ]]; then
++ return ${OCF_SUCCESS}
++ fi
++ return ${OCF_NOT_RUNNING}
++}
++
++ganesha_nfsd_validate()
++{
++ return ${OCF_SUCCESS}
++}
++
++ganesha_nfsd_validate
++
++# ocf_log notice "ganesha_nfsd ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION"
++
++# Translate each action into the appropriate function call
++case $__OCF_ACTION in
++start) ganesha_nfsd_start
++ ;;
++stop) ganesha_nfsd_stop
++ ;;
++status|monitor) ganesha_nfsd_monitor
++ ;;
++*) ganesha_nfsd_usage
++ exit ${OCF_ERR_UNIMPLEMENTED}
++ ;;
++esac
++
++rc=$?
++
++# The resource agent may optionally log a debug message
++ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc"
++exit $rc
+diff --git a/extras/ganesha/scripts/Makefile.am b/extras/ganesha/scripts/Makefile.am
+index 00a2c45..7e345fd 100644
+--- a/extras/ganesha/scripts/Makefile.am
++++ b/extras/ganesha/scripts/Makefile.am
+@@ -1,4 +1,6 @@
+-EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh
++EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh \
++ ganesha-ha.sh
+
+ scriptsdir = $(libexecdir)/ganesha
+-scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py
++scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py \
++ ganesha-ha.sh
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+new file mode 100644
+index 0000000..6b011be
+--- /dev/null
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -0,0 +1,1125 @@
++#!/bin/bash
++
++# Copyright 2015-2016 Red Hat Inc. All Rights Reserved
++#
++# Pacemaker+Corosync High Availability for NFS-Ganesha
++#
++# setup, teardown, add, delete, refresh-config, and status
++#
++# Each participating node in the cluster is assigned a virtual IP (VIP)
++# which fails over to another node when its associated ganesha.nfsd dies
++# for any reason. After the VIP is moved to another node all the
++# ganesha.nfsds are send a signal using DBUS to put them into NFS GRACE.
++#
++# There are six resource agent types used: ganesha_mon, ganesha_grace,
++# ganesha_nfsd, IPaddr, and Dummy. ganesha_mon is used to monitor the
++# ganesha.nfsd. ganesha_grace is used to send the DBUS signal to put
++# the remaining ganesha.nfsds into grace. ganesha_nfsd is used to start
++# and stop the ganesha.nfsd during setup and teardown. IPaddr manages
++# the VIP. A Dummy resource named $hostname-trigger_ip-1 is used to
++# ensure that the NFS GRACE DBUS signal is sent after the VIP moves to
++# the new host.
++
++HA_NUM_SERVERS=0
++HA_SERVERS=""
++HA_VOL_NAME="gluster_shared_storage"
++HA_VOL_MNT="/var/run/gluster/shared_storage"
++HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha"
++SERVICE_MAN="DISTRO_NOT_FOUND"
++
++RHEL6_PCS_CNAME_OPTION="--name"
++SECRET_PEM="/var/lib/glusterd/nfs/secret.pem"
++
++# UNBLOCK RA uses shared_storage which may become unavailable
++# during any of the nodes reboot. Hence increase timeout value.
++PORTBLOCK_UNBLOCK_TIMEOUT="60s"
++
++# Try loading the config from any of the distro
++# specific configuration locations
++if [ -f /etc/sysconfig/ganesha ]
++ then
++ . /etc/sysconfig/ganesha
++fi
++if [ -f /etc/conf.d/ganesha ]
++ then
++ . /etc/conf.d/ganesha
++fi
++if [ -f /etc/default/ganesha ]
++ then
++ . /etc/default/ganesha
++fi
++
++GANESHA_CONF=
++
++function find_rhel7_conf
++{
++ while [[ $# > 0 ]]
++ do
++ key="$1"
++ case $key in
++ -f)
++ CONFFILE="$2"
++ break;
++ ;;
++ *)
++ ;;
++ esac
++ shift
++ done
++}
++
++if [ -z $CONFFILE ]
++ then
++ find_rhel7_conf $OPTIONS
++
++fi
++
++GANESHA_CONF=${CONFFILE:-/etc/ganesha/ganesha.conf}
++
++usage() {
++
++ echo "Usage : add|delete|refresh-config|status"
++ echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \
++<NODE-HOSTNAME> <NODE-VIP>"
++ echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \
++<NODE-HOSTNAME>"
++ echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR> \
++<volume>"
++ echo "Status : ganesha-ha.sh --status <HA_CONFDIR>"
++}
++
++determine_service_manager () {
++
++ if [ -e "/usr/bin/systemctl" ];
++ then
++ SERVICE_MAN="/usr/bin/systemctl"
++ elif [ -e "/sbin/invoke-rc.d" ];
++ then
++ SERVICE_MAN="/sbin/invoke-rc.d"
++ elif [ -e "/sbin/service" ];
++ then
++ SERVICE_MAN="/sbin/service"
++ fi
++ if [ "$SERVICE_MAN" == "DISTRO_NOT_FOUND" ]
++ then
++ echo "Service manager not recognized, exiting"
++ exit 1
++ fi
++}
++
++manage_service ()
++{
++ local action=${1}
++ local new_node=${2}
++ local option=
++
++ if [ "$action" == "start" ]; then
++ option="yes"
++ else
++ option="no"
++ fi
++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
++${SECRET_PEM} root@${new_node} "/usr/libexec/ganesha/ganesha-ha.sh --setup-ganesha-conf-files $HA_CONFDIR $option"
++
++ if [ "$SERVICE_MAN" == "/usr/bin/systemctl" ]
++ then
++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
++${SECRET_PEM} root@${new_node} "$SERVICE_MAN ${action} nfs-ganesha"
++ else
++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
++${SECRET_PEM} root@${new_node} "$SERVICE_MAN nfs-ganesha ${action}"
++ fi
++}
++
++
++check_cluster_exists()
++{
++ local name=${1}
++ local cluster_name=""
++
++ if [ -e /var/run/corosync.pid ]; then
++ cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3)
++ if [ ${cluster_name} -a ${cluster_name} = ${name} ]; then
++ logger "$name already exists, exiting"
++ exit 0
++ fi
++ fi
++}
++
++
++determine_servers()
++{
++ local cmd=${1}
++ local num_servers=0
++ local tmp_ifs=${IFS}
++ local ha_servers=""
++
++ if [ "X${cmd}X" != "XsetupX" -a "X${cmd}X" != "XstatusX" ]; then
++ ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//')
++ IFS=$' '
++ for server in ${ha_servers} ; do
++ num_servers=$(expr ${num_servers} + 1)
++ done
++ IFS=${tmp_ifs}
++ HA_NUM_SERVERS=${num_servers}
++ HA_SERVERS="${ha_servers}"
++ else
++ IFS=$','
++ for server in ${HA_CLUSTER_NODES} ; do
++ num_servers=$(expr ${num_servers} + 1)
++ done
++ IFS=${tmp_ifs}
++ HA_NUM_SERVERS=${num_servers}
++ HA_SERVERS="${HA_CLUSTER_NODES//,/ }"
++ fi
++}
++
++
++setup_cluster()
++{
++ local name=${1}
++ local num_servers=${2}
++ local servers=${3}
++ local unclean=""
++ local quorum_policy="stop"
++
++ logger "setting up cluster ${name} with the following ${servers}"
++
++ pcs cluster auth ${servers}
++ # pcs cluster setup --name ${name} ${servers}
++ pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --transport udpu ${servers}
++ if [ $? -ne 0 ]; then
++ logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed"
++ exit 1;
++ fi
++ pcs cluster start --all
++ if [ $? -ne 0 ]; then
++ logger "pcs cluster start failed"
++ exit 1;
++ fi
++
++ sleep 1
++ # wait for the cluster to elect a DC before querying or writing
++ # to the CIB. BZ 1334092
++ crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1
++ while [ $? -ne 0 ]; do
++ crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1
++ done
++
++ unclean=$(pcs status | grep -u "UNCLEAN")
++ while [[ "${unclean}X" = "UNCLEANX" ]]; do
++ sleep 1
++ unclean=$(pcs status | grep -u "UNCLEAN")
++ done
++ sleep 1
++
++ if [ ${num_servers} -lt 3 ]; then
++ quorum_policy="ignore"
++ fi
++ pcs property set no-quorum-policy=${quorum_policy}
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed"
++ fi
++
++ pcs property set stonith-enabled=false
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs property set stonith-enabled=false failed"
++ fi
++}
++
++
++setup_finalize_ha()
++{
++ local cibfile=${1}
++ local stopped=""
++
++ stopped=$(pcs status | grep -u "Stopped")
++ while [[ "${stopped}X" = "StoppedX" ]]; do
++ sleep 1
++ stopped=$(pcs status | grep -u "Stopped")
++ done
++}
++
++
++refresh_config ()
++{
++ local short_host=$(hostname -s)
++ local VOL=${1}
++ local HA_CONFDIR=${2}
++ local short_host=$(hostname -s)
++
++ local export_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\
++ awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]')
++
++
++ if [ -e ${SECRET_PEM} ]; then
++ while [[ ${3} ]]; do
++ current_host=`echo ${3} | cut -d "." -f 1`
++ if [ ${short_host} != ${current_host} ]; then
++ output=$(ssh -oPasswordAuthentication=no \
++-oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \
++"dbus-send --print-reply --system --dest=org.ganesha.nfsd \
++/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \
++string:$HA_CONFDIR/exports/export.$VOL.conf \
++string:\"EXPORT(Export_Id=$export_id)\" 2>&1")
++ ret=$?
++ logger <<< "${output}"
++ if [ ${ret} -ne 0 ]; then
++ echo "Error: refresh-config failed on ${current_host}."
++ exit 1
++ else
++ echo "Refresh-config completed on ${current_host}."
++ fi
++
++ fi
++ shift
++ done
++ else
++ echo "Error: refresh-config failed. Passwordless ssh is not enabled."
++ exit 1
++ fi
++
++ # Run the same command on the localhost,
++ output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \
++/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \
++string:$HA_CONFDIR/exports/export.$VOL.conf \
++string:"EXPORT(Export_Id=$export_id)" 2>&1)
++ ret=$?
++ logger <<< "${output}"
++ if [ ${ret} -ne 0 ] ; then
++ echo "Error: refresh-config failed on localhost."
++ exit 1
++ else
++ echo "Success: refresh-config completed."
++ fi
++}
++
++
++teardown_cluster()
++{
++ local name=${1}
++
++ for server in ${HA_SERVERS} ; do
++ if [[ ${HA_CLUSTER_NODES} != *${server}* ]]; then
++ logger "info: ${server} is not in config, removing"
++
++ pcs cluster stop ${server} --force
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs cluster stop ${server} failed"
++ fi
++
++ pcs cluster node remove ${server}
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs cluster node remove ${server} failed"
++ fi
++ fi
++ done
++
++ # BZ 1193433 - pcs doesn't reload cluster.conf after modification
++ # after teardown completes, a subsequent setup will appear to have
++ # 'remembered' the deleted node. You can work around this by
++ # issuing another `pcs cluster node remove $node`,
++ # `crm_node -f -R $server`, or
++ # `cibadmin --delete --xml-text '<node id="$server"
++ # uname="$server"/>'
++
++ pcs cluster stop --all
++ if [ $? -ne 0 ]; then
++ logger "warning pcs cluster stop --all failed"
++ fi
++
++ pcs cluster destroy
++ if [ $? -ne 0 ]; then
++ logger "error pcs cluster destroy failed"
++ exit 1
++ fi
++}
++
++
++cleanup_ganesha_config ()
++{
++ rm -f /etc/corosync/corosync.conf
++ rm -rf /etc/cluster/cluster.conf*
++ rm -rf /var/lib/pacemaker/cib/*
++}
++
++do_create_virt_ip_constraints()
++{
++ local cibfile=${1}; shift
++ local primary=${1}; shift
++ local weight="1000"
++
++ # first a constraint location rule that says the VIP must be where
++ # there's a ganesha.nfsd running
++ pcs -f ${cibfile} constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 failed"
++ fi
++
++ # then a set of constraint location prefers to set the prefered order
++ # for where a VIP should move
++ while [[ ${1} ]]; do
++ pcs -f ${cibfile} constraint location ${primary}-group prefers ${1}=${weight}
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs constraint location ${primary}-group prefers ${1}=${weight} failed"
++ fi
++ weight=$(expr ${weight} + 1000)
++ shift
++ done
++ # and finally set the highest preference for the VIP to its home node
++ # default weight when created is/was 100.
++ # on Fedora setting appears to be additive, so to get the desired
++ # value we adjust the weight
++ # weight=$(expr ${weight} - 100)
++ pcs -f ${cibfile} constraint location ${primary}-group prefers ${primary}=${weight}
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs constraint location ${primary}-group prefers ${primary}=${weight} failed"
++ fi
++}
++
++
++wrap_create_virt_ip_constraints()
++{
++ local cibfile=${1}; shift
++ local primary=${1}; shift
++ local head=""
++ local tail=""
++
++ # build a list of peers, e.g. for a four node cluster, for node1,
++ # the result is "node2 node3 node4"; for node2, "node3 node4 node1"
++ # and so on.
++ while [[ ${1} ]]; do
++ if [ "${1}" = "${primary}" ]; then
++ shift
++ while [[ ${1} ]]; do
++ tail=${tail}" "${1}
++ shift
++ done
++ else
++ head=${head}" "${1}
++ fi
++ shift
++ done
++ do_create_virt_ip_constraints ${cibfile} ${primary} ${tail} ${head}
++}
++
++
++create_virt_ip_constraints()
++{
++ local cibfile=${1}; shift
++
++ while [[ ${1} ]]; do
++ wrap_create_virt_ip_constraints ${cibfile} ${1} ${HA_SERVERS}
++ shift
++ done
++}
++
++
++setup_create_resources()
++{
++ local cibfile=$(mktemp -u)
++
++ # fixup /var/lib/nfs
++ logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone"
++ pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed"
++ fi
++
++ pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone failed"
++ fi
++
++ # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace
++ # start method. Allow time for ganesha_mon to start and set the
++ # ganesha-active crm_attribute
++ sleep 5
++
++ pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone meta notify=true
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone failed"
++ fi
++
++ pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1"
++ fi
++
++ pcs cluster cib ${cibfile}
++
++ while [[ ${1} ]]; do
++
++ # this is variable indirection
++ # from a nvs like 'VIP_host1=10.7.6.5' or 'VIP_host1="10.7.6.5"'
++ # (or VIP_host-1=..., or VIP_host-1.my.domain.name=...)
++ # a variable 'clean_name' is created (e.g. w/ value 'VIP_host_1')
++ # and a clean nvs (e.g. w/ value 'VIP_host_1="10_7_6_5"')
++ # after the `eval ${clean_nvs}` there is a variable VIP_host_1
++ # with the value '10_7_6_5', and the following \$$ magic to
++ # reference it, i.e. `eval tmp_ipaddr=\$${clean_name}` gives us
++ # ${tmp_ipaddr} with 10_7_6_5 and then convert the _s back to .s
++ # to give us ipaddr="10.7.6.5". whew!
++ name="VIP_${1}"
++ clean_name=${name//[-.]/_}
++ nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf)
++ clean_nvs=${nvs//[-.]/_}
++ eval ${clean_nvs}
++ eval tmp_ipaddr=\$${clean_name}
++ ipaddr=${tmp_ipaddr//_/.}
++
++ pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \
++ portno=2049 action=block ip=${ipaddr} --group ${1}-group
++ if [ $? -ne 0 ]; then
++ logger "warning pcs resource create ${1}-nfs_block failed"
++ fi
++ pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
++ cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block
++ if [ $? -ne 0 ]; then
++ logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
++ cidr_netmask=32 op monitor interval=15s failed"
++ fi
++
++ pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
++ fi
++
++ pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \
++ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \
++ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \
++ op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \
++ op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
++ if [ $? -ne 0 ]; then
++ logger "warning pcs resource create ${1}-nfs_unblock failed"
++ fi
++
++
++ shift
++ done
++
++ create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
++
++ pcs cluster cib-push ${cibfile}
++ if [ $? -ne 0 ]; then
++ logger "warning pcs cluster cib-push ${cibfile} failed"
++ fi
++ rm -f ${cibfile}
++}
++
++
++teardown_resources()
++{
++ # local mntpt=$(grep ha-vol-mnt ${HA_CONFIG_FILE} | cut -d = -f 2)
++
++ # restore /var/lib/nfs
++ logger "notice: pcs resource delete nfs_setup-clone"
++ pcs resource delete nfs_setup-clone
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs resource delete nfs_setup-clone failed"
++ fi
++
++ # delete -clone resource agents
++ # in particular delete the ganesha monitor so we don't try to
++ # trigger anything when we shut down ganesha next.
++ pcs resource delete nfs-mon-clone
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs resource delete nfs-mon-clone failed"
++ fi
++
++ pcs resource delete nfs-grace-clone
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs resource delete nfs-grace-clone failed"
++ fi
++
++ while [[ ${1} ]]; do
++ pcs resource delete ${1}-group
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs resource delete ${1}-group failed"
++ fi
++ shift
++ done
++
++}
++
++
++recreate_resources()
++{
++ local cibfile=${1}; shift
++
++ while [[ ${1} ]]; do
++ # this is variable indirection
++ # see the comment on the same a few lines up
++ name="VIP_${1}"
++ clean_name=${name//[-.]/_}
++ nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf)
++ clean_nvs=${nvs//[-.]/_}
++ eval ${clean_nvs}
++ eval tmp_ipaddr=\$${clean_name}
++ ipaddr=${tmp_ipaddr//_/.}
++
++ pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \
++ portno=2049 action=block ip=${ipaddr} --group ${1}-group
++ if [ $? -ne 0 ]; then
++ logger "warning pcs resource create ${1}-nfs_block failed"
++ fi
++ pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
++ cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block
++ if [ $? -ne 0 ]; then
++ logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
++ cidr_netmask=32 op monitor interval=15s failed"
++ fi
++
++ pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
++ fi
++
++ pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \
++ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \
++ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \
++ op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \
++ op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
++ if [ $? -ne 0 ]; then
++ logger "warning pcs resource create ${1}-nfs_unblock failed"
++ fi
++
++ shift
++ done
++}
++
++
++addnode_recreate_resources()
++{
++ local cibfile=${1}; shift
++ local add_node=${1}; shift
++ local add_vip=${1}; shift
++
++ recreate_resources ${cibfile} ${HA_SERVERS}
++
++ pcs -f ${cibfile} resource create ${add_node}-nfs_block ocf:heartbeat:portblock \
++ protocol=tcp portno=2049 action=block ip=${add_vip} --group ${add_node}-group
++ if [ $? -ne 0 ]; then
++ logger "warning pcs resource create ${add_node}-nfs_block failed"
++ fi
++ pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \
++ ip=${add_vip} cidr_netmask=32 op monitor interval=15s --group ${add_node}-group \
++ --after ${add_node}-nfs_block
++ if [ $? -ne 0 ]; then
++ logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \
++ ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed"
++ fi
++
++ pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 failed"
++ fi
++ pcs -f ${cibfile} resource create ${add_node}-nfs_unblock ocf:heartbeat:portblock \
++ protocol=tcp portno=2049 action=unblock ip=${add_vip} reset_local_on_unblock_stop=true \
++ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${add_node}-group --after \
++ ${add_node}-cluster_ip-1 op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start \
++ timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op monitor interval=10s \
++ timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
++ if [ $? -ne 0 ]; then
++ logger "warning pcs resource create ${add_node}-nfs_unblock failed"
++ fi
++}
++
++
++clear_resources()
++{
++ local cibfile=${1}; shift
++
++ while [[ ${1} ]]; do
++ pcs -f ${cibfile} resource delete ${1}-group
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs -f ${cibfile} resource delete ${1}-group"
++ fi
++
++ shift
++ done
++}
++
++
++addnode_create_resources()
++{
++ local add_node=${1}; shift
++ local add_vip=${1}; shift
++ local cibfile=$(mktemp -u)
++
++ # start HA on the new node
++ pcs cluster start ${add_node}
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs cluster start ${add_node} failed"
++ fi
++
++ pcs cluster cib ${cibfile}
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs cluster cib ${cibfile} failed"
++ fi
++
++ # delete all the -cluster_ip-1 resources, clearing
++ # their constraints, then create them again so we can
++ # recompute their constraints
++ clear_resources ${cibfile} ${HA_SERVERS}
++ addnode_recreate_resources ${cibfile} ${add_node} ${add_vip}
++
++ HA_SERVERS="${HA_SERVERS} ${add_node}"
++ create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
++
++ pcs cluster cib-push ${cibfile}
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs cluster cib-push ${cibfile} failed"
++ fi
++ rm -f ${cibfile}
++}
++
++
++deletenode_delete_resources()
++{
++ local node=${1}; shift
++ local ha_servers=$(echo "${HA_SERVERS}" | sed s/${node}//)
++ local cibfile=$(mktemp -u)
++
++ pcs cluster cib ${cibfile}
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs cluster cib ${cibfile} failed"
++ fi
++
++ # delete all the -cluster_ip-1 and -trigger_ip-1 resources,
++ # clearing their constraints, then create them again so we can
++ # recompute their constraints
++ clear_resources ${cibfile} ${HA_SERVERS}
++ recreate_resources ${cibfile} ${ha_servers}
++ HA_SERVERS=$(echo "${ha_servers}" | sed -e "s/ / /")
++
++ create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
++
++ pcs cluster cib-push ${cibfile}
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs cluster cib-push ${cibfile} failed"
++ fi
++ rm -f ${cibfile}
++
++}
++
++
++deletenode_update_haconfig()
++{
++ local name="VIP_${1}"
++ local clean_name=${name//[-.]/_}
++
++ ha_servers=$(echo ${HA_SERVERS} | sed -e "s/ /,/")
++ sed -i -e "s/^HA_CLUSTER_NODES=.*$/HA_CLUSTER_NODES=\"${ha_servers// /,}\"/" -e "s/^${name}=.*$//" -e "/^$/d" ${HA_CONFDIR}/ganesha-ha.conf
++}
++
++
++setup_state_volume()
++{
++ local mnt=${HA_VOL_MNT}
++ local longname=""
++ local shortname=""
++ local dname=""
++ local dirname=""
++
++ longname=$(hostname)
++ dname=${longname#$(hostname -s)}
++
++ while [[ ${1} ]]; do
++
++ if [[ ${1} == *${dname} ]]; then
++ dirname=${1}
++ else
++ dirname=${1}${dname}
++ fi
++
++ if [ ! -d ${mnt}/nfs-ganesha/tickle_dir ]; then
++ mkdir ${mnt}/nfs-ganesha/tickle_dir
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd
++ fi
++ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then
++ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
++ fi
++ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then
++ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state
++ fi
++ for server in ${HA_SERVERS} ; do
++ if [ ${server} != ${dirname} ]; then
++ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server}
++ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server}
++ fi
++ done
++ shift
++ done
++
++}
++
++
++addnode_state_volume()
++{
++ local newnode=${1}; shift
++ local mnt=${HA_VOL_MNT}
++ local longname=""
++ local dname=""
++ local dirname=""
++
++ longname=$(hostname)
++ dname=${longname#$(hostname -s)}
++
++ if [[ ${newnode} == *${dname} ]]; then
++ dirname=${newnode}
++ else
++ dirname=${newnode}${dname}
++ fi
++
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd
++ fi
++ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then
++ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
++ fi
++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then
++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
++ fi
++ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then
++ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state
++ fi
++
++ for server in ${HA_SERVERS} ; do
++
++ if [[ ${server} != ${dirname} ]]; then
++ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server}
++ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server}
++
++ ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname}
++ ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname}
++ fi
++ done
++
++}
++
++
++delnode_state_volume()
++{
++ local delnode=${1}; shift
++ local mnt=${HA_VOL_MNT}
++ local longname=""
++ local dname=""
++ local dirname=""
++
++ longname=$(hostname)
++ dname=${longname#$(hostname -s)}
++
++ if [[ ${delnode} == *${dname} ]]; then
++ dirname=${delnode}
++ else
++ dirname=${delnode}${dname}
++ fi
++
++ rm -rf ${mnt}/nfs-ganesha/${dirname}
++
++ for server in ${HA_SERVERS} ; do
++ if [[ "${server}" != "${dirname}" ]]; then
++ rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname}
++ rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname}
++ fi
++ done
++}
++
++
++status()
++{
++ local scratch=$(mktemp)
++ local regex_str="^${1}-cluster_ip-1"
++ local healthy=0
++ local index=1
++ local nodes
++
++ # change tabs to spaces, strip leading spaces
++ pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*//" > ${scratch}
++
++ nodes[0]=${1}; shift
++
++ # make a regex of the configured nodes
++ # and initalize the nodes array for later
++ while [[ ${1} ]]; do
++
++ regex_str="${regex_str}|^${1}-cluster_ip-1"
++ nodes[${index}]=${1}
++ ((index++))
++ shift
++ done
++
++ # print the nodes that are expected to be online
++ grep -E "^Online:" ${scratch}
++
++ echo
++
++ # print the VIPs and which node they are on
++ grep -E "${regex_str}" < ${scratch} | cut -d ' ' -f 1,4
++
++ echo
++
++ # check if the VIP and port block/unblock RAs are on the expected nodes
++ for n in ${nodes[*]}; do
++
++ grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch}
++ result=$?
++ ((healthy+=${result}))
++ grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch}
++ result=$?
++ ((healthy+=${result}))
++ grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch}
++ result=$?
++ ((healthy+=${result}))
++ done
++
++ grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch}
++ result=$?
++
++ if [ ${result} -eq 0 ]; then
++ echo "Cluster HA Status: BAD"
++ elif [ ${healthy} -eq 0 ]; then
++ echo "Cluster HA Status: HEALTHY"
++ else
++ echo "Cluster HA Status: FAILOVER"
++ fi
++
++ rm -f ${scratch}
++}
++
++create_ganesha_conf_file()
++{
++ if [ $1 == "yes" ];
++ then
++ if [ -e $GANESHA_CONF ];
++ then
++ rm -rf $GANESHA_CONF
++ fi
++ # The symlink /etc/ganesha/ganesha.conf need to be
++ # created using ganesha conf file mentioned in the
++ # shared storage. Every node will only have this
++ # link and actual file will stored in shared storage,
++ # so that ganesha conf editing of ganesha conf will
++ # be easy as well as it become more consistent.
++
++ ln -s $HA_CONFDIR/ganesha.conf $GANESHA_CONF
++ else
++ # Restoring previous file
++ rm -rf $GANESHA_CONF
++ cp $HA_CONFDIR/ganesha.conf $GANESHA_CONF
++ sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $GANESHA_CONF
++ fi
++}
++
++set_quorum_policy()
++{
++ local quorum_policy="stop"
++ local num_servers=${1}
++
++ if [ ${num_servers} -lt 3 ]; then
++ quorum_policy="ignore"
++ fi
++ pcs property set no-quorum-policy=${quorum_policy}
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed"
++ fi
++}
++
++main()
++{
++
++ local cmd=${1}; shift
++ if [[ ${cmd} == *help ]]; then
++ usage
++ exit 0
++ fi
++ HA_CONFDIR=${1%/}; shift
++ local ha_conf=${HA_CONFDIR}/ganesha-ha.conf
++ local node=""
++ local vip=""
++
++ # ignore any comment lines
++ cfgline=$(grep ^HA_NAME= ${ha_conf})
++ eval $(echo ${cfgline} | grep -F HA_NAME=)
++ cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf})
++ eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=)
++
++ case "${cmd}" in
++
++ setup | --setup)
++ logger "setting up ${HA_NAME}"
++
++ check_cluster_exists ${HA_NAME}
++
++ determine_servers "setup"
++
++ if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then
++
++ setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}"
++
++ setup_create_resources ${HA_SERVERS}
++
++ setup_finalize_ha
++
++ setup_state_volume ${HA_SERVERS}
++
++ else
++
++ logger "insufficient servers for HA, aborting"
++ fi
++ ;;
++
++ teardown | --teardown)
++ logger "tearing down ${HA_NAME}"
++
++ determine_servers "teardown"
++
++ teardown_resources ${HA_SERVERS}
++
++ teardown_cluster ${HA_NAME}
++
++ cleanup_ganesha_config ${HA_CONFDIR}
++ ;;
++
++ cleanup | --cleanup)
++ cleanup_ganesha_config ${HA_CONFDIR}
++ ;;
++
++ add | --add)
++ node=${1}; shift
++ vip=${1}; shift
++
++ logger "adding ${node} with ${vip} to ${HA_NAME}"
++
++ determine_service_manager
++
++ manage_service "start" ${node}
++
++ determine_servers "add"
++
++ pcs cluster node add ${node}
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs cluster node add ${node} failed"
++ fi
++
++ addnode_create_resources ${node} ${vip}
++ # Subsequent add-node recreates resources for all the nodes
++ # that already exist in the cluster. The nodes are picked up
++ # from the entries in the ganesha-ha.conf file. Adding the
++ # newly added node to the file so that the resources specfic
++ # to this node is correctly recreated in the future.
++ clean_node=${node//[-.]/_}
++ echo "VIP_${node}=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf
++
++ NEW_NODES="$HA_CLUSTER_NODES,${node}"
++
++ sed -i s/HA_CLUSTER_NODES.*/"HA_CLUSTER_NODES=\"$NEW_NODES\""/ \
++$HA_CONFDIR/ganesha-ha.conf
++
++ addnode_state_volume ${node}
++
++ # addnode_create_resources() already appended ${node} to
++ # HA_SERVERS, so only need to increment HA_NUM_SERVERS
++ # and set quorum policy
++ HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} + 1)
++ set_quorum_policy ${HA_NUM_SERVERS}
++ ;;
++
++ delete | --delete)
++ node=${1}; shift
++
++ logger "deleting ${node} from ${HA_NAME}"
++
++ determine_servers "delete"
++
++ deletenode_delete_resources ${node}
++
++ pcs cluster node remove ${node}
++ if [ $? -ne 0 ]; then
++ logger "warning: pcs cluster node remove ${node} failed"
++ fi
++
++ deletenode_update_haconfig ${node}
++
++ delnode_state_volume ${node}
++
++ determine_service_manager
++
++ manage_service "stop" ${node}
++
++ HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} - 1)
++ set_quorum_policy ${HA_NUM_SERVERS}
++ ;;
++
++ status | --status)
++ determine_servers "status"
++
++ status ${HA_SERVERS}
++ ;;
++
++ refresh-config | --refresh-config)
++ VOL=${1}
++
++ determine_servers "refresh-config"
++
++ refresh_config ${VOL} ${HA_CONFDIR} ${HA_SERVERS}
++ ;;
++
++ setup-ganesha-conf-files | --setup-ganesha-conf-files)
++
++ create_ganesha_conf_file ${1}
++ ;;
++
++ *)
++ # setup and teardown are not intended to be used by a
++ # casual user
++ usage
++ logger "Usage: ganesha-ha.sh add|delete|status"
++ ;;
++
++ esac
++}
++
++main $*
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index dd7438c..d748ebc 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -460,7 +460,8 @@ Summary: NFS-Ganesha configuration
+ Group: Applications/File
+
+ Requires: %{name}-server%{?_isa} = %{version}-%{release}
+-Requires: nfs-ganesha-gluster, pcs, dbus
++Requires: nfs-ganesha-gluster >= 2.4.1
++Requires: pcs, dbus
+ %if ( 0%{?rhel} && 0%{?rhel} == 6 )
+ Requires: cman, pacemaker, corosync
+ %endif
+@@ -1138,6 +1139,7 @@ exit 0
+ #exclude ganesha related files
+ %exclude %{_sysconfdir}/ganesha/*
+ %exclude %{_libexecdir}/ganesha/*
++%exclude %{_prefix}/lib/ocf/resource.d/heartbeat/*
+ %endif
+
+ %files api
+@@ -1306,6 +1308,7 @@ exit 0
+ %files ganesha
+ %{_sysconfdir}/ganesha/*
+ %{_libexecdir}/ganesha/*
++%{_prefix}/lib/ocf/resource.d/heartbeat/*
+ %endif
+
+ %if ( 0%{!?_without_ocf:1} )
+@@ -1904,6 +1907,9 @@ fi
+ %endif
+
+ %changelog
++* Sat Apr 6 2019 Jiffin Tony Thottan <jthottan@redhat.com>
++- Adding ganesha ha resources back in gluster repository
++
+ * Fri Apr 5 2019 Jiffin Tony Thottan <jthottan@redhat.com>
+ - Adding ganesha bits back in gluster repository
+
+--
+1.8.3.1
+
diff --git a/0056-common-ha-fixes-for-Debian-based-systems.patch b/0056-common-ha-fixes-for-Debian-based-systems.patch
new file mode 100644
index 0000000..fef23b1
--- /dev/null
+++ b/0056-common-ha-fixes-for-Debian-based-systems.patch
@@ -0,0 +1,229 @@
+From 2c1a83920b959a1ec170243d1eec71b1e2c074b0 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Fri, 7 Apr 2017 09:09:29 -0400
+Subject: [PATCH 056/124] common-ha: fixes for Debian-based systems
+
+1) Debian-based systems don't have /usr/libexec/... and there is
+a hard-coded invocation of /usr/libexec/ganesha/ganesha-ha.sh within
+ganesha-ha.sh itself.
+Fix: save $0 and use it instead for further invocations of self.
+
+2) default shell is /bin/dash (not /bin/bash). Various runner_run()
+invocations for ganesha used what amounts to
+ exec("sh /usr/$libexec/ganesha/ganesha-ha.sh ...);
+which executes the script using the default shell, but there are
+some bash-specific idioms that don't work if the shell is dash.
+Fix: change to exec("/usr/$libexec/ganesha/ganesha-ha.sh ...); so that
+the shebang forces the use of /bin/bash
+
+3) Fedora and RHEL7 have merged /bin/ and /usr/bin, /bin is a symlink
+to /usr/bin. Debian-based systems are not merged, and systemd systems
+have /bin/systemctl. The logic to find .../bin/systemctl is backwards.
+If the logic looks for /usr/bin/systemctl it will not find it on
+Debian-based systems; if it looks for /bin/systemctl it will find it
+on Fedora and RHEL by virtue of the symlink. (RHEL6 and others will
+find their respective init regardless.)
+Fix: change the logic to look for /bin/systemctl instead.
+
+4) The logic for deciding to run systemctl (or not) is a bit silly.
+Fix: simply invoke the found method via the function pointer in the
+table.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I33681b296a73aebb078bda6ac0d3a1d3b9770a21
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://review.gluster.org/17013
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: Niels de Vos <ndevos@redhat.com>
+NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: jiffin tony Thottan <jthottan@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167141
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 21 +++++++++---------
+ xlators/mgmt/glusterd/src/glusterd-ganesha.c | 32 +++++++++++-----------------
+ 2 files changed, 23 insertions(+), 30 deletions(-)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index 6b011be..4b93f95 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -20,6 +20,7 @@
+ # ensure that the NFS GRACE DBUS signal is sent after the VIP moves to
+ # the new host.
+
++GANESHA_HA_SH=$(realpath $0)
+ HA_NUM_SERVERS=0
+ HA_SERVERS=""
+ HA_VOL_NAME="gluster_shared_storage"
+@@ -68,9 +69,9 @@ function find_rhel7_conf
+ done
+ }
+
+-if [ -z $CONFFILE ]
++if [ -z ${CONFFILE} ]
+ then
+- find_rhel7_conf $OPTIONS
++ find_rhel7_conf ${OPTIONS}
+
+ fi
+
+@@ -90,9 +91,9 @@ usage() {
+
+ determine_service_manager () {
+
+- if [ -e "/usr/bin/systemctl" ];
++ if [ -e "/bin/systemctl" ];
+ then
+- SERVICE_MAN="/usr/bin/systemctl"
++ SERVICE_MAN="/bin/systemctl"
+ elif [ -e "/sbin/invoke-rc.d" ];
+ then
+ SERVICE_MAN="/sbin/invoke-rc.d"
+@@ -100,7 +101,7 @@ determine_service_manager () {
+ then
+ SERVICE_MAN="/sbin/service"
+ fi
+- if [ "$SERVICE_MAN" == "DISTRO_NOT_FOUND" ]
++ if [ "${SERVICE_MAN}" == "DISTRO_NOT_FOUND" ]
+ then
+ echo "Service manager not recognized, exiting"
+ exit 1
+@@ -113,21 +114,21 @@ manage_service ()
+ local new_node=${2}
+ local option=
+
+- if [ "$action" == "start" ]; then
++ if [ "${action}" == "start" ]; then
+ option="yes"
+ else
+ option="no"
+ fi
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+-${SECRET_PEM} root@${new_node} "/usr/libexec/ganesha/ganesha-ha.sh --setup-ganesha-conf-files $HA_CONFDIR $option"
++${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option"
+
+- if [ "$SERVICE_MAN" == "/usr/bin/systemctl" ]
++ if [ "${SERVICE_MAN}" == "/bin/systemctl" ]
+ then
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+-${SECRET_PEM} root@${new_node} "$SERVICE_MAN ${action} nfs-ganesha"
++${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha"
+ else
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+-${SECRET_PEM} root@${new_node} "$SERVICE_MAN nfs-ganesha ${action}"
++${SECRET_PEM} root@${new_node} "${SERVICE_MAN} nfs-ganesha ${action}"
+ fi
+ }
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+index fac16e6..81f794d 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+@@ -122,12 +122,9 @@ sc_service_action(struct service_command *sc, char *command)
+ static int
+ manage_service(char *action)
+ {
+- struct stat stbuf = {
+- 0,
+- };
+ int i = 0;
+ int ret = 0;
+- struct service_command sc_list[] = {{.binary = "/usr/bin/systemctl",
++ struct service_command sc_list[] = {{.binary = "/bin/systemctl",
+ .service = "nfs-ganesha",
+ .action = sc_systemctl_action},
+ {.binary = "/sbin/invoke-rc.d",
+@@ -139,15 +136,10 @@ manage_service(char *action)
+ {.binary = NULL}};
+
+ while (sc_list[i].binary != NULL) {
+- ret = sys_stat(sc_list[i].binary, &stbuf);
++ ret = sys_access(sc_list[i].binary, X_OK);
+ if (ret == 0) {
+ gf_msg_debug(THIS->name, 0, "%s found.", sc_list[i].binary);
+- if (strcmp(sc_list[i].binary, "/usr/bin/systemctl") == 0)
+- ret = sc_systemctl_action(&sc_list[i], action);
+- else
+- ret = sc_service_action(&sc_list[i], action);
+-
+- return ret;
++ return sc_list[i].action(&sc_list[i], action);
+ }
+ i++;
+ }
+@@ -449,7 +441,7 @@ manage_export_config(char *volname, char *value, char **op_errstr)
+
+ GF_ASSERT(volname);
+ runinit(&runner);
+- runner_add_args(&runner, "sh", GANESHA_PREFIX "/create-export-ganesha.sh",
++ runner_add_args(&runner, GANESHA_PREFIX "/create-export-ganesha.sh",
+ CONFDIR, value, volname, NULL);
+ ret = runner_run(&runner);
+
+@@ -558,8 +550,8 @@ ganesha_manage_export(dict_t *dict, char *value, char **op_errstr)
+ }
+
+ if (check_host_list()) {
+- runner_add_args(&runner, "sh", GANESHA_PREFIX "/dbus-send.sh", CONFDIR,
+- value, volname, NULL);
++ runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR, value,
++ volname, NULL);
+ ret = runner_run(&runner);
+ if (ret) {
+ gf_asprintf(op_errstr,
+@@ -610,8 +602,8 @@ tear_down_cluster(gf_boolean_t run_teardown)
+
+ if (run_teardown) {
+ runinit(&runner);
+- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh",
+- "teardown", CONFDIR, NULL);
++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "teardown",
++ CONFDIR, NULL);
+ ret = runner_run(&runner);
+ /* *
+ * Remove all the entries in CONFDIR expect ganesha.conf and
+@@ -685,7 +677,7 @@ setup_cluster(gf_boolean_t run_setup)
+
+ if (run_setup) {
+ runinit(&runner);
+- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", "setup",
++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "setup",
+ CONFDIR, NULL);
+ ret = runner_run(&runner);
+ }
+@@ -714,7 +706,7 @@ teardown(gf_boolean_t run_teardown, char **op_errstr)
+ }
+
+ runinit(&runner);
+- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", "cleanup",
++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "cleanup",
+ CONFDIR, NULL);
+ ret = runner_run(&runner);
+ if (ret)
+@@ -759,7 +751,7 @@ stop_ganesha(char **op_errstr)
+ };
+
+ runinit(&runner);
+- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh",
++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh",
+ "--setup-ganesha-conf-files", CONFDIR, "no", NULL);
+ ret = runner_run(&runner);
+ if (ret) {
+@@ -828,7 +820,7 @@ start_ganesha(char **op_errstr)
+
+ if (check_host_list()) {
+ runinit(&runner);
+- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh",
++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh",
+ "--setup-ganesha-conf-files", CONFDIR, "yes", NULL);
+ ret = runner_run(&runner);
+ if (ret) {
+--
+1.8.3.1
+
diff --git a/0057-ganesha-scripts-Remove-export-entries-from-ganesha.c.patch b/0057-ganesha-scripts-Remove-export-entries-from-ganesha.c.patch
new file mode 100644
index 0000000..996e2d0
--- /dev/null
+++ b/0057-ganesha-scripts-Remove-export-entries-from-ganesha.c.patch
@@ -0,0 +1,40 @@
+From 16d298584c70138fd639281bc900838d7938aec9 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Wed, 22 Feb 2017 14:37:04 +0530
+Subject: [PATCH 057/124] ganesha/scripts : Remove export entries from
+ ganesha.conf during cleanup
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I288f7c9ced23d258a7ce1242d8efe03a4bf6f746
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://review.gluster.org/16708
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: soumya k <skoduri@redhat.com>
+Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167142
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index 4b93f95..7ba80b5 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -342,6 +342,7 @@ cleanup_ganesha_config ()
+ rm -f /etc/corosync/corosync.conf
+ rm -rf /etc/cluster/cluster.conf*
+ rm -rf /var/lib/pacemaker/cib/*
++ sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $HA_CONFDIR/ganesha.conf
+ }
+
+ do_create_virt_ip_constraints()
+--
+1.8.3.1
+
diff --git a/0058-glusterd-ganesha-During-volume-delete-remove-the-gan.patch b/0058-glusterd-ganesha-During-volume-delete-remove-the-gan.patch
new file mode 100644
index 0000000..251e78d
--- /dev/null
+++ b/0058-glusterd-ganesha-During-volume-delete-remove-the-gan.patch
@@ -0,0 +1,62 @@
+From 172f32058b1a7d2e42f373490853aef5dd72f02f Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Wed, 22 Feb 2017 14:20:41 +0530
+Subject: [PATCH 058/124] glusterd/ganesha : During volume delete remove the
+ ganesha export configuration file
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I0363e7f4d7cefd3f1b3c4f91e495767ec52e230e
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://review.gluster.org/16707
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: soumya k <skoduri@redhat.com>
+Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167143
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +-
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 9 +++++++++
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+index 81f794d..6d72fda 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+@@ -445,7 +445,7 @@ manage_export_config(char *volname, char *value, char **op_errstr)
+ CONFDIR, value, volname, NULL);
+ ret = runner_run(&runner);
+
+- if (ret)
++ if (ret && !(*op_errstr))
+ gf_asprintf(op_errstr,
+ "Failed to create"
+ " NFS-Ganesha export config file.");
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index a0417ca..81c668c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -2936,6 +2936,15 @@ glusterd_op_delete_volume(dict_t *dict)
+ goto out;
+ }
+
++ if (glusterd_check_ganesha_export(volinfo)) {
++ ret = manage_export_config(volname, "off", NULL);
++ if (ret)
++ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
++ "Could not delete ganesha export conf file "
++ "for %s",
++ volname);
++ }
++
+ ret = glusterd_delete_volume(volinfo);
+ out:
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+--
+1.8.3.1
+
diff --git a/0059-glusterd-ganesha-throw-proper-error-for-gluster-nfs-.patch b/0059-glusterd-ganesha-throw-proper-error-for-gluster-nfs-.patch
new file mode 100644
index 0000000..e41a178
--- /dev/null
+++ b/0059-glusterd-ganesha-throw-proper-error-for-gluster-nfs-.patch
@@ -0,0 +1,132 @@
+From 8b501d9dfbeecb3ffdc3cd11b7c74aa929356ed6 Mon Sep 17 00:00:00 2001
+From: jiffin tony thottan <jthottan@redhat.com>
+Date: Mon, 7 Dec 2015 14:38:54 +0530
+Subject: [PATCH 059/124] glusterd/ganesha : throw proper error for "gluster
+ nfs-ganesha disable"
+
+For first time or if "gluster nfs-ganesha enable" fails the global option
+"nfs-ganesha" won't be stored in glusterd's dictionary. In both cases the
+"gluster nfs-ganesha disable" throws following error :
+"nfs-ganesha: failed: nfs-ganesha is already (null)d."
+
+Also this patch provides the missing prompt for nfs-ganesha disable in 3.10
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I7c9fd6dabedc0cfb14c5190b3554bc63a6bc0340
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://review.gluster.org/16791
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: soumya k <skoduri@redhat.com>
+Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167144
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-parser.c | 33 +++++++++++++++++-----------
+ xlators/mgmt/glusterd/src/glusterd-ganesha.c | 22 +++++--------------
+ 2 files changed, 26 insertions(+), 29 deletions(-)
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index cd9c445..f85958b 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -5908,20 +5908,27 @@ cli_cmd_ganesha_parse(struct cli_state *state, const char **words,
+ goto out;
+ }
+
+- question =
+- "Enabling NFS-Ganesha requires Gluster-NFS to be"
+- " disabled across the trusted pool. Do you "
+- "still want to continue?\n";
+-
+ if (strcmp(value, "enable") == 0) {
+- answer = cli_cmd_get_confirmation(state, question);
+- if (GF_ANSWER_NO == answer) {
+- gf_log("cli", GF_LOG_ERROR,
+- "Global operation "
+- "cancelled, exiting");
+- ret = -1;
+- goto out;
+- }
++ question =
++ "Enabling NFS-Ganesha requires Gluster-NFS to be "
++ "disabled across the trusted pool. Do you "
++ "still want to continue?\n";
++ } else if (strcmp(value, "disable") == 0) {
++ question =
++ "Disabling NFS-Ganesha will tear down the entire "
++ "ganesha cluster across the trusted pool. Do you "
++ "still want to continue?\n";
++ } else {
++ ret = -1;
++ goto out;
++ }
++ answer = cli_cmd_get_confirmation(state, question);
++ if (GF_ANSWER_NO == answer) {
++ gf_log("cli", GF_LOG_ERROR,
++ "Global operation "
++ "cancelled, exiting");
++ ret = -1;
++ goto out;
+ }
+ cli_out("This will take a few minutes to complete. Please wait ..");
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+index 6d72fda..1d17a33 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+@@ -252,8 +252,7 @@ int
+ glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr)
+ {
+ int ret = -1;
+- int value = -1;
+- gf_boolean_t option = _gf_false;
++ char *value = NULL;
+ char *str = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+@@ -264,8 +263,8 @@ glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr)
+ priv = this->private;
+ GF_ASSERT(priv);
+
+- value = dict_get_str_boolean(dict, "value", _gf_false);
+- if (value == -1) {
++ ret = dict_get_str(dict, "value", &value);
++ if (value == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "value not present.");
+ goto out;
+@@ -273,22 +272,13 @@ glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr)
+ /* This dict_get will fail if the user had never set the key before */
+ /*Ignoring the ret value and proceeding */
+ ret = dict_get_str(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str);
+- if (ret == -1) {
+- gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DICT_GET_FAILED,
+- "Global dict not present.");
+- ret = 0;
+- goto out;
+- }
+- /* Validity of the value is already checked */
+- ret = gf_string2boolean(str, &option);
+- /* Check if the feature is already enabled, fail in that case */
+- if (value == option) {
+- gf_asprintf(op_errstr, "nfs-ganesha is already %sd.", str);
++ if (str ? strcmp(value, str) == 0 : strcmp(value, "disable") == 0) {
++ gf_asprintf(op_errstr, "nfs-ganesha is already %sd.", value);
+ ret = -1;
+ goto out;
+ }
+
+- if (value) {
++ if (strcmp(value, "enable")) {
+ ret = start_ganesha(op_errstr);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_START_FAIL,
+--
+1.8.3.1
+
diff --git a/0060-ganesha-scripts-Stop-ganesha-process-on-all-nodes-if.patch b/0060-ganesha-scripts-Stop-ganesha-process-on-all-nodes-if.patch
new file mode 100644
index 0000000..39202ca
--- /dev/null
+++ b/0060-ganesha-scripts-Stop-ganesha-process-on-all-nodes-if.patch
@@ -0,0 +1,61 @@
+From 93635333d17a03078a6bf72771445e1bd9ebdc15 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Thu, 2 Mar 2017 12:22:30 +0530
+Subject: [PATCH 060/124] ganesha/scripts : Stop ganesha process on all nodes
+ if cluster setup fails
+
+During staging phase of volume option "nfs-ganesha", symlink "ganesha.conf"
+will be created plus ganesha process will be started. The cluster setup
+happens during commit phase of that option. So if cluster set up fails, the
+ganesha process will be running on all cluster nodes.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ib2cb85364b7ef5b702acb4826ffdf8e6f31a2acd
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://review.gluster.org/16823
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+Tested-by: Kaleb KEITHLEY <kkeithle@redhat.com>
+Reviewed-by: soumya k <skoduri@redhat.com>
+Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
+NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167145
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index 7ba80b5..db3f921 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -175,6 +175,13 @@ determine_servers()
+ fi
+ }
+
++stop_ganesha_all()
++{
++ local serverlist=${1}
++ for node in ${serverlist} ; do
++ manage_service "stop" ${node}
++ done
++}
+
+ setup_cluster()
+ {
+@@ -191,6 +198,8 @@ setup_cluster()
+ pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --transport udpu ${servers}
+ if [ $? -ne 0 ]; then
+ logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed"
++ #set up failed stop all ganesha process and clean up symlinks in cluster
++ stop_ganesha_all ${servers}
+ exit 1;
+ fi
+ pcs cluster start --all
+--
+1.8.3.1
+
diff --git a/0061-ganesha-allow-refresh-config-and-volume-export-unexp.patch b/0061-ganesha-allow-refresh-config-and-volume-export-unexp.patch
new file mode 100644
index 0000000..610c471
--- /dev/null
+++ b/0061-ganesha-allow-refresh-config-and-volume-export-unexp.patch
@@ -0,0 +1,106 @@
+From a766878e11a984680ed29f13aae713d464ec985e Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Wed, 19 Apr 2017 16:12:10 +0530
+Subject: [PATCH 061/124] ganesha : allow refresh-config and volume
+ export/unexport in failover state
+
+If ganesha is not running on one of nodes in HA cluster, then alli dbus
+commands send to that ganesha server will fail. This results in both
+refresh-config and volume export/unepxort failure. This change will
+gracefully handle those scenarios.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I3f1b7b7ca98e54c273c266e56357d8e24dd1b14b
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://review.gluster.org/17081
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: soumya k <skoduri@redhat.com>
+Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167146
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 6 ++----
+ xlators/mgmt/glusterd/src/glusterd-ganesha.c | 24 +++++++++++++++---------
+ xlators/mgmt/glusterd/src/glusterd-messages.h | 2 +-
+ 3 files changed, 18 insertions(+), 14 deletions(-)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index db3f921..f040ef6 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -275,8 +275,7 @@ string:\"EXPORT(Export_Id=$export_id)\" 2>&1")
+ ret=$?
+ logger <<< "${output}"
+ if [ ${ret} -ne 0 ]; then
+- echo "Error: refresh-config failed on ${current_host}."
+- exit 1
++ echo "Refresh-config failed on ${current_host}"
+ else
+ echo "Refresh-config completed on ${current_host}."
+ fi
+@@ -297,8 +296,7 @@ string:"EXPORT(Export_Id=$export_id)" 2>&1)
+ ret=$?
+ logger <<< "${output}"
+ if [ ${ret} -ne 0 ] ; then
+- echo "Error: refresh-config failed on localhost."
+- exit 1
++ echo "Refresh-config failed on localhost."
+ else
+ echo "Success: refresh-config completed."
+ fi
+diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+index 1d17a33..ee8b588 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+@@ -540,15 +540,21 @@ ganesha_manage_export(dict_t *dict, char *value, char **op_errstr)
+ }
+
+ if (check_host_list()) {
+- runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR, value,
+- volname, NULL);
+- ret = runner_run(&runner);
+- if (ret) {
+- gf_asprintf(op_errstr,
+- "Dynamic export"
+- " addition/deletion failed."
+- " Please see log file for details");
+- goto out;
++ /* Check whether ganesha is running on this node */
++ if (manage_service("status")) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GANESHA_NOT_RUNNING,
++ "Export failed, NFS-Ganesha is not running");
++ } else {
++ runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR,
++ value, volname, NULL);
++ ret = runner_run(&runner);
++ if (ret) {
++ gf_asprintf(op_errstr,
++ "Dynamic export"
++ " addition/deletion failed."
++ " Please see log file for details");
++ goto out;
++ }
+ }
+ }
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
+index 9558480..c7b3ca8 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
++++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
+@@ -298,6 +298,6 @@ GLFS_MSGID(
+ GD_MSG_LOCALTIME_LOGGING_ENABLE, GD_MSG_LOCALTIME_LOGGING_DISABLE,
+ GD_MSG_PORTS_EXHAUSTED, GD_MSG_CHANGELOG_GET_FAIL,
+ GD_MSG_MANAGER_FUNCTION_FAILED, GD_MSG_NFS_GANESHA_DISABLED,
+- GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL);
++ GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL);
+
+ #endif /* !_GLUSTERD_MESSAGES_H_ */
+--
+1.8.3.1
+
diff --git a/0062-glusterd-ganesha-perform-removal-of-ganesha.conf-on-.patch b/0062-glusterd-ganesha-perform-removal-of-ganesha.conf-on-.patch
new file mode 100644
index 0000000..71b4416
--- /dev/null
+++ b/0062-glusterd-ganesha-perform-removal-of-ganesha.conf-on-.patch
@@ -0,0 +1,59 @@
+From eb784a40a4f72e347945e0d66ac1a28389bb076c Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Fri, 28 Apr 2017 17:27:46 +0530
+Subject: [PATCH 062/124] glusterd/ganesha : perform removal of ganesha.conf on
+ nodes only in ganesha cluster
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I864ecd9391adf80fb1fa6ad2f9891a9ce77135e7
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://review.gluster.org/17138
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: soumya k <skoduri@redhat.com>
+NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167147
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-ganesha.c | 19 +++++++++----------
+ 1 file changed, 9 insertions(+), 10 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+index ee8b588..b743216 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+@@ -746,17 +746,16 @@ stop_ganesha(char **op_errstr)
+ 0,
+ };
+
+- runinit(&runner);
+- runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh",
+- "--setup-ganesha-conf-files", CONFDIR, "no", NULL);
+- ret = runner_run(&runner);
+- if (ret) {
+- gf_asprintf(op_errstr,
+- "removal of symlink ganesha.conf "
+- "in /etc/ganesha failed");
+- }
+-
+ if (check_host_list()) {
++ runinit(&runner);
++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh",
++ "--setup-ganesha-conf-files", CONFDIR, "no", NULL);
++ ret = runner_run(&runner);
++ if (ret) {
++ gf_asprintf(op_errstr,
++ "removal of symlink ganesha.conf "
++ "in /etc/ganesha failed");
++ }
+ ret = manage_service("stop");
+ if (ret)
+ gf_asprintf(op_errstr,
+--
+1.8.3.1
+
diff --git a/0063-glusterd-ganesha-update-cache-invalidation-properly-.patch b/0063-glusterd-ganesha-update-cache-invalidation-properly-.patch
new file mode 100644
index 0000000..7bbd920
--- /dev/null
+++ b/0063-glusterd-ganesha-update-cache-invalidation-properly-.patch
@@ -0,0 +1,144 @@
+From e5450c639915f4c29ae2ad480e4128b5845254cc Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Tue, 25 Apr 2017 16:36:40 +0530
+Subject: [PATCH 063/124] glusterd/ganesha : update cache invalidation properly
+ during volume stop
+
+As per current code, during volume stop for ganesha enabled volume the
+feature.cache-invalidation was turned "off" in ganesha_manage_export().
+And it never turn back to "on" when volume is started. It is not desire
+to modify the volume options during stop, this patch fixes above mentioned
+issue.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Iea9c62e5cda4f54805b41ea6055cf0c3652a634c
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://review.gluster.org/17111
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
+Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167148
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-ganesha.c | 33 ++++++++++++++-----------
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 4 +--
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 2 +-
+ xlators/mgmt/glusterd/src/glusterd.h | 3 ++-
+ 4 files changed, 23 insertions(+), 19 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+index b743216..1c2ba7a 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+@@ -445,7 +445,8 @@ manage_export_config(char *volname, char *value, char **op_errstr)
+
+ /* Exports and unexports a particular volume via NFS-Ganesha */
+ int
+-ganesha_manage_export(dict_t *dict, char *value, char **op_errstr)
++ganesha_manage_export(dict_t *dict, char *value,
++ gf_boolean_t update_cache_invalidation, char **op_errstr)
+ {
+ runner_t runner = {
+ 0,
+@@ -558,19 +559,21 @@ ganesha_manage_export(dict_t *dict, char *value, char **op_errstr)
+ }
+ }
+
+- vol_opts = volinfo->dict;
+- ret = dict_set_dynstr_with_alloc(vol_opts, "features.cache-invalidation",
+- value);
+- if (ret)
+- gf_asprintf(op_errstr,
+- "Cache-invalidation could not"
+- " be set to %s.",
+- value);
+- ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+- if (ret)
+- gf_asprintf(op_errstr, "failed to store volinfo for %s",
+- volinfo->volname);
+-
++ if (update_cache_invalidation) {
++ vol_opts = volinfo->dict;
++ ret = dict_set_dynstr_with_alloc(vol_opts,
++ "features.cache-invalidation", value);
++ if (ret)
++ gf_asprintf(op_errstr,
++ "Cache-invalidation could not"
++ " be set to %s.",
++ value);
++ ret = glusterd_store_volinfo(volinfo,
++ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
++ if (ret)
++ gf_asprintf(op_errstr, "failed to store volinfo for %s",
++ volinfo->volname);
++ }
+ out:
+ return ret;
+ }
+@@ -867,7 +870,7 @@ glusterd_handle_ganesha_op(dict_t *dict, char **op_errstr, char *key,
+ GF_ASSERT(value);
+
+ if (strcmp(key, "ganesha.enable") == 0) {
+- ret = ganesha_manage_export(dict, value, op_errstr);
++ ret = ganesha_manage_export(dict, value, _gf_true, op_errstr);
+ if (ret < 0)
+ goto out;
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index a630c48..52809a8 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -1178,7 +1178,7 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
+
+ if ((strcmp(key, "ganesha.enable") == 0) &&
+ (strcmp(value, "off") == 0)) {
+- ret = ganesha_manage_export(dict, "off", op_errstr);
++ ret = ganesha_manage_export(dict, "off", _gf_true, op_errstr);
+ if (ret)
+ goto out;
+ }
+@@ -1691,7 +1691,7 @@ glusterd_op_stage_reset_volume(dict_t *dict, char **op_errstr)
+ */
+ if (volinfo && (!strcmp(key, "all") || !strcmp(key, "ganesha.enable"))) {
+ if (glusterd_check_ganesha_export(volinfo)) {
+- ret = ganesha_manage_export(dict, "off", op_errstr);
++ ret = ganesha_manage_export(dict, "off", _gf_true, op_errstr);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL,
+ "Could not reset ganesha.enable key");
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index 81c668c..de4eccb 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -1825,7 +1825,7 @@ glusterd_op_stage_stop_volume(dict_t *dict, char **op_errstr)
+
+ ret = glusterd_check_ganesha_export(volinfo);
+ if (ret) {
+- ret = ganesha_manage_export(dict, "off", op_errstr);
++ ret = ganesha_manage_export(dict, "off", _gf_false, op_errstr);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_NFS_GNS_UNEXPRT_VOL_FAIL,
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index 5135181..e858ce4 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -1368,7 +1368,8 @@ glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr);
+ int
+ glusterd_op_set_ganesha(dict_t *dict, char **errstr);
+ int
+-ganesha_manage_export(dict_t *dict, char *value, char **op_errstr);
++ganesha_manage_export(dict_t *dict, char *value,
++ gf_boolean_t update_cache_invalidation, char **op_errstr);
+ gf_boolean_t
+ glusterd_is_ganesha_cluster();
+ gf_boolean_t
+--
+1.8.3.1
+
diff --git a/0064-glusterd-ganesha-return-proper-value-in-pre_setup.patch b/0064-glusterd-ganesha-return-proper-value-in-pre_setup.patch
new file mode 100644
index 0000000..042e1c0
--- /dev/null
+++ b/0064-glusterd-ganesha-return-proper-value-in-pre_setup.patch
@@ -0,0 +1,52 @@
+From 37bf4daca164cfcb260760ee2fd25d66f920dc7f Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Wed, 22 Feb 2017 18:26:30 +0530
+Subject: [PATCH 064/124] glusterd/ganesha : return proper value in pre_setup()
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I6f7ce82488904c7d418ee078162f26f1ec81e9d9
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://review.gluster.org/16733
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
+Tested-by: Raghavendra Talur <rtalur@redhat.com>
+NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167149
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-ganesha.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+index 1c2ba7a..d9fdfc6 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+@@ -843,16 +843,15 @@ pre_setup(gf_boolean_t run_setup, char **op_errstr)
+ {
+ int ret = 0;
+
+- ret = check_host_list();
+-
+- if (ret) {
++ if (check_host_list()) {
+ ret = setup_cluster(run_setup);
+ if (ret == -1)
+ gf_asprintf(op_errstr,
+ "Failed to set up HA "
+ "config for NFS-Ganesha. "
+ "Please check the log file for details");
+- }
++ } else
++ ret = -1;
+
+ return ret;
+ }
+--
+1.8.3.1
+
diff --git a/0065-ganesha-scripts-remove-dependency-over-export-config.patch b/0065-ganesha-scripts-remove-dependency-over-export-config.patch
new file mode 100644
index 0000000..4c99ef4
--- /dev/null
+++ b/0065-ganesha-scripts-remove-dependency-over-export-config.patch
@@ -0,0 +1,58 @@
+From 7a47c004b907ed5469b78d559cae6d151e4d626b Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Thu, 23 Feb 2017 16:21:52 +0530
+Subject: [PATCH 065/124] ganesha/scripts : remove dependency over export
+ configuration file for unexport
+
+Currently unexport is performed by reading export id from volume configuration
+file. So unexport has dependency over that file. This patch will unexport with
+help of dbus command ShowExport. And it will only unexport the share which is
+added via cli.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I6f3c9b2bb48f0328b18e9cc0e4b9356174afd596
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://review.gluster.org/16771
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
+Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167150
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/dbus-send.sh | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/extras/ganesha/scripts/dbus-send.sh b/extras/ganesha/scripts/dbus-send.sh
+index ec8d948..9d613a0 100755
+--- a/extras/ganesha/scripts/dbus-send.sh
++++ b/extras/ganesha/scripts/dbus-send.sh
+@@ -41,8 +41,18 @@ string:"EXPORT(Path=/$VOL)"
+ #This function removes an export dynamically(uses the export_id of the export)
+ function dynamic_export_remove()
+ {
+- removed_id=`cat $GANESHA_DIR/exports/export.$VOL.conf |\
+-grep Export_Id | awk -F"[=,;]" '{print$2}'| tr -d '[[:space:]]'`
++ # Below bash fetch all the export from ShowExport command and search
++ # export entry based on path and then get its export entry.
++ # There are two possiblities for path, either entire volume will be
++ # exported or subdir. It handles both cases. But it remove only first
++ # entry from the list based on assumption that entry exported via cli
++ # has lowest export id value
++ removed_id=$(dbus-send --type=method_call --print-reply --system \
++ --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
++ org.ganesha.nfsd.exportmgr.ShowExports | grep -B 1 -we \
++ "/"$VOL -e "/"$VOL"/" | grep uint16 | awk '{print $2}' \
++ | head -1)
++
+ dbus-send --print-reply --system \
+ --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
+ org.ganesha.nfsd.exportmgr.RemoveExport uint16:$removed_id
+--
+1.8.3.1
+
diff --git a/0066-glusterd-ganesha-add-proper-NULL-check-in-manage_exp.patch b/0066-glusterd-ganesha-add-proper-NULL-check-in-manage_exp.patch
new file mode 100644
index 0000000..187b97c
--- /dev/null
+++ b/0066-glusterd-ganesha-add-proper-NULL-check-in-manage_exp.patch
@@ -0,0 +1,41 @@
+From d91eadbbb3e2d02e7297214da394b0e232544386 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Tue, 2 May 2017 14:06:00 +0530
+Subject: [PATCH 066/124] glusterd/ganesha : add proper NULL check in
+ manage_export_config
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I872b2b6b027f04e61f60ad85588f50e1ef2f988c
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://review.gluster.org/17150
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: soumya k <skoduri@redhat.com>
+NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
+Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167151
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+index d9fdfc6..fe0bffc 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+@@ -435,7 +435,7 @@ manage_export_config(char *volname, char *value, char **op_errstr)
+ CONFDIR, value, volname, NULL);
+ ret = runner_run(&runner);
+
+- if (ret && !(*op_errstr))
++ if (ret && op_errstr)
+ gf_asprintf(op_errstr,
+ "Failed to create"
+ " NFS-Ganesha export config file.");
+--
+1.8.3.1
+
diff --git a/0067-ganesha-minor-improvments-for-commit-e91cdf4-17081.patch b/0067-ganesha-minor-improvments-for-commit-e91cdf4-17081.patch
new file mode 100644
index 0000000..233725e
--- /dev/null
+++ b/0067-ganesha-minor-improvments-for-commit-e91cdf4-17081.patch
@@ -0,0 +1,41 @@
+From 1e5c6bb28894a57e5ca5ed7b4b3b5e05efecf7cd Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Wed, 3 May 2017 12:47:14 +0530
+Subject: [PATCH 067/124] ganesha : minor improvments for commit e91cdf4
+ (17081)
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I3af13e081c5e46cc6f2c132e7a5106ac3355c850
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://review.gluster.org/17152
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: soumya k <skoduri@redhat.com>
+NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167152
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index f040ef6..cedc3fa 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -275,7 +275,7 @@ string:\"EXPORT(Export_Id=$export_id)\" 2>&1")
+ ret=$?
+ logger <<< "${output}"
+ if [ ${ret} -ne 0 ]; then
+- echo "Refresh-config failed on ${current_host}"
++ echo "Refresh-config failed on ${current_host}. Please check logs on ${current_host}"
+ else
+ echo "Refresh-config completed on ${current_host}."
+ fi
+--
+1.8.3.1
+
diff --git a/0068-common-ha-surviving-ganesha.nfsd-not-put-in-grace-on.patch b/0068-common-ha-surviving-ganesha.nfsd-not-put-in-grace-on.patch
new file mode 100644
index 0000000..3658ec1
--- /dev/null
+++ b/0068-common-ha-surviving-ganesha.nfsd-not-put-in-grace-on.patch
@@ -0,0 +1,58 @@
+From aabc623f99d22a2a9e1d52f3ca7de1dc5b49946d Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Tue, 13 Jun 2017 07:36:50 -0400
+Subject: [PATCH 068/124] common-ha: surviving ganesha.nfsd not put in grace on
+ fail-over
+
+Behavior change is seen in new HA in RHEL 7.4 Beta. Up to now clone
+RAs have been created with "pcs resource create ... meta notify=true".
+Their notify method is invoked with pre-start or post-stop when one of
+the clone RAs is started or stopped.
+
+In 7.4 Beta the notify method we observe that the notify method is not
+invoked when one of the clones is stopped (or started).
+
+Ken Gaillot, one of the pacemaker devs, wrote:
+ With the above command, pcs puts the notify=true meta-attribute
+ on the primitive instead of the clone. Looking at the pcs help,
+ that seems expected (--clone notify=true would put it on the clone,
+ meta notify=true puts it on the primitive). If you drop the "meta"
+ above, I think it will work again.
+
+And indeed his suggested fix does work on both RHEL 7.4 Beta and RHEL
+7.3 and presumably Fedora.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Idbb539f1366df6d39f77431c357dff4e53a2df6d
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://review.gluster.org/17534
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: soumya k <skoduri@redhat.com>
+NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167153
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index cedc3fa..537c965 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -445,7 +445,7 @@ setup_create_resources()
+ # ganesha-active crm_attribute
+ sleep 5
+
+- pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone meta notify=true
++ pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone notify=true
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone failed"
+ fi
+--
+1.8.3.1
+
diff --git a/0069-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch b/0069-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch
new file mode 100644
index 0000000..16aea73
--- /dev/null
+++ b/0069-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch
@@ -0,0 +1,96 @@
+From 916a79ea78db264ceedd4ebdba794e488b82eceb Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Wed, 21 Jun 2017 10:01:20 -0400
+Subject: [PATCH 069/124] common-ha: enable and disable selinux
+ ganesha_use_fusefs
+
+Starting in Fedora 26 and RHEL 7.4 there are new targeted policies
+in selinux which include a tuneable to allow ganesha.nfsd to access
+the gluster (FUSE) shared_storage volume where ganesha maintains its
+state.
+
+N.B. rpm doesn't have a way to distinguish between RHEL 7.3 or 7.4
+so it can't be enabled for RHEL at this time. /usr/sbin/semanage is
+in policycoreutils-python in RHEL (versus policycoreutils-python-utils
+in Fedora.) Once RHEL 7.4 GAs we may also wish to specify the version
+for RHEL 7 explicitly, i.e.
+ Requires: selinux-policy >= 3.13.1-160.
+But beware, the corresponding version in Fedora 26 seems to be
+selinux-policy-3.13.1.258 or so. (Maybe earlier versions, but that's
+what's currently in the F26 beta.
+
+release-3.10 is the upstream master branch for glusterfs-ganesha. For
+release-3.11 and later storhaug needs a similar change, which is
+tracked by https://github.com/linux-ha-storage/storhaug/issues/11
+
+Maybe at some point we would want to consider migrating the targeted
+policies for glusterfs (and nfs-ganesha) from selinux-policy to a
+glusterfs-selinux (and nfs-ganesha-selinux) subpackage?
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I04a5443edd00636cbded59a2baddfa98095bf7ac
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://review.gluster.org/17597
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: Niels de Vos <ndevos@redhat.com>
+Reviewed-by: jiffin tony Thottan <jthottan@redhat.com>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167154
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index d748ebc..b01c94f 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -466,6 +466,11 @@ Requires: pcs, dbus
+ Requires: cman, pacemaker, corosync
+ %endif
+
++%if ( 0%{?fedora} && 0%{?fedora} > 25 )
++Requires(post): policycoreutils-python-utils
++Requires(postun): policycoreutils-python-utils
++%endif
++
+ %description ganesha
+ GlusterFS is a distributed file-system capable of scaling to several
+ petabytes. It aggregates various storage bricks over Infiniband RDMA
+@@ -923,6 +928,14 @@ exit 0
+ %systemd_post glustereventsd
+ %endif
+
++%if ( 0%{!?_without_server:1} )
++%if ( 0%{?fedora} && 0%{?fedora} > 25 )
++%post ganesha
++semanage boolean -m ganesha_use_fusefs --on
++exit 0
++%endif
++%endif
++
+ %if ( 0%{!?_without_georeplication:1} )
+ %post geo-replication
+ if [ $1 -ge 1 ]; then
+@@ -1055,6 +1068,14 @@ fi
+ exit 0
+ %endif
+
++%if ( 0%{!?_without_server:1} )
++%if ( 0%{?fedora} && 0%{?fedora} > 25 )
++%postun ganesha
++semanage boolean -m ganesha_use_fusefs --off
++exit 0
++%endif
++%endif
++
+ ##-----------------------------------------------------------------------------
+ ## All %%files should be placed here and keep them grouped
+ ##
+--
+1.8.3.1
+
diff --git a/0070-packaging-glusterfs-ganesha-update-sometimes-fails-s.patch b/0070-packaging-glusterfs-ganesha-update-sometimes-fails-s.patch
new file mode 100644
index 0000000..6715f1f
--- /dev/null
+++ b/0070-packaging-glusterfs-ganesha-update-sometimes-fails-s.patch
@@ -0,0 +1,76 @@
+From f410cd9f9b9455373a9612423558d8d0f83cd0fc Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Wed, 12 Jul 2017 07:43:51 -0400
+Subject: [PATCH 070/124] packaging: glusterfs-ganesha update sometimes fails
+ semanage
+
+Depending on how dnf orders updates, the updated version of
+selinux-policy-targeted with ganesha_use_fusefs may not be updated
+before the glusterfs-ganesha update execute its %post scriptlet
+containing the `semanage ganesha_use_fusefs ...` command. In such
+situations the semanage command (silently) fails.
+
+Use a %trigger (and %triggerun) to run the scriptlet (again) after
+selinux-policy-targeted with ganesha_use_fusefs has been installed
+or updated.
+
+Note: the %triggerun is probably unnecessary, but it doesn't hurt.
+
+The release-3.10 branch is the "upstream master" for the glusterfs-
+ganesha subpackage.
+
+Note: to be merged after https://review.gluster.org/17806
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I1ad06d79fa1711e4abf038baf9f0a5b7bb665934
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://review.gluster.org/17756
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: Niels de Vos <ndevos@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167155
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index b01c94f..1d99a3d 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1077,6 +1077,28 @@ exit 0
+ %endif
+
+ ##-----------------------------------------------------------------------------
++## All %%trigger should be placed here and keep them sorted
++##
++%if ( 0%{!?_without_server:1} )
++%if ( 0%{?fedora} && 0%{?fedora} > 25 )
++%trigger ganesha -- selinux-policy-targeted
++semanage boolean -m ganesha_use_fusefs --on
++exit 0
++%endif
++%endif
++
++##-----------------------------------------------------------------------------
++## All %%triggerun should be placed here and keep them sorted
++##
++%if ( 0%{!?_without_server:1} )
++%if ( 0%{?fedora} && 0%{?fedora} > 25 )
++%triggerun ganesha -- selinux-policy-targeted
++semanage boolean -m ganesha_use_fusefs --off
++exit 0
++%endif
++%endif
++
++##-----------------------------------------------------------------------------
+ ## All %%files should be placed here and keep them grouped
+ ##
+ %files
+--
+1.8.3.1
+
diff --git a/0071-common-ha-enable-and-disable-selinux-gluster_use_exe.patch b/0071-common-ha-enable-and-disable-selinux-gluster_use_exe.patch
new file mode 100644
index 0000000..ad14a89
--- /dev/null
+++ b/0071-common-ha-enable-and-disable-selinux-gluster_use_exe.patch
@@ -0,0 +1,66 @@
+From 662c94f3b3173bf78465644e2e42e03efd9ea493 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Mon, 17 Jul 2017 11:07:40 -0400
+Subject: [PATCH 071/124] common-ha: enable and disable selinux
+ gluster_use_execmem
+
+Starting in Fedora 26 and RHEL 7.4 there are new targeted policies in
+selinux which include a tuneable to allow glusterd->ganesha-ha.sh->pcs
+to access the pcs config, i.e. gluster-use-execmem.
+
+Note. rpm doesn't have a way to distinguish between RHEL 7.3 or 7.4
+or between 3.13.1-X and 3.13.1-Y so it can't be enabled for RHEL at
+this time.
+
+/usr/sbin/semanage is in policycoreutils-python in RHEL (versus
+policycoreutils-python-utils in Fedora.)
+
+Requires selinux-policy >= 3.13.1-160 in RHEL7. The corresponding
+version in Fedora 26 seems to be selinux-policy-3.13.1-259 or so. (Maybe
+earlier versions, but that's what was in F26 when I checked.)
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ic474b3f7739ff5be1e99d94d00b55caae4ceb5a0
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://review.gluster.org/17806
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: soumya k <skoduri@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167156
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index 537c965..f4400af 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -984,6 +984,9 @@ main()
+ usage
+ exit 0
+ fi
++
++ semanage boolean -m gluster_use_execmem --on
++
+ HA_CONFDIR=${1%/}; shift
+ local ha_conf=${HA_CONFDIR}/ganesha-ha.conf
+ local node=""
+@@ -1129,6 +1132,9 @@ $HA_CONFDIR/ganesha-ha.conf
+ ;;
+
+ esac
++
++ semanage boolean -m gluster_use_execmem --off
++
+ }
+
+ main $*
+--
+1.8.3.1
+
diff --git a/0072-ganesha-ha-don-t-set-SELinux-booleans-if-SELinux-is-.patch b/0072-ganesha-ha-don-t-set-SELinux-booleans-if-SELinux-is-.patch
new file mode 100644
index 0000000..4bf730b
--- /dev/null
+++ b/0072-ganesha-ha-don-t-set-SELinux-booleans-if-SELinux-is-.patch
@@ -0,0 +1,60 @@
+From c147bbec10fc72b85301ab6a7580f15713b8a974 Mon Sep 17 00:00:00 2001
+From: Ambarish <asoman@redhat.com>
+Date: Tue, 12 Sep 2017 18:34:29 +0530
+Subject: [PATCH 072/124] ganesha-ha: don't set SELinux booleans if SELinux is
+ disabled
+
+semanage commands inside ganesha-ha.sh script will fail if selinux is
+Disabled. This patch introduces a check if selinux is enabled or not,
+and subsequently run semange commands only on selinux enabled systems.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ibee61cbb1d51a73e6c326b49bac5c7ce06feb310
+Signed-off-by: Ambarish <asoman@redhat.com>
+Reviewed-on: https://review.gluster.org/18264
+Reviewed-by: Niels de Vos <ndevos@redhat.com>
+Smoke: Gluster Build System <jenkins@build.gluster.org>
+Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
+Reviewed-by: jiffin tony Thottan <jthottan@redhat.com>
+Reviewed-by: Daniel Gryniewicz <dang@redhat.com>
+CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167157
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index f4400af..e1d3ea0 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -985,7 +985,9 @@ main()
+ exit 0
+ fi
+
+- semanage boolean -m gluster_use_execmem --on
++ if (selinuxenabled) ;then
++ semanage boolean -m gluster_use_execmem --on
++ fi
+
+ HA_CONFDIR=${1%/}; shift
+ local ha_conf=${HA_CONFDIR}/ganesha-ha.conf
+@@ -1133,8 +1135,9 @@ $HA_CONFDIR/ganesha-ha.conf
+
+ esac
+
+- semanage boolean -m gluster_use_execmem --off
+-
++ if (selinuxenabled) ;then
++ semanage boolean -m gluster_use_execmem --off
++ fi
+ }
+
+ main $*
+--
+1.8.3.1
+
diff --git a/0073-build-remove-ganesha-dependency-on-selinux-policy.patch b/0073-build-remove-ganesha-dependency-on-selinux-policy.patch
new file mode 100644
index 0000000..4d16042
--- /dev/null
+++ b/0073-build-remove-ganesha-dependency-on-selinux-policy.patch
@@ -0,0 +1,45 @@
+From 52279c877264f41b522f747a986b937e6f054e2a Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Fri, 23 Jun 2017 20:43:16 +0530
+Subject: [PATCH 073/124] build: remove ganesha dependency on selinux-policy
+
+Problem:
+Puddle creation fails with unresolved dependencies
+ unresolved deps:
+*** selinux-policy >= 0:3.13.1-160
+
+Solution:
+We know a priori about the version at RHEL 7.4 is already the desired
+version. So removing this explicit dependency *should* not be a gluster
+test blocker.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Id53ac0e41adc14704932787ee0dd3143e6615aaf
+Signed-off-by: Milind Changire <mchangir@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/109945
+Reviewed-by: Kaleb Keithley <kkeithle@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167158
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 1d99a3d..e55e255 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -467,6 +467,7 @@ Requires: cman, pacemaker, corosync
+ %endif
+
+ %if ( 0%{?fedora} && 0%{?fedora} > 25 )
++Requires: selinux-policy >= 3.13.1-160
+ Requires(post): policycoreutils-python-utils
+ Requires(postun): policycoreutils-python-utils
+ %endif
+--
+1.8.3.1
+
diff --git a/0074-common-ha-enable-pacemaker-at-end-of-setup.patch b/0074-common-ha-enable-pacemaker-at-end-of-setup.patch
new file mode 100644
index 0000000..6366f0c
--- /dev/null
+++ b/0074-common-ha-enable-pacemaker-at-end-of-setup.patch
@@ -0,0 +1,67 @@
+From bfbda24746bf11573b485baf534a5cf1373c6c89 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Wed, 7 Jun 2017 08:15:48 -0400
+Subject: [PATCH 074/124] common-ha: enable pacemaker at end of setup
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I3ccd59b67ed364bfc5d27e88321ab5b9f8d471fd
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/108431
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167159
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index e1d3ea0..d7dfb87 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -787,6 +787,22 @@ setup_state_volume()
+ }
+
+
++enable_pacemaker()
++{
++ while [[ ${1} ]]; do
++ if [ "${SERVICE_MAN}" == "/usr/bin/systemctl" ]; then
++${SECRET_PEM} root@${1} ${SERVICE_MAN} enable pacemaker"
++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
++${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker"
++ else
++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
++${SECRET_PEM} root@${1} "${SERVICE_MAN} pacemaker enable"
++ fi
++ shift
++ done
++}
++
++
+ addnode_state_volume()
+ {
+ local newnode=${1}; shift
+@@ -1011,6 +1027,8 @@ main()
+
+ if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then
+
++ determine_service_manager
++
+ setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}"
+
+ setup_create_resources ${HA_SERVERS}
+@@ -1019,6 +1037,8 @@ main()
+
+ setup_state_volume ${HA_SERVERS}
+
++ enable_pacemaker ${HA_SERVERS}
++
+ else
+
+ logger "insufficient servers for HA, aborting"
+--
+1.8.3.1
+
diff --git a/0075-common-ha-Fix-an-incorrect-syntax-during-setup.patch b/0075-common-ha-Fix-an-incorrect-syntax-during-setup.patch
new file mode 100644
index 0000000..b524cfe
--- /dev/null
+++ b/0075-common-ha-Fix-an-incorrect-syntax-during-setup.patch
@@ -0,0 +1,43 @@
+From 0a124b59c662c8f85fe6d184b839cbfe29d5e8ab Mon Sep 17 00:00:00 2001
+From: Soumya Koduri <skoduri@redhat.com>
+Date: Wed, 14 Jun 2017 15:20:22 +0530
+Subject: [PATCH 075/124] common-ha: Fix an incorrect syntax during setup
+
+There was an invalid line introduced as part of
+https://code.engineering.redhat.com/gerrit/#/c/108431/
+
+Detected by rpmdiff -
+ https://errata.devel.redhat.com/rpmdiff/show/175336?result_id=4796901
+
+This change is to fix the same.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I55cdd7d866cb175fb620dbbd2d02c36eab291a74
+Signed-off-by: Soumya Koduri <skoduri@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/109017
+Reviewed-by: Kaleb Keithley <kkeithle@redhat.com>
+Tested-by: Kaleb Keithley <kkeithle@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167160
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index d7dfb87..8302c5e 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -791,7 +791,6 @@ enable_pacemaker()
+ {
+ while [[ ${1} ]]; do
+ if [ "${SERVICE_MAN}" == "/usr/bin/systemctl" ]; then
+-${SECRET_PEM} root@${1} ${SERVICE_MAN} enable pacemaker"
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+ ${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker"
+ else
+--
+1.8.3.1
+
diff --git a/0076-glusterd-ganesha-change-voltype-for-ganesha.enable-i.patch b/0076-glusterd-ganesha-change-voltype-for-ganesha.enable-i.patch
new file mode 100644
index 0000000..4147a46
--- /dev/null
+++ b/0076-glusterd-ganesha-change-voltype-for-ganesha.enable-i.patch
@@ -0,0 +1,44 @@
+From a917a989232d2c72752f8a2cf27bad90b5acb83d Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Tue, 27 Feb 2018 15:35:30 +0530
+Subject: [PATCH 076/124] glusterd/ganesha : change voltype for ganesha.enable
+ in volume option table
+
+The voltype defined for ganesha.enable is features/ganesha. But ganesha xlator
+was removed from client stack long back. Now it is defined as part of glusterd.
+So reflecting the same on the volume option table.
+
+Label: DOWNSTREAM ONLY
+
+Upstream reference :
+>patch link https://review.gluster.org/19639
+>Change-Id: Ifedd7493020b77bd54edfdbdd9c799d93b24d0aa
+>BUG: 1486542
+>Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+
+Change-Id: Ifedd7493020b77bd54edfdbdd9c799d93b24d0aa
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167161
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 13f423a..c8f6e67 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2599,7 +2599,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ },
+ {
+ .key = "ganesha.enable",
+- .voltype = "features/ganesha",
++ .voltype = "mgmt/ganesha",
+ .value = "off",
+ .option = "ganesha.enable",
+ .op_version = GD_OP_VERSION_3_7_0,
+--
+1.8.3.1
+
diff --git a/0077-glusterd-ganesha-create-remove-export-file-only-from.patch b/0077-glusterd-ganesha-create-remove-export-file-only-from.patch
new file mode 100644
index 0000000..139a8dd
--- /dev/null
+++ b/0077-glusterd-ganesha-create-remove-export-file-only-from.patch
@@ -0,0 +1,73 @@
+From 1e619b95e3f03e226fef135bfaeeca9b069eb978 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Wed, 14 Mar 2018 12:01:30 +0530
+Subject: [PATCH 077/124] glusterd/ganesha : create/remove export file only
+ from the node which performs ganesha.enable
+
+As part of volume set ganesha.enable on the ganesha export configuration file will be created/removed
+using "create-export-ganesha.sh". This performed from the nodes which are part of ganesha cluster.
+But it is not need since the file is saved in shared storage and consumed by the nodes in the ganesha cluster.
+
+Label: DOWNSTREAM ONLY
+
+Upstream Reference :
+>patch link : https://review.gluster.org/#/c/19716/
+>Change-Id: I2583899972b47d451a90711940293004a3af4690
+>BUG: 1555195
+>Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+
+Change-Id: I2583899972b47d451a90711940293004a3af4690
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167162
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +-
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 3 ++-
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 2 +-
+ 3 files changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+index fe0bffc..ff36476 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+@@ -530,7 +530,7 @@ ganesha_manage_export(dict_t *dict, char *value,
+ * Create the export file from the node where ganesha.enable "on"
+ * is executed
+ * */
+- if (option) {
++ if (option && is_origin_glusterd(dict)) {
+ ret = manage_export_config(volname, "on", op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 52809a8..10e2d48 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -2415,7 +2415,8 @@ glusterd_op_reset_volume(dict_t *dict, char **op_rspstr)
+ }
+
+ if (!strcmp(key, "ganesha.enable") || !strcmp(key, "all")) {
+- if (glusterd_check_ganesha_export(volinfo)) {
++ if (glusterd_check_ganesha_export(volinfo) &&
++ is_origin_glusterd(dict)) {
+ ret = manage_export_config(volname, "off", op_rspstr);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index de4eccb..1ea8ba6 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -2936,7 +2936,7 @@ glusterd_op_delete_volume(dict_t *dict)
+ goto out;
+ }
+
+- if (glusterd_check_ganesha_export(volinfo)) {
++ if (glusterd_check_ganesha_export(volinfo) && is_origin_glusterd(dict)) {
+ ret = manage_export_config(volname, "off", NULL);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
+--
+1.8.3.1
+
diff --git a/0078-common-ha-scripts-pass-the-list-of-servers-properly-.patch b/0078-common-ha-scripts-pass-the-list-of-servers-properly-.patch
new file mode 100644
index 0000000..fe29fc7
--- /dev/null
+++ b/0078-common-ha-scripts-pass-the-list-of-servers-properly-.patch
@@ -0,0 +1,40 @@
+From 5daff948884b1b68ffcbc6ceea3c7affdb9700f4 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Wed, 4 Apr 2018 09:29:43 +0530
+Subject: [PATCH 078/124] common-ha/scripts : pass the list of servers properly
+ to stop_ganesha_all()
+
+Label: DOWNSTREAM ONLY
+
+Upstream Reference :
+>Change-Id: I6d92623cd9fb450d7a27f5acc61eca0b3cbc9b08
+>BUG: 1563500
+>Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+>Patch link : https://review.gluster.org/#/c/19816/
+
+Change-Id: I6d92623cd9fb450d7a27f5acc61eca0b3cbc9b08
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167163
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index 8302c5e..4e5799f 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -199,7 +199,7 @@ setup_cluster()
+ if [ $? -ne 0 ]; then
+ logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed"
+ #set up failed stop all ganesha process and clean up symlinks in cluster
+- stop_ganesha_all ${servers}
++ stop_ganesha_all "${servers}"
+ exit 1;
+ fi
+ pcs cluster start --all
+--
+1.8.3.1
+
diff --git a/0079-common-ha-All-statd-related-files-need-to-be-owned-b.patch b/0079-common-ha-All-statd-related-files-need-to-be-owned-b.patch
new file mode 100644
index 0000000..982a531
--- /dev/null
+++ b/0079-common-ha-All-statd-related-files-need-to-be-owned-b.patch
@@ -0,0 +1,93 @@
+From 7e71723a46237f13a570961054b361dc1b34ab25 Mon Sep 17 00:00:00 2001
+From: Soumya Koduri <skoduri@redhat.com>
+Date: Thu, 19 Jan 2017 15:01:12 +0530
+Subject: [PATCH 079/124] common-ha: All statd related files need to be owned
+ by rpcuser
+
+Statd service is started as rpcuser by default. Hence the
+files/directories needed by it under '/var/lib/nfs' should be
+owned by the same user.
+
+Note: This change is not in mainline as the cluster-bits
+are being moved to storehaug project -
+http://review.gluster.org/#/c/16349/
+http://review.gluster.org/#/c/16333/
+
+Label: DOWNSTREAM ONLY
+
+Upstream Reference :
+> Change-Id: I89fd06aa9700c5ce60026ac825da7c154d9f48fd
+> BUG: 1414665
+> Signed-off-by: Soumya Koduri <skoduri@redhat.com>
+> Reviewed-on: http://review.gluster.org/16433
+> Reviewed-by: jiffin tony Thottan <jthottan@redhat.com>
+> Smoke: Gluster Build System <jenkins@build.gluster.org>
+> Tested-by: Kaleb KEITHLEY <kkeithle@redhat.com>
+> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
+> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
+
+Change-Id: I89fd06aa9700c5ce60026ac825da7c154d9f48fd
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167164
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index 4e5799f..4a98f32 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -756,9 +756,11 @@ setup_state_volume()
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd
++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state
++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov
+@@ -768,9 +770,11 @@ setup_state_volume()
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state
+@@ -830,9 +834,11 @@ addnode_state_volume()
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd
++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state
++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov
+@@ -842,9 +848,11 @@ addnode_state_volume()
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state
+--
+1.8.3.1
+
diff --git a/0080-glusterd-ganesha-Skip-non-ganesha-nodes-properly-for.patch b/0080-glusterd-ganesha-Skip-non-ganesha-nodes-properly-for.patch
new file mode 100644
index 0000000..acd1d4a
--- /dev/null
+++ b/0080-glusterd-ganesha-Skip-non-ganesha-nodes-properly-for.patch
@@ -0,0 +1,62 @@
+From c5c6720c5186741a3b01a5ba2b34633fc1a00fc5 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Mon, 30 Apr 2018 12:35:01 +0530
+Subject: [PATCH 080/124] glusterd/ganesha : Skip non-ganesha nodes properly
+ for ganesha HA set up
+
+Label: DOWNSTREAM ONLY
+
+Upstream reference:
+>Patch unlink https://review.gluster.org/#/c/19949/
+>Change-Id: Iff7bc3ead43e97847219c5a5cc8b967bf0967903
+>BUG: 1573078
+>Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+
+Change-Id: Iff7bc3ead43e97847219c5a5cc8b967bf0967903
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167165
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-ganesha.c | 23 +++++++++++++----------
+ 1 file changed, 13 insertions(+), 10 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+index ff36476..d882105 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+@@ -842,17 +842,20 @@ static int
+ pre_setup(gf_boolean_t run_setup, char **op_errstr)
+ {
+ int ret = 0;
+-
+- if (check_host_list()) {
+- ret = setup_cluster(run_setup);
+- if (ret == -1)
++ if (run_setup) {
++ if (!check_host_list()) {
+ gf_asprintf(op_errstr,
+- "Failed to set up HA "
+- "config for NFS-Ganesha. "
+- "Please check the log file for details");
+- } else
+- ret = -1;
+-
++ "Running nfs-ganesha setup command "
++ "from node which is not part of ganesha cluster");
++ return -1;
++ }
++ }
++ ret = setup_cluster(run_setup);
++ if (ret == -1)
++ gf_asprintf(op_errstr,
++ "Failed to set up HA "
++ "config for NFS-Ganesha. "
++ "Please check the log file for details");
+ return ret;
+ }
+
+--
+1.8.3.1
+
diff --git a/0081-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch b/0081-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch
new file mode 100644
index 0000000..0a4110f
--- /dev/null
+++ b/0081-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch
@@ -0,0 +1,50 @@
+From 3cb9ed7e20f59eec036908eed4cfdbc61e990ee2 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Tue, 11 Dec 2018 10:09:42 -0500
+Subject: [PATCH 081/124] ganesha-ha: ensure pacemaker is enabled after setup
+
+There appears to be a race between `pcs cluster setup ...` early
+in the setup and the `systemctl enable pacemaker` at the end. The
+`pcs cluster setup ...` disables pacemaker and corosync. (Now, in
+pacemaker-1.1.18. Was it always the case?)
+
+I am not able to reproduce this on my devel system. I speculate that
+on a busy system that the `pcs cluster setup ...` disable may, under
+the right conditions, not run until after the setup script enables
+it. It must require the right alignment of the Sun, Moon, and all
+the planets.
+
+Regardless, we'll use the --enable option to `pcs cluster setup ...`
+to ensure that the cluster (re)starts pacemaker.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I771ff62c37426438b80e61651a8b4ecaf2d549c3
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167166
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index 4a98f32..32af1ca 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -195,9 +195,9 @@ setup_cluster()
+
+ pcs cluster auth ${servers}
+ # pcs cluster setup --name ${name} ${servers}
+- pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --transport udpu ${servers}
++ pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --enable --transport udpu ${servers}
+ if [ $? -ne 0 ]; then
+- logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed"
++ logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --enable --transport udpu ${servers} failed"
+ #set up failed stop all ganesha process and clean up symlinks in cluster
+ stop_ganesha_all "${servers}"
+ exit 1;
+--
+1.8.3.1
+
diff --git a/0082-build-Add-dependency-on-netstat-for-glusterfs-ganesh.patch b/0082-build-Add-dependency-on-netstat-for-glusterfs-ganesh.patch
new file mode 100644
index 0000000..6df51eb
--- /dev/null
+++ b/0082-build-Add-dependency-on-netstat-for-glusterfs-ganesh.patch
@@ -0,0 +1,59 @@
+From 6d6841a996a52488e8a18606f386bba0a12b4231 Mon Sep 17 00:00:00 2001
+From: Soumya Koduri <skoduri@redhat.com>
+Date: Fri, 18 Nov 2016 12:47:06 +0530
+Subject: [PATCH 082/124] build: Add dependency on netstat for
+ glusterfs-ganesha pkg
+
+portblock resource-agent needs netstat command but this dependency
+should have been ideally added to resource-agents package. But the
+fixes (bug1395594, bug1395596) are going to be available only
+in the future RHEL 6.9 and RHEL 7.4 releases. Hence as an interim
+workaround, we agreed to add this dependency for glusterfs-ganesha package.
+
+label : DOWNSTREAM ONLY
+
+Change-Id: I6ac1003103755d7534dd079c821bbaacd8dd94b8
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167167
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index e55e255..bc27058 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -466,6 +466,12 @@ Requires: pcs, dbus
+ Requires: cman, pacemaker, corosync
+ %endif
+
++%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 5 )
++# we need portblock resource-agent in 3.9.5 and later.
++Requires: resource-agents >= 3.9.5
++Requires: net-tools
++%endif
++
+ %if ( 0%{?fedora} && 0%{?fedora} > 25 )
+ Requires: selinux-policy >= 3.13.1-160
+ Requires(post): policycoreutils-python-utils
+@@ -1951,6 +1957,14 @@ fi
+ %endif
+
+ %changelog
++* Sun Apr 7 2019 Soumya Koduri <skoduri@redhat.com>
++- As an interim fix add dependency on netstat(/net-tools) for glusterfs-ganesha package (#1395574)
++
++* Sun Apr 7 2019 Soumya Koduri <skoduri@redhat.com>
++- Add dependency on portblock resource agent for ganesha package (#1278336)
++- Fix incorrect Requires for portblock resource agent (#1278336)
++- Update version checks for portblock resource agent on RHEL (#1278336)
++
+ * Sat Apr 6 2019 Jiffin Tony Thottan <jthottan@redhat.com>
+ - Adding ganesha ha resources back in gluster repository
+
+--
+1.8.3.1
+
diff --git a/0083-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch b/0083-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch
new file mode 100644
index 0000000..2d12285
--- /dev/null
+++ b/0083-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch
@@ -0,0 +1,82 @@
+From a80743a3053798521ae4dd830adcde8bc7da11b6 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Tue, 20 Feb 2018 11:50:33 +0530
+Subject: [PATCH 083/124] common-ha: enable and disable selinux
+ ganesha_use_fusefs
+
+Adding missing changes in a downstream backport(https://code.engineering.redhat.com/gerrit/#/c/109845/)
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I59fd2fc2228ded9547c2d1e08c22f7a10c35f86f
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167244
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Reviewed-by: Kaleb Keithley <kkeithle@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index bc27058..2149f86 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -472,11 +472,16 @@ Requires: resource-agents >= 3.9.5
+ Requires: net-tools
+ %endif
+
+-%if ( 0%{?fedora} && 0%{?fedora} > 25 )
++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
++%if ( 0%{?rhel} )
+ Requires: selinux-policy >= 3.13.1-160
++Requires(post): policycoreutils-python
++Requires(postun): policycoreutils-python
++%else
+ Requires(post): policycoreutils-python-utils
+ Requires(postun): policycoreutils-python-utils
+ %endif
++%endif
+
+ %description ganesha
+ GlusterFS is a distributed file-system capable of scaling to several
+@@ -936,7 +941,7 @@ exit 0
+ %endif
+
+ %if ( 0%{!?_without_server:1} )
+-%if ( 0%{?fedora} && 0%{?fedora} > 25 )
++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+ %post ganesha
+ semanage boolean -m ganesha_use_fusefs --on
+ exit 0
+@@ -1076,7 +1081,7 @@ exit 0
+ %endif
+
+ %if ( 0%{!?_without_server:1} )
+-%if ( 0%{?fedora} && 0%{?fedora} > 25 )
++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+ %postun ganesha
+ semanage boolean -m ganesha_use_fusefs --off
+ exit 0
+@@ -1087,7 +1092,7 @@ exit 0
+ ## All %%trigger should be placed here and keep them sorted
+ ##
+ %if ( 0%{!?_without_server:1} )
+-%if ( 0%{?fedora} && 0%{?fedora} > 25 )
++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+ %trigger ganesha -- selinux-policy-targeted
+ semanage boolean -m ganesha_use_fusefs --on
+ exit 0
+@@ -1098,7 +1103,7 @@ exit 0
+ ## All %%triggerun should be placed here and keep them sorted
+ ##
+ %if ( 0%{!?_without_server:1} )
+-%if ( 0%{?fedora} && 0%{?fedora} > 25 )
++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+ %triggerun ganesha -- selinux-policy-targeted
+ semanage boolean -m ganesha_use_fusefs --off
+ exit 0
+--
+1.8.3.1
+
diff --git a/0084-glusterd-Fix-duplicate-client_op_version-in-info-fil.patch b/0084-glusterd-Fix-duplicate-client_op_version-in-info-fil.patch
new file mode 100644
index 0000000..04f8013
--- /dev/null
+++ b/0084-glusterd-Fix-duplicate-client_op_version-in-info-fil.patch
@@ -0,0 +1,37 @@
+From d7bee4a4ad0878003e19711e20994c42c4d2bd9e Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Tue, 9 Apr 2019 16:15:09 +0530
+Subject: [PATCH 084/124] glusterd: Fix duplicate client_op_version in info
+ file
+
+This must have been introduced while applying downstream only patches at
+RHGS 3.5.0 branch.
+
+Change-Id: I231249cca2a7bce29ef53cf95f9d2377b8203283
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167341
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-store.c | 5 -----
+ 1 file changed, 5 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index fb52957..351bd9e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -1022,11 +1022,6 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo)
+ goto out;
+ }
+
+- snprintf(buf, sizeof(buf), "%d", volinfo->client_op_version);
+- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
+- buf);
+- if (ret)
+- goto out;
+ if (volinfo->caps) {
+ snprintf(buf, sizeof(buf), "%d", volinfo->caps);
+ ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_CAPS, buf);
+--
+1.8.3.1
+
diff --git a/0085-Revert-all-remove-code-which-is-not-being-considered.patch b/0085-Revert-all-remove-code-which-is-not-being-considered.patch
new file mode 100644
index 0000000..6addaff
--- /dev/null
+++ b/0085-Revert-all-remove-code-which-is-not-being-considered.patch
@@ -0,0 +1,8976 @@
+From 379b9f7247a4daac9545e3dec79d3c2660111d8d Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <hgowtham@redhat.com>
+Date: Mon, 8 Apr 2019 11:32:09 +0530
+Subject: [PATCH 085/124] Revert "all: remove code which is not being
+ considered in build"
+
+This reverts most part of commit 8293d21280fd6ddfc9bb54068cf87794fc6be207.
+It adds in the changes for tier and CTR with the neccesary changes for building it.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I8f7978618f2a6a949b09dbcfd25722494cb8f1cd
+Signed-off-by: Hari Gowtham <hgowtham@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/166245
+Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ Makefile.am | 8 +-
+ configure.ac | 34 +
+ glusterfs.spec.in | 19 +
+ libglusterfs/Makefile.am | 4 +-
+ libglusterfs/src/glusterfs/mem-types.h | 1 +
+ xlators/cluster/dht/src/Makefile.am | 14 +-
+ xlators/cluster/dht/src/dht-rebalance.c | 12 +
+ xlators/cluster/dht/src/tier-common.c | 1199 ++++++++
+ xlators/cluster/dht/src/tier-common.h | 55 +
+ xlators/cluster/dht/src/tier.c | 3105 ++++++++++++++++++++
+ xlators/cluster/dht/src/tier.h | 110 +
+ xlators/features/Makefile.am | 2 +-
+ xlators/features/changetimerecorder/Makefile.am | 3 +
+ .../features/changetimerecorder/src/Makefile.am | 26 +
+ .../changetimerecorder/src/changetimerecorder.c | 2371 +++++++++++++++
+ .../changetimerecorder/src/changetimerecorder.h | 21 +
+ .../features/changetimerecorder/src/ctr-helper.c | 293 ++
+ .../features/changetimerecorder/src/ctr-helper.h | 854 ++++++
+ .../features/changetimerecorder/src/ctr-messages.h | 61 +
+ .../changetimerecorder/src/ctr-xlator-ctx.c | 362 +++
+ .../changetimerecorder/src/ctr-xlator-ctx.h | 68 +
+ .../changetimerecorder/src/ctr_mem_types.h | 22 +
+ 22 files changed, 8637 insertions(+), 7 deletions(-)
+ create mode 100644 xlators/cluster/dht/src/tier-common.c
+ create mode 100644 xlators/cluster/dht/src/tier-common.h
+ create mode 100644 xlators/cluster/dht/src/tier.c
+ create mode 100644 xlators/cluster/dht/src/tier.h
+ create mode 100644 xlators/features/changetimerecorder/Makefile.am
+ create mode 100644 xlators/features/changetimerecorder/src/Makefile.am
+ create mode 100644 xlators/features/changetimerecorder/src/changetimerecorder.c
+ create mode 100644 xlators/features/changetimerecorder/src/changetimerecorder.h
+ create mode 100644 xlators/features/changetimerecorder/src/ctr-helper.c
+ create mode 100644 xlators/features/changetimerecorder/src/ctr-helper.h
+ create mode 100644 xlators/features/changetimerecorder/src/ctr-messages.h
+ create mode 100644 xlators/features/changetimerecorder/src/ctr-xlator-ctx.c
+ create mode 100644 xlators/features/changetimerecorder/src/ctr-xlator-ctx.h
+ create mode 100644 xlators/features/changetimerecorder/src/ctr_mem_types.h
+
+diff --git a/Makefile.am b/Makefile.am
+index e0c795f..613382f 100644
+--- a/Makefile.am
++++ b/Makefile.am
+@@ -3,7 +3,7 @@ SOURCES = site.h
+ EXTRA_DIST = autogen.sh \
+ COPYING-GPLV2 COPYING-LGPLV3 COMMITMENT \
+ INSTALL README.md AUTHORS THANKS NEWS \
+- glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in \
++ glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in libgfdb.pc.in \
+ run-tests.sh \
+ build-aux/pkg-version \
+ contrib/umountd \
+@@ -15,8 +15,12 @@ SUBDIRS = $(ARGP_STANDALONE_DIR) rpc/xdr/gen libglusterfs rpc api xlators \
+
+ pkgconfigdir = @pkgconfigdir@
+ pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc
++if USE_GFDB
++pkgconfig_DATA += libgfdb.pc
++endif
+
+-CLEANFILES = glusterfs-api.pc libgfchangelog.pc contrib/umountd/Makefile
++CLEANFILES = glusterfs-api.pc libgfchangelog.pc libgfdb.pc \
++ contrib/umountd/Makefile
+
+ gitclean: distclean
+ find . -name Makefile.in -exec rm -f {} \;
+diff --git a/configure.ac b/configure.ac
+index baa811a..633e850 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -30,6 +30,7 @@ AC_CONFIG_HEADERS([config.h site.h])
+ AC_CONFIG_FILES([Makefile
+ libglusterfs/Makefile
+ libglusterfs/src/Makefile
++ libglusterfs/src/gfdb/Makefile
+ geo-replication/src/peer_gsec_create
+ geo-replication/src/peer_mountbroker
+ geo-replication/src/peer_mountbroker.py
+@@ -121,6 +122,8 @@ AC_CONFIG_FILES([Makefile
+ xlators/features/changelog/src/Makefile
+ xlators/features/changelog/lib/Makefile
+ xlators/features/changelog/lib/src/Makefile
++ xlators/features/changetimerecorder/Makefile
++ xlators/features/changetimerecorder/src/Makefile
+ xlators/features/locks/Makefile
+ xlators/features/locks/src/Makefile
+ xlators/features/quota/Makefile
+@@ -237,6 +240,7 @@ AC_CONFIG_FILES([Makefile
+ contrib/umountd/Makefile
+ glusterfs-api.pc
+ libgfchangelog.pc
++ libgfdb.pc
+ api/Makefile
+ api/src/Makefile
+ api/examples/Makefile
+@@ -866,6 +870,33 @@ AM_CONDITIONAL([USE_FIREWALLD],test ["x${BUILD_FIREWALLD}" = "xyes"])
+
+ #endof firewald section
+
++# Data tiering requires sqlite
++AC_ARG_ENABLE([tiering],
++ AC_HELP_STRING([--disable-tiering],
++ [Disable data classification/tiering]),
++ [BUILD_GFDB="${enableval}"], [BUILD_GFDB="yes"])
++
++case $host_os in
++ darwin*)
++ SQLITE_LIBS="-lsqlite3"
++ AC_CHECK_HEADERS([sqlite3.h], AC_DEFINE(USE_GFDB, 1))
++ ;;
++ *)
++ if test "x${BUILD_GFDB}" = "xyes"; then
++ PKG_CHECK_MODULES([SQLITE], [sqlite3],
++ AC_DEFINE(USE_GFDB, 1),
++ AC_MSG_ERROR([pass --disable-tiering to build without sqlite]))
++ else
++ AC_DEFINE(USE_GFDB, 0, [no sqlite, gfdb is disabled])
++ fi
++ ;;
++esac
++
++AC_SUBST(SQLITE_CFLAGS)
++AC_SUBST(SQLITE_LIBS)
++AM_CONDITIONAL(BUILD_GFDB, test "x${with_server}" = "xyes" -a "x${BUILD_GFDB}" = "xyes")
++AM_CONDITIONAL(USE_GFDB, test "x${with_server}" = "xyes" -a "x${BUILD_GFDB}" = "xyes")
++
+ # xml-output
+ AC_ARG_ENABLE([xml-output],
+ AC_HELP_STRING([--disable-xml-output],
+@@ -1544,6 +1575,8 @@ GFAPI_VERSION="7."${PACKAGE_VERSION}
+ LIBGFCHANGELOG_VERSION="0.0.1"
+ AC_SUBST(GFAPI_VERSION)
+ AC_SUBST(LIBGFCHANGELOG_VERSION)
++LIBGFDB_VERSION="0.0.1"
++AC_SUBST(LIBGFDB_VERSION)
+
+ dnl libtool versioning
+ LIBGFXDR_LT_VERSION="0:1:0"
+@@ -1584,6 +1617,7 @@ echo "XML output : $BUILD_XML_OUTPUT"
+ echo "Unit Tests : $BUILD_UNITTEST"
+ echo "Track priv ports : $TRACK_PRIVPORTS"
+ echo "POSIX ACLs : $BUILD_POSIX_ACLS"
++echo "Data Classification : $BUILD_GFDB"
+ echo "firewalld-config : $BUILD_FIREWALLD"
+ echo "Events : $BUILD_EVENTS"
+ echo "EC dynamic support : $EC_DYNAMIC_SUPPORT"
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 2149f86..e0607ba 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -154,6 +154,7 @@
+ %global _without_events --disable-events
+ %global _without_georeplication --disable-georeplication
+ %global _with_gnfs %{nil}
++%global _without_tiering --disable-tiering
+ %global _without_ocf --without-ocf
+ %endif
+
+@@ -287,6 +288,9 @@ BuildRequires: libuuid-devel
+ %if ( 0%{?_with_cmocka:1} )
+ BuildRequires: libcmocka-devel >= 1.0.1
+ %endif
++%if ( 0%{!?_without_tiering:1} )
++BuildRequires: sqlite-devel
++%endif
+ %if ( 0%{!?_without_georeplication:1} )
+ BuildRequires: libattr-devel
+ %endif
+@@ -797,6 +801,7 @@ export LDFLAGS
+ %{?_without_rdma} \
+ %{?_without_server} \
+ %{?_without_syslog} \
++ %{?_without_tiering} \
+ %{?_with_ipv6default} \
+ %{?_without_libtirpc}
+
+@@ -1232,9 +1237,15 @@ exit 0
+ %if ( 0%{?_without_server:1} )
+ %exclude %{_libdir}/pkgconfig/libgfchangelog.pc
+ %exclude %{_libdir}/libgfchangelog.so
++%if ( 0%{!?_without_tiering:1} )
++%{_libdir}/pkgconfig/libgfdb.pc
++%endif
+ %else
+ %{_libdir}/pkgconfig/libgfchangelog.pc
+ %{_libdir}/libgfchangelog.so
++%if ( 0%{!?_without_tiering:1} )
++%{_libdir}/pkgconfig/libgfdb.pc
++%endif
+ %endif
+
+ %files client-xlators
+@@ -1330,6 +1341,10 @@ exit 0
+ %files libs
+ %{_libdir}/*.so.*
+ %exclude %{_libdir}/libgfapi.*
++%if ( 0%{!?_without_tiering:1} )
++# libgfdb is only needed server-side
++%exclude %{_libdir}/libgfdb.*
++%endif
+
+ %files -n python%{_pythonver}-gluster
+ # introducing glusterfs module in site packages.
+@@ -1417,6 +1432,10 @@ exit 0
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so
++%if ( 0%{!?_without_tiering:1} )
++ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changetimerecorder.so
++ %{_libdir}/libgfdb.so.*
++%endif
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix*
+diff --git a/libglusterfs/Makefile.am b/libglusterfs/Makefile.am
+index d471a3f..7e72f61 100644
+--- a/libglusterfs/Makefile.am
++++ b/libglusterfs/Makefile.am
+@@ -1,3 +1,3 @@
+-SUBDIRS = src
++SUBDIRS = src src/gfdb
+
+-CLEANFILES =
++CLEANFILES =
+diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h
+index 832f68c..92730a9 100644
+--- a/libglusterfs/src/glusterfs/mem-types.h
++++ b/libglusterfs/src/glusterfs/mem-types.h
+@@ -138,6 +138,7 @@ enum gf_common_mem_types_ {
+ gf_common_volfile_t,
+ gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */
+ gf_common_mt_server_cmdline_t, /* used only in one location */
++ gf_mt_gfdb_query_record_t,
+ gf_common_mt_end
+ };
+ #endif
+diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
+index 56f1f2a..5532047 100644
+--- a/xlators/cluster/dht/src/Makefile.am
++++ b/xlators/cluster/dht/src/Makefile.am
+@@ -1,4 +1,7 @@
+ xlator_LTLIBRARIES = dht.la nufa.la switch.la
++if BUILD_GFDB
++ xlator_LTLIBRARIES += tier.la
++endif
+
+ AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+@@ -13,6 +16,7 @@ dht_la_SOURCES = $(dht_common_source) dht.c
+
+ nufa_la_SOURCES = $(dht_common_source) nufa.c
+ switch_la_SOURCES = $(dht_common_source) switch.c
++tier_la_SOURCES = $(dht_common_source) tier.c tier-common.c
+
+ dht_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+ dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+@@ -23,15 +27,21 @@ nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+ switch_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+ switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
++tier_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) $(LIB_DL)
++tier_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
++
+ noinst_HEADERS = dht-common.h dht-mem-types.h dht-messages.h \
+- dht-lock.h $(top_builddir)/xlators/lib/src/libxlator.h
++ dht-lock.h tier-common.h tier.h \
++ $(top_builddir)/xlators/lib/src/libxlator.h
+
+ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
++ -I$(top_srcdir)/libglusterfs/src/gfdb \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/xlators/lib/src \
+ -DDATADIR=\"$(localstatedir)\" \
+- -DLIBDIR=\"$(libdir)\"
++ -DLIBDIR=\"$(libdir)\" \
++ -DLIBGFDB_VERSION=\"$(LIBGFDB_VERSION)\"
+
+ CLEANFILES =
+
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index e0f25b1..efbe8a4 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -8,6 +8,7 @@
+ cases as published by the Free Software Foundation.
+ */
+
++#include "tier.h"
+ #include "dht-common.h"
+ #include <glusterfs/xlator.h>
+ #include <glusterfs/syscall.h>
+@@ -2134,6 +2135,17 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ }
+ }
+
++ /* store size of previous migrated file */
++ if (defrag && defrag->tier_conf.is_tier) {
++ if (from != TIER_HASHED_SUBVOL) {
++ defrag->tier_conf.st_last_promoted_size = stbuf.ia_size;
++ } else {
++ /* Don't delete the linkto file on the hashed subvol */
++ delete_src_linkto = _gf_false;
++ defrag->tier_conf.st_last_demoted_size = stbuf.ia_size;
++ }
++ }
++
+ /* The src file is being unlinked after this so we don't need
+ to clean it up */
+ clean_src = _gf_false;
+diff --git a/xlators/cluster/dht/src/tier-common.c b/xlators/cluster/dht/src/tier-common.c
+new file mode 100644
+index 0000000..b22f477
+--- /dev/null
++++ b/xlators/cluster/dht/src/tier-common.c
+@@ -0,0 +1,1199 @@
++/*
++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#include <glusterfs/glusterfs.h>
++#include <glusterfs/xlator.h>
++#include "libxlator.h"
++#include "dht-common.h"
++#include <glusterfs/defaults.h>
++#include "tier-common.h"
++#include "tier.h"
++
++int
++dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
++ int op_errno, inode_t *inode, struct iatt *stbuf,
++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
++
++int
++tier_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
++ int op_errno, inode_t *inode, struct iatt *stbuf,
++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
++{
++ dht_local_t *local = NULL;
++ loc_t *oldloc = NULL;
++ loc_t *newloc = NULL;
++
++ local = frame->local;
++
++ oldloc = &local->loc;
++ newloc = &local->loc2;
++
++ if (op_ret == -1) {
++ /* No continuation on DHT inode missing errors, as we should
++ * then have a good stbuf that states P2 happened. We would
++ * get inode missing if, the file completed migrated between
++ * the lookup and the link call */
++ goto out;
++ }
++
++ if (local->call_cnt != 1) {
++ goto out;
++ }
++
++ local->call_cnt = 2;
++
++ /* Do this on the hot tier now */
++
++ STACK_WIND(frame, tier_link_cbk, local->cached_subvol,
++ local->cached_subvol->fops->link, oldloc, newloc, xdata);
++
++ return 0;
++
++out:
++ DHT_STRIP_PHASE1_FLAGS(stbuf);
++
++ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
++ postparent, NULL);
++
++ return 0;
++}
++
++int
++tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
++ dict_t *xdata)
++{
++ xlator_t *cached_subvol = NULL;
++ xlator_t *hashed_subvol = NULL;
++ int op_errno = -1;
++ int ret = -1;
++ dht_local_t *local = NULL;
++ dht_conf_t *conf = NULL;
++
++ VALIDATE_OR_GOTO(frame, err);
++ VALIDATE_OR_GOTO(this, err);
++ VALIDATE_OR_GOTO(oldloc, err);
++ VALIDATE_OR_GOTO(newloc, err);
++
++ conf = this->private;
++
++ local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK);
++ if (!local) {
++ op_errno = ENOMEM;
++ goto err;
++ }
++ local->call_cnt = 1;
++
++ cached_subvol = local->cached_subvol;
++
++ if (!cached_subvol) {
++ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
++ oldloc->path);
++ op_errno = ENOENT;
++ goto err;
++ }
++
++ hashed_subvol = TIER_HASHED_SUBVOL;
++
++ ret = loc_copy(&local->loc2, newloc);
++ if (ret == -1) {
++ op_errno = ENOMEM;
++ goto err;
++ }
++
++ if (hashed_subvol == cached_subvol) {
++ STACK_WIND(frame, dht_link_cbk, cached_subvol,
++ cached_subvol->fops->link, oldloc, newloc, xdata);
++ return 0;
++ }
++
++ /* Create hardlinks to both the data file on the hot tier
++ and the linkto file on the cold tier */
++
++ gf_uuid_copy(local->gfid, oldloc->inode->gfid);
++
++ STACK_WIND(frame, tier_link_cbk, hashed_subvol, hashed_subvol->fops->link,
++ oldloc, newloc, xdata);
++
++ return 0;
++err:
++ op_errno = (op_errno == -1) ? errno : op_errno;
++ DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
++ return 0;
++}
++
++int
++tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int op_ret, int op_errno,
++ struct iatt *preparent,
++ struct iatt *postparent, dict_t *xdata)
++{
++ dht_local_t *local = NULL;
++
++ local = frame->local;
++
++ if (local->params) {
++ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
++ }
++
++ DHT_STACK_UNWIND(create, frame, -1, local->op_errno, NULL, NULL, NULL, NULL,
++ NULL, NULL);
++
++ return 0;
++}
++
++int
++tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
++ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
++{
++ xlator_t *prev = NULL;
++ int ret = -1;
++ dht_local_t *local = NULL;
++ xlator_t *hashed_subvol = NULL;
++ dht_conf_t *conf = NULL;
++
++ local = frame->local;
++ conf = this->private;
++
++ hashed_subvol = TIER_HASHED_SUBVOL;
++
++ if (!local) {
++ op_ret = -1;
++ op_errno = EINVAL;
++ goto out;
++ }
++
++ if (op_ret == -1) {
++ if (local->linked == _gf_true && local->xattr_req) {
++ local->op_errno = op_errno;
++ local->op_ret = op_ret;
++ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(
++ local->xattr_req);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
++ "Failed to set dictionary value to "
++ "unlink of migrating file");
++ goto out;
++ }
++
++ STACK_WIND(frame, tier_create_unlink_stale_linkto_cbk,
++ hashed_subvol, hashed_subvol->fops->unlink, &local->loc,
++ 0, local->xattr_req);
++ return 0;
++ }
++ goto out;
++ }
++
++ prev = cookie;
++
++ if (local->loc.parent) {
++ dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
++
++ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
++ }
++
++ ret = dht_layout_preset(this, prev, inode);
++ if (ret != 0) {
++ gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s",
++ prev->name);
++ op_ret = -1;
++ op_errno = EINVAL;
++ goto out;
++ }
++
++ local->op_errno = op_errno;
++
++ if (local->linked == _gf_true) {
++ local->stbuf = *stbuf;
++ dht_linkfile_attr_heal(frame, this);
++ }
++out:
++ if (local) {
++ if (local->xattr_req) {
++ dict_del(local->xattr_req, TIER_LINKFILE_GFID);
++ }
++ }
++
++ DHT_STRIP_PHASE1_FLAGS(stbuf);
++
++ DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
++ preparent, postparent, xdata);
++
++ return 0;
++}
++
++int
++tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret,
++ int32_t op_errno, inode_t *inode,
++ struct iatt *stbuf, struct iatt *preparent,
++ struct iatt *postparent, dict_t *xdata)
++{
++ dht_local_t *local = NULL;
++ xlator_t *cached_subvol = NULL;
++ dht_conf_t *conf = NULL;
++ int ret = -1;
++ unsigned char *gfid = NULL;
++
++ local = frame->local;
++ if (!local) {
++ op_errno = EINVAL;
++ goto err;
++ }
++
++ if (op_ret == -1) {
++ local->op_errno = op_errno;
++ goto err;
++ }
++
++ conf = this->private;
++ if (!conf) {
++ local->op_errno = EINVAL;
++ op_errno = EINVAL;
++ goto err;
++ }
++
++ cached_subvol = TIER_UNHASHED_SUBVOL;
++
++ if (local->params) {
++ dict_del(local->params, conf->link_xattr_name);
++ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
++ }
++
++ /*
++ * We will delete the linkfile if data file creation fails.
++ * When deleting this stale linkfile, there is a possibility
++ * for a race between this linkfile deletion and a stale
++ * linkfile deletion triggered by another lookup from different
++ * client.
++ *
++ * For eg:
++ *
++ * Client 1 Client 2
++ *
++ * 1 linkfile created for foo
++ *
++ * 2 data file creation failed
++ *
++ * 3 creating a file with same name
++ *
++ * 4 lookup before creation deleted
++ * the linkfile created by client1
++ * considering as a stale linkfile.
++ *
++ * 5 New linkfile created for foo
++ * with different gfid.
++ *
++ * 6 Trigger linkfile deletion as
++ * data file creation failed.
++ *
++ * 7 Linkfile deleted which is
++ * created by client2.
++ *
++ * 8 Data file created.
++ *
++ * With this race, we will end up having a file in a non-hashed subvol
++ * without a linkfile in hashed subvol.
++ *
++ * To avoid this, we store the gfid of linkfile created by client, So
++ * If we delete the linkfile , we validate gfid of existing file with
++ * stored value from posix layer.
++ *
++ * Storing this value in local->xattr_req as local->params was also used
++ * to create the data file. During the linkfile deletion we will use
++ * local->xattr_req dictionary.
++ */
++ if (!local->xattr_req) {
++ local->xattr_req = dict_new();
++ if (!local->xattr_req) {
++ local->op_errno = ENOMEM;
++ op_errno = ENOMEM;
++ goto err;
++ }
++ }
++
++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
++ if (!gfid) {
++ local->op_errno = ENOMEM;
++ op_errno = ENOMEM;
++ goto err;
++ }
++
++ gf_uuid_copy(gfid, stbuf->ia_gfid);
++ ret = dict_set_dynptr(local->xattr_req, TIER_LINKFILE_GFID, gfid,
++ sizeof(uuid_t));
++ if (ret) {
++ GF_FREE(gfid);
++ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
++ "Failed to set dictionary value"
++ " : key = %s",
++ TIER_LINKFILE_GFID);
++ }
++
++ STACK_WIND_COOKIE(frame, tier_create_cbk, cached_subvol, cached_subvol,
++ cached_subvol->fops->create, &local->loc, local->flags,
++ local->mode, local->umask, local->fd, local->params);
++
++ return 0;
++err:
++ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
++ NULL);
++ return 0;
++}
++
++gf_boolean_t
++tier_is_hot_tier_decommissioned(xlator_t *this)
++{
++ dht_conf_t *conf = NULL;
++ xlator_t *hot_tier = NULL;
++ int i = 0;
++
++ conf = this->private;
++ hot_tier = conf->subvolumes[1];
++
++ if (conf->decommission_subvols_cnt) {
++ for (i = 0; i < conf->subvolume_cnt; i++) {
++ if (conf->decommissioned_bricks[i] &&
++ conf->decommissioned_bricks[i] == hot_tier)
++ return _gf_true;
++ }
++ }
++
++ return _gf_false;
++}
++
++int
++tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
++ mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
++{
++ int op_errno = -1;
++ dht_local_t *local = NULL;
++ dht_conf_t *conf = NULL;
++ xlator_t *hot_subvol = NULL;
++ xlator_t *cold_subvol = NULL;
++
++ VALIDATE_OR_GOTO(frame, err);
++ VALIDATE_OR_GOTO(this, err);
++ VALIDATE_OR_GOTO(loc, err);
++
++ conf = this->private;
++
++ dht_get_du_info(frame, this, loc);
++
++ local = dht_local_init(frame, loc, fd, GF_FOP_CREATE);
++ if (!local) {
++ op_errno = ENOMEM;
++ goto err;
++ }
++
++ cold_subvol = TIER_HASHED_SUBVOL;
++ hot_subvol = TIER_UNHASHED_SUBVOL;
++
++ if (conf->subvolumes[0] != cold_subvol) {
++ hot_subvol = conf->subvolumes[0];
++ }
++ /*
++ * if hot tier full, write to cold.
++ * Also if hot tier is full, create in cold
++ */
++ if (dht_is_subvol_filled(this, hot_subvol) ||
++ tier_is_hot_tier_decommissioned(this)) {
++ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
++ cold_subvol->name);
++
++ STACK_WIND_COOKIE(frame, tier_create_cbk, cold_subvol, cold_subvol,
++ cold_subvol->fops->create, loc, flags, mode, umask,
++ fd, params);
++ } else {
++ local->params = dict_ref(params);
++ local->flags = flags;
++ local->mode = mode;
++ local->umask = umask;
++ local->cached_subvol = hot_subvol;
++ local->hashed_subvol = cold_subvol;
++
++ gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)", loc->path,
++ hot_subvol->name, cold_subvol->name);
++
++ dht_linkfile_create(frame, tier_create_linkfile_create_cbk, this,
++ hot_subvol, cold_subvol, loc);
++
++ goto out;
++ }
++out:
++ return 0;
++
++err:
++
++ op_errno = (op_errno == -1) ? errno : op_errno;
++ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
++ NULL);
++
++ return 0;
++}
++
++int
++tier_unlink_nonhashed_linkfile_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int op_ret, int op_errno,
++ struct iatt *preparent,
++ struct iatt *postparent, dict_t *xdata)
++{
++ dht_local_t *local = NULL;
++ xlator_t *prev = NULL;
++
++ local = frame->local;
++ prev = cookie;
++
++ LOCK(&frame->lock);
++ {
++ if ((op_ret == -1) && (op_errno != ENOENT)) {
++ local->op_errno = op_errno;
++ local->op_ret = op_ret;
++ gf_msg_debug(this->name, op_errno,
++ "Unlink link: subvolume %s"
++ " returned -1",
++ prev->name);
++ goto unlock;
++ }
++
++ local->op_ret = 0;
++ }
++unlock:
++ UNLOCK(&frame->lock);
++
++ if (local->op_ret == -1)
++ goto err;
++ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
++ &local->preparent, &local->postparent, NULL);
++
++ return 0;
++
++err:
++ DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL);
++ return 0;
++}
++
++int
++tier_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int op_ret, int op_errno, inode_t *inode,
++ struct iatt *preparent, dict_t *xdata,
++ struct iatt *postparent)
++{
++ dht_local_t *local = NULL;
++ xlator_t *prev = NULL;
++ dht_conf_t *conf = NULL;
++ xlator_t *hot_subvol = NULL;
++
++ local = frame->local;
++ prev = cookie;
++ conf = this->private;
++ hot_subvol = TIER_UNHASHED_SUBVOL;
++
++ if (!op_ret) {
++ /*
++ * linkfile present on hot tier. unlinking the linkfile
++ */
++ STACK_WIND_COOKIE(frame, tier_unlink_nonhashed_linkfile_cbk, hot_subvol,
++ hot_subvol, hot_subvol->fops->unlink, &local->loc,
++ local->flags, NULL);
++ return 0;
++ }
++
++ LOCK(&frame->lock);
++ {
++ if (op_errno == ENOENT) {
++ local->op_ret = 0;
++ local->op_errno = op_errno;
++ } else {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ }
++ gf_msg_debug(this->name, op_errno, "Lookup : subvolume %s returned -1",
++ prev->name);
++ }
++
++ UNLOCK(&frame->lock);
++
++ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
++ &local->preparent, &local->postparent, xdata);
++
++ return 0;
++}
++
++int
++tier_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int op_ret, int op_errno, struct iatt *preparent,
++ struct iatt *postparent, dict_t *xdata)
++{
++ dht_local_t *local = NULL;
++ xlator_t *prev = NULL;
++
++ local = frame->local;
++ prev = cookie;
++
++ LOCK(&frame->lock);
++ {
++ /* Ignore EINVAL for tier to ignore error when the file
++ does not exist on the other tier */
++ if ((op_ret == -1) && !((op_errno == ENOENT) || (op_errno == EINVAL))) {
++ local->op_errno = op_errno;
++ local->op_ret = op_ret;
++ gf_msg_debug(this->name, op_errno,
++ "Unlink link: subvolume %s"
++ " returned -1",
++ prev->name);
++ goto unlock;
++ }
++
++ local->op_ret = 0;
++ }
++unlock:
++ UNLOCK(&frame->lock);
++
++ if (local->op_ret == -1)
++ goto err;
++
++ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
++ &local->preparent, &local->postparent, xdata);
++
++ return 0;
++
++err:
++ DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL);
++ return 0;
++}
++
++int32_t
++tier_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
++ int op_errno, struct iatt *preparent, struct iatt *postparent,
++ dict_t *xdata)
++{
++ dht_local_t *local = NULL;
++ xlator_t *prev = NULL;
++ struct iatt *stbuf = NULL;
++ dht_conf_t *conf = NULL;
++ int ret = -1;
++ xlator_t *hot_tier = NULL;
++ xlator_t *cold_tier = NULL;
++
++ local = frame->local;
++ prev = cookie;
++ conf = this->private;
++
++ cold_tier = TIER_HASHED_SUBVOL;
++ hot_tier = TIER_UNHASHED_SUBVOL;
++
++ LOCK(&frame->lock);
++ {
++ if (op_ret == -1) {
++ if (op_errno == ENOENT) {
++ local->op_ret = 0;
++ } else {
++ local->op_ret = -1;
++ local->op_errno = op_errno;
++ }
++ gf_msg_debug(this->name, op_errno,
++ "Unlink: subvolume %s returned -1"
++ " with errno = %d",
++ prev->name, op_errno);
++ goto unlock;
++ }
++
++ local->op_ret = 0;
++
++ local->postparent = *postparent;
++ local->preparent = *preparent;
++
++ if (local->loc.parent) {
++ dht_inode_ctx_time_update(local->loc.parent, this,
++ &local->preparent, 0);
++ dht_inode_ctx_time_update(local->loc.parent, this,
++ &local->postparent, 1);
++ }
++ }
++unlock:
++ UNLOCK(&frame->lock);
++
++ if (local->op_ret)
++ goto out;
++
++ if (cold_tier != local->cached_subvol) {
++ /*
++ * File is present in hot tier, so there will be
++ * a link file on cold tier, deleting the linkfile
++ * from cold tier
++ */
++ STACK_WIND_COOKIE(frame, tier_unlink_linkfile_cbk, cold_tier, cold_tier,
++ cold_tier->fops->unlink, &local->loc, local->flags,
++ xdata);
++ return 0;
++ }
++
++ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
++ if (!ret && stbuf &&
++ ((IS_DHT_MIGRATION_PHASE2(stbuf)) || IS_DHT_MIGRATION_PHASE1(stbuf))) {
++ /*
++ * File is migrating from cold to hot tier.
++ * Delete the destination linkfile.
++ */
++ STACK_WIND_COOKIE(frame, tier_unlink_lookup_cbk, hot_tier, hot_tier,
++ hot_tier->fops->lookup, &local->loc, NULL);
++ return 0;
++ }
++
++out:
++ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
++ &local->preparent, &local->postparent, xdata);
++
++ return 0;
++}
++
++int
++tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
++ dict_t *xdata)
++{
++ xlator_t *cached_subvol = NULL;
++ xlator_t *hashed_subvol = NULL;
++ dht_conf_t *conf = NULL;
++ int op_errno = -1;
++ dht_local_t *local = NULL;
++ int ret = -1;
++
++ VALIDATE_OR_GOTO(frame, err);
++ VALIDATE_OR_GOTO(this, err);
++ VALIDATE_OR_GOTO(loc, err);
++
++ conf = this->private;
++
++ local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK);
++ if (!local) {
++ op_errno = ENOMEM;
++
++ goto err;
++ }
++
++ hashed_subvol = TIER_HASHED_SUBVOL;
++
++ cached_subvol = local->cached_subvol;
++ if (!cached_subvol) {
++ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
++ loc->path);
++ op_errno = EINVAL;
++ goto err;
++ }
++
++ local->flags = xflag;
++ if (IA_ISREG(loc->inode->ia_type) && (hashed_subvol == cached_subvol)) {
++ /*
++ * File resides in cold tier. We need to stat
++ * the file to see if it is being promoted.
++ * If yes we need to delete the destination
++ * file as well.
++ *
++ * Currently we are doing this check only for
++ * regular files.
++ */
++ xdata = xdata ? dict_ref(xdata) : dict_new();
++ if (xdata) {
++ ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1);
++ if (ret) {
++ gf_msg_debug(this->name, 0, "Failed to set dictionary key %s",
++ DHT_IATT_IN_XDATA_KEY);
++ }
++ }
++ }
++
++ /*
++ * File is on hot tier, delete the data file first, then
++ * linkfile from cold.
++ */
++ STACK_WIND_COOKIE(frame, tier_unlink_cbk, cached_subvol, cached_subvol,
++ cached_subvol->fops->unlink, loc, xflag, xdata);
++ if (xdata)
++ dict_unref(xdata);
++ return 0;
++err:
++ op_errno = (op_errno == -1) ? errno : op_errno;
++ DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
++
++ return 0;
++}
++
++int
++tier_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
++ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
++{
++ gf_dirent_t entries;
++ gf_dirent_t *orig_entry = NULL;
++ gf_dirent_t *entry = NULL;
++ int count = 0;
++
++ INIT_LIST_HEAD(&entries.list);
++
++ if (op_ret < 0)
++ goto unwind;
++
++ list_for_each_entry(orig_entry, (&orig_entries->list), list)
++ {
++ entry = gf_dirent_for_name(orig_entry->d_name);
++ if (!entry) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
++ "Memory allocation failed ");
++ goto unwind;
++ }
++
++ entry->d_off = orig_entry->d_off;
++ entry->d_ino = orig_entry->d_ino;
++ entry->d_type = orig_entry->d_type;
++ entry->d_len = orig_entry->d_len;
++
++ list_add_tail(&entry->list, &entries.list);
++ count++;
++ }
++ op_ret = count;
++
++unwind:
++ if (op_ret < 0)
++ op_ret = 0;
++
++ DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL);
++
++ gf_dirent_free(&entries);
++
++ return 0;
++}
++
++int
++tier_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
++ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
++{
++ dht_local_t *local = NULL;
++ gf_dirent_t entries;
++ gf_dirent_t *orig_entry = NULL;
++ gf_dirent_t *entry = NULL;
++ xlator_t *prev = NULL;
++ xlator_t *next_subvol = NULL;
++ off_t next_offset = 0;
++ int count = 0;
++ dht_conf_t *conf = NULL;
++ int ret = 0;
++ inode_table_t *itable = NULL;
++ inode_t *inode = NULL;
++
++ INIT_LIST_HEAD(&entries.list);
++ prev = cookie;
++ local = frame->local;
++ itable = local->fd ? local->fd->inode->table : NULL;
++
++ conf = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, conf, unwind);
++
++ if (op_ret < 0)
++ goto done;
++
++ list_for_each_entry(orig_entry, (&orig_entries->list), list)
++ {
++ next_offset = orig_entry->d_off;
++
++ if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
++ /*stat failed somewhere- ignore this entry*/
++ continue;
++ }
++
++ entry = gf_dirent_for_name(orig_entry->d_name);
++ if (!entry) {
++ goto unwind;
++ }
++
++ entry->d_off = orig_entry->d_off;
++ entry->d_stat = orig_entry->d_stat;
++ entry->d_ino = orig_entry->d_ino;
++ entry->d_type = orig_entry->d_type;
++ entry->d_len = orig_entry->d_len;
++
++ if (orig_entry->dict)
++ entry->dict = dict_ref(orig_entry->dict);
++
++ if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict,
++ conf->link_xattr_name)) {
++ goto entries;
++
++ } else if (IA_ISDIR(entry->d_stat.ia_type)) {
++ if (orig_entry->inode) {
++ dht_inode_ctx_time_update(orig_entry->inode, this,
++ &entry->d_stat, 1);
++ }
++ } else {
++ if (orig_entry->inode) {
++ ret = dht_layout_preset(this, prev, orig_entry->inode);
++ if (ret)
++ gf_msg(this->name, GF_LOG_WARNING, 0,
++ DHT_MSG_LAYOUT_SET_FAILED,
++ "failed to link the layout "
++ "in inode");
++
++ entry->inode = inode_ref(orig_entry->inode);
++ } else if (itable) {
++ /*
++ * orig_entry->inode might be null if any upper
++ * layer xlators below client set to null, to
++ * force a lookup on the inode even if the inode
++ * is present in the inode table. In that case
++ * we just update the ctx to make sure we didn't
++ * missed anything.
++ */
++ inode = inode_find(itable, orig_entry->d_stat.ia_gfid);
++ if (inode) {
++ ret = dht_layout_preset(this, TIER_HASHED_SUBVOL, inode);
++ if (ret)
++ gf_msg(this->name, GF_LOG_WARNING, 0,
++ DHT_MSG_LAYOUT_SET_FAILED,
++ "failed to link the layout"
++ " in inode");
++ inode_unref(inode);
++ inode = NULL;
++ }
++ }
++ }
++
++ entries:
++ list_add_tail(&entry->list, &entries.list);
++ count++;
++ }
++ op_ret = count;
++
++done:
++ if (count == 0) {
++ /* non-zero next_offset means that
++ EOF is not yet hit on the current subvol
++ */
++ if (next_offset != 0) {
++ next_subvol = prev;
++ } else {
++ goto unwind;
++ }
++
++ STACK_WIND_COOKIE(frame, tier_readdirp_cbk, next_subvol, next_subvol,
++ next_subvol->fops->readdirp, local->fd, local->size,
++ next_offset, local->xattr);
++ return 0;
++ }
++
++unwind:
++ if (op_ret < 0)
++ op_ret = 0;
++
++ DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL);
++
++ gf_dirent_free(&entries);
++
++ return 0;
++}
++
++int
++tier_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++ off_t yoff, int whichop, dict_t *dict)
++{
++ dht_local_t *local = NULL;
++ int op_errno = -1;
++ xlator_t *hashed_subvol = NULL;
++ int ret = 0;
++ dht_conf_t *conf = NULL;
++
++ VALIDATE_OR_GOTO(frame, err);
++ VALIDATE_OR_GOTO(this, err);
++ VALIDATE_OR_GOTO(fd, err);
++ VALIDATE_OR_GOTO(this->private, err);
++
++ conf = this->private;
++
++ local = dht_local_init(frame, NULL, NULL, whichop);
++ if (!local) {
++ op_errno = ENOMEM;
++ goto err;
++ }
++
++ local->fd = fd_ref(fd);
++ local->size = size;
++ local->xattr_req = (dict) ? dict_ref(dict) : NULL;
++
++ hashed_subvol = TIER_HASHED_SUBVOL;
++
++ /* TODO: do proper readdir */
++ if (whichop == GF_FOP_READDIRP) {
++ if (dict)
++ local->xattr = dict_ref(dict);
++ else
++ local->xattr = dict_new();
++
++ if (local->xattr) {
++ ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256);
++ if (ret)
++ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
++ "Failed to set dictionary value"
++ " : key = %s",
++ conf->link_xattr_name);
++ }
++
++ STACK_WIND_COOKIE(frame, tier_readdirp_cbk, hashed_subvol,
++ hashed_subvol, hashed_subvol->fops->readdirp, fd,
++ size, yoff, local->xattr);
++
++ } else {
++ STACK_WIND_COOKIE(frame, tier_readdir_cbk, hashed_subvol, hashed_subvol,
++ hashed_subvol->fops->readdir, fd, size, yoff,
++ local->xattr);
++ }
++
++ return 0;
++
++err:
++ op_errno = (op_errno == -1) ? errno : op_errno;
++ DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
++
++ return 0;
++}
++
++int
++tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++ off_t yoff, dict_t *xdata)
++{
++ int op = GF_FOP_READDIR;
++ dht_conf_t *conf = NULL;
++ int i = 0;
++
++ conf = this->private;
++ if (!conf)
++ goto out;
++
++ for (i = 0; i < conf->subvolume_cnt; i++) {
++ if (!conf->subvolume_status[i]) {
++ op = GF_FOP_READDIRP;
++ break;
++ }
++ }
++
++ if (conf->use_readdirp)
++ op = GF_FOP_READDIRP;
++
++out:
++ tier_do_readdir(frame, this, fd, size, yoff, op, 0);
++ return 0;
++}
++
++int
++tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++ off_t yoff, dict_t *dict)
++{
++ tier_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
++ return 0;
++}
++
++int
++tier_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
++ int op_errno, struct statvfs *statvfs, dict_t *xdata)
++{
++ gf_boolean_t event = _gf_false;
++ qdstatfs_action_t action = qdstatfs_action_OFF;
++ dht_local_t *local = NULL;
++ int this_call_cnt = 0;
++ int bsize = 0;
++ int frsize = 0;
++ GF_UNUSED int ret = 0;
++ unsigned long new_usage = 0;
++ unsigned long cur_usage = 0;
++ xlator_t *prev = NULL;
++ dht_conf_t *conf = NULL;
++ tier_statvfs_t *tier_stat = NULL;
++
++ prev = cookie;
++ local = frame->local;
++ GF_ASSERT(local);
++
++ conf = this->private;
++
++ if (xdata)
++ ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event);
++
++ tier_stat = &local->tier_statvfs;
++
++ LOCK(&frame->lock);
++ {
++ if (op_ret == -1) {
++ local->op_errno = op_errno;
++ goto unlock;
++ }
++ if (!statvfs) {
++ op_errno = EINVAL;
++ local->op_ret = -1;
++ goto unlock;
++ }
++ local->op_ret = 0;
++
++ if (local->quota_deem_statfs) {
++ if (event == _gf_true) {
++ action = qdstatfs_action_COMPARE;
++ } else {
++ action = qdstatfs_action_NEGLECT;
++ }
++ } else {
++ if (event == _gf_true) {
++ action = qdstatfs_action_REPLACE;
++ local->quota_deem_statfs = _gf_true;
++ }
++ }
++
++ if (local->quota_deem_statfs) {
++ switch (action) {
++ case qdstatfs_action_NEGLECT:
++ goto unlock;
++
++ case qdstatfs_action_REPLACE:
++ local->statvfs = *statvfs;
++ goto unlock;
++
++ case qdstatfs_action_COMPARE:
++ new_usage = statvfs->f_blocks - statvfs->f_bfree;
++ cur_usage = local->statvfs.f_blocks -
++ local->statvfs.f_bfree;
++
++ /* Take the max of the usage from subvols */
++ if (new_usage >= cur_usage)
++ local->statvfs = *statvfs;
++ goto unlock;
++
++ default:
++ break;
++ }
++ }
++
++ if (local->statvfs.f_bsize != 0) {
++ bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
++ frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
++ dht_normalize_stats(&local->statvfs, bsize, frsize);
++ dht_normalize_stats(statvfs, bsize, frsize);
++ } else {
++ local->statvfs.f_bsize = statvfs->f_bsize;
++ local->statvfs.f_frsize = statvfs->f_frsize;
++ }
++
++ if (prev == TIER_HASHED_SUBVOL) {
++ local->statvfs.f_blocks = statvfs->f_blocks;
++ local->statvfs.f_files = statvfs->f_files;
++ local->statvfs.f_fsid = statvfs->f_fsid;
++ local->statvfs.f_flag = statvfs->f_flag;
++ local->statvfs.f_namemax = statvfs->f_namemax;
++ tier_stat->blocks_used = (statvfs->f_blocks - statvfs->f_bfree);
++ tier_stat->pblocks_used = (statvfs->f_blocks - statvfs->f_bavail);
++ tier_stat->files_used = (statvfs->f_files - statvfs->f_ffree);
++ tier_stat->pfiles_used = (statvfs->f_files - statvfs->f_favail);
++ tier_stat->hashed_fsid = statvfs->f_fsid;
++ } else {
++ tier_stat->unhashed_fsid = statvfs->f_fsid;
++ tier_stat->unhashed_blocks_used = (statvfs->f_blocks -
++ statvfs->f_bfree);
++ tier_stat->unhashed_pblocks_used = (statvfs->f_blocks -
++ statvfs->f_bavail);
++ tier_stat->unhashed_files_used = (statvfs->f_files -
++ statvfs->f_ffree);
++ tier_stat->unhashed_pfiles_used = (statvfs->f_files -
++ statvfs->f_favail);
++ }
++ }
++unlock:
++ UNLOCK(&frame->lock);
++
++ this_call_cnt = dht_frame_return(frame);
++ if (is_last_call(this_call_cnt)) {
++ if (tier_stat->unhashed_fsid != tier_stat->hashed_fsid) {
++ tier_stat->blocks_used += tier_stat->unhashed_blocks_used;
++ tier_stat->pblocks_used += tier_stat->unhashed_pblocks_used;
++ tier_stat->files_used += tier_stat->unhashed_files_used;
++ tier_stat->pfiles_used += tier_stat->unhashed_pfiles_used;
++ }
++ local->statvfs.f_bfree = local->statvfs.f_blocks -
++ tier_stat->blocks_used;
++ local->statvfs.f_bavail = local->statvfs.f_blocks -
++ tier_stat->pblocks_used;
++ local->statvfs.f_ffree = local->statvfs.f_files - tier_stat->files_used;
++ local->statvfs.f_favail = local->statvfs.f_files -
++ tier_stat->pfiles_used;
++ DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
++ &local->statvfs, xdata);
++ }
++
++ return 0;
++}
++
++int
++tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
++{
++ dht_local_t *local = NULL;
++ dht_conf_t *conf = NULL;
++ int op_errno = -1;
++ int i = -1;
++ inode_t *inode = NULL;
++ inode_table_t *itable = NULL;
++ uuid_t root_gfid = {
++ 0,
++ };
++ loc_t newloc = {
++ 0,
++ };
++
++ VALIDATE_OR_GOTO(frame, err);
++ VALIDATE_OR_GOTO(this, err);
++ VALIDATE_OR_GOTO(loc, err);
++ VALIDATE_OR_GOTO(this->private, err);
++
++ conf = this->private;
++
++ local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS);
++ if (!local) {
++ op_errno = ENOMEM;
++ goto err;
++ }
++
++ if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) {
++ itable = loc->inode->table;
++ if (!itable) {
++ op_errno = EINVAL;
++ goto err;
++ }
++
++ loc = &local->loc2;
++ root_gfid[15] = 1;
++
++ inode = inode_find(itable, root_gfid);
++ if (!inode) {
++ op_errno = EINVAL;
++ goto err;
++ }
++
++ dht_build_root_loc(inode, &newloc);
++ loc = &newloc;
++ }
++
++ local->call_cnt = conf->subvolume_cnt;
++
++ for (i = 0; i < conf->subvolume_cnt; i++) {
++ STACK_WIND_COOKIE(frame, tier_statfs_cbk, conf->subvolumes[i],
++ conf->subvolumes[i],
++ conf->subvolumes[i]->fops->statfs, loc, xdata);
++ }
++
++ return 0;
++
++err:
++ op_errno = (op_errno == -1) ? errno : op_errno;
++ DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
++
++ return 0;
++}
+diff --git a/xlators/cluster/dht/src/tier-common.h b/xlators/cluster/dht/src/tier-common.h
+new file mode 100644
+index 0000000..b1ebaa8
+--- /dev/null
++++ b/xlators/cluster/dht/src/tier-common.h
+@@ -0,0 +1,55 @@
++/*
++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#ifndef _TIER_COMMON_H_
++#define _TIER_COMMON_H_
++/* Function definitions */
++int
++tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int op_ret, int op_errno,
++ struct iatt *preparent,
++ struct iatt *postparent, dict_t *xdata);
++
++int
++tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
++ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
++
++int
++tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret,
++ int32_t op_errno, inode_t *inode,
++ struct iatt *stbuf, struct iatt *preparent,
++ struct iatt *postparent, dict_t *xdata);
++
++int
++tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
++ mode_t mode, mode_t umask, fd_t *fd, dict_t *params);
++
++int32_t
++tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
++ dict_t *xdata);
++
++int32_t
++tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++ off_t off, dict_t *dict);
++
++int
++tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++ off_t yoff, dict_t *xdata);
++
++int
++tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
++ dict_t *xdata);
++
++int
++tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
++
++#endif
+diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
+new file mode 100644
+index 0000000..94b4c63
+--- /dev/null
++++ b/xlators/cluster/dht/src/tier.c
+@@ -0,0 +1,3105 @@
++/*
++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#include <dlfcn.h>
++
++#include "dht-common.h"
++#include "tier.h"
++#include "tier-common.h"
++#include <glusterfs/syscall.h>
++#include <glusterfs/events.h>
++#include "tier-ctr-interface.h"
++
++/*Hard coded DB info*/
++static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3;
++/*Hard coded DB info*/
++
++/*Mutex for updating the data movement stats*/
++static pthread_mutex_t dm_stat_mutex = PTHREAD_MUTEX_INITIALIZER;
++
++/* Stores the path location of promotion query files */
++static char *promotion_qfile;
++/* Stores the path location of demotion query files */
++static char *demotion_qfile;
++
++static void *libhandle;
++static gfdb_methods_t gfdb_methods;
++
++#define DB_QUERY_RECORD_SIZE 4096
++
++/*
++ * Closes all the fds and frees the qfile_array
++ * */
++static void
++qfile_array_free(tier_qfile_array_t *qfile_array)
++{
++ ssize_t i = 0;
++
++ if (qfile_array) {
++ if (qfile_array->fd_array) {
++ for (i = 0; i < qfile_array->array_size; i++) {
++ if (qfile_array->fd_array[i] != -1) {
++ sys_close(qfile_array->fd_array[i]);
++ }
++ }
++ }
++ GF_FREE(qfile_array->fd_array);
++ }
++ GF_FREE(qfile_array);
++}
++
++/* Create a new query file list with given size */
++static tier_qfile_array_t *
++qfile_array_new(ssize_t array_size)
++{
++ int ret = -1;
++ tier_qfile_array_t *qfile_array = NULL;
++ ssize_t i = 0;
++
++ GF_VALIDATE_OR_GOTO("tier", (array_size > 0), out);
++
++ qfile_array = GF_CALLOC(1, sizeof(tier_qfile_array_t),
++ gf_tier_mt_qfile_array_t);
++ if (!qfile_array) {
++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to allocate memory for tier_qfile_array_t");
++ goto out;
++ }
++
++ qfile_array->fd_array = GF_MALLOC(array_size * sizeof(int),
++ gf_dht_mt_int32_t);
++ if (!qfile_array->fd_array) {
++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to allocate memory for "
++ "tier_qfile_array_t->fd_array");
++ goto out;
++ }
++
++ /* Init all the fds to -1 */
++ for (i = 0; i < array_size; i++) {
++ qfile_array->fd_array[i] = -1;
++ }
++
++ qfile_array->array_size = array_size;
++ qfile_array->next_index = 0;
++
++ /* Set exhausted count to list size as the list is empty */
++ qfile_array->exhausted_count = qfile_array->array_size;
++
++ ret = 0;
++out:
++ if (ret) {
++ qfile_array_free(qfile_array);
++ qfile_array = NULL;
++ }
++ return qfile_array;
++}
++
++/* Checks if the query file list is empty or totally exhausted. */
++static gf_boolean_t
++is_qfile_array_empty(tier_qfile_array_t *qfile_array)
++{
++ return (qfile_array->exhausted_count == qfile_array->array_size)
++ ? _gf_true
++ : _gf_false;
++}
++
++/* Shifts the next_fd pointer to the next available fd in the list */
++static void
++shift_next_index(tier_qfile_array_t *qfile_array)
++{
++ int qfile_fd = 0;
++ int spin_count = 0;
++
++ if (is_qfile_array_empty(qfile_array)) {
++ return;
++ }
++
++ do {
++ /* change next_index in a rotional manner */
++ (qfile_array->next_index == (qfile_array->array_size - 1))
++ ? qfile_array->next_index = 0
++ : qfile_array->next_index++;
++
++ qfile_fd = (qfile_array->fd_array[qfile_array->next_index]);
++
++ spin_count++;
++
++ } while ((qfile_fd == -1) && (spin_count < qfile_array->array_size));
++}
++
++/*
++ * This is a non-thread safe function to read query records
++ * from a list of query files in a Round-Robin manner.
++ * As in when the query files get exhuasted they are closed.
++ * Returns:
++ * 0 if all the query records in all the query files of the list are
++ * exhausted.
++ * > 0 if a query record is successfully read. Indicates the size of the query
++ * record read.
++ * < 0 if there was failure
++ * */
++static int
++read_query_record_list(tier_qfile_array_t *qfile_array,
++ gfdb_query_record_t **query_record)
++{
++ int ret = -1;
++ int qfile_fd = 0;
++
++ GF_VALIDATE_OR_GOTO("tier", qfile_array, out);
++ GF_VALIDATE_OR_GOTO("tier", qfile_array->fd_array, out);
++
++ do {
++ if (is_qfile_array_empty(qfile_array)) {
++ ret = 0;
++ break;
++ }
++
++ qfile_fd = qfile_array->fd_array[qfile_array->next_index];
++ ret = gfdb_methods.gfdb_read_query_record(qfile_fd, query_record);
++ if (ret <= 0) {
++ /*The qfile_fd has reached EOF or
++ * there was an error.
++ * 1. Close the exhausted fd
++ * 2. increment the exhausted count
++ * 3. shift next_qfile to next qfile
++ **/
++ sys_close(qfile_fd);
++ qfile_array->fd_array[qfile_array->next_index] = -1;
++ qfile_array->exhausted_count++;
++ /* shift next_qfile to next qfile */
++ shift_next_index(qfile_array);
++ continue;
++ } else {
++ /* shift next_qfile to next qfile */
++ shift_next_index(qfile_array);
++ break;
++ }
++ } while (1);
++out:
++ return ret;
++}
++
++/* Check and update the watermark every WM_INTERVAL seconds */
++#define WM_INTERVAL 5
++#define WM_INTERVAL_EMERG 1
++
++static int
++tier_check_same_node(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
++{
++ int ret = -1;
++ dict_t *dict = NULL;
++ char *uuid_str = NULL;
++ uuid_t node_uuid = {
++ 0,
++ };
++
++ GF_VALIDATE_OR_GOTO("tier", this, out);
++ GF_VALIDATE_OR_GOTO(this->name, loc, out);
++ GF_VALIDATE_OR_GOTO(this->name, defrag, out);
++
++ if (syncop_getxattr(this, loc, &dict, GF_XATTR_NODE_UUID_KEY, NULL, NULL)) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Unable to get NODE_UUID_KEY %s %s\n", loc->name, loc->path);
++ goto out;
++ }
++
++ if (dict_get_str(dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to get node-uuids for %s", loc->path);
++ goto out;
++ }
++
++ if (gf_uuid_parse(uuid_str, node_uuid)) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "uuid_parse failed for %s", loc->path);
++ goto out;
++ }
++
++ if (gf_uuid_compare(node_uuid, defrag->node_uuid)) {
++ gf_msg_debug(this->name, 0, "%s does not belong to this node",
++ loc->path);
++ ret = 1;
++ goto out;
++ }
++
++ ret = 0;
++out:
++ if (dict)
++ dict_unref(dict);
++
++ return ret;
++}
++
++int
++tier_get_fs_stat(xlator_t *this, loc_t *root_loc)
++{
++ int ret = 0;
++ gf_defrag_info_t *defrag = NULL;
++ dht_conf_t *conf = NULL;
++ dict_t *xdata = NULL;
++ struct statvfs statfs = {
++ 0,
++ };
++ gf_tier_conf_t *tier_conf = NULL;
++
++ conf = this->private;
++ if (!conf) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
++ "conf is NULL");
++ ret = -1;
++ goto exit;
++ }
++
++ defrag = conf->defrag;
++ if (!defrag) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
++ "defrag is NULL");
++ ret = -1;
++ goto exit;
++ }
++
++ tier_conf = &defrag->tier_conf;
++
++ xdata = dict_new();
++ if (!xdata) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
++ "failed to allocate dictionary");
++ ret = -1;
++ goto exit;
++ }
++
++ ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
++ "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict");
++ ret = -1;
++ goto exit;
++ }
++
++ /* Find how much free space is on the hot subvolume.
++ * Then see if that value */
++ /* is less than or greater than user defined watermarks.
++ * Stash results in */
++ /* the tier_conf data structure. */
++
++ ret = syncop_statfs(conf->subvolumes[1], root_loc, &statfs, xdata, NULL);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_STATUS,
++ "Unable to obtain statfs.");
++ goto exit;
++ }
++
++ pthread_mutex_lock(&dm_stat_mutex);
++
++ tier_conf->block_size = statfs.f_bsize;
++ tier_conf->blocks_total = statfs.f_blocks;
++ tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree;
++
++ tier_conf->percent_full = GF_PERCENTAGE(tier_conf->blocks_used,
++ statfs.f_blocks);
++ pthread_mutex_unlock(&dm_stat_mutex);
++
++exit:
++ if (xdata)
++ dict_unref(xdata);
++ return ret;
++}
++
++static void
++tier_send_watermark_event(const char *volname, tier_watermark_op_t old_wm,
++ tier_watermark_op_t new_wm)
++{
++ if (old_wm == TIER_WM_LOW || old_wm == TIER_WM_NONE) {
++ if (new_wm == TIER_WM_MID) {
++ gf_event(EVENT_TIER_WATERMARK_RAISED_TO_MID, "vol=%s", volname);
++ } else if (new_wm == TIER_WM_HI) {
++ gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname);
++ }
++ } else if (old_wm == TIER_WM_MID) {
++ if (new_wm == TIER_WM_LOW) {
++ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname);
++ } else if (new_wm == TIER_WM_HI) {
++ gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname);
++ }
++ } else if (old_wm == TIER_WM_HI) {
++ if (new_wm == TIER_WM_MID) {
++ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_MID, "vol=%s", volname);
++ } else if (new_wm == TIER_WM_LOW) {
++ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname);
++ }
++ }
++}
++
++int
++tier_check_watermark(xlator_t *this)
++{
++ int ret = -1;
++ gf_defrag_info_t *defrag = NULL;
++ dht_conf_t *conf = NULL;
++ gf_tier_conf_t *tier_conf = NULL;
++ tier_watermark_op_t wm = TIER_WM_NONE;
++
++ conf = this->private;
++ if (!conf)
++ goto exit;
++
++ defrag = conf->defrag;
++ if (!defrag)
++ goto exit;
++
++ tier_conf = &defrag->tier_conf;
++
++ if (tier_conf->percent_full < tier_conf->watermark_low) {
++ wm = TIER_WM_LOW;
++
++ } else if (tier_conf->percent_full < tier_conf->watermark_hi) {
++ wm = TIER_WM_MID;
++
++ } else {
++ wm = TIER_WM_HI;
++ }
++
++ if (wm != tier_conf->watermark_last) {
++ tier_send_watermark_event(tier_conf->volname, tier_conf->watermark_last,
++ wm);
++
++ tier_conf->watermark_last = wm;
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Tier watermark now %d", wm);
++ }
++
++ ret = 0;
++
++exit:
++ return ret;
++}
++
++static gf_boolean_t
++is_hot_tier_full(gf_tier_conf_t *tier_conf)
++{
++ if (tier_conf && (tier_conf->mode == TIER_MODE_WM) &&
++ (tier_conf->watermark_last == TIER_WM_HI))
++ return _gf_true;
++
++ return _gf_false;
++}
++
++int
++tier_do_migration(xlator_t *this, int promote)
++{
++ gf_defrag_info_t *defrag = NULL;
++ dht_conf_t *conf = NULL;
++ long rand = 0;
++ int migrate = 0;
++ gf_tier_conf_t *tier_conf = NULL;
++
++ conf = this->private;
++ if (!conf)
++ goto exit;
++
++ defrag = conf->defrag;
++ if (!defrag)
++ goto exit;
++
++ if (tier_check_watermark(this) != 0) {
++ gf_msg(this->name, GF_LOG_CRITICAL, errno, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to get watermark");
++ goto exit;
++ }
++
++ tier_conf = &defrag->tier_conf;
++
++ switch (tier_conf->watermark_last) {
++ case TIER_WM_LOW:
++ migrate = promote ? 1 : 0;
++ break;
++ case TIER_WM_HI:
++ migrate = promote ? 0 : 1;
++ break;
++ case TIER_WM_MID:
++ /* coverity[DC.WEAK_CRYPTO] */
++ rand = random() % 100;
++ if (promote) {
++ migrate = (rand > tier_conf->percent_full);
++ } else {
++ migrate = (rand <= tier_conf->percent_full);
++ }
++ break;
++ }
++
++exit:
++ return migrate;
++}
++
++int
++tier_migrate(xlator_t *this, int is_promotion, dict_t *migrate_data, loc_t *loc,
++ gf_tier_conf_t *tier_conf)
++{
++ int ret = -1;
++
++ pthread_mutex_lock(&tier_conf->pause_mutex);
++ if (is_promotion)
++ tier_conf->promote_in_progress = 1;
++ else
++ tier_conf->demote_in_progress = 1;
++ pthread_mutex_unlock(&tier_conf->pause_mutex);
++
++ /* Data migration */
++ ret = syncop_setxattr(this, loc, migrate_data, 0, NULL, NULL);
++
++ pthread_mutex_lock(&tier_conf->pause_mutex);
++ if (is_promotion)
++ tier_conf->promote_in_progress = 0;
++ else
++ tier_conf->demote_in_progress = 0;
++ pthread_mutex_unlock(&tier_conf->pause_mutex);
++
++ return ret;
++}
++
++/* returns _gf_true: if file can be promoted
++ * returns _gf_false: if file cannot be promoted
++ */
++static gf_boolean_t
++tier_can_promote_file(xlator_t *this, char const *file_name,
++ struct iatt *current, gf_defrag_info_t *defrag)
++{
++ gf_boolean_t ret = _gf_false;
++ fsblkcnt_t estimated_usage = 0;
++
++ if (defrag->tier_conf.tier_max_promote_size &&
++ (current->ia_size > defrag->tier_conf.tier_max_promote_size)) {
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "File %s (gfid:%s) with size (%" PRIu64
++ ") exceeds maxsize "
++ "(%d) for promotion. File will not be promoted.",
++ file_name, uuid_utoa(current->ia_gfid), current->ia_size,
++ defrag->tier_conf.tier_max_promote_size);
++ goto err;
++ }
++
++ /* bypass further validations for TEST mode */
++ if (defrag->tier_conf.mode != TIER_MODE_WM) {
++ ret = _gf_true;
++ goto err;
++ }
++
++ /* convert the file size to blocks as per the block size of the
++ * destination tier
++ * NOTE: add (block_size - 1) to get the correct block size when
++ * there is a remainder after a modulo
++ */
++ estimated_usage = ((current->ia_size + defrag->tier_conf.block_size - 1) /
++ defrag->tier_conf.block_size) +
++ defrag->tier_conf.blocks_used;
++
++ /* test if the estimated block usage goes above HI watermark */
++ if (GF_PERCENTAGE(estimated_usage, defrag->tier_conf.blocks_total) >=
++ defrag->tier_conf.watermark_hi) {
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Estimated block count consumption on "
++ "hot tier (%" PRIu64
++ ") exceeds hi watermark (%d%%). "
++ "File will not be promoted.",
++ estimated_usage, defrag->tier_conf.watermark_hi);
++ goto err;
++ }
++ ret = _gf_true;
++err:
++ return ret;
++}
++
++static int
++tier_set_migrate_data(dict_t *migrate_data)
++{
++ int failed = 1;
++
++ failed = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, "force");
++ if (failed) {
++ goto bail_out;
++ }
++
++ /* Flag to suggest the xattr call is from migrator */
++ failed = dict_set_str(migrate_data, "from.migrator", "yes");
++ if (failed) {
++ goto bail_out;
++ }
++
++ /* Flag to suggest its a tiering migration
++ * The reason for this dic key-value is that
++ * promotions and demotions are multithreaded
++ * so the original frame from gf_defrag_start()
++ * is not carried. A new frame will be created when
++ * we do syncop_setxattr(). This does not have the
++ * frame->root->pid of the original frame. So we pass
++ * this dic key-value when we do syncop_setxattr() to do
++ * data migration and set the frame->root->pid to
++ * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
++ * calling dht_start_rebalance_task() */
++ failed = dict_set_str(migrate_data, TIERING_MIGRATION_KEY, "yes");
++ if (failed) {
++ goto bail_out;
++ }
++
++ failed = 0;
++
++bail_out:
++ return failed;
++}
++
++static char *
++tier_get_parent_path(xlator_t *this, loc_t *p_loc, struct iatt *par_stbuf,
++ int *per_link_status)
++{
++ int ret = -1;
++ char *parent_path = NULL;
++ dict_t *xdata_request = NULL;
++ dict_t *xdata_response = NULL;
++
++ xdata_request = dict_new();
++ if (!xdata_request) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to create xdata_request dict");
++ goto err;
++ }
++ ret = dict_set_int32(xdata_request, GET_ANCESTRY_PATH_KEY, 42);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to set value to dict : key %s \n",
++ GET_ANCESTRY_PATH_KEY);
++ goto err;
++ }
++
++ ret = syncop_lookup(this, p_loc, par_stbuf, NULL, xdata_request,
++ &xdata_response);
++ /* When the parent gfid is a stale entry, the lookup
++ * will fail and stop the demotion process.
++ * The parent gfid can be stale when a huge folder is
++ * deleted while the files within it are being migrated
++ */
++ if (ret == -ESTALE) {
++ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP,
++ "Stale entry in parent lookup for %s", uuid_utoa(p_loc->gfid));
++ *per_link_status = 1;
++ goto err;
++ } else if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
++ "Error in parent lookup for %s", uuid_utoa(p_loc->gfid));
++ *per_link_status = -1;
++ goto err;
++ }
++ ret = dict_get_str(xdata_response, GET_ANCESTRY_PATH_KEY, &parent_path);
++ if (ret || !parent_path) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to get parent path for %s", uuid_utoa(p_loc->gfid));
++ *per_link_status = -1;
++ goto err;
++ }
++
++err:
++ if (xdata_request) {
++ dict_unref(xdata_request);
++ }
++
++ if (xdata_response) {
++ dict_unref(xdata_response);
++ xdata_response = NULL;
++ }
++
++ return parent_path;
++}
++
++static int
++tier_get_file_name_and_path(xlator_t *this, uuid_t gfid,
++ gfdb_link_info_t *link_info,
++ char const *parent_path, loc_t *loc,
++ int *per_link_status)
++{
++ int ret = -1;
++
++ loc->name = gf_strdup(link_info->file_name);
++ if (!loc->name) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Memory "
++ "allocation failed for %s",
++ uuid_utoa(gfid));
++ *per_link_status = -1;
++ goto err;
++ }
++ ret = gf_asprintf((char **)&(loc->path), "%s/%s", parent_path, loc->name);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to "
++ "construct file path for %s %s\n",
++ parent_path, loc->name);
++ *per_link_status = -1;
++ goto err;
++ }
++
++ ret = 0;
++
++err:
++ return ret;
++}
++
++static int
++tier_lookup_file(xlator_t *this, loc_t *p_loc, loc_t *loc, struct iatt *current,
++ int *per_link_status)
++{
++ int ret = -1;
++
++ ret = syncop_lookup(this, loc, current, NULL, NULL, NULL);
++
++ /* The file may be deleted even when the parent
++ * is available and the lookup will
++ * return a stale entry which would stop the
++ * migration. so if its a stale entry, then skip
++ * the file and keep migrating.
++ */
++ if (ret == -ESTALE) {
++ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP,
++ "Stale lookup for %s", uuid_utoa(p_loc->gfid));
++ *per_link_status = 1;
++ goto err;
++ } else if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to "
++ "lookup file %s\n",
++ loc->name);
++ *per_link_status = -1;
++ goto err;
++ }
++ ret = 0;
++
++err:
++ return ret;
++}
++
++static gf_boolean_t
++tier_is_file_already_at_destination(xlator_t *src_subvol,
++ query_cbk_args_t *query_cbk_args,
++ dht_conf_t *conf, int *per_link_status)
++{
++ gf_boolean_t at_destination = _gf_true;
++
++ if (src_subvol == NULL) {
++ *per_link_status = 1;
++ goto err;
++ }
++ if (query_cbk_args->is_promotion && src_subvol == conf->subvolumes[1]) {
++ *per_link_status = 1;
++ goto err;
++ }
++
++ if (!query_cbk_args->is_promotion && src_subvol == conf->subvolumes[0]) {
++ *per_link_status = 1;
++ goto err;
++ }
++ at_destination = _gf_false;
++
++err:
++ return at_destination;
++}
++
++static void
++tier_update_migration_counters(query_cbk_args_t *query_cbk_args,
++ gf_defrag_info_t *defrag,
++ uint64_t *total_migrated_bytes, int *total_files)
++{
++ if (query_cbk_args->is_promotion) {
++ defrag->total_files_promoted++;
++ *total_migrated_bytes += defrag->tier_conf.st_last_promoted_size;
++ pthread_mutex_lock(&dm_stat_mutex);
++ defrag->tier_conf.blocks_used += defrag->tier_conf
++ .st_last_promoted_size;
++ pthread_mutex_unlock(&dm_stat_mutex);
++ } else {
++ defrag->total_files_demoted++;
++ *total_migrated_bytes += defrag->tier_conf.st_last_demoted_size;
++ pthread_mutex_lock(&dm_stat_mutex);
++ defrag->tier_conf.blocks_used -= defrag->tier_conf.st_last_demoted_size;
++ pthread_mutex_unlock(&dm_stat_mutex);
++ }
++ if (defrag->tier_conf.blocks_total) {
++ pthread_mutex_lock(&dm_stat_mutex);
++ defrag->tier_conf.percent_full = GF_PERCENTAGE(
++ defrag->tier_conf.blocks_used, defrag->tier_conf.blocks_total);
++ pthread_mutex_unlock(&dm_stat_mutex);
++ }
++
++ (*total_files)++;
++}
++
++static int
++tier_migrate_link(xlator_t *this, dht_conf_t *conf, uuid_t gfid,
++ gfdb_link_info_t *link_info, gf_defrag_info_t *defrag,
++ query_cbk_args_t *query_cbk_args, dict_t *migrate_data,
++ int *per_link_status, int *total_files,
++ uint64_t *total_migrated_bytes)
++{
++ int ret = -1;
++ struct iatt current = {
++ 0,
++ };
++ struct iatt par_stbuf = {
++ 0,
++ };
++ loc_t p_loc = {
++ 0,
++ };
++ loc_t loc = {
++ 0,
++ };
++ xlator_t *src_subvol = NULL;
++ inode_t *linked_inode = NULL;
++ char *parent_path = NULL;
++
++ /* Lookup for parent and get the path of parent */
++ gf_uuid_copy(p_loc.gfid, link_info->pargfid);
++ p_loc.inode = inode_new(defrag->root_inode->table);
++ if (!p_loc.inode) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to create reference to inode"
++ " for %s",
++ uuid_utoa(p_loc.gfid));
++
++ *per_link_status = -1;
++ goto err;
++ }
++
++ parent_path = tier_get_parent_path(this, &p_loc, &par_stbuf,
++ per_link_status);
++ if (!parent_path) {
++ goto err;
++ }
++
++ linked_inode = inode_link(p_loc.inode, NULL, NULL, &par_stbuf);
++ inode_unref(p_loc.inode);
++ p_loc.inode = linked_inode;
++
++ /* Preparing File Inode */
++ gf_uuid_copy(loc.gfid, gfid);
++ loc.inode = inode_new(defrag->root_inode->table);
++ gf_uuid_copy(loc.pargfid, link_info->pargfid);
++ loc.parent = inode_ref(p_loc.inode);
++
++ /* Get filename and Construct file path */
++ if (tier_get_file_name_and_path(this, gfid, link_info, parent_path, &loc,
++ per_link_status) != 0) {
++ goto err;
++ }
++ gf_uuid_copy(loc.parent->gfid, link_info->pargfid);
++
++ /* lookup file inode */
++ if (tier_lookup_file(this, &p_loc, &loc, &current, per_link_status) != 0) {
++ goto err;
++ }
++
++ if (query_cbk_args->is_promotion) {
++ if (!tier_can_promote_file(this, link_info->file_name, &current,
++ defrag)) {
++ *per_link_status = 1;
++ goto err;
++ }
++ }
++
++ linked_inode = inode_link(loc.inode, NULL, NULL, &current);
++ inode_unref(loc.inode);
++ loc.inode = linked_inode;
++
++ /*
++ * Do not promote/demote if file already is where it
++ * should be. It means another brick moved the file
++ * so is not an error. So we set per_link_status = 1
++ * so that we ignore counting this.
++ */
++ src_subvol = dht_subvol_get_cached(this, loc.inode);
++
++ if (tier_is_file_already_at_destination(src_subvol, query_cbk_args, conf,
++ per_link_status)) {
++ goto err;
++ }
++
++ gf_msg_debug(this->name, 0, "Tier %s: src_subvol %s file %s",
++ (query_cbk_args->is_promotion ? "promote" : "demote"),
++ src_subvol->name, loc.path);
++
++ ret = tier_check_same_node(this, &loc, defrag);
++ if (ret != 0) {
++ if (ret < 0) {
++ *per_link_status = -1;
++ goto err;
++ }
++ ret = 0;
++ /* By setting per_link_status to 1 we are
++ * ignoring this status and will not be counting
++ * this file for migration */
++ *per_link_status = 1;
++ goto err;
++ }
++
++ gf_uuid_copy(loc.gfid, loc.inode->gfid);
++
++ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) {
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Tiering paused. "
++ "Exiting tier_migrate_link");
++ goto err;
++ }
++
++ ret = tier_migrate(this, query_cbk_args->is_promotion, migrate_data, &loc,
++ &defrag->tier_conf);
++
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to "
++ "migrate %s ",
++ loc.path);
++ *per_link_status = -1;
++ goto err;
++ }
++
++ tier_update_migration_counters(query_cbk_args, defrag, total_migrated_bytes,
++ total_files);
++
++ ret = 0;
++
++err:
++ GF_FREE((char *)loc.name);
++ loc.name = NULL;
++ loc_wipe(&loc);
++ loc_wipe(&p_loc);
++
++ if ((*total_files >= defrag->tier_conf.max_migrate_files) ||
++ (*total_migrated_bytes > defrag->tier_conf.max_migrate_bytes)) {
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Reached cycle migration limit."
++ "migrated bytes %" PRId64 " files %d",
++ *total_migrated_bytes, *total_files);
++ ret = -1;
++ }
++
++ return ret;
++}
++
++static int
++tier_migrate_using_query_file(void *_args)
++{
++ int ret = -1;
++ query_cbk_args_t *query_cbk_args = (query_cbk_args_t *)_args;
++ xlator_t *this = NULL;
++ gf_defrag_info_t *defrag = NULL;
++ gfdb_query_record_t *query_record = NULL;
++ gfdb_link_info_t *link_info = NULL;
++ dict_t *migrate_data = NULL;
++ /*
++ * per_file_status and per_link_status
++ * 0 : success
++ * -1 : failure
++ * 1 : ignore the status and don't count for migration
++ * */
++ int per_file_status = 0;
++ int per_link_status = 0;
++ int total_status = 0;
++ dht_conf_t *conf = NULL;
++ uint64_t total_migrated_bytes = 0;
++ int total_files = 0;
++ loc_t root_loc = {0};
++ gfdb_time_t start_time = {0};
++ gfdb_time_t current_time = {0};
++ int total_time = 0;
++ int max_time = 0;
++ gf_boolean_t emergency_demote_mode = _gf_false;
++
++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out);
++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
++ this = query_cbk_args->this;
++ GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->defrag, out);
++ GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->qfile_array, out);
++ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
++
++ conf = this->private;
++
++ defrag = query_cbk_args->defrag;
++ migrate_data = dict_new();
++ if (!migrate_data)
++ goto out;
++
++ emergency_demote_mode = (!query_cbk_args->is_promotion &&
++ is_hot_tier_full(&defrag->tier_conf));
++
++ if (tier_set_migrate_data(migrate_data) != 0) {
++ goto out;
++ }
++
++ dht_build_root_loc(defrag->root_inode, &root_loc);
++
++ ret = gettimeofday(&start_time, NULL);
++ if (query_cbk_args->is_promotion) {
++ max_time = defrag->tier_conf.tier_promote_frequency;
++ } else {
++ max_time = defrag->tier_conf.tier_demote_frequency;
++ }
++
++ /* Per file */
++ while ((ret = read_query_record_list(query_cbk_args->qfile_array,
++ &query_record)) != 0) {
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to fetch query record "
++ "from query file");
++ goto out;
++ }
++
++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
++ ret = -1;
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Exiting tier migration as"
++ "defrag status is not started");
++ goto out;
++ }
++
++ ret = gettimeofday(&current_time, NULL);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Could not get current time.");
++ goto out;
++ }
++
++ total_time = current_time.tv_sec - start_time.tv_sec;
++ if (total_time > max_time) {
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Max cycle time reached. Exiting migration.");
++ goto out;
++ }
++
++ per_file_status = 0;
++ per_link_status = 0;
++
++ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) {
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Tiering paused. "
++ "Exiting tier_migrate_using_query_file");
++ break;
++ }
++
++ if (defrag->tier_conf.mode == TIER_MODE_WM) {
++ ret = tier_get_fs_stat(this, &root_loc);
++ if (ret != 0) {
++ gfdb_methods.gfdb_query_record_free(query_record);
++ query_record = NULL;
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
++ "tier_get_fs_stat() FAILED ... "
++ "skipping file migrations until next cycle");
++ break;
++ }
++
++ if (!tier_do_migration(this, query_cbk_args->is_promotion)) {
++ gfdb_methods.gfdb_query_record_free(query_record);
++ query_record = NULL;
++
++ /* We have crossed the high watermark. Stop processing
++ * files if this is a promotion cycle so demotion gets
++ * a chance to start if not already running*/
++
++ if (query_cbk_args->is_promotion &&
++ is_hot_tier_full(&defrag->tier_conf)) {
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "High watermark crossed during "
++ "promotion. Exiting "
++ "tier_migrate_using_query_file");
++ break;
++ }
++ continue;
++ }
++ }
++
++ per_link_status = 0;
++
++ /* For now we only support single link migration. And we will
++ * ignore other hard links in the link info list of query record
++ * TODO: Multiple hard links migration */
++ if (!list_empty(&query_record->link_list)) {
++ link_info = list_first_entry(&query_record->link_list,
++ gfdb_link_info_t, list);
++ }
++ if (link_info != NULL) {
++ if (tier_migrate_link(this, conf, query_record->gfid, link_info,
++ defrag, query_cbk_args, migrate_data,
++ &per_link_status, &total_files,
++ &total_migrated_bytes) != 0) {
++ gf_msg(
++ this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "%s failed for %s(gfid:%s)",
++ (query_cbk_args->is_promotion ? "Promotion" : "Demotion"),
++ link_info->file_name, uuid_utoa(query_record->gfid));
++ }
++ }
++ per_file_status = per_link_status;
++
++ if (per_file_status < 0) { /* Failure */
++ pthread_mutex_lock(&dm_stat_mutex);
++ defrag->total_failures++;
++ pthread_mutex_unlock(&dm_stat_mutex);
++ } else if (per_file_status == 0) { /* Success */
++ pthread_mutex_lock(&dm_stat_mutex);
++ defrag->total_files++;
++ pthread_mutex_unlock(&dm_stat_mutex);
++ } else if (per_file_status == 1) { /* Ignore */
++ per_file_status = 0;
++ /* Since this attempt was ignored we
++ * decrement the lookup count*/
++ pthread_mutex_lock(&dm_stat_mutex);
++ defrag->num_files_lookedup--;
++ pthread_mutex_unlock(&dm_stat_mutex);
++ }
++ total_status = total_status + per_file_status;
++ per_link_status = 0;
++ per_file_status = 0;
++
++ gfdb_methods.gfdb_query_record_free(query_record);
++ query_record = NULL;
++
++ /* If we are demoting and the entry watermark was HI, then
++ * we are done with emergency demotions if the current
++ * watermark has fallen below hi-watermark level
++ */
++ if (emergency_demote_mode) {
++ if (tier_check_watermark(this) == 0) {
++ if (!is_hot_tier_full(&defrag->tier_conf)) {
++ break;
++ }
++ }
++ }
++ }
++
++out:
++ if (migrate_data)
++ dict_unref(migrate_data);
++
++ gfdb_methods.gfdb_query_record_free(query_record);
++ query_record = NULL;
++
++ return total_status;
++}
++
++/* This is the call back function per record/file from data base */
++static int
++tier_gf_query_callback(gfdb_query_record_t *gfdb_query_record, void *_args)
++{
++ int ret = -1;
++ query_cbk_args_t *query_cbk_args = _args;
++
++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out);
++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->defrag, out);
++ GF_VALIDATE_OR_GOTO("tier", (query_cbk_args->query_fd > 0), out);
++
++ ret = gfdb_methods.gfdb_write_query_record(query_cbk_args->query_fd,
++ gfdb_query_record);
++ if (ret) {
++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed writing query record to query file");
++ goto out;
++ }
++
++ pthread_mutex_lock(&dm_stat_mutex);
++ query_cbk_args->defrag->num_files_lookedup++;
++ pthread_mutex_unlock(&dm_stat_mutex);
++
++ ret = 0;
++out:
++ return ret;
++}
++
++/* Create query file in tier process */
++static int
++tier_process_self_query(tier_brick_list_t *local_brick, void *args)
++{
++ int ret = -1;
++ char *db_path = NULL;
++ query_cbk_args_t *query_cbk_args = NULL;
++ xlator_t *this = NULL;
++ gfdb_conn_node_t *conn_node = NULL;
++ dict_t *params_dict = NULL;
++ dict_t *ctr_ipc_dict = NULL;
++ gfdb_brick_info_t *gfdb_brick_info = args;
++
++ /*Init of all the essentials*/
++ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
++ query_cbk_args = gfdb_brick_info->_query_cbk_args;
++
++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
++ this = query_cbk_args->this;
++
++ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
++
++ GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
++
++ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
++
++ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
++
++ db_path = local_brick->brick_db_path;
++
++ /*Preparing DB parameters before init_db i.e getting db connection*/
++ params_dict = dict_new();
++ if (!params_dict) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "DB Params cannot initialized");
++ goto out;
++ }
++ SET_DB_PARAM_TO_DICT(this->name, params_dict,
++ (char *)gfdb_methods.get_db_path_key(), db_path, ret,
++ out);
++
++ /*Get the db connection*/
++ conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type);
++ if (!conn_node) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "FATAL: Failed initializing db operations");
++ goto out;
++ }
++
++ /* Query for eligible files from db */
++ query_cbk_args->query_fd = open(local_brick->qfile_path,
++ O_WRONLY | O_CREAT | O_APPEND,
++ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
++ if (query_cbk_args->query_fd < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to open query file %s", local_brick->qfile_path);
++ goto out;
++ }
++ if (!gfdb_brick_info->_gfdb_promote) {
++ if (query_cbk_args->defrag->tier_conf.watermark_last == TIER_WM_HI) {
++ /* emergency demotion mode */
++ ret = gfdb_methods.find_all(
++ conn_node, tier_gf_query_callback, (void *)query_cbk_args,
++ query_cbk_args->defrag->tier_conf.query_limit);
++ } else {
++ if (query_cbk_args->defrag->write_freq_threshold == 0 &&
++ query_cbk_args->defrag->read_freq_threshold == 0) {
++ ret = gfdb_methods.find_unchanged_for_time(
++ conn_node, tier_gf_query_callback, (void *)query_cbk_args,
++ gfdb_brick_info->time_stamp);
++ } else {
++ ret = gfdb_methods.find_unchanged_for_time_freq(
++ conn_node, tier_gf_query_callback, (void *)query_cbk_args,
++ gfdb_brick_info->time_stamp,
++ query_cbk_args->defrag->write_freq_threshold,
++ query_cbk_args->defrag->read_freq_threshold, _gf_false);
++ }
++ }
++ } else {
++ if (query_cbk_args->defrag->write_freq_threshold == 0 &&
++ query_cbk_args->defrag->read_freq_threshold == 0) {
++ ret = gfdb_methods.find_recently_changed_files(
++ conn_node, tier_gf_query_callback, (void *)query_cbk_args,
++ gfdb_brick_info->time_stamp);
++ } else {
++ ret = gfdb_methods.find_recently_changed_files_freq(
++ conn_node, tier_gf_query_callback, (void *)query_cbk_args,
++ gfdb_brick_info->time_stamp,
++ query_cbk_args->defrag->write_freq_threshold,
++ query_cbk_args->defrag->read_freq_threshold, _gf_false);
++ }
++ }
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "FATAL: query from db failed");
++ goto out;
++ }
++
++ /*Clear the heat on the DB entries*/
++ /*Preparing ctr_ipc_dict*/
++ ctr_ipc_dict = dict_new();
++ if (!ctr_ipc_dict) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "ctr_ipc_dict cannot initialized");
++ goto out;
++ }
++
++ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY,
++ GFDB_IPC_CTR_CLEAR_OPS, ret, out);
++
++ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict,
++ NULL);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed clearing the heat "
++ "on db %s error %d",
++ local_brick->brick_db_path, ret);
++ goto out;
++ }
++
++ ret = 0;
++out:
++ if (params_dict) {
++ dict_unref(params_dict);
++ params_dict = NULL;
++ }
++
++ if (ctr_ipc_dict) {
++ dict_unref(ctr_ipc_dict);
++ ctr_ipc_dict = NULL;
++ }
++
++ if (query_cbk_args && query_cbk_args->query_fd >= 0) {
++ sys_close(query_cbk_args->query_fd);
++ query_cbk_args->query_fd = -1;
++ }
++ gfdb_methods.fini_db(conn_node);
++
++ return ret;
++}
++
++/*Ask CTR to create the query file*/
++static int
++tier_process_ctr_query(tier_brick_list_t *local_brick, void *args)
++{
++ int ret = -1;
++ query_cbk_args_t *query_cbk_args = NULL;
++ xlator_t *this = NULL;
++ dict_t *ctr_ipc_in_dict = NULL;
++ dict_t *ctr_ipc_out_dict = NULL;
++ gfdb_brick_info_t *gfdb_brick_info = args;
++ gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL;
++ int count = 0;
++
++ /*Init of all the essentials*/
++ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
++ query_cbk_args = gfdb_brick_info->_query_cbk_args;
++
++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
++ this = query_cbk_args->this;
++
++ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
++
++ GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
++
++ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
++
++ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
++
++ /*Preparing ctr_ipc_in_dict*/
++ ctr_ipc_in_dict = dict_new();
++ if (!ctr_ipc_in_dict) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "ctr_ipc_in_dict cannot initialized");
++ goto out;
++ }
++
++ ipc_ctr_params = GF_CALLOC(1, sizeof(gfdb_ipc_ctr_params_t),
++ gf_tier_mt_ipc_ctr_params_t);
++ if (!ipc_ctr_params) {
++ goto out;
++ }
++
++ /* set all the query params*/
++ ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote;
++
++ ipc_ctr_params->write_freq_threshold = query_cbk_args->defrag
++ ->write_freq_threshold;
++
++ ipc_ctr_params->read_freq_threshold = query_cbk_args->defrag
++ ->read_freq_threshold;
++
++ ipc_ctr_params->query_limit = query_cbk_args->defrag->tier_conf.query_limit;
++
++ ipc_ctr_params->emergency_demote = (!gfdb_brick_info->_gfdb_promote &&
++ query_cbk_args->defrag->tier_conf
++ .watermark_last == TIER_WM_HI);
++
++ memcpy(&ipc_ctr_params->time_stamp, gfdb_brick_info->time_stamp,
++ sizeof(gfdb_time_t));
++
++ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, GFDB_IPC_CTR_KEY,
++ GFDB_IPC_CTR_QUERY_OPS, ret, out);
++
++ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict,
++ GFDB_IPC_CTR_GET_QFILE_PATH, local_brick->qfile_path,
++ ret, out);
++
++ ret = dict_set_bin(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS,
++ ipc_ctr_params, sizeof(*ipc_ctr_params));
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
++ "Failed setting %s to params dictionary",
++ GFDB_IPC_CTR_GET_QUERY_PARAMS);
++ GF_FREE(ipc_ctr_params);
++ goto out;
++ }
++ ipc_ctr_params = NULL;
++
++ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_in_dict,
++ &ctr_ipc_out_dict);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_IPC_TIER_ERROR,
++ "Failed query on %s ret %d", local_brick->brick_db_path, ret);
++ goto out;
++ }
++
++ ret = dict_get_int32(ctr_ipc_out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT,
++ &count);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed getting count "
++ "of records on %s",
++ local_brick->brick_db_path);
++ goto out;
++ }
++
++ if (count < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed query on %s", local_brick->brick_db_path);
++ ret = -1;
++ goto out;
++ }
++
++ pthread_mutex_lock(&dm_stat_mutex);
++ query_cbk_args->defrag->num_files_lookedup = count;
++ pthread_mutex_unlock(&dm_stat_mutex);
++
++ ret = 0;
++out:
++
++ if (ctr_ipc_in_dict) {
++ dict_unref(ctr_ipc_in_dict);
++ ctr_ipc_in_dict = NULL;
++ }
++
++ if (ctr_ipc_out_dict) {
++ dict_unref(ctr_ipc_out_dict);
++ ctr_ipc_out_dict = NULL;
++ }
++
++ GF_FREE(ipc_ctr_params);
++
++ return ret;
++}
++
++/* This is the call back function for each brick from hot/cold bricklist
++ * It picks up each bricks db and queries for eligible files for migration.
++ * The list of eligible files are populated in appropriate query files*/
++static int
++tier_process_brick(tier_brick_list_t *local_brick, void *args)
++{
++ int ret = -1;
++ dict_t *ctr_ipc_in_dict = NULL;
++ dict_t *ctr_ipc_out_dict = NULL;
++ char *strval = NULL;
++
++ GF_VALIDATE_OR_GOTO("tier", local_brick, out);
++
++ GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out);
++
++ if (dht_tier_db_type == GFDB_SQLITE3) {
++ /*Preparing ctr_ipc_in_dict*/
++ ctr_ipc_in_dict = dict_new();
++ if (!ctr_ipc_in_dict) {
++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "ctr_ipc_in_dict cannot initialized");
++ goto out;
++ }
++
++ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_KEY,
++ GFDB_IPC_CTR_GET_DB_PARAM_OPS);
++ if (ret) {
++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
++ "Failed to set %s "
++ "to params dictionary",
++ GFDB_IPC_CTR_KEY);
++ goto out;
++ }
++
++ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_PARAM_OPS, "");
++ if (ret) {
++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
++ "Failed to set %s "
++ "to params dictionary",
++ GFDB_IPC_CTR_GET_DB_PARAM_OPS);
++ goto out;
++ }
++
++ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_KEY,
++ "journal_mode");
++ if (ret) {
++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
++ "Failed to set %s "
++ "to params dictionary",
++ GFDB_IPC_CTR_GET_DB_KEY);
++ goto out;
++ }
++
++ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR,
++ ctr_ipc_in_dict, &ctr_ipc_out_dict);
++ if (ret || ctr_ipc_out_dict == NULL) {
++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to get "
++ "journal_mode of sql db %s",
++ local_brick->brick_db_path);
++ goto out;
++ }
++
++ ret = dict_get_str(ctr_ipc_out_dict, "journal_mode", &strval);
++ if (ret) {
++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_GET_PARAM_FAILED,
++ "Failed to get %s "
++ "from params dictionary"
++ "journal_mode",
++ strval);
++ goto out;
++ }
++
++ if (strval && (strncmp(strval, "wal", SLEN("wal")) == 0)) {
++ ret = tier_process_self_query(local_brick, args);
++ if (ret) {
++ goto out;
++ }
++ } else {
++ ret = tier_process_ctr_query(local_brick, args);
++ if (ret) {
++ goto out;
++ }
++ }
++ ret = 0;
++
++ } else {
++ ret = tier_process_self_query(local_brick, args);
++ if (ret) {
++ goto out;
++ }
++ }
++
++ ret = 0;
++out:
++ if (ctr_ipc_in_dict)
++ dict_unref(ctr_ipc_in_dict);
++
++ if (ctr_ipc_out_dict)
++ dict_unref(ctr_ipc_out_dict);
++
++ return ret;
++}
++
++static int
++tier_build_migration_qfile(migration_args_t *args,
++ query_cbk_args_t *query_cbk_args,
++ gf_boolean_t is_promotion)
++{
++ gfdb_time_t current_time;
++ gfdb_brick_info_t gfdb_brick_info;
++ gfdb_time_t time_in_past;
++ int ret = -1;
++ tier_brick_list_t *local_brick = NULL;
++ int i = 0;
++ time_in_past.tv_sec = args->freq_time;
++ time_in_past.tv_usec = 0;
++
++ ret = gettimeofday(&current_time, NULL);
++ if (ret == -1) {
++ gf_msg(args->this->name, GF_LOG_ERROR, errno,
++ DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time");
++ goto out;
++ }
++ time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
++
++ /* The migration daemon may run a varying numberof usec after the */
++ /* sleep call triggers. A file may be registered in CTR some number */
++ /* of usec X after the daemon started and missed in the subsequent */
++ /* cycle if the daemon starts Y usec after the period in seconds */
++ /* where Y>X. Normalize away this problem by always setting usec */
++ /* to 0. */
++ time_in_past.tv_usec = 0;
++
++ gfdb_brick_info.time_stamp = &time_in_past;
++ gfdb_brick_info._gfdb_promote = is_promotion;
++ gfdb_brick_info._query_cbk_args = query_cbk_args;
++
++ list_for_each_entry(local_brick, args->brick_list, list)
++ {
++ /* Construct query file path for this brick
++ * i.e
++ * /var/run/gluster/xlator_name/
++ * {promote/demote}-brickname-indexinbricklist
++ * So that no two query files will have same path even
++ * bricks have the same name
++ * */
++ snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d",
++ GET_QFILE_PATH(gfdb_brick_info._gfdb_promote),
++ local_brick->brick_name, i);
++
++ /* Delete any old query files for this brick */
++ sys_unlink(local_brick->qfile_path);
++
++ ret = tier_process_brick(local_brick, &gfdb_brick_info);
++ if (ret) {
++ gf_msg(args->this->name, GF_LOG_ERROR, 0,
++ DHT_MSG_BRICK_QUERY_FAILED, "Brick %s query failed\n",
++ local_brick->brick_db_path);
++ }
++ i++;
++ }
++ ret = 0;
++out:
++ return ret;
++}
++
++static int
++tier_migrate_files_using_qfile(migration_args_t *comp,
++ query_cbk_args_t *query_cbk_args)
++{
++ int ret = -1;
++ tier_brick_list_t *local_brick = NULL;
++ tier_brick_list_t *temp = NULL;
++ gfdb_time_t current_time = {
++ 0,
++ };
++ ssize_t qfile_array_size = 0;
++ int count = 0;
++ int temp_fd = 0;
++ gf_tier_conf_t *tier_conf = NULL;
++
++ tier_conf = &(query_cbk_args->defrag->tier_conf);
++
++ /* Time for error query files */
++ gettimeofday(&current_time, NULL);
++
++ /* Build the qfile list */
++ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
++ {
++ qfile_array_size++;
++ }
++ query_cbk_args->qfile_array = qfile_array_new(qfile_array_size);
++ if (!query_cbk_args->qfile_array) {
++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to create new "
++ "qfile_array");
++ goto out;
++ }
++
++ /*Open all qfiles*/
++ count = 0;
++ query_cbk_args->qfile_array->exhausted_count = 0;
++ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
++ {
++ temp_fd = query_cbk_args->qfile_array->fd_array[count];
++ temp_fd = open(local_brick->qfile_path, O_RDONLY,
++ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
++ if (temp_fd < 0) {
++ gf_msg("tier", GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to open "
++ "%s to the query file",
++ local_brick->qfile_path);
++ query_cbk_args->qfile_array->exhausted_count++;
++ }
++ query_cbk_args->qfile_array->fd_array[count] = temp_fd;
++ count++;
++ }
++
++ /* Moving the query file index to the next, so that we won't the same
++ * query file every cycle as the first one */
++ query_cbk_args->qfile_array
++ ->next_index = (query_cbk_args->is_promotion)
++ ? tier_conf->last_promote_qfile_index
++ : tier_conf->last_demote_qfile_index;
++ shift_next_index(query_cbk_args->qfile_array);
++ if (query_cbk_args->is_promotion) {
++ tier_conf->last_promote_qfile_index = query_cbk_args->qfile_array
++ ->next_index;
++ } else {
++ tier_conf->last_demote_qfile_index = query_cbk_args->qfile_array
++ ->next_index;
++ }
++
++ /* Migrate files using query file list */
++ ret = tier_migrate_using_query_file((void *)query_cbk_args);
++out:
++ qfile_array_free(query_cbk_args->qfile_array);
++
++ /* If there is an error rename all the query files to .err files
++ * with a timestamp for better debugging */
++ if (ret) {
++ struct tm tm = {
++ 0,
++ };
++ char time_str[128] = {
++ 0,
++ };
++ char query_file_path_err[PATH_MAX] = {
++ 0,
++ };
++ int32_t len = 0;
++
++ /* Time format for error query files */
++ gmtime_r(&current_time.tv_sec, &tm);
++ strftime(time_str, sizeof(time_str), "%F-%T", &tm);
++
++ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
++ {
++ /* rename error qfile*/
++ len = snprintf(query_file_path_err, sizeof(query_file_path_err),
++ "%s-%s.err", local_brick->qfile_path, time_str);
++ if ((len >= 0) && (len < sizeof(query_file_path_err))) {
++ if (sys_rename(local_brick->qfile_path, query_file_path_err) ==
++ -1)
++ gf_msg_debug("tier", 0,
++ "rename "
++ "failed");
++ }
++ }
++ }
++
++ query_cbk_args->qfile_array = NULL;
++
++ return ret;
++}
++
++int
++tier_demote(migration_args_t *demotion_args)
++{
++ query_cbk_args_t query_cbk_args;
++ int ret = -1;
++
++ GF_VALIDATE_OR_GOTO("tier", demotion_args, out);
++ GF_VALIDATE_OR_GOTO("tier", demotion_args->this, out);
++ GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->brick_list,
++ out);
++ GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->defrag, out);
++
++ THIS = demotion_args->this;
++
++ query_cbk_args.this = demotion_args->this;
++ query_cbk_args.defrag = demotion_args->defrag;
++ query_cbk_args.is_promotion = 0;
++
++ /*Build the query file using bricklist*/
++ ret = tier_build_migration_qfile(demotion_args, &query_cbk_args, _gf_false);
++ if (ret)
++ goto out;
++
++ /* Migrate files using the query file */
++ ret = tier_migrate_files_using_qfile(demotion_args, &query_cbk_args);
++ if (ret)
++ goto out;
++
++out:
++ demotion_args->return_value = ret;
++ return ret;
++}
++
++int
++tier_promote(migration_args_t *promotion_args)
++{
++ int ret = -1;
++ query_cbk_args_t query_cbk_args;
++
++ GF_VALIDATE_OR_GOTO("tier", promotion_args->this, out);
++ GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->brick_list,
++ out);
++ GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->defrag,
++ out);
++
++ THIS = promotion_args->this;
++
++ query_cbk_args.this = promotion_args->this;
++ query_cbk_args.defrag = promotion_args->defrag;
++ query_cbk_args.is_promotion = 1;
++
++ /*Build the query file using bricklist*/
++ ret = tier_build_migration_qfile(promotion_args, &query_cbk_args, _gf_true);
++ if (ret)
++ goto out;
++
++ /* Migrate files using the query file */
++ ret = tier_migrate_files_using_qfile(promotion_args, &query_cbk_args);
++ if (ret)
++ goto out;
++
++out:
++ promotion_args->return_value = ret;
++ return ret;
++}
++
++/*
++ * Command the CTR on a brick to compact the local database using an IPC
++ */
++static int
++tier_process_self_compact(tier_brick_list_t *local_brick, void *args)
++{
++ int ret = -1;
++ char *db_path = NULL;
++ query_cbk_args_t *query_cbk_args = NULL;
++ xlator_t *this = NULL;
++ gfdb_conn_node_t *conn_node = NULL;
++ dict_t *params_dict = NULL;
++ dict_t *ctr_ipc_dict = NULL;
++ gfdb_brick_info_t *gfdb_brick_info = args;
++
++ /*Init of all the essentials*/
++ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
++ query_cbk_args = gfdb_brick_info->_query_cbk_args;
++
++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
++ this = query_cbk_args->this;
++
++ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
++
++ GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
++
++ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
++
++ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
++
++ db_path = local_brick->brick_db_path;
++
++ /*Preparing DB parameters before init_db i.e getting db connection*/
++ params_dict = dict_new();
++ if (!params_dict) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "DB Params cannot initialized");
++ goto out;
++ }
++ SET_DB_PARAM_TO_DICT(this->name, params_dict,
++ (char *)gfdb_methods.get_db_path_key(), db_path, ret,
++ out);
++
++ /*Get the db connection*/
++ conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type);
++ if (!conn_node) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "FATAL: Failed initializing db operations");
++ goto out;
++ }
++
++ ret = 0;
++
++ /*Preparing ctr_ipc_dict*/
++ ctr_ipc_dict = dict_new();
++ if (!ctr_ipc_dict) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "ctr_ipc_dict cannot initialized");
++ goto out;
++ }
++
++ ret = dict_set_int32(ctr_ipc_dict, "compact_active",
++ query_cbk_args->defrag->tier_conf.compact_active);
++
++ if (ret) {
++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
++ "Failed to set %s "
++ "to params dictionary",
++ "compact_active");
++ goto out;
++ }
++
++ ret = dict_set_int32(
++ ctr_ipc_dict, "compact_mode_switched",
++ query_cbk_args->defrag->tier_conf.compact_mode_switched);
++
++ if (ret) {
++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
++ "Failed to set %s "
++ "to params dictionary",
++ "compact_mode_switched");
++ goto out;
++ }
++
++ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY,
++ GFDB_IPC_CTR_SET_COMPACT_PRAGMA, ret, out);
++
++ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Starting Compaction IPC");
++
++ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict,
++ NULL);
++
++ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Ending Compaction IPC");
++
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed compaction "
++ "on db %s error %d",
++ local_brick->brick_db_path, ret);
++ goto out;
++ }
++
++ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
++ "SUCCESS: %s Compaction", local_brick->brick_name);
++
++ ret = 0;
++out:
++ if (params_dict) {
++ dict_unref(params_dict);
++ params_dict = NULL;
++ }
++
++ if (ctr_ipc_dict) {
++ dict_unref(ctr_ipc_dict);
++ ctr_ipc_dict = NULL;
++ }
++
++ gfdb_methods.fini_db(conn_node);
++
++ return ret;
++}
++
++/*
++ * This is the call back function for each brick from hot/cold bricklist.
++ * It determines the database type on each brick and calls the corresponding
++ * function to prepare the compaction IPC.
++ */
++static int
++tier_compact_db_brick(tier_brick_list_t *local_brick, void *args)
++{
++ int ret = -1;
++
++ GF_VALIDATE_OR_GOTO("tier", local_brick, out);
++
++ GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out);
++
++ ret = tier_process_self_compact(local_brick, args);
++ if (ret) {
++ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Brick %s did not compact", local_brick->brick_name);
++ goto out;
++ }
++
++ ret = 0;
++
++out:
++
++ return ret;
++}
++
++static int
++tier_send_compact(migration_args_t *args, query_cbk_args_t *query_cbk_args)
++{
++ gfdb_time_t current_time;
++ gfdb_brick_info_t gfdb_brick_info;
++ gfdb_time_t time_in_past;
++ int ret = -1;
++ tier_brick_list_t *local_brick = NULL;
++
++ time_in_past.tv_sec = args->freq_time;
++ time_in_past.tv_usec = 0;
++
++ ret = gettimeofday(&current_time, NULL);
++ if (ret == -1) {
++ gf_msg(args->this->name, GF_LOG_ERROR, errno,
++ DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time");
++ goto out;
++ }
++ time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
++
++ /* The migration daemon may run a varying numberof usec after the sleep
++ call triggers. A file may be registered in CTR some number of usec X
++ after the daemon started and missed in the subsequent cycle if the
++ daemon starts Y usec after the period in seconds where Y>X. Normalize
++ away this problem by always setting usec to 0. */
++ time_in_past.tv_usec = 0;
++
++ gfdb_brick_info.time_stamp = &time_in_past;
++
++ /* This is meant to say we are always compacting at this point */
++ /* We simply borrow the promotion flag to do this */
++ gfdb_brick_info._gfdb_promote = 1;
++
++ gfdb_brick_info._query_cbk_args = query_cbk_args;
++
++ list_for_each_entry(local_brick, args->brick_list, list)
++ {
++ gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Start compaction for %s", local_brick->brick_name);
++
++ ret = tier_compact_db_brick(local_brick, &gfdb_brick_info);
++ if (ret) {
++ gf_msg(args->this->name, GF_LOG_ERROR, 0,
++ DHT_MSG_BRICK_QUERY_FAILED, "Brick %s compaction failed\n",
++ local_brick->brick_db_path);
++ }
++
++ gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
++ "End compaction for %s", local_brick->brick_name);
++ }
++ ret = 0;
++out:
++ return ret;
++}
++
++static int
++tier_compact(void *args)
++{
++ int ret = -1;
++ query_cbk_args_t query_cbk_args;
++ migration_args_t *compaction_args = args;
++
++ GF_VALIDATE_OR_GOTO("tier", compaction_args->this, out);
++ GF_VALIDATE_OR_GOTO(compaction_args->this->name,
++ compaction_args->brick_list, out);
++ GF_VALIDATE_OR_GOTO(compaction_args->this->name, compaction_args->defrag,
++ out);
++
++ THIS = compaction_args->this;
++
++ query_cbk_args.this = compaction_args->this;
++ query_cbk_args.defrag = compaction_args->defrag;
++ query_cbk_args.is_compaction = 1;
++
++ /* Send the compaction pragma out to all the bricks on the bricklist. */
++ /* tier_get_bricklist ensures all bricks on the list are local to */
++ /* this node. */
++ ret = tier_send_compact(compaction_args, &query_cbk_args);
++ if (ret)
++ goto out;
++
++ ret = 0;
++out:
++ compaction_args->return_value = ret;
++ return ret;
++}
++
++static int
++tier_get_bricklist(xlator_t *xl, struct list_head *local_bricklist_head)
++{
++ xlator_list_t *child = NULL;
++ char *rv = NULL;
++ char *rh = NULL;
++ char *brickname = NULL;
++ char db_name[PATH_MAX] = "";
++ int ret = 0;
++ tier_brick_list_t *local_brick = NULL;
++ int32_t len = 0;
++
++ GF_VALIDATE_OR_GOTO("tier", xl, out);
++ GF_VALIDATE_OR_GOTO("tier", local_bricklist_head, out);
++
++ /*
++ * This function obtains remote subvolumes and filters out only
++ * those running on the same node as the tier daemon.
++ */
++ if (strcmp(xl->type, "protocol/client") == 0) {
++ ret = dict_get_str(xl->options, "remote-host", &rh);
++ if (ret < 0)
++ goto out;
++
++ if (gf_is_local_addr(rh)) {
++ local_brick = GF_CALLOC(1, sizeof(tier_brick_list_t),
++ gf_tier_mt_bricklist_t);
++ if (!local_brick) {
++ goto out;
++ }
++
++ ret = dict_get_str(xl->options, "remote-subvolume", &rv);
++ if (ret < 0)
++ goto out;
++
++ brickname = strrchr(rv, '/') + 1;
++ snprintf(db_name, sizeof(db_name), "%s.db", brickname);
++
++ local_brick->brick_db_path = GF_MALLOC(PATH_MAX, gf_common_mt_char);
++ if (!local_brick->brick_db_path) {
++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Failed to allocate memory for"
++ " bricklist.");
++ ret = -1;
++ goto out;
++ }
++
++ len = snprintf(local_brick->brick_db_path, PATH_MAX, "%s/%s/%s", rv,
++ GF_HIDDEN_PATH, db_name);
++ if ((len < 0) || (len >= PATH_MAX)) {
++ gf_msg("tier", GF_LOG_ERROR, EINVAL, DHT_MSG_LOG_TIER_STATUS,
++ "DB path too long");
++ ret = -1;
++ goto out;
++ }
++
++ local_brick->xlator = xl;
++
++ snprintf(local_brick->brick_name, NAME_MAX, "%s", brickname);
++
++ list_add_tail(&(local_brick->list), local_bricklist_head);
++
++ ret = 0;
++ goto out;
++ }
++ }
++
++ for (child = xl->children; child; child = child->next) {
++ ret = tier_get_bricklist(child->xlator, local_bricklist_head);
++ if (ret) {
++ goto out;
++ }
++ }
++
++ ret = 0;
++out:
++
++ if (ret) {
++ if (local_brick) {
++ GF_FREE(local_brick->brick_db_path);
++ }
++ GF_FREE(local_brick);
++ }
++
++ return ret;
++}
++
++int
++tier_get_freq_demote(gf_tier_conf_t *tier_conf)
++{
++ if ((tier_conf->mode == TIER_MODE_WM) &&
++ (tier_conf->watermark_last == TIER_WM_HI))
++ return DEFAULT_DEMOTE_DEGRADED;
++ else
++ return tier_conf->tier_demote_frequency;
++}
++
++int
++tier_get_freq_promote(gf_tier_conf_t *tier_conf)
++{
++ return tier_conf->tier_promote_frequency;
++}
++
++int
++tier_get_freq_compact_hot(gf_tier_conf_t *tier_conf)
++{
++ return tier_conf->tier_compact_hot_frequency;
++}
++
++int
++tier_get_freq_compact_cold(gf_tier_conf_t *tier_conf)
++{
++ return tier_conf->tier_compact_cold_frequency;
++}
++
++static int
++tier_check_demote(gfdb_time_t current_time, int freq)
++{
++ return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false;
++}
++
++static gf_boolean_t
++tier_check_promote(gf_tier_conf_t *tier_conf, gfdb_time_t current_time,
++ int freq)
++{
++ if ((tier_conf->mode == TIER_MODE_WM) &&
++ (tier_conf->watermark_last == TIER_WM_HI))
++ return _gf_false;
++
++ else
++ return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false;
++}
++
++static gf_boolean_t
++tier_check_compact(gf_tier_conf_t *tier_conf, gfdb_time_t current_time,
++ int freq_compact)
++{
++ if (!(tier_conf->compact_active || tier_conf->compact_mode_switched))
++ return _gf_false;
++
++ return ((current_time.tv_sec % freq_compact) == 0) ? _gf_true : _gf_false;
++}
++
++void
++clear_bricklist(struct list_head *brick_list)
++{
++ tier_brick_list_t *local_brick = NULL;
++ tier_brick_list_t *temp = NULL;
++
++ if (list_empty(brick_list)) {
++ return;
++ }
++
++ list_for_each_entry_safe(local_brick, temp, brick_list, list)
++ {
++ list_del(&local_brick->list);
++ GF_FREE(local_brick->brick_db_path);
++ GF_FREE(local_brick);
++ }
++}
++
++static void
++set_brick_list_qpath(struct list_head *brick_list, gf_boolean_t is_cold)
++{
++ tier_brick_list_t *local_brick = NULL;
++ int i = 0;
++
++ GF_VALIDATE_OR_GOTO("tier", brick_list, out);
++
++ list_for_each_entry(local_brick, brick_list, list)
++ {
++ /* Construct query file path for this brick
++ * i.e
++ * /var/run/gluster/xlator_name/
++ * {promote/demote}-brickname-indexinbricklist
++ * So that no two query files will have same path even
++ * bricks have the same name
++ * */
++ snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d",
++ GET_QFILE_PATH(is_cold), local_brick->brick_name, i);
++ i++;
++ }
++out:
++ return;
++}
++
++static int
++tier_prepare_compact(migration_args_t *args, gfdb_time_t current_time)
++{
++ xlator_t *this = NULL;
++ dht_conf_t *conf = NULL;
++ gf_defrag_info_t *defrag = NULL;
++ gf_tier_conf_t *tier_conf = NULL;
++ gf_boolean_t is_hot_tier = args->is_hot_tier;
++ int freq = 0;
++ int ret = -1;
++ const char *tier_type = is_hot_tier ? "hot" : "cold";
++
++ this = args->this;
++
++ conf = this->private;
++
++ defrag = conf->defrag;
++
++ tier_conf = &defrag->tier_conf;
++
++ freq = is_hot_tier ? tier_get_freq_compact_hot(tier_conf)
++ : tier_get_freq_compact_cold(tier_conf);
++
++ defrag->tier_conf.compact_mode_switched =
++ is_hot_tier ? defrag->tier_conf.compact_mode_switched_hot
++ : defrag->tier_conf.compact_mode_switched_cold;
++
++ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Compact mode %i", defrag->tier_conf.compact_mode_switched);
++
++ if (tier_check_compact(tier_conf, current_time, freq)) {
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Start compaction on %s tier", tier_type);
++
++ args->freq_time = freq;
++ ret = tier_compact(args);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Compaction failed on "
++ "%s tier",
++ tier_type);
++ goto out;
++ }
++
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "End compaction on %s tier", tier_type);
++
++ if (is_hot_tier) {
++ defrag->tier_conf.compact_mode_switched_hot = _gf_false;
++ } else {
++ defrag->tier_conf.compact_mode_switched_cold = _gf_false;
++ }
++ }
++
++out:
++ return ret;
++}
++
++static int
++tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm)
++{
++ if (mode == TIER_MODE_WM && wm == TIER_WM_HI)
++ return WM_INTERVAL_EMERG;
++
++ return WM_INTERVAL;
++}
++
++/*
++ * Main tiering loop. This is called from the promotion and the
++ * demotion threads spawned in tier_start().
++ *
++ * Every second, wake from sleep to perform tasks.
++ * 1. Check trigger to migrate data.
++ * 2. Check for state changes (pause, unpause, stop).
++ */
++static void *
++tier_run(void *in_args)
++{
++ dht_conf_t *conf = NULL;
++ gfdb_time_t current_time = {0};
++ int freq = 0;
++ int ret = 0;
++ xlator_t *any = NULL;
++ xlator_t *xlator = NULL;
++ gf_tier_conf_t *tier_conf = NULL;
++ loc_t root_loc = {0};
++ int check_watermark = 0;
++ gf_defrag_info_t *defrag = NULL;
++ xlator_t *this = NULL;
++ migration_args_t *args = in_args;
++ GF_VALIDATE_OR_GOTO("tier", args, out);
++ GF_VALIDATE_OR_GOTO("tier", args->brick_list, out);
++
++ this = args->this;
++ GF_VALIDATE_OR_GOTO("tier", this, out);
++
++ conf = this->private;
++ GF_VALIDATE_OR_GOTO("tier", conf, out);
++
++ defrag = conf->defrag;
++ GF_VALIDATE_OR_GOTO("tier", defrag, out);
++
++ if (list_empty(args->brick_list)) {
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Brick list for tier is empty. Exiting.");
++ goto out;
++ }
++
++ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
++ tier_conf = &defrag->tier_conf;
++
++ dht_build_root_loc(defrag->root_inode, &root_loc);
++
++ while (1) {
++ /*
++ * Check if a graph switch occurred. If so, stop migration
++ * thread. It will need to be restarted manually.
++ */
++ any = THIS->ctx->active->first;
++ xlator = xlator_search_by_name(any, this->name);
++
++ if (xlator != this) {
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Detected graph switch. Exiting migration "
++ "daemon.");
++ goto out;
++ }
++
++ gf_defrag_check_pause_tier(tier_conf);
++
++ sleep(1);
++
++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
++ ret = 1;
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "defrag->defrag_status != "
++ "GF_DEFRAG_STATUS_STARTED");
++ goto out;
++ }
++
++ if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
++ defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
++ ret = 0;
++ defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
++ gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_TIER_ERROR,
++ "defrag->defrag_cmd == "
++ "GF_DEFRAG_CMD_START_DETACH_TIER");
++ goto out;
++ }
++
++ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)
++ continue;
++
++ /* To have proper synchronization amongst all
++ * brick holding nodes, so that promotion and demotions
++ * start atomically w.r.t promotion/demotion frequency
++ * period, all nodes should have their system time
++ * in-sync with each other either manually set or
++ * using a NTP server*/
++ ret = gettimeofday(&current_time, NULL);
++ if (ret == -1) {
++ gf_msg(this->name, GF_LOG_ERROR, errno,
++ DHT_MSG_SYS_CALL_GET_TIME_FAILED,
++ "Failed to get current time");
++ goto out;
++ }
++
++ check_watermark++;
++
++ /* emergency demotion requires frequent watermark monitoring */
++ if (check_watermark >=
++ tier_get_wm_interval(tier_conf->mode, tier_conf->watermark_last)) {
++ check_watermark = 0;
++ if (tier_conf->mode == TIER_MODE_WM) {
++ ret = tier_get_fs_stat(this, &root_loc);
++ if (ret != 0) {
++ continue;
++ }
++ ret = tier_check_watermark(this);
++ if (ret != 0) {
++ gf_msg(this->name, GF_LOG_CRITICAL, errno,
++ DHT_MSG_LOG_TIER_ERROR, "Failed to get watermark");
++ continue;
++ }
++ }
++ }
++
++ if (args->is_promotion) {
++ freq = tier_get_freq_promote(tier_conf);
++
++ if (tier_check_promote(tier_conf, current_time, freq)) {
++ args->freq_time = freq;
++ ret = tier_promote(args);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Promotion failed");
++ }
++ }
++ } else if (args->is_compaction) {
++ tier_prepare_compact(args, current_time);
++ } else {
++ freq = tier_get_freq_demote(tier_conf);
++
++ if (tier_check_demote(current_time, freq)) {
++ args->freq_time = freq;
++ ret = tier_demote(args);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Demotion failed");
++ }
++ }
++ }
++
++ /* Check the statfs immediately after the processing threads
++ return */
++ check_watermark = WM_INTERVAL;
++ }
++
++ ret = 0;
++out:
++
++ args->return_value = ret;
++
++ return NULL;
++}
++
++int
++tier_start(xlator_t *this, gf_defrag_info_t *defrag)
++{
++ pthread_t promote_thread;
++ pthread_t demote_thread;
++ pthread_t hot_compact_thread;
++ pthread_t cold_compact_thread;
++ int ret = -1;
++ struct list_head bricklist_hot = {0};
++ struct list_head bricklist_cold = {0};
++ migration_args_t promotion_args = {0};
++ migration_args_t demotion_args = {0};
++ migration_args_t hot_compaction_args = {0};
++ migration_args_t cold_compaction_args = {0};
++ dht_conf_t *conf = NULL;
++
++ INIT_LIST_HEAD((&bricklist_hot));
++ INIT_LIST_HEAD((&bricklist_cold));
++
++ conf = this->private;
++
++ tier_get_bricklist(conf->subvolumes[1], &bricklist_hot);
++ set_brick_list_qpath(&bricklist_hot, _gf_false);
++
++ demotion_args.this = this;
++ demotion_args.brick_list = &bricklist_hot;
++ demotion_args.defrag = defrag;
++ demotion_args.is_promotion = _gf_false;
++ demotion_args.is_compaction = _gf_false;
++
++ ret = gf_thread_create(&demote_thread, NULL, &tier_run, &demotion_args,
++ "tierdem");
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to start demotion thread.");
++ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
++ goto cleanup;
++ }
++
++ tier_get_bricklist(conf->subvolumes[0], &bricklist_cold);
++ set_brick_list_qpath(&bricklist_cold, _gf_true);
++
++ promotion_args.this = this;
++ promotion_args.brick_list = &bricklist_cold;
++ promotion_args.defrag = defrag;
++ promotion_args.is_promotion = _gf_true;
++
++ ret = gf_thread_create(&promote_thread, NULL, &tier_run, &promotion_args,
++ "tierpro");
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to start promotion thread.");
++ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
++ goto waitforspawned;
++ }
++
++ hot_compaction_args.this = this;
++ hot_compaction_args.brick_list = &bricklist_hot;
++ hot_compaction_args.defrag = defrag;
++ hot_compaction_args.is_promotion = _gf_false;
++ hot_compaction_args.is_compaction = _gf_true;
++ hot_compaction_args.is_hot_tier = _gf_true;
++
++ ret = gf_thread_create(&hot_compact_thread, NULL, &tier_run,
++ &hot_compaction_args, "tierhcom");
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to start compaction thread.");
++ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
++ goto waitforspawnedpromote;
++ }
++
++ cold_compaction_args.this = this;
++ cold_compaction_args.brick_list = &bricklist_cold;
++ cold_compaction_args.defrag = defrag;
++ cold_compaction_args.is_promotion = _gf_false;
++ cold_compaction_args.is_compaction = _gf_true;
++ cold_compaction_args.is_hot_tier = _gf_false;
++
++ ret = gf_thread_create(&cold_compact_thread, NULL, &tier_run,
++ &cold_compaction_args, "tierccom");
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Failed to start compaction thread.");
++ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
++ goto waitforspawnedhotcompact;
++ }
++ pthread_join(cold_compact_thread, NULL);
++
++waitforspawnedhotcompact:
++ pthread_join(hot_compact_thread, NULL);
++
++waitforspawnedpromote:
++ pthread_join(promote_thread, NULL);
++
++waitforspawned:
++ pthread_join(demote_thread, NULL);
++
++cleanup:
++ clear_bricklist(&bricklist_cold);
++ clear_bricklist(&bricklist_hot);
++ return ret;
++}
++
++int32_t
++tier_migration_needed(xlator_t *this)
++{
++ gf_defrag_info_t *defrag = NULL;
++ dht_conf_t *conf = NULL;
++ int ret = 0;
++
++ conf = this->private;
++
++ GF_VALIDATE_OR_GOTO(this->name, conf, out);
++ GF_VALIDATE_OR_GOTO(this->name, conf->defrag, out);
++
++ defrag = conf->defrag;
++
++ if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) ||
++ (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER))
++ ret = 1;
++out:
++ return ret;
++}
++
++int32_t
++tier_migration_get_dst(xlator_t *this, dht_local_t *local)
++{
++ dht_conf_t *conf = NULL;
++ int32_t ret = -1;
++ gf_defrag_info_t *defrag = NULL;
++
++ GF_VALIDATE_OR_GOTO("tier", this, out);
++ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
++
++ conf = this->private;
++
++ defrag = conf->defrag;
++
++ if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
++ local->rebalance.target_node = conf->subvolumes[0];
++
++ } else if (conf->subvolumes[0] == local->cached_subvol)
++ local->rebalance.target_node = conf->subvolumes[1];
++ else
++ local->rebalance.target_node = conf->subvolumes[0];
++
++ if (local->rebalance.target_node)
++ ret = 0;
++
++out:
++ return ret;
++}
++
++xlator_t *
++tier_search(xlator_t *this, dht_layout_t *layout, const char *name)
++{
++ xlator_t *subvol = NULL;
++ dht_conf_t *conf = NULL;
++
++ GF_VALIDATE_OR_GOTO("tier", this, out);
++ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
++
++ conf = this->private;
++
++ subvol = TIER_HASHED_SUBVOL;
++
++out:
++ return subvol;
++}
++
++static int
++tier_load_externals(xlator_t *this)
++{
++ int ret = -1;
++ char *libpathfull = (LIBDIR "/libgfdb.so.0");
++ get_gfdb_methods_t get_gfdb_methods;
++
++ GF_VALIDATE_OR_GOTO("this", this, out);
++
++ libhandle = dlopen(libpathfull, RTLD_NOW);
++ if (!libhandle) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Error loading libgfdb.so %s\n", dlerror());
++ ret = -1;
++ goto out;
++ }
++
++ get_gfdb_methods = dlsym(libhandle, "get_gfdb_methods");
++ if (!get_gfdb_methods) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Error loading get_gfdb_methods()");
++ ret = -1;
++ goto out;
++ }
++
++ get_gfdb_methods(&gfdb_methods);
++
++ ret = 0;
++
++out:
++ if (ret && libhandle)
++ dlclose(libhandle);
++
++ return ret;
++}
++
++static tier_mode_t
++tier_validate_mode(char *mode)
++{
++ int ret = -1;
++
++ if (strcmp(mode, "test") == 0) {
++ ret = TIER_MODE_TEST;
++ } else {
++ ret = TIER_MODE_WM;
++ }
++
++ return ret;
++}
++
++static gf_boolean_t
++tier_validate_compact_mode(char *mode)
++{
++ gf_boolean_t ret = _gf_false;
++
++ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "tier_validate_compact_mode: mode = %s", mode);
++
++ if (!strcmp(mode, "on")) {
++ ret = _gf_true;
++ } else {
++ ret = _gf_false;
++ }
++
++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
++ "tier_validate_compact_mode: ret = %i", ret);
++
++ return ret;
++}
++
++int
++tier_init_methods(xlator_t *this)
++{
++ int ret = -1;
++ dht_conf_t *conf = NULL;
++ dht_methods_t *methods = NULL;
++
++ GF_VALIDATE_OR_GOTO("tier", this, err);
++
++ conf = this->private;
++
++ methods = &(conf->methods);
++
++ methods->migration_get_dst_subvol = tier_migration_get_dst;
++ methods->migration_other = tier_start;
++ methods->migration_needed = tier_migration_needed;
++ methods->layout_search = tier_search;
++
++ ret = 0;
++err:
++ return ret;
++}
++
++static void
++tier_save_vol_name(xlator_t *this)
++{
++ dht_conf_t *conf = NULL;
++ gf_defrag_info_t *defrag = NULL;
++ char *suffix = NULL;
++ int name_len = 0;
++
++ conf = this->private;
++ defrag = conf->defrag;
++
++ suffix = strstr(this->name, "-tier-dht");
++
++ if (suffix)
++ name_len = suffix - this->name;
++ else
++ name_len = strlen(this->name);
++
++ if (name_len > GD_VOLUME_NAME_MAX)
++ name_len = GD_VOLUME_NAME_MAX;
++
++ strncpy(defrag->tier_conf.volname, this->name, name_len);
++ defrag->tier_conf.volname[name_len] = 0;
++}
++
++int
++tier_init(xlator_t *this)
++{
++ int ret = -1;
++ int freq = 0;
++ int maxsize = 0;
++ dht_conf_t *conf = NULL;
++ gf_defrag_info_t *defrag = NULL;
++ char *voldir = NULL;
++ char *mode = NULL;
++ char *paused = NULL;
++ tier_mode_t tier_mode = DEFAULT_TIER_MODE;
++ gf_boolean_t compact_mode = _gf_false;
++
++ ret = dht_init(this);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "tier_init failed");
++ goto out;
++ }
++
++ conf = this->private;
++
++ ret = tier_init_methods(this);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "tier_init_methods failed");
++ goto out;
++ }
++
++ if (conf->subvolume_cnt != 2) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Invalid number of subvolumes %d", conf->subvolume_cnt);
++ goto out;
++ }
++
++ /* if instatiated from client side initialization is complete. */
++ if (!conf->defrag) {
++ ret = 0;
++ goto out;
++ }
++
++ /* if instatiated from server side, load db libraries */
++ ret = tier_load_externals(this);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "Could not load externals. Aborting");
++ goto out;
++ }
++
++ defrag = conf->defrag;
++
++ defrag->tier_conf.last_demote_qfile_index = 0;
++ defrag->tier_conf.last_promote_qfile_index = 0;
++
++ defrag->tier_conf.is_tier = 1;
++ defrag->this = this;
++
++ ret = dict_get_int32(this->options, "tier-max-promote-file-size", &maxsize);
++ if (ret) {
++ maxsize = 0;
++ }
++
++ defrag->tier_conf.tier_max_promote_size = maxsize;
++
++ ret = dict_get_int32(this->options, "tier-promote-frequency", &freq);
++ if (ret) {
++ freq = DEFAULT_PROMOTE_FREQ_SEC;
++ }
++
++ defrag->tier_conf.tier_promote_frequency = freq;
++
++ ret = dict_get_int32(this->options, "tier-demote-frequency", &freq);
++ if (ret) {
++ freq = DEFAULT_DEMOTE_FREQ_SEC;
++ }
++
++ defrag->tier_conf.tier_demote_frequency = freq;
++
++ ret = dict_get_int32(this->options, "tier-hot-compact-frequency", &freq);
++ if (ret) {
++ freq = DEFAULT_HOT_COMPACT_FREQ_SEC;
++ }
++
++ defrag->tier_conf.tier_compact_hot_frequency = freq;
++
++ ret = dict_get_int32(this->options, "tier-cold-compact-frequency", &freq);
++ if (ret) {
++ freq = DEFAULT_COLD_COMPACT_FREQ_SEC;
++ }
++
++ defrag->tier_conf.tier_compact_cold_frequency = freq;
++
++ ret = dict_get_int32(this->options, "watermark-hi", &freq);
++ if (ret) {
++ freq = DEFAULT_WM_HI;
++ }
++
++ defrag->tier_conf.watermark_hi = freq;
++
++ ret = dict_get_int32(this->options, "watermark-low", &freq);
++ if (ret) {
++ freq = DEFAULT_WM_LOW;
++ }
++
++ defrag->tier_conf.watermark_low = freq;
++
++ ret = dict_get_int32(this->options, "write-freq-threshold", &freq);
++ if (ret) {
++ freq = DEFAULT_WRITE_FREQ_SEC;
++ }
++
++ defrag->write_freq_threshold = freq;
++
++ ret = dict_get_int32(this->options, "read-freq-threshold", &freq);
++ if (ret) {
++ freq = DEFAULT_READ_FREQ_SEC;
++ }
++
++ defrag->read_freq_threshold = freq;
++
++ ret = dict_get_int32(this->options, "tier-max-mb", &freq);
++ if (ret) {
++ freq = DEFAULT_TIER_MAX_MIGRATE_MB;
++ }
++
++ defrag->tier_conf.max_migrate_bytes = (uint64_t)freq * 1024 * 1024;
++
++ ret = dict_get_int32(this->options, "tier-max-files", &freq);
++ if (ret) {
++ freq = DEFAULT_TIER_MAX_MIGRATE_FILES;
++ }
++
++ defrag->tier_conf.max_migrate_files = freq;
++
++ ret = dict_get_int32(this->options, "tier-query-limit",
++ &(defrag->tier_conf.query_limit));
++ if (ret) {
++ defrag->tier_conf.query_limit = DEFAULT_TIER_QUERY_LIMIT;
++ }
++
++ ret = dict_get_str(this->options, "tier-compact", &mode);
++
++ if (ret) {
++ defrag->tier_conf.compact_active = DEFAULT_COMP_MODE;
++ } else {
++ compact_mode = tier_validate_compact_mode(mode);
++ /* If compaction is now active, we need to inform the bricks on
++ the hot and cold tier of this. See dht-common.h for more. */
++ defrag->tier_conf.compact_active = compact_mode;
++ if (compact_mode) {
++ defrag->tier_conf.compact_mode_switched_hot = _gf_true;
++ defrag->tier_conf.compact_mode_switched_cold = _gf_true;
++ }
++ }
++
++ ret = dict_get_str(this->options, "tier-mode", &mode);
++ if (ret) {
++ defrag->tier_conf.mode = DEFAULT_TIER_MODE;
++ } else {
++ tier_mode = tier_validate_mode(mode);
++ defrag->tier_conf.mode = tier_mode;
++ }
++
++ pthread_mutex_init(&defrag->tier_conf.pause_mutex, 0);
++
++ gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
++
++ ret = dict_get_str(this->options, "tier-pause", &paused);
++
++ if (paused && strcmp(paused, "on") == 0)
++ gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE);
++
++ ret = gf_asprintf(&voldir, "%s/%s", DEFAULT_VAR_RUN_DIRECTORY, this->name);
++ if (ret < 0)
++ goto out;
++
++ ret = mkdir_p(voldir, 0777, _gf_true);
++ if (ret == -1 && errno != EEXIST) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "tier_init failed");
++
++ GF_FREE(voldir);
++ goto out;
++ }
++
++ GF_FREE(voldir);
++
++ ret = gf_asprintf(&promotion_qfile, "%s/%s/promote",
++ DEFAULT_VAR_RUN_DIRECTORY, this->name);
++ if (ret < 0)
++ goto out;
++
++ ret = gf_asprintf(&demotion_qfile, "%s/%s/demote",
++ DEFAULT_VAR_RUN_DIRECTORY, this->name);
++ if (ret < 0) {
++ GF_FREE(promotion_qfile);
++ goto out;
++ }
++
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "Promote/demote frequency %d/%d "
++ "Write/Read freq thresholds %d/%d",
++ defrag->tier_conf.tier_promote_frequency,
++ defrag->tier_conf.tier_demote_frequency,
++ defrag->write_freq_threshold, defrag->read_freq_threshold);
++
++ tier_save_vol_name(this);
++
++ ret = 0;
++
++out:
++
++ return ret;
++}
++
++int
++tier_cli_pause_done(int op_ret, call_frame_t *sync_frame, void *data)
++{
++ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED,
++ "Migrate file paused with op_ret %d", op_ret);
++
++ return op_ret;
++}
++
++int
++tier_cli_pause(void *data)
++{
++ gf_defrag_info_t *defrag = NULL;
++ xlator_t *this = NULL;
++ dht_conf_t *conf = NULL;
++ int ret = -1;
++
++ this = data;
++
++ conf = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, conf, exit);
++
++ defrag = conf->defrag;
++ GF_VALIDATE_OR_GOTO(this->name, defrag, exit);
++
++ gf_defrag_pause_tier(this, defrag);
++
++ ret = 0;
++exit:
++ return ret;
++}
++
++int
++tier_reconfigure(xlator_t *this, dict_t *options)
++{
++ dht_conf_t *conf = NULL;
++ gf_defrag_info_t *defrag = NULL;
++ char *mode = NULL;
++ int migrate_mb = 0;
++ gf_boolean_t req_pause = _gf_false;
++ int ret = 0;
++ call_frame_t *frame = NULL;
++ gf_boolean_t last_compact_setting = _gf_false;
++
++ conf = this->private;
++
++ if (conf->defrag) {
++ defrag = conf->defrag;
++ GF_OPTION_RECONF("tier-max-promote-file-size",
++ defrag->tier_conf.tier_max_promote_size, options,
++ int32, out);
++
++ GF_OPTION_RECONF("tier-promote-frequency",
++ defrag->tier_conf.tier_promote_frequency, options,
++ int32, out);
++
++ GF_OPTION_RECONF("tier-demote-frequency",
++ defrag->tier_conf.tier_demote_frequency, options,
++ int32, out);
++
++ GF_OPTION_RECONF("write-freq-threshold", defrag->write_freq_threshold,
++ options, int32, out);
++
++ GF_OPTION_RECONF("read-freq-threshold", defrag->read_freq_threshold,
++ options, int32, out);
++
++ GF_OPTION_RECONF("watermark-hi", defrag->tier_conf.watermark_hi,
++ options, int32, out);
++
++ GF_OPTION_RECONF("watermark-low", defrag->tier_conf.watermark_low,
++ options, int32, out);
++
++ last_compact_setting = defrag->tier_conf.compact_active;
++
++ GF_OPTION_RECONF("tier-compact", defrag->tier_conf.compact_active,
++ options, bool, out);
++
++ if (last_compact_setting != defrag->tier_conf.compact_active) {
++ defrag->tier_conf.compact_mode_switched_hot = _gf_true;
++ defrag->tier_conf.compact_mode_switched_cold = _gf_true;
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
++ "compact mode switched");
++ }
++
++ GF_OPTION_RECONF("tier-hot-compact-frequency",
++ defrag->tier_conf.tier_compact_hot_frequency, options,
++ int32, out);
++
++ GF_OPTION_RECONF("tier-cold-compact-frequency",
++ defrag->tier_conf.tier_compact_cold_frequency, options,
++ int32, out);
++
++ GF_OPTION_RECONF("tier-mode", mode, options, str, out);
++ defrag->tier_conf.mode = tier_validate_mode(mode);
++
++ GF_OPTION_RECONF("tier-max-mb", migrate_mb, options, int32, out);
++ defrag->tier_conf.max_migrate_bytes = (uint64_t)migrate_mb * 1024 *
++ 1024;
++
++ GF_OPTION_RECONF("tier-max-files", defrag->tier_conf.max_migrate_files,
++ options, int32, out);
++
++ GF_OPTION_RECONF("tier-query-limit", defrag->tier_conf.query_limit,
++ options, int32, out);
++
++ GF_OPTION_RECONF("tier-pause", req_pause, options, bool, out);
++
++ if (req_pause == _gf_true) {
++ frame = create_frame(this, this->ctx->pool);
++ if (!frame)
++ goto out;
++
++ frame->root->pid = GF_CLIENT_PID_DEFRAG;
++
++ ret = synctask_new(this->ctx->env, tier_cli_pause,
++ tier_cli_pause_done, frame, this);
++
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "pause tier failed on reconfigure");
++ }
++ } else {
++ ret = gf_defrag_resume_tier(this, defrag);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
++ "resume tier failed on reconfigure");
++ }
++ }
++ }
++
++out:
++ return dht_reconfigure(this, options);
++}
++
++void
++tier_fini(xlator_t *this)
++{
++ if (libhandle)
++ dlclose(libhandle);
++
++ GF_FREE(demotion_qfile);
++ GF_FREE(promotion_qfile);
++
++ dht_fini(this);
++}
++
++struct xlator_fops fops = {
++
++ .lookup = dht_lookup,
++ .create = tier_create,
++ .mknod = dht_mknod,
++
++ .open = dht_open,
++ .statfs = tier_statfs,
++ .opendir = dht_opendir,
++ .readdir = tier_readdir,
++ .readdirp = tier_readdirp,
++ .fsyncdir = dht_fsyncdir,
++ .symlink = dht_symlink,
++ .unlink = tier_unlink,
++ .link = tier_link,
++ .mkdir = dht_mkdir,
++ .rmdir = dht_rmdir,
++ .rename = dht_rename,
++ .entrylk = dht_entrylk,
++ .fentrylk = dht_fentrylk,
++
++ /* Inode read operations */
++ .stat = dht_stat,
++ .fstat = dht_fstat,
++ .access = dht_access,
++ .readlink = dht_readlink,
++ .getxattr = dht_getxattr,
++ .fgetxattr = dht_fgetxattr,
++ .readv = dht_readv,
++ .flush = dht_flush,
++ .fsync = dht_fsync,
++ .inodelk = dht_inodelk,
++ .finodelk = dht_finodelk,
++ .lk = dht_lk,
++
++ /* Inode write operations */
++ .fremovexattr = dht_fremovexattr,
++ .removexattr = dht_removexattr,
++ .setxattr = dht_setxattr,
++ .fsetxattr = dht_fsetxattr,
++ .truncate = dht_truncate,
++ .ftruncate = dht_ftruncate,
++ .writev = dht_writev,
++ .xattrop = dht_xattrop,
++ .fxattrop = dht_fxattrop,
++ .setattr = dht_setattr,
++ .fsetattr = dht_fsetattr,
++ .fallocate = dht_fallocate,
++ .discard = dht_discard,
++ .zerofill = dht_zerofill,
++};
++
++struct xlator_cbks cbks = {.release = dht_release, .forget = dht_forget};
++
++extern int32_t
++mem_acct_init(xlator_t *this);
++
++extern struct volume_options dht_options[];
++
++xlator_api_t xlator_api = {
++ .init = tier_init,
++ .fini = tier_fini,
++ .notify = dht_notify,
++ .reconfigure = tier_reconfigure,
++ .mem_acct_init = mem_acct_init,
++ .op_version = {GD_OP_VERSION_3_7_0}, /* Present from the initial version */
++ .fops = &fops,
++ .cbks = &cbks,
++ .options = dht_options,
++ .identifier = "tier",
++ .category = GF_MAINTAINED,
++};
++
+diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h
+new file mode 100644
+index 0000000..a20b1db
+--- /dev/null
++++ b/xlators/cluster/dht/src/tier.h
+@@ -0,0 +1,110 @@
++/*
++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#ifndef _TIER_H_
++#define _TIER_H_
++
++/******************************************************************************/
++/* This is from dht-rebalancer.c as we don't have dht-rebalancer.h */
++#include "dht-common.h"
++#include <glusterfs/xlator.h>
++#include <signal.h>
++#include <fnmatch.h>
++#include <signal.h>
++
++/*
++ * Size of timer wheel. We would not promote or demote less
++ * frequently than this number.
++ */
++#define TIMER_SECS 3600
++
++#include "gfdb_data_store.h"
++#include <ctype.h>
++#include <sys/stat.h>
++
++#define PROMOTION_QFILE "promotequeryfile"
++#define DEMOTION_QFILE "demotequeryfile"
++
++#define TIER_HASHED_SUBVOL conf->subvolumes[0]
++#define TIER_UNHASHED_SUBVOL conf->subvolumes[1]
++
++#define GET_QFILE_PATH(is_promotion) \
++ (is_promotion) ? promotion_qfile : demotion_qfile
++
++typedef struct tier_qfile_array {
++ int *fd_array;
++ ssize_t array_size;
++ ssize_t next_index;
++ /* Indicate the number of exhuasted FDs*/
++ ssize_t exhausted_count;
++} tier_qfile_array_t;
++
++typedef struct _query_cbk_args {
++ xlator_t *this;
++ gf_defrag_info_t *defrag;
++ /* This is write */
++ int query_fd;
++ int is_promotion;
++ int is_compaction;
++ /* This is for read */
++ tier_qfile_array_t *qfile_array;
++} query_cbk_args_t;
++
++int
++gf_run_tier(xlator_t *this, gf_defrag_info_t *defrag);
++
++typedef struct gfdb_brick_info {
++ gfdb_time_t *time_stamp;
++ gf_boolean_t _gfdb_promote;
++ query_cbk_args_t *_query_cbk_args;
++} gfdb_brick_info_t;
++
++typedef struct brick_list {
++ xlator_t *xlator;
++ char *brick_db_path;
++ char brick_name[NAME_MAX];
++ char qfile_path[PATH_MAX];
++ struct list_head list;
++} tier_brick_list_t;
++
++typedef struct _dm_thread_args {
++ xlator_t *this;
++ gf_defrag_info_t *defrag;
++ struct list_head *brick_list;
++ int freq_time;
++ int return_value;
++ int is_promotion;
++ int is_compaction;
++ gf_boolean_t is_hot_tier;
++} migration_args_t;
++
++typedef enum tier_watermark_op_ {
++ TIER_WM_NONE = 0,
++ TIER_WM_LOW,
++ TIER_WM_HI,
++ TIER_WM_MID
++} tier_watermark_op_t;
++
++#define DEFAULT_PROMOTE_FREQ_SEC 120
++#define DEFAULT_DEMOTE_FREQ_SEC 120
++#define DEFAULT_HOT_COMPACT_FREQ_SEC 604800
++#define DEFAULT_COLD_COMPACT_FREQ_SEC 604800
++#define DEFAULT_DEMOTE_DEGRADED 1
++#define DEFAULT_WRITE_FREQ_SEC 0
++#define DEFAULT_READ_FREQ_SEC 0
++#define DEFAULT_WM_LOW 75
++#define DEFAULT_WM_HI 90
++#define DEFAULT_TIER_MODE TIER_MODE_TEST
++#define DEFAULT_COMP_MODE _gf_true
++#define DEFAULT_TIER_MAX_MIGRATE_MB 1000
++#define DEFAULT_TIER_MAX_MIGRATE_FILES 5000
++#define DEFAULT_TIER_QUERY_LIMIT 100
++
++#endif
+diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
+index 194634b..545c02b 100644
+--- a/xlators/features/Makefile.am
++++ b/xlators/features/Makefile.am
+@@ -5,6 +5,6 @@ endif
+ SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \
+ compress changelog gfid-access snapview-client snapview-server trash \
+ shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \
+- utime
++ utime changetimerecorder
+
+ CLEANFILES =
+diff --git a/xlators/features/changetimerecorder/Makefile.am b/xlators/features/changetimerecorder/Makefile.am
+new file mode 100644
+index 0000000..a985f42
+--- /dev/null
++++ b/xlators/features/changetimerecorder/Makefile.am
+@@ -0,0 +1,3 @@
++SUBDIRS = src
++
++CLEANFILES =
+diff --git a/xlators/features/changetimerecorder/src/Makefile.am b/xlators/features/changetimerecorder/src/Makefile.am
+new file mode 100644
+index 0000000..620017e
+--- /dev/null
++++ b/xlators/features/changetimerecorder/src/Makefile.am
+@@ -0,0 +1,26 @@
++xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
++
++# changetimerecorder can only get build when libgfdb is enabled
++if BUILD_GFDB
++ xlator_LTLIBRARIES = changetimerecorder.la
++endif
++
++changetimerecorder_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
++
++changetimerecorder_la_SOURCES = changetimerecorder.c \
++ ctr-helper.c ctr-xlator-ctx.c
++
++changetimerecorder_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\
++ $(top_builddir)/libglusterfs/src/gfdb/libgfdb.la
++
++noinst_HEADERS = ctr-messages.h changetimerecorder.h ctr_mem_types.h \
++ ctr-helper.h ctr-xlator-ctx.h
++
++AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
++ -I$(top_srcdir)/libglusterfs/src/gfdb \
++ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
++ -DDATADIR=\"$(localstatedir)\"
++
++AM_CFLAGS = -Wall $(GF_CFLAGS) $(SQLITE_CFLAGS)
++
++CLEANFILES =
+diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c
+new file mode 100644
+index 0000000..f2aa4a9
+--- /dev/null
++++ b/xlators/features/changetimerecorder/src/changetimerecorder.c
+@@ -0,0 +1,2371 @@
++/*
++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++#include <ctype.h>
++#include <sys/uio.h>
++
++#include "gfdb_sqlite3.h"
++#include "ctr-helper.h"
++#include "ctr-messages.h"
++#include <glusterfs/syscall.h>
++
++#include "changetimerecorder.h"
++#include "tier-ctr-interface.h"
++
++/*******************************inode forget***********************************/
++int
++ctr_forget(xlator_t *this, inode_t *inode)
++{
++ fini_ctr_xlator_ctx(this, inode);
++ return 0;
++}
++
++/************************** Look up heal **************************************/
++/*
++Problem: The CTR xlator records file meta (heat/hardlinks)
++into the data. This works fine for files which are created
++after ctr xlator is switched ON. But for files which were
++created before CTR xlator is ON, CTR xlator is not able to
++record either of the meta i.e heat or hardlinks. Thus making
++those files immune to promotions/demotions.
++
++Solution: The solution that is implemented in this patch is
++do ctr-db heal of all those pre-existent files, using named lookup.
++For this purpose we use the inode-xlator context variable option
++in gluster.
++The inode-xlator context variable for ctr xlator will have the
++following,
++ a. A Lock for the context variable
++ b. A hardlink list: This list represents the successful looked
++ up hardlinks.
++These are the scenarios when the hardlink list is updated:
++1) Named-Lookup: Whenever a named lookup happens on a file, in the
++ wind path we copy all required hardlink and inode information to
++ ctr_db_record structure, which resides in the frame->local variable.
++ We don't update the database in wind. During the unwind, we read the
++ information from the ctr_db_record and ,
++ Check if the inode context variable is created, if not we create it.
++ Check if the hard link is there in the hardlink list.
++ If its not there we add it to the list and send a update to the
++ database using libgfdb.
++ Please note: The database transaction can fail(and we ignore) as there
++ already might be a record in the db. This update to the db is to heal
++ if its not there.
++ If its there in the list we ignore it.
++2) Inode Forget: Whenever an inode forget hits we clear the hardlink list in
++ the inode context variable and delete the inode context variable.
++ Please note: An inode forget may happen for two reason,
++ a. when the inode is delete.
++ b. the in-memory inode is evicted from the inode table due to cache limits.
++3) create: whenever a create happens we create the inode context variable and
++ add the hardlink. The database updation is done as usual by ctr.
++4) link: whenever a hardlink is created for the inode, we create the inode
++ context variable, if not present, and add the hardlink to the list.
++5) unlink: whenever a unlink happens we delete the hardlink from the list.
++6) mknod: same as create.
++7) rename: whenever a rename happens we update the hardlink in list. if the
++ hardlink was not present for updation, we add the hardlink to the list.
++
++What is pending:
++1) This solution will only work for named lookups.
++2) We don't track afr-self-heal/dht-rebalancer traffic for healing.
++
++*/
++
++/* This function does not write anything to the db,
++ * just created the local variable
++ * for the frame and sets values for the ctr_db_record */
++static int
++ctr_lookup_wind(call_frame_t *frame, xlator_t *this,
++ gf_ctr_inode_context_t *ctr_inode_cx)
++{
++ int ret = -1;
++ gf_ctr_private_t *_priv = NULL;
++ gf_ctr_local_t *ctr_local = NULL;
++
++ GF_ASSERT(frame);
++ GF_ASSERT(frame->root);
++ GF_ASSERT(this);
++ IS_CTR_INODE_CX_SANE(ctr_inode_cx);
++
++ _priv = this->private;
++ GF_ASSERT(_priv);
++
++ if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) {
++ frame->local = init_ctr_local_t(this);
++ if (!frame->local) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
++ "WIND: Error while creating ctr local");
++ goto out;
++ };
++ ctr_local = frame->local;
++ /*Definitely no internal fops will reach here*/
++ ctr_local->is_internal_fop = _gf_false;
++ /*Don't record counters*/
++ CTR_DB_REC(ctr_local).do_record_counters = _gf_false;
++ /*Don't record time at all*/
++ CTR_DB_REC(ctr_local).do_record_times = _gf_false;
++
++ /* Copy gfid into db record*/
++ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid));
++
++ /* Set fop_path and fop_type, required by libgfdb to make
++ * decision while inserting the record */
++ CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path;
++ CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type;
++
++ /* Copy hard link info*/
++ gf_uuid_copy(CTR_DB_REC(ctr_local).pargfid,
++ *((NEW_LINK_CX(ctr_inode_cx))->pargfid));
++ if (snprintf(CTR_DB_REC(ctr_local).file_name,
++ sizeof(CTR_DB_REC(ctr_local).file_name), "%s",
++ NEW_LINK_CX(ctr_inode_cx)->basename) >=
++ sizeof(CTR_DB_REC(ctr_local).file_name)) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
++ "WIND: Error copying filename of ctr local");
++ goto out;
++ }
++ /* Since we are in lookup we can ignore errors while
++ * Inserting in the DB, because there may be many
++ * to write to the DB attempts for healing.
++ * We don't want to log all failed attempts and
++ * bloat the log*/
++ ctr_local->gfdb_db_record.ignore_errors = _gf_true;
++ }
++
++ ret = 0;
++
++out:
++
++ if (ret) {
++ free_ctr_local(ctr_local);
++ frame->local = NULL;
++ }
++
++ return ret;
++}
++
++/* This function inserts the ctr_db_record populated by ctr_lookup_wind
++ * in to the db. It also destroys the frame->local created by ctr_lookup_wind */
++static int
++ctr_lookup_unwind(call_frame_t *frame, xlator_t *this)
++{
++ int ret = -1;
++ gf_ctr_private_t *_priv = NULL;
++ gf_ctr_local_t *ctr_local = NULL;
++
++ GF_ASSERT(frame);
++ GF_ASSERT(this);
++
++ _priv = this->private;
++ GF_ASSERT(_priv);
++
++ GF_ASSERT(_priv->_db_conn);
++
++ ctr_local = frame->local;
++
++ if (ctr_local && (ctr_local->ia_inode_type != IA_IFDIR)) {
++ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record);
++ if (ret == -1) {
++ gf_msg(this->name,
++ _gfdb_log_level(GF_LOG_ERROR,
++ ctr_local->gfdb_db_record.ignore_errors),
++ 0, CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
++ "UNWIND: Error filling ctr local");
++ goto out;
++ }
++ }
++ ret = 0;
++out:
++ free_ctr_local(ctr_local);
++ frame->local = NULL;
++ return ret;
++}
++
++/******************************************************************************
++ *
++ * FOPS HANDLING BELOW
++ *
++ * ***************************************************************************/
++
++/****************************LOOKUP********************************************/
++
++int32_t
++ctr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, inode_t *inode,
++ struct iatt *buf, dict_t *dict, struct iatt *postparent)
++{
++ int ret = -1;
++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
++ gf_ctr_local_t *ctr_local = NULL;
++ ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR;
++ gf_boolean_t _is_heal_needed = _gf_false;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++
++ /* if the lookup failed lookup don't do anything*/
++ if (op_ret == -1) {
++ gf_msg_trace(this->name, 0, "lookup failed with %s",
++ strerror(op_errno));
++ goto out;
++ }
++
++ /* Ignore directory lookups */
++ if (inode->ia_type == IA_IFDIR) {
++ goto out;
++ }
++
++ /* if frame local was not set by the ctr_lookup()
++ * so don't so anything*/
++ if (!frame->local) {
++ goto out;
++ }
++
++ /* if the lookup is for dht link donot record*/
++ if (dht_is_linkfile(buf, dict)) {
++ gf_msg_trace(this->name, 0,
++ "Ignoring Lookup "
++ "for dht link file");
++ goto out;
++ }
++
++ ctr_local = frame->local;
++ /*Assign the proper inode type*/
++ ctr_local->ia_inode_type = inode->ia_type;
++
++ /* Copy gfid directly from inode */
++ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, inode->gfid);
++
++ /* Checking if gfid and parent gfid is valid */
++ if (gf_uuid_is_null(CTR_DB_REC(ctr_local).gfid) ||
++ gf_uuid_is_null(CTR_DB_REC(ctr_local).pargfid)) {
++ gf_msg_trace(this->name, 0, "Invalid GFID");
++ goto out;
++ }
++
++ /* if its a first entry
++ * then mark the ctr_record for create
++ * A create will attempt a file and a hard link created in the db*/
++ ctr_xlator_ctx = get_ctr_xlator_ctx(this, inode);
++ if (!ctr_xlator_ctx) {
++ /* This marks inode heal */
++ CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE;
++ _is_heal_needed = _gf_true;
++ }
++
++ /* Copy the correct gfid from resolved inode */
++ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, inode->gfid);
++
++ /* Add hard link to the list */
++ ret_val = add_hard_link_ctx(frame, this, inode);
++ if (ret_val == CTR_CTX_ERROR) {
++ gf_msg_trace(this->name, 0, "Failed adding hardlink to list");
++ goto out;
++ }
++ /* If inode needs healing then heal the hardlink also */
++ else if (ret_val & CTR_TRY_INODE_HEAL) {
++ /* This marks inode heal */
++ CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE;
++ _is_heal_needed = _gf_true;
++ }
++ /* If hardlink needs healing */
++ else if (ret_val & CTR_TRY_HARDLINK_HEAL) {
++ _is_heal_needed = _gf_true;
++ }
++
++ /* If lookup heal needed */
++ if (!_is_heal_needed)
++ goto out;
++
++ /* FINALLY HEAL : Inserts the ctr_db_record populated by ctr_lookup_wind
++ * in to the db. It also destroys the frame->local
++ * created by ctr_lookup_wind */
++ ret = ctr_lookup_unwind(frame, this);
++ if (ret) {
++ gf_msg_trace(this->name, 0, "Failed healing/inserting link");
++ }
++
++out:
++ free_ctr_local((gf_ctr_local_t *)frame->local);
++ frame->local = NULL;
++
++ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, dict,
++ postparent);
++
++ return 0;
++}
++
++int32_t
++ctr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
++{
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++ gf_ctr_link_context_t ctr_link_cx;
++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
++ int ret = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++
++ GF_ASSERT(frame);
++ GF_ASSERT(frame->root);
++
++ /* Don't handle nameless lookups*/
++ if (!loc->parent || !loc->name)
++ goto out;
++
++ /*fill ctr link context*/
++ FILL_CTR_LINK_CX(_link_cx, loc->parent->gfid, loc->name, out);
++
++ /* Fill ctr inode context*/
++ /* IA_IFREG : We assume its a file in the wind
++ * but in the unwind we are sure what the inode is a file
++ * or directory
++ * gfid: we are just filling loc->gfid which is not correct.
++ * In unwind we fill the correct gfid for successful lookup*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, IA_IFREG, loc->gfid, _link_cx, NULL,
++ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
++
++ /* Create the frame->local and populate ctr_db_record
++ * No writing to the db yet */
++ ret = ctr_lookup_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_LINK_WIND_FAILED,
++ "Failed to insert link wind");
++ }
++
++out:
++ STACK_WIND(frame, ctr_lookup_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->lookup, loc, xdata);
++ return 0;
++}
++
++/****************************WRITEV********************************************/
++int32_t
++ctr_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
++ struct iatt *postbuf, dict_t *xdata)
++{
++ int ret = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_WRITEV_UNWIND_FAILED,
++ "Failed to insert writev unwind");
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
++ xdata);
++
++ return 0;
++}
++
++int32_t
++ctr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
++ int32_t count, off_t off, uint32_t flags, struct iobref *iobref,
++ dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_WRITEV_WIND_FAILED,
++ "Failed to insert writev wind");
++ }
++
++out:
++ STACK_WIND(frame, ctr_writev_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags,
++ iobref, xdata);
++
++ return 0;
++}
++
++/******************************setattr*****************************************/
++
++int32_t
++ctr_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, struct iatt *preop_stbuf,
++ struct iatt *postop_stbuf, dict_t *xdata)
++{
++ int ret = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED,
++ "Failed to insert setattr unwind");
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop_stbuf,
++ postop_stbuf, xdata);
++
++ return 0;
++}
++
++int32_t
++ctr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
++ int32_t valid, dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
++ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
++ "Failed to insert setattr wind");
++ }
++out:
++
++ STACK_WIND(frame, ctr_setattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
++
++ return 0;
++}
++
++/*************************** fsetattr ***************************************/
++int32_t
++ctr_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, struct iatt *preop_stbuf,
++ struct iatt *postop_stbuf, dict_t *xdata)
++{
++ int ret = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED,
++ "Failed to insert fsetattr unwind");
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, preop_stbuf,
++ postop_stbuf, xdata);
++
++ return 0;
++}
++
++int32_t
++ctr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
++ int32_t valid, dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
++ "Failed to insert fsetattr wind");
++ }
++out:
++ STACK_WIND(frame, ctr_fsetattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
++
++ return 0;
++}
++/****************************fremovexattr************************************/
++
++int32_t
++ctr_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++ int ret = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED,
++ "Failed to insert fremovexattr unwind");
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata);
++
++ return 0;
++}
++
++int32_t
++ctr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
++ const char *name, dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED,
++ "Failed to insert fremovexattr wind");
++ }
++
++out:
++ STACK_WIND(frame, ctr_fremovexattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
++ return 0;
++}
++
++/****************************removexattr*************************************/
++
++int32_t
++ctr_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++ int ret = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED,
++ "Failed to insert removexattr unwind");
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata);
++
++ return 0;
++}
++
++int32_t
++ctr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++ const char *name, dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
++ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED,
++ "Failed to insert removexattr wind");
++ }
++
++out:
++ STACK_WIND(frame, ctr_removexattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
++ return 0;
++}
++
++/****************************truncate****************************************/
++
++int32_t
++ctr_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
++ struct iatt *postbuf, dict_t *xdata)
++{
++ int ret = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED,
++ "Failed to insert truncate unwind");
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
++ xdata);
++
++ return 0;
++}
++
++int32_t
++ctr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
++ dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
++ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_TRUNCATE_WIND_FAILED,
++ "Failed to insert truncate wind");
++ }
++out:
++ STACK_WIND(frame, ctr_truncate_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
++ return 0;
++}
++
++/****************************ftruncate***************************************/
++
++int32_t
++ctr_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
++ struct iatt *postbuf, dict_t *xdata)
++{
++ int ret = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED,
++ "Failed to insert ftruncate unwind");
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
++ xdata);
++
++ return 0;
++}
++
++int32_t
++ctr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++ dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED,
++ "Failed to insert ftruncate wind");
++ }
++
++out:
++ STACK_WIND(frame, ctr_ftruncate_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
++ return 0;
++}
++
++/****************************rename******************************************/
++int32_t
++ctr_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, struct iatt *buf,
++ struct iatt *preoldparent, struct iatt *postoldparent,
++ struct iatt *prenewparent, struct iatt *postnewparent,
++ dict_t *xdata)
++{
++ int ret = -1;
++ uint32_t remaining_links = -1;
++ gf_ctr_local_t *ctr_local = NULL;
++ gfdb_fop_type_t fop_type = GFDB_FOP_INVALID_OP;
++ gfdb_fop_path_t fop_path = GFDB_FOP_INVALID;
++
++ GF_ASSERT(frame);
++ GF_ASSERT(this);
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
++ GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_UNWIND_FAILED,
++ "Failed to insert rename unwind");
++ goto out;
++ }
++
++ if (!xdata)
++ goto out;
++ /*
++ *
++ * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator
++ * This is only set when we are overwriting hardlinks.
++ *
++ * */
++ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA,
++ &remaining_links);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
++ "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA");
++ remaining_links = -1;
++ goto out;
++ }
++
++ ctr_local = frame->local;
++ if (!ctr_local) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_NULL_LOCAL,
++ "ctr_local is NULL.");
++ goto out;
++ }
++
++ /* This is not the only link */
++ if (remaining_links > 1) {
++ fop_type = GFDB_FOP_DENTRY_WRITE;
++ fop_path = GFDB_FOP_UNDEL;
++ }
++ /* Last link that was deleted */
++ else if (remaining_links == 1) {
++ fop_type = GFDB_FOP_DENTRY_WRITE;
++ fop_path = GFDB_FOP_UNDEL_ALL;
++ } else {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_UNWIND_FAILED,
++ "Invalid link count from posix");
++ goto out;
++ }
++
++ ret = ctr_delete_hard_link_from_db(
++ this, CTR_DB_REC(ctr_local).old_gfid, CTR_DB_REC(ctr_local).pargfid,
++ CTR_DB_REC(ctr_local).file_name, fop_type, fop_path);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
++ "Failed to delete records of %s",
++ CTR_DB_REC(ctr_local).old_file_name);
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent,
++ postoldparent, prenewparent, postnewparent, xdata);
++
++ return 0;
++}
++
++int32_t
++ctr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
++ dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++ gf_ctr_link_context_t new_link_cx, old_link_cx;
++ gf_ctr_link_context_t *_nlink_cx = &new_link_cx;
++ gf_ctr_link_context_t *_olink_cx = &old_link_cx;
++ int is_dict_created = 0;
++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++
++ /*Fill old link context*/
++ FILL_CTR_LINK_CX(_olink_cx, oldloc->pargfid, oldloc->name, out);
++
++ /*Fill new link context*/
++ FILL_CTR_LINK_CX(_nlink_cx, newloc->pargfid, newloc->name, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type,
++ oldloc->inode->gfid, _nlink_cx, _olink_cx,
++ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
++
++ /* If the rename is a overwrite of hardlink
++ * rename ("file1", "file2")
++ * file1 is hardlink for gfid say 00000000-0000-0000-0000-00000000000A
++ * file2 is hardlink for gfid say 00000000-0000-0000-0000-00000000000B
++ * so we are saving file2 gfid in old_gfid so that we delete entries
++ * from the db during rename callback if the fop is successful
++ * */
++ if (newloc->inode) {
++ /* This is the GFID from where the newloc hardlink will be
++ * unlinked */
++ _inode_cx->old_gfid = &newloc->inode->gfid;
++ }
++
++ /* Is a metatdata fop */
++ _inode_cx->is_metadata_fop = _gf_true;
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_WIND_FAILED,
++ "Failed to insert rename wind");
++ } else {
++ /* We are doing updation of hard link in inode context in wind
++ * As we don't get the "inode" in the call back for rename */
++ ret = update_hard_link_ctx(frame, this, oldloc->inode);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_UPDATE_HARDLINK_FAILED,
++ "Failed "
++ "updating hard link in ctr inode context");
++ goto out;
++ }
++
++ /* If the newloc has an inode. i.e acquiring hardlink of an
++ * exisitng file i.e overwritting a file.
++ * */
++ if (newloc->inode) {
++ /* Getting the ctr inode context variable for
++ * inode whose hardlink will be acquired during
++ * the rename
++ * */
++ ctr_xlator_ctx = get_ctr_xlator_ctx(this, newloc->inode);
++ if (!ctr_xlator_ctx) {
++ /* Since there is no ctr inode context
++ * so nothing more to do */
++ ret = 0;
++ goto out;
++ }
++
++ /* Deleting hardlink from context variable */
++ ret = ctr_delete_hard_link(this, ctr_xlator_ctx, newloc->pargfid,
++ newloc->name);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_DELETE_HARDLINK_FAILED,
++ "Failed to delete hard link");
++ goto out;
++ }
++
++ /* Requesting for number of hardlinks on the newloc
++ * inode from POSIX.
++ * */
++ is_dict_created = set_posix_link_request(this, &xdata);
++ if (is_dict_created == -1) {
++ ret = -1;
++ goto out;
++ }
++ }
++ }
++
++out:
++ STACK_WIND(frame, ctr_rename_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
++
++ if (is_dict_created == 1) {
++ dict_unref(xdata);
++ }
++
++ return 0;
++}
++
++/****************************unlink******************************************/
++int32_t
++ctr_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
++ struct iatt *postparent, dict_t *xdata)
++{
++ int ret = -1;
++ uint32_t remaining_links = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++
++ if (!xdata)
++ goto out;
++
++ /*
++ *
++ * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator
++ *
++ * */
++ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA,
++ &remaining_links);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
++ "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA");
++ remaining_links = -1;
++ }
++
++ /*This is not the only link*/
++ if (remaining_links != 1) {
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
++ GFDB_FOP_UNDEL);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
++ "Failed to insert unlink unwind");
++ }
++ }
++ /*Last link that was deleted*/
++ else if (remaining_links == 1) {
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
++ GFDB_FOP_UNDEL_ALL);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
++ "Failed to insert unlink unwind");
++ }
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
++ xdata);
++
++ return 0;
++}
++
++int32_t
++ctr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
++ dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++ gf_ctr_link_context_t ctr_link_cx;
++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
++ gf_boolean_t is_xdata_created = _gf_false;
++ struct iatt dummy_stat = {0};
++
++ GF_ASSERT(frame);
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++
++ /*Fill link context*/
++ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
++ _link_cx, NULL, GFDB_FOP_DENTRY_WRITE,
++ GFDB_FOP_WDEL);
++
++ /*Internal FOP*/
++ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata);
++
++ /* Is a metadata FOP */
++ _inode_cx->is_metadata_fop = _gf_true;
++
++ /* If its a internal FOP and dht link file donot record*/
++ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) {
++ goto out;
++ }
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
++ "Failed to insert unlink wind");
++ } else {
++ /* We are doing delete of hard link in inode context in wind
++ * As we don't get the "inode" in the call back for rename */
++ ret = delete_hard_link_ctx(frame, this, loc->inode);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
++ "Failed "
++ "deleting hard link from ctr inode context");
++ }
++ }
++
++ /*
++ *
++ * Sending GF_REQUEST_LINK_COUNT_XDATA
++ * to POSIX Xlator to send link count in unwind path
++ *
++ * */
++ /*create xdata if NULL*/
++ if (!xdata) {
++ xdata = dict_new();
++ is_xdata_created = (xdata) ? _gf_true : _gf_false;
++ }
++ if (!xdata) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL,
++ "xdata is NULL :Cannot send "
++ "GF_REQUEST_LINK_COUNT_XDATA to posix");
++ goto out;
++ }
++
++ ret = dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
++ "Failed setting GF_REQUEST_LINK_COUNT_XDATA");
++ if (is_xdata_created) {
++ dict_unref(xdata);
++ }
++ goto out;
++ }
++
++out:
++ STACK_WIND(frame, ctr_unlink_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
++
++ if (is_xdata_created)
++ dict_unref(xdata);
++
++ return 0;
++}
++
++/****************************fsync******************************************/
++int32_t
++ctr_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
++ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
++ dict_t *xdata)
++{
++ int ret = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
++ "Failed to insert fsync unwind");
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
++
++ return 0;
++}
++
++int32_t
++ctr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
++ dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_WIND_FAILED,
++ "Failed to insert fsync wind");
++ }
++
++out:
++ STACK_WIND(frame, ctr_fsync_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
++ return 0;
++}
++
++/****************************setxattr****************************************/
++
++int
++ctr_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++ int ret = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
++ "Failed to insert setxattr unwind");
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata);
++
++ return 0;
++}
++
++int
++ctr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
++ int flags, dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
++ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
++ "Failed to insert setxattr wind");
++ }
++
++out:
++ STACK_WIND(frame, ctr_setxattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->setxattr, loc, xattr, flags, xdata);
++ return 0;
++}
++/**************************** fsetxattr *************************************/
++int32_t
++ctr_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++ int ret = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
++ "Failed to insert fsetxattr unwind");
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
++
++ return 0;
++}
++
++int32_t
++ctr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
++ int32_t flags, dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
++ "Failed to insert fsetxattr wind");
++ }
++
++out:
++ STACK_WIND(frame, ctr_fsetxattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
++ return 0;
++}
++/****************************mknod*******************************************/
++
++int32_t
++ctr_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
++ int32_t op_errno, inode_t *inode, struct iatt *buf,
++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
++{
++ int ret = -1;
++ ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++
++ /* Add hard link to the list */
++ ret_val = add_hard_link_ctx(frame, this, inode);
++ if (ret_val == CTR_CTX_ERROR) {
++ gf_msg_trace(this->name, 0, "Failed adding hard link");
++ }
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE,
++ GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_MKNOD_UNWIND_FAILED,
++ "Failed to insert mknod unwind");
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent,
++ postparent, xdata);
++
++ return 0;
++}
++
++int
++ctr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
++ dev_t rdev, mode_t umask, dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++ gf_ctr_link_context_t ctr_link_cx;
++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
++ uuid_t gfid = {
++ 0,
++ };
++ uuid_t *ptr_gfid = &gfid;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++
++ GF_ASSERT(frame);
++ GF_ASSERT(frame->root);
++
++ /*get gfid from xdata dict*/
++ ret = dict_get_gfuuid(xdata, "gfid-req", &gfid);
++ if (ret) {
++ gf_msg_debug(this->name, 0, "failed to get gfid from dict");
++ goto out;
++ }
++
++ /*fill ctr link context*/
++ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, *ptr_gfid, _link_cx,
++ NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND);
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_MKNOD_WIND_FAILED,
++ "Failed to insert mknod wind");
++ }
++
++out:
++ STACK_WIND(frame, ctr_mknod_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
++ return 0;
++}
++
++/****************************create******************************************/
++int
++ctr_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
++ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
++{
++ int ret = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++
++ ret = add_hard_link_ctx(frame, this, inode);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_ADD_HARDLINK_FAILED,
++ "Failed adding hard link");
++ }
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE,
++ GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
++ "Failed to insert create unwind");
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, stbuf,
++ preparent, postparent, xdata);
++
++ return 0;
++}
++
++int
++ctr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
++ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++ gf_ctr_link_context_t ctr_link_cx;
++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
++ uuid_t gfid = {
++ 0,
++ };
++ uuid_t *ptr_gfid = &gfid;
++ struct iatt dummy_stat = {0};
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++
++ GF_ASSERT(frame);
++ GF_ASSERT(frame->root);
++
++ /*Get GFID from Xdata dict*/
++ ret = dict_get_gfuuid(xdata, "gfid-req", &gfid);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_GET_GFID_FROM_DICT_FAILED,
++ "failed to get gfid from dict");
++ goto out;
++ }
++
++ /*fill ctr link context*/
++ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, *ptr_gfid, _link_cx,
++ NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND);
++
++ /*Internal FOP*/
++ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata);
++
++ /* If its a internal FOP and dht link file donot record*/
++ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) {
++ goto out;
++ }
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, &ctr_inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_WIND_FAILED,
++ "Failed to insert create wind");
++ }
++out:
++ STACK_WIND(frame, ctr_create_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
++ xdata);
++ return 0;
++}
++
++/****************************link********************************************/
++
++int
++ctr_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
++ int op_errno, inode_t *inode, struct iatt *stbuf,
++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
++{
++ int ret = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++
++ /* Add hard link to the list */
++ ret = add_hard_link_ctx(frame, this, inode);
++ if (ret) {
++ gf_msg_trace(this->name, 0, "Failed adding hard link");
++ }
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
++ GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
++ "Failed to insert create unwind");
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, stbuf, preparent,
++ postparent, xdata);
++ return 0;
++}
++
++int
++ctr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
++ dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++ gf_ctr_link_context_t ctr_link_cx;
++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
++ struct iatt dummy_stat = {0};
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++
++ GF_ASSERT(frame);
++ GF_ASSERT(frame->root);
++
++ /*fill ctr link context*/
++ FILL_CTR_LINK_CX(_link_cx, newloc->pargfid, newloc->name, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type,
++ oldloc->inode->gfid, _link_cx, NULL,
++ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
++
++ /*Internal FOP*/
++ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata);
++
++ /* Is a metadata fop */
++ _inode_cx->is_metadata_fop = _gf_true;
++
++ /* If its a internal FOP and dht link file donot record*/
++ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) {
++ goto out;
++ }
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_LINK_WIND_FAILED,
++ "Failed to insert link wind");
++ }
++
++out:
++ STACK_WIND(frame, ctr_link_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
++ return 0;
++}
++
++/******************************readv*****************************************/
++int
++ctr_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
++ int op_errno, struct iovec *vector, int count, struct iatt *stbuf,
++ struct iobref *iobref, dict_t *xdata)
++{
++ int ret = -1;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
++
++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_READ, GFDB_FOP_UNWIND);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
++ "Failed to insert create unwind");
++ }
++
++out:
++ ctr_free_frame_local(frame);
++
++ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf,
++ iobref, xdata);
++ return 0;
++}
++
++int
++ctr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off,
++ uint32_t flags, dict_t *xdata)
++{
++ int ret = -1;
++ gf_ctr_inode_context_t ctr_inode_cx;
++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
++
++ CTR_IS_DISABLED_THEN_GOTO(this, out);
++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
++
++ /*Fill ctr inode context*/
++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
++ NULL, GFDB_FOP_INODE_READ, GFDB_FOP_WIND);
++
++ /*record into the database*/
++ ret = ctr_insert_wind(frame, this, _inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_READV_WIND_FAILED,
++ "Failed to insert readv wind");
++ }
++
++out:
++ STACK_WIND(frame, ctr_readv_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->readv, fd, size, off, flags, xdata);
++ return 0;
++}
++
++/*******************************ctr_ipc****************************************/
++
++/*This is the call back function per record/file from data base*/
++static int
++ctr_db_query_callback(gfdb_query_record_t *gfdb_query_record, void *args)
++{
++ int ret = -1;
++ ctr_query_cbk_args_t *query_cbk_args = args;
++
++ GF_VALIDATE_OR_GOTO("ctr", query_cbk_args, out);
++
++ ret = gfdb_write_query_record(query_cbk_args->query_fd, gfdb_query_record);
++ if (ret) {
++ gf_msg("ctr", GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
++ "Failed to write to query file");
++ goto out;
++ }
++
++ query_cbk_args->count++;
++
++ ret = 0;
++out:
++ return ret;
++}
++
++/* This function does all the db queries related to tiering and
++ * generates/populates new/existing query file
++ * inputs:
++ * xlator_t *this : CTR Translator
++ * void *conn_node : Database connection
++ * char *query_file: the query file that needs to be updated
++ * gfdb_ipc_ctr_params_t *ipc_ctr_params: the query parameters
++ * Return:
++ * On success 0
++ * On failure -1
++ * */
++int
++ctr_db_query(xlator_t *this, void *conn_node, char *query_file,
++ gfdb_ipc_ctr_params_t *ipc_ctr_params)
++{
++ int ret = -1;
++ ctr_query_cbk_args_t query_cbk_args = {0};
++
++ GF_VALIDATE_OR_GOTO("ctr", this, out);
++ GF_VALIDATE_OR_GOTO(this->name, conn_node, out);
++ GF_VALIDATE_OR_GOTO(this->name, query_file, out);
++ GF_VALIDATE_OR_GOTO(this->name, ipc_ctr_params, out);
++
++ /*Query for eligible files from db*/
++ query_cbk_args.query_fd = open(query_file, O_WRONLY | O_CREAT | O_APPEND,
++ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
++ if (query_cbk_args.query_fd < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, CTR_MSG_FATAL_ERROR,
++ "Failed to open query file %s", query_file);
++ goto out;
++ }
++ if (!ipc_ctr_params->is_promote) {
++ if (ipc_ctr_params->emergency_demote) {
++ /* emergency demotion mode */
++ ret = find_all(conn_node, ctr_db_query_callback,
++ (void *)&query_cbk_args,
++ ipc_ctr_params->query_limit);
++ } else {
++ if (ipc_ctr_params->write_freq_threshold == 0 &&
++ ipc_ctr_params->read_freq_threshold == 0) {
++ ret = find_unchanged_for_time(conn_node, ctr_db_query_callback,
++ (void *)&query_cbk_args,
++ &ipc_ctr_params->time_stamp);
++ } else {
++ ret = find_unchanged_for_time_freq(
++ conn_node, ctr_db_query_callback, (void *)&query_cbk_args,
++ &ipc_ctr_params->time_stamp,
++ ipc_ctr_params->write_freq_threshold,
++ ipc_ctr_params->read_freq_threshold, _gf_false);
++ }
++ }
++ } else {
++ if (ipc_ctr_params->write_freq_threshold == 0 &&
++ ipc_ctr_params->read_freq_threshold == 0) {
++ ret = find_recently_changed_files(conn_node, ctr_db_query_callback,
++ (void *)&query_cbk_args,
++ &ipc_ctr_params->time_stamp);
++ } else {
++ ret = find_recently_changed_files_freq(
++ conn_node, ctr_db_query_callback, (void *)&query_cbk_args,
++ &ipc_ctr_params->time_stamp,
++ ipc_ctr_params->write_freq_threshold,
++ ipc_ctr_params->read_freq_threshold, _gf_false);
++ }
++ }
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
++ "FATAL: query from db failed");
++ goto out;
++ }
++
++ ret = clear_files_heat(conn_node);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
++ "FATAL: Failed to clear db entries");
++ goto out;
++ }
++
++ ret = 0;
++out:
++
++ if (!ret)
++ ret = query_cbk_args.count;
++
++ if (query_cbk_args.query_fd >= 0) {
++ sys_close(query_cbk_args.query_fd);
++ query_cbk_args.query_fd = -1;
++ }
++
++ return ret;
++}
++
++void *
++ctr_compact_thread(void *args)
++{
++ int ret = -1;
++ void *db_conn = NULL;
++
++ xlator_t *this = NULL;
++ gf_ctr_private_t *priv = NULL;
++ gf_boolean_t compact_active = _gf_false;
++ gf_boolean_t compact_mode_switched = _gf_false;
++
++ this = (xlator_t *)args;
++
++ GF_VALIDATE_OR_GOTO("ctr", this, out);
++
++ priv = this->private;
++
++ db_conn = priv->_db_conn;
++ compact_active = priv->compact_active;
++ compact_mode_switched = priv->compact_mode_switched;
++
++ gf_msg("ctr-compact", GF_LOG_INFO, 0, CTR_MSG_SET, "Starting compaction");
++
++ ret = compact_db(db_conn, compact_active, compact_mode_switched);
++
++ if (ret) {
++ gf_msg("ctr-compact", GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Failed to perform the compaction");
++ }
++
++ ret = pthread_mutex_lock(&priv->compact_lock);
++
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Failed to acquire lock");
++ goto out;
++ }
++
++ /* We are done compaction on this brick. Set all flags to false */
++ priv->compact_active = _gf_false;
++ priv->compact_mode_switched = _gf_false;
++
++ ret = pthread_mutex_unlock(&priv->compact_lock);
++
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Failed to release lock");
++ goto out;
++ }
++
++out:
++ return NULL;
++}
++
++int
++ctr_ipc_helper(xlator_t *this, dict_t *in_dict, dict_t *out_dict)
++{
++ int ret = -1;
++ char *ctr_ipc_ops = NULL;
++ gf_ctr_private_t *priv = NULL;
++ char *db_version = NULL;
++ char *db_param_key = NULL;
++ char *db_param = NULL;
++ char *query_file = NULL;
++ gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL;
++ int result = 0;
++ pthread_t compact_thread;
++
++ GF_VALIDATE_OR_GOTO("ctr", this, out);
++ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
++ priv = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, priv->_db_conn, out);
++ GF_VALIDATE_OR_GOTO(this->name, in_dict, out);
++ GF_VALIDATE_OR_GOTO(this->name, out_dict, out);
++
++ GET_DB_PARAM_FROM_DICT(this->name, in_dict, GFDB_IPC_CTR_KEY, ctr_ipc_ops,
++ out);
++
++ /*if its a db clear operation */
++ if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_CLEAR_OPS,
++ SLEN(GFDB_IPC_CTR_CLEAR_OPS)) == 0) {
++ ret = clear_files_heat(priv->_db_conn);
++ if (ret)
++ goto out;
++
++ } /* if its a query operation, in which case its query + clear db*/
++ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_QUERY_OPS,
++ SLEN(GFDB_IPC_CTR_QUERY_OPS)) == 0) {
++ ret = dict_get_str(in_dict, GFDB_IPC_CTR_GET_QFILE_PATH, &query_file);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Failed extracting query file path");
++ goto out;
++ }
++
++ ret = dict_get_bin(in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS,
++ (void *)&ipc_ctr_params);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Failed extracting query parameters");
++ goto out;
++ }
++
++ ret = ctr_db_query(this, priv->_db_conn, query_file, ipc_ctr_params);
++
++ ret = dict_set_int32(out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT, ret);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Failed setting query reply");
++ goto out;
++ }
++
++ } /* if its a query for db version */
++ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_VERSION_OPS,
++ SLEN(GFDB_IPC_CTR_GET_DB_VERSION_OPS)) == 0) {
++ ret = get_db_version(priv->_db_conn, &db_version);
++ if (ret == -1 || !db_version) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Failed extracting db version ");
++ goto out;
++ }
++
++ SET_DB_PARAM_TO_DICT(this->name, out_dict, GFDB_IPC_CTR_RET_DB_VERSION,
++ db_version, ret, error);
++
++ } /* if its a query for a db setting */
++ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_PARAM_OPS,
++ SLEN(GFDB_IPC_CTR_GET_DB_PARAM_OPS)) == 0) {
++ ret = dict_get_str(in_dict, GFDB_IPC_CTR_GET_DB_KEY, &db_param_key);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Failed extracting db param key");
++ goto out;
++ }
++
++ ret = get_db_params(priv->_db_conn, db_param_key, &db_param);
++ if (ret == -1 || !db_param) {
++ goto out;
++ }
++
++ SET_DB_PARAM_TO_DICT(this->name, out_dict, db_param_key, db_param, ret,
++ error);
++ } /* if its an attempt to compact the database */
++ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_SET_COMPACT_PRAGMA,
++ SLEN(GFDB_IPC_CTR_SET_COMPACT_PRAGMA)) == 0) {
++ ret = pthread_mutex_lock(&priv->compact_lock);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Failed to acquire lock for compaction");
++ goto out;
++ }
++
++ if ((priv->compact_active || priv->compact_mode_switched)) {
++ /* Compaction in progress. LEAVE */
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Compaction already in progress.");
++ pthread_mutex_unlock(&priv->compact_lock);
++ goto out;
++ }
++ /* At this point, we should be the only one on the brick */
++ /* compacting */
++
++ /* Grab the arguments from the dictionary */
++ ret = dict_get_int32(in_dict, "compact_active", &result);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Failed to get compaction type");
++ goto out;
++ }
++
++ if (result) {
++ priv->compact_active = _gf_true;
++ }
++
++ ret = dict_get_int32(in_dict, "compact_mode_switched", &result);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Failed to see if compaction switched");
++ goto out;
++ }
++
++ if (result) {
++ priv->compact_mode_switched = _gf_true;
++ gf_msg("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET,
++ "Pre-thread: Compact mode switch is true");
++ } else {
++ gf_msg("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET,
++ "Pre-thread: Compact mode switch is false");
++ }
++
++ ret = pthread_mutex_unlock(&priv->compact_lock);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Failed to release lock for compaction");
++ goto out;
++ }
++
++ ret = gf_thread_create(&compact_thread, NULL, ctr_compact_thread,
++ (void *)this, "ctrcomp");
++
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Failed to spawn compaction thread");
++ goto out;
++ }
++
++ goto out;
++ } /* default case */
++ else {
++ goto out;
++ }
++
++ ret = 0;
++ goto out;
++error:
++ GF_FREE(db_param_key);
++ GF_FREE(db_param);
++ GF_FREE(db_version);
++out:
++ return ret;
++}
++
++/* IPC Call from tier migrator to clear the heat on the DB */
++int32_t
++ctr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *in_dict)
++{
++ int ret = -1;
++ gf_ctr_private_t *priv = NULL;
++ dict_t *out_dict = NULL;
++
++ GF_ASSERT(this);
++ priv = this->private;
++ GF_ASSERT(priv);
++ GF_ASSERT(priv->_db_conn);
++ GF_VALIDATE_OR_GOTO(this->name, in_dict, wind);
++
++ if (op != GF_IPC_TARGET_CTR)
++ goto wind;
++
++ out_dict = dict_new();
++ if (!out_dict) {
++ goto out;
++ }
++
++ ret = ctr_ipc_helper(this, in_dict, out_dict);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
++ "Failed in ctr_ipc_helper");
++ }
++out:
++
++ STACK_UNWIND_STRICT(ipc, frame, ret, 0, out_dict);
++
++ if (out_dict)
++ dict_unref(out_dict);
++
++ return 0;
++
++wind:
++ STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->ipc, op, in_dict);
++
++ return 0;
++}
++
++/* Call to initialize db for ctr xlator while ctr is enabled */
++int32_t
++initialize_ctr_resource(xlator_t *this, gf_ctr_private_t *priv)
++{
++ int ret_db = -1;
++ dict_t *params_dict = NULL;
++
++ if (!priv)
++ goto error;
++
++ /* For compaction */
++ priv->compact_active = _gf_false;
++ priv->compact_mode_switched = _gf_false;
++ ret_db = pthread_mutex_init(&priv->compact_lock, NULL);
++
++ if (ret_db) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
++ "FATAL: Failed initializing compaction mutex");
++ goto error;
++ }
++
++ params_dict = dict_new();
++ if (!params_dict) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INIT_DB_PARAMS_FAILED,
++ "DB Params cannot initialized!");
++ goto error;
++ }
++
++ /*Extract db params options*/
++ ret_db = extract_db_params(this, params_dict, priv->gfdb_db_type);
++ if (ret_db) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED,
++ "Failed extracting db params options");
++ goto error;
++ }
++
++ /*Create a memory pool for ctr xlator*/
++ this->local_pool = mem_pool_new(gf_ctr_local_t, 64);
++ if (!this->local_pool) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED,
++ "failed to create local memory pool");
++ goto error;
++ }
++
++ /*Initialize Database Connection*/
++ priv->_db_conn = init_db(params_dict, priv->gfdb_db_type);
++ if (!priv->_db_conn) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
++ "FATAL: Failed initializing data base");
++ goto error;
++ }
++
++ ret_db = 0;
++ goto out;
++
++error:
++ if (this)
++ mem_pool_destroy(this->local_pool);
++
++ if (priv) {
++ GF_FREE(priv->ctr_db_path);
++ }
++ GF_FREE(priv);
++ ret_db = -1;
++out:
++ if (params_dict)
++ dict_unref(params_dict);
++
++ return ret_db;
++}
++
++/******************************************************************************/
++int
++reconfigure(xlator_t *this, dict_t *options)
++{
++ char *temp_str = NULL;
++ int ret = 0;
++ gf_ctr_private_t *priv = NULL;
++
++ priv = this->private;
++
++ if (dict_get_str(options, "changetimerecorder.frequency", &temp_str)) {
++ gf_msg(this->name, GF_LOG_TRACE, 0, CTR_MSG_SET, "set");
++ }
++
++ GF_OPTION_RECONF("ctr-enabled", priv->enabled, options, bool, out);
++ if (!priv->enabled) {
++ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED,
++ "CTR Xlator is not enabled so skip ctr reconfigure");
++ goto out;
++ }
++
++ /* If ctr is enabled after skip init for ctr xlator then call
++ initialize_ctr_resource during reconfigure phase to allocate resources
++ for xlator
++ */
++ if (priv->enabled && !priv->_db_conn) {
++ ret = initialize_ctr_resource(this, priv);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
++ "FATAL: Failed ctr initialize resource");
++ goto out;
++ }
++ }
++
++ GF_OPTION_RECONF("record-counters", priv->ctr_record_counter, options, bool,
++ out);
++
++ GF_OPTION_RECONF("ctr-record-metadata-heat", priv->ctr_record_metadata_heat,
++ options, bool, out);
++
++ GF_OPTION_RECONF("ctr_link_consistency", priv->ctr_link_consistency,
++ options, bool, out);
++
++ GF_OPTION_RECONF("ctr_lookupheal_inode_timeout",
++ priv->ctr_lookupheal_inode_timeout, options, uint64, out);
++
++ GF_OPTION_RECONF("ctr_lookupheal_link_timeout",
++ priv->ctr_lookupheal_link_timeout, options, uint64, out);
++
++ GF_OPTION_RECONF("record-exit", priv->ctr_record_unwind, options, bool,
++ out);
++
++ GF_OPTION_RECONF("record-entry", priv->ctr_record_wind, options, bool, out);
++
++ /* If database is sqlite */
++ if (priv->gfdb_db_type == GFDB_SQLITE3) {
++ /* AUTOCHECKPOINT */
++ if (dict_get_str(options, GFDB_SQL_PARAM_WAL_AUTOCHECK, &temp_str) ==
++ 0) {
++ ret = set_db_params(priv->_db_conn, "wal_autocheckpoint", temp_str);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
++ "Failed to set %s", GFDB_SQL_PARAM_WAL_AUTOCHECK);
++ }
++ }
++
++ /* CACHE_SIZE */
++ if (dict_get_str(options, GFDB_SQL_PARAM_CACHE_SIZE, &temp_str) == 0) {
++ ret = set_db_params(priv->_db_conn, "cache_size", temp_str);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
++ "Failed to set %s", GFDB_SQL_PARAM_CACHE_SIZE);
++ }
++ }
++ }
++
++ ret = 0;
++
++out:
++
++ return ret;
++}
++
++/****************************init********************************************/
++
++int32_t
++init(xlator_t *this)
++{
++ gf_ctr_private_t *priv = NULL;
++ int ret_db = -1;
++
++ if (!this) {
++ gf_msg("ctr", GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
++ "FATAL: ctr this is not initialized");
++ return -1;
++ }
++
++ if (!this->children || this->children->next) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
++ "FATAL: ctr should have exactly one child");
++ return -1;
++ }
++
++ if (!this->parents) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_DANGLING_VOLUME,
++ "dangling volume. check volfile ");
++ }
++
++ priv = GF_CALLOC(1, sizeof(*priv), gf_ctr_mt_private_t);
++ if (!priv) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CTR_MSG_CALLOC_FAILED,
++ "Calloc did not work!!!");
++ return -1;
++ }
++
++ /*Default values for the translator*/
++ priv->ctr_record_wind = _gf_true;
++ priv->ctr_record_unwind = _gf_false;
++ priv->ctr_hot_brick = _gf_false;
++ priv->gfdb_db_type = GFDB_SQLITE3;
++ priv->gfdb_sync_type = GFDB_DB_SYNC;
++ priv->_db_conn = NULL;
++ priv->ctr_lookupheal_link_timeout = CTR_DEFAULT_HARDLINK_EXP_PERIOD;
++ priv->ctr_lookupheal_inode_timeout = CTR_DEFAULT_INODE_EXP_PERIOD;
++
++ /*Extract ctr xlator options*/
++ ret_db = extract_ctr_options(this, priv);
++ if (ret_db) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED,
++ "Failed extracting ctr xlator options");
++ GF_FREE(priv);
++ return -1;
++ }
++
++ if (!priv->enabled) {
++ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED,
++ "CTR Xlator is not enabled so skip ctr init");
++ goto out;
++ }
++
++ ret_db = initialize_ctr_resource(this, priv);
++ if (ret_db) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
++ "FATAL: Failed ctr initialize resource");
++ return -1;
++ }
++
++out:
++ this->private = (void *)priv;
++ return 0;
++}
++
++int
++notify(xlator_t *this, int event, void *data, ...)
++{
++ gf_ctr_private_t *priv = NULL;
++ int ret = 0;
++
++ priv = this->private;
++
++ if (!priv)
++ goto out;
++
++ ret = default_notify(this, event, data);
++
++out:
++ return ret;
++}
++
++int32_t
++mem_acct_init(xlator_t *this)
++{
++ int ret = -1;
++
++ GF_VALIDATE_OR_GOTO("ctr", this, out);
++
++ ret = xlator_mem_acct_init(this, gf_ctr_mt_end + 1);
++
++ if (ret != 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_MEM_ACC_INIT_FAILED,
++ "Memory accounting init"
++ "failed");
++ return ret;
++ }
++out:
++ return ret;
++}
++
++void
++fini(xlator_t *this)
++{
++ gf_ctr_private_t *priv = NULL;
++
++ priv = this->private;
++
++ if (priv && priv->enabled) {
++ if (fini_db(priv->_db_conn)) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_CLOSE_DB_CONN_FAILED,
++ "Failed closing "
++ "db connection");
++ }
++
++ if (priv->_db_conn)
++ priv->_db_conn = NULL;
++
++ GF_FREE(priv->ctr_db_path);
++ if (pthread_mutex_destroy(&priv->compact_lock)) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_CLOSE_DB_CONN_FAILED,
++ "Failed to "
++ "destroy the compaction mutex");
++ }
++ }
++ GF_FREE(priv);
++ mem_pool_destroy(this->local_pool);
++ this->local_pool = NULL;
++
++ return;
++}
++
++struct xlator_fops fops = {
++ /*lookup*/
++ .lookup = ctr_lookup,
++ /*write fops */
++ .mknod = ctr_mknod,
++ .create = ctr_create,
++ .truncate = ctr_truncate,
++ .ftruncate = ctr_ftruncate,
++ .setxattr = ctr_setxattr,
++ .fsetxattr = ctr_fsetxattr,
++ .removexattr = ctr_removexattr,
++ .fremovexattr = ctr_fremovexattr,
++ .unlink = ctr_unlink,
++ .link = ctr_link,
++ .rename = ctr_rename,
++ .writev = ctr_writev,
++ .setattr = ctr_setattr,
++ .fsetattr = ctr_fsetattr,
++ /*read fops*/
++ .readv = ctr_readv,
++ /* IPC call*/
++ .ipc = ctr_ipc};
++
++struct xlator_cbks cbks = {.forget = ctr_forget};
++
++struct volume_options options[] = {
++ {.key =
++ {
++ "ctr-enabled",
++ },
++ .type = GF_OPTION_TYPE_BOOL,
++ .value = {"on", "off"},
++ .default_value = "off",
++ .description = "Enables the CTR",
++ .flags = OPT_FLAG_SETTABLE},
++ {.key = {"record-entry"},
++ .type = GF_OPTION_TYPE_BOOL,
++ .value = {"on", "off"},
++ .default_value = "on"},
++ {.key = {"record-exit"},
++ .type = GF_OPTION_TYPE_BOOL,
++ .value = {"on", "off"},
++ .default_value = "off"},
++ {.key = {"record-counters"},
++ .type = GF_OPTION_TYPE_BOOL,
++ .value = {"on", "off"},
++ .default_value = "off",
++ .op_version = {GD_OP_VERSION_3_7_0},
++ .flags = OPT_FLAG_SETTABLE,
++ .tags = {}},
++ {.key = {"ctr-record-metadata-heat"},
++ .type = GF_OPTION_TYPE_BOOL,
++ .value = {"on", "off"},
++ .default_value = "off",
++ .flags = OPT_FLAG_SETTABLE,
++ .op_version = {GD_OP_VERSION_3_7_0},
++ .tags = {}},
++ {.key = {"ctr_link_consistency"},
++ .type = GF_OPTION_TYPE_BOOL,
++ .value = {"on", "off"},
++ .default_value = "off",
++ .flags = OPT_FLAG_SETTABLE,
++ .op_version = {GD_OP_VERSION_3_7_0},
++ .tags = {}},
++ {.key = {"ctr_lookupheal_link_timeout"},
++ .type = GF_OPTION_TYPE_INT,
++ .default_value = "300",
++ .flags = OPT_FLAG_SETTABLE,
++ .op_version = {GD_OP_VERSION_3_7_2},
++ .tags = {}},
++ {.key = {"ctr_lookupheal_inode_timeout"},
++ .type = GF_OPTION_TYPE_INT,
++ .default_value = "300",
++ .flags = OPT_FLAG_SETTABLE,
++ .op_version = {GD_OP_VERSION_3_7_2},
++ .tags = {}},
++ {.key = {"hot-brick"},
++ .type = GF_OPTION_TYPE_BOOL,
++ .value = {"on", "off"},
++ .default_value = "off"},
++ {.key = {"db-type"},
++ .type = GF_OPTION_TYPE_STR,
++ .value = {"hashfile", "rocksdb", "changelog", "sqlite3", "hyperdex"},
++ .default_value = "sqlite3",
++ .op_version = {GD_OP_VERSION_3_7_0},
++ .flags = OPT_FLAG_SETTABLE,
++ .tags = {}},
++ {.key = {"db-sync"},
++ .type = GF_OPTION_TYPE_STR,
++ .value = {"sync", "async"},
++ .default_value = "sync"},
++ {.key = {"db-path"}, .type = GF_OPTION_TYPE_PATH},
++ {.key = {"db-name"}, .type = GF_OPTION_TYPE_STR},
++ {.key = {GFDB_SQL_PARAM_SYNC},
++ .type = GF_OPTION_TYPE_STR,
++ .value = {"off", "normal", "full"},
++ .default_value = "normal"},
++ {.key = {GFDB_SQL_PARAM_JOURNAL_MODE},
++ .type = GF_OPTION_TYPE_STR,
++ .value = {"delete", "truncate", "persist", "memory", "wal", "off"},
++ .default_value = "wal",
++ .flags = OPT_FLAG_SETTABLE,
++ .op_version = {GD_OP_VERSION_3_7_0},
++ .tags = {}},
++ {.key = {GFDB_SQL_PARAM_AUTO_VACUUM},
++ .type = GF_OPTION_TYPE_STR,
++ .value = {"off", "full", "incr"},
++ .default_value = "off",
++ .flags = OPT_FLAG_SETTABLE,
++ .op_version = {GD_OP_VERSION_3_7_0},
++ .tags = {}},
++ {.key = {GFDB_SQL_PARAM_WAL_AUTOCHECK},
++ .type = GF_OPTION_TYPE_INT,
++ .default_value = "25000",
++ .flags = OPT_FLAG_SETTABLE,
++ .op_version = {GD_OP_VERSION_3_7_0},
++ .tags = {}},
++ {.key = {GFDB_SQL_PARAM_CACHE_SIZE},
++ .type = GF_OPTION_TYPE_INT,
++ .default_value = "12500",
++ .flags = OPT_FLAG_SETTABLE,
++ .op_version = {GD_OP_VERSION_3_7_0},
++ .tags = {}},
++ {.key = {GFDB_SQL_PARAM_PAGE_SIZE},
++ .type = GF_OPTION_TYPE_INT,
++ .default_value = "4096",
++ .flags = OPT_FLAG_SETTABLE,
++ .op_version = {GD_OP_VERSION_3_7_0},
++ .tags = {}},
++ {.key = {NULL}},
++};
++
++xlator_api_t xlator_api = {
++ .init = init,
++ .fini = fini,
++ .notify = notify,
++ .reconfigure = reconfigure,
++ .mem_acct_init = mem_acct_init,
++ .op_version = {GD_OP_VERSION_3_7_0}, /* Present from the initial version */
++ .fops = &fops,
++ .cbks = &cbks,
++ .identifier = "changetimerecorder",
++ .category = GF_MAINTAINED,
++ .options = options,
++};
+diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.h b/xlators/features/changetimerecorder/src/changetimerecorder.h
+new file mode 100644
+index 0000000..0150a1c
+--- /dev/null
++++ b/xlators/features/changetimerecorder/src/changetimerecorder.h
+@@ -0,0 +1,21 @@
++/*
++ Copyright (c) 2006-2015 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#ifndef __CTR_H
++#define __CTR_H
++
++#include <glusterfs/glusterfs.h>
++#include <glusterfs/xlator.h>
++#include <glusterfs/logging.h>
++#include <glusterfs/common-utils.h>
++#include "ctr_mem_types.h"
++#include "ctr-helper.h"
++
++#endif /* __CTR_H */
+diff --git a/xlators/features/changetimerecorder/src/ctr-helper.c b/xlators/features/changetimerecorder/src/ctr-helper.c
+new file mode 100644
+index 0000000..e1e6573
+--- /dev/null
++++ b/xlators/features/changetimerecorder/src/ctr-helper.c
+@@ -0,0 +1,293 @@
++/*
++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#include "gfdb_sqlite3.h"
++#include "ctr-helper.h"
++#include "ctr-messages.h"
++
++/*******************************************************************************
++ *
++ * Fill unwind into db record
++ *
++ ******************************************************************************/
++int
++fill_db_record_for_unwind(xlator_t *this, gf_ctr_local_t *ctr_local,
++ gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path)
++{
++ int ret = -1;
++ gfdb_time_t *ctr_uwtime = NULL;
++ gf_ctr_private_t *_priv = NULL;
++
++ GF_ASSERT(this);
++ _priv = this->private;
++ GF_ASSERT(_priv);
++
++ GF_ASSERT(ctr_local);
++
++ /*If not unwind path error*/
++ if (!isunwindpath(fop_path)) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH,
++ "Wrong fop_path. Should be unwind");
++ goto out;
++ }
++
++ ctr_uwtime = &CTR_DB_REC(ctr_local).gfdb_unwind_change_time;
++ CTR_DB_REC(ctr_local).gfdb_fop_path = fop_path;
++ CTR_DB_REC(ctr_local).gfdb_fop_type = fop_type;
++
++ ret = gettimeofday(ctr_uwtime, NULL);
++ if (ret == -1) {
++ gf_msg(this->name, GF_LOG_ERROR, errno,
++ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR,
++ "Error "
++ "filling unwind time record %s",
++ strerror(errno));
++ goto out;
++ }
++
++ /* Special case i.e if its a tier rebalance
++ * + cold tier brick
++ * + its a create/mknod FOP
++ * we record unwind time as zero */
++ if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG &&
++ (!_priv->ctr_hot_brick) && isdentrycreatefop(fop_type)) {
++ memset(ctr_uwtime, 0, sizeof(*ctr_uwtime));
++ }
++ ret = 0;
++out:
++ return ret;
++}
++
++/*******************************************************************************
++ *
++ * Fill wind into db record
++ *
++ ******************************************************************************/
++int
++fill_db_record_for_wind(xlator_t *this, gf_ctr_local_t *ctr_local,
++ gf_ctr_inode_context_t *ctr_inode_cx)
++{
++ int ret = -1;
++ gfdb_time_t *ctr_wtime = NULL;
++ gf_ctr_private_t *_priv = NULL;
++
++ GF_ASSERT(this);
++ _priv = this->private;
++ GF_ASSERT(_priv);
++ GF_ASSERT(ctr_local);
++ IS_CTR_INODE_CX_SANE(ctr_inode_cx);
++
++ /*if not wind path error!*/
++ if (!iswindpath(ctr_inode_cx->fop_path)) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH,
++ "Wrong fop_path. Should be wind");
++ goto out;
++ }
++
++ ctr_wtime = &CTR_DB_REC(ctr_local).gfdb_wind_change_time;
++ CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path;
++ CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type;
++ CTR_DB_REC(ctr_local).link_consistency = _priv->ctr_link_consistency;
++
++ ret = gettimeofday(ctr_wtime, NULL);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, errno,
++ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR,
++ "Error filling wind time record %s", strerror(errno));
++ goto out;
++ }
++
++ /* Special case i.e if its a tier rebalance
++ * + cold tier brick
++ * + its a create/mknod FOP
++ * we record wind time as zero */
++ if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG &&
++ (!_priv->ctr_hot_brick) && isdentrycreatefop(ctr_inode_cx->fop_type)) {
++ memset(ctr_wtime, 0, sizeof(*ctr_wtime));
++ }
++
++ /* Copy gfid into db record */
++ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid));
++
++ /* Copy older gfid if any */
++ if (ctr_inode_cx->old_gfid &&
++ (!gf_uuid_is_null(*(ctr_inode_cx->old_gfid)))) {
++ gf_uuid_copy(CTR_DB_REC(ctr_local).old_gfid, *(ctr_inode_cx->old_gfid));
++ }
++
++ /*Hard Links*/
++ if (isdentryfop(ctr_inode_cx->fop_type)) {
++ /*new link fop*/
++ if (NEW_LINK_CX(ctr_inode_cx)) {
++ gf_uuid_copy(CTR_DB_REC(ctr_local).pargfid,
++ *((NEW_LINK_CX(ctr_inode_cx))->pargfid));
++ strcpy(CTR_DB_REC(ctr_local).file_name,
++ NEW_LINK_CX(ctr_inode_cx)->basename);
++ }
++ /*rename fop*/
++ if (OLD_LINK_CX(ctr_inode_cx)) {
++ gf_uuid_copy(CTR_DB_REC(ctr_local).old_pargfid,
++ *((OLD_LINK_CX(ctr_inode_cx))->pargfid));
++ strcpy(CTR_DB_REC(ctr_local).old_file_name,
++ OLD_LINK_CX(ctr_inode_cx)->basename);
++ }
++ }
++
++ ret = 0;
++out:
++ /*On error roll back and clean the record*/
++ if (ret == -1) {
++ CLEAR_CTR_DB_RECORD(ctr_local);
++ }
++ return ret;
++}
++
++/******************************************************************************
++ *
++ * CTR xlator init related functions
++ *
++ *
++ * ****************************************************************************/
++static int
++extract_sql_params(xlator_t *this, dict_t *params_dict)
++{
++ int ret = -1;
++ char *db_path = NULL;
++ char *db_name = NULL;
++ char *db_full_path = NULL;
++
++ GF_ASSERT(this);
++ GF_ASSERT(params_dict);
++
++ /*Extract the path of the db*/
++ db_path = NULL;
++ GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-path",
++ db_path, "/var/run/gluster/");
++
++ /*Extract the name of the db*/
++ db_name = NULL;
++ GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-name",
++ db_name, "gf_ctr_db.db");
++
++ /*Construct full path of the db*/
++ ret = gf_asprintf(&db_full_path, "%s/%s", db_path, db_name);
++ if (ret < 0) {
++ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0,
++ CTR_MSG_CONSTRUCT_DB_PATH_FAILED,
++ "Construction of full db path failed!");
++ goto out;
++ }
++
++ /*Setting the SQL DB Path*/
++ SET_DB_PARAM_TO_DICT(this->name, params_dict, GFDB_SQL_PARAM_DBPATH,
++ db_full_path, ret, out);
++
++ /*Extract rest of the sql params*/
++ ret = gfdb_set_sql_params(this->name, this->options, params_dict);
++ if (ret) {
++ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0,
++ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
++ "Failed setting values to sql param dict!");
++ }
++
++ ret = 0;
++
++out:
++ if (ret)
++ GF_FREE(db_full_path);
++ return ret;
++}
++
++int
++extract_db_params(xlator_t *this, dict_t *params_dict, gfdb_db_type_t db_type)
++{
++ int ret = -1;
++
++ GF_ASSERT(this);
++ GF_ASSERT(params_dict);
++
++ switch (db_type) {
++ case GFDB_SQLITE3:
++ ret = extract_sql_params(this, params_dict);
++ if (ret)
++ goto out;
++ break;
++ case GFDB_ROCKS_DB:
++ case GFDB_HYPERDEX:
++ case GFDB_HASH_FILE_STORE:
++ case GFDB_INVALID_DB:
++ case GFDB_DB_END:
++ goto out;
++ }
++ ret = 0;
++out:
++ return ret;
++}
++
++int
++extract_ctr_options(xlator_t *this, gf_ctr_private_t *_priv)
++{
++ int ret = -1;
++ char *_val_str = NULL;
++
++ GF_ASSERT(this);
++ GF_ASSERT(_priv);
++
++ /*Checking if the CTR Translator is enabled. By default its disabled*/
++ _priv->enabled = _gf_false;
++ GF_OPTION_INIT("ctr-enabled", _priv->enabled, bool, out);
++ if (!_priv->enabled) {
++ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED,
++ "CTR Xlator is disabled.");
++ ret = 0;
++ goto out;
++ }
++
++ /*Extract db type*/
++ GF_OPTION_INIT("db-type", _val_str, str, out);
++ _priv->gfdb_db_type = gf_string2gfdbdbtype(_val_str);
++
++ /*Extract flag for record on wind*/
++ GF_OPTION_INIT("record-entry", _priv->ctr_record_wind, bool, out);
++
++ /*Extract flag for record on unwind*/
++ GF_OPTION_INIT("record-exit", _priv->ctr_record_unwind, bool, out);
++
++ /*Extract flag for record on counters*/
++ GF_OPTION_INIT("record-counters", _priv->ctr_record_counter, bool, out);
++
++ /* Extract flag for record metadata heat */
++ GF_OPTION_INIT("ctr-record-metadata-heat", _priv->ctr_record_metadata_heat,
++ bool, out);
++
++ /*Extract flag for link consistency*/
++ GF_OPTION_INIT("ctr_link_consistency", _priv->ctr_link_consistency, bool,
++ out);
++
++ /*Extract ctr_lookupheal_inode_timeout */
++ GF_OPTION_INIT("ctr_lookupheal_inode_timeout",
++ _priv->ctr_lookupheal_inode_timeout, uint64, out);
++
++ /*Extract ctr_lookupheal_link_timeout*/
++ GF_OPTION_INIT("ctr_lookupheal_link_timeout",
++ _priv->ctr_lookupheal_link_timeout, uint64, out);
++
++ /*Extract flag for hot tier brick*/
++ GF_OPTION_INIT("hot-brick", _priv->ctr_hot_brick, bool, out);
++
++ /*Extract flag for sync mode*/
++ GF_OPTION_INIT("db-sync", _val_str, str, out);
++ _priv->gfdb_sync_type = gf_string2gfdbdbsync(_val_str);
++
++ ret = 0;
++
++out:
++ return ret;
++}
+diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h
+new file mode 100644
+index 0000000..517fbb0
+--- /dev/null
++++ b/xlators/features/changetimerecorder/src/ctr-helper.h
+@@ -0,0 +1,854 @@
++/*
++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#ifndef __CTR_HELPER_H
++#define __CTR_HELPER_H
++
++#include <glusterfs/xlator.h>
++#include "ctr_mem_types.h"
++#include <glusterfs/iatt.h>
++#include <glusterfs/glusterfs.h>
++#include <glusterfs/xlator.h>
++#include <glusterfs/defaults.h>
++#include <glusterfs/logging.h>
++#include <glusterfs/common-utils.h>
++#include <time.h>
++#include <sys/time.h>
++#include <pthread.h>
++
++#include "gfdb_data_store.h"
++#include "ctr-xlator-ctx.h"
++#include "ctr-messages.h"
++
++#define CTR_DEFAULT_HARDLINK_EXP_PERIOD 300 /* Five mins */
++#define CTR_DEFAULT_INODE_EXP_PERIOD 300 /* Five mins */
++
++typedef struct ctr_query_cbk_args {
++ int query_fd;
++ int count;
++} ctr_query_cbk_args_t;
++
++/*CTR Xlator Private structure*/
++typedef struct gf_ctr_private {
++ gf_boolean_t enabled;
++ char *ctr_db_path;
++ gf_boolean_t ctr_hot_brick;
++ gf_boolean_t ctr_record_wind;
++ gf_boolean_t ctr_record_unwind;
++ gf_boolean_t ctr_record_counter;
++ gf_boolean_t ctr_record_metadata_heat;
++ gf_boolean_t ctr_link_consistency;
++ gfdb_db_type_t gfdb_db_type;
++ gfdb_sync_type_t gfdb_sync_type;
++ gfdb_conn_node_t *_db_conn;
++ uint64_t ctr_lookupheal_link_timeout;
++ uint64_t ctr_lookupheal_inode_timeout;
++ gf_boolean_t compact_active;
++ gf_boolean_t compact_mode_switched;
++ pthread_mutex_t compact_lock;
++} gf_ctr_private_t;
++
++/*
++ * gf_ctr_local_t is the ctr xlator local data structure that is stored in
++ * the call_frame of each FOP.
++ *
++ * gfdb_db_record: The gf_ctr_local contains a gfdb_db_record object, which is
++ * used by the insert_record() api from the libgfdb. The gfdb_db_record object
++ * will contain all the inode and hardlink(only for dentry fops: create,
++ * mknod,link, unlink, rename).The ctr_local is keep alive till the unwind
++ * call and will be release during the unwind. The same gfdb_db_record will
++ * used for the unwind insert_record() api, to record unwind in the database.
++ *
++ * ia_inode_type in gf_ctr_local will tell the type of the inode. This is
++ * important for during the unwind path. As we will not have the inode during
++ * the unwind path. We would have include this in the gfdb_db_record itself
++ * but currently we record only file inode information.
++ *
++ * is_internal_fop in gf_ctr_local will tell us if this is a internal fop and
++ * take special/no action. We don't record change/access times or increement
++ * heat counter for internal fops from rebalancer.
++ * */
++typedef struct gf_ctr_local {
++ gfdb_db_record_t gfdb_db_record;
++ ia_type_t ia_inode_type;
++ gf_boolean_t is_internal_fop;
++ gf_special_pid_t client_pid;
++} gf_ctr_local_t;
++/*
++ * Easy access of gfdb_db_record of ctr_local
++ * */
++#define CTR_DB_REC(ctr_local) (ctr_local->gfdb_db_record)
++
++/*Clear db record*/
++#define CLEAR_CTR_DB_RECORD(ctr_local) \
++ do { \
++ ctr_local->gfdb_db_record.gfdb_fop_path = GFDB_FOP_INVALID; \
++ memset(&(ctr_local->gfdb_db_record.gfdb_wind_change_time), 0, \
++ sizeof(gfdb_time_t)); \
++ memset(&(ctr_local->gfdb_db_record.gfdb_unwind_change_time), 0, \
++ sizeof(gfdb_time_t)); \
++ gf_uuid_clear(ctr_local->gfdb_db_record.gfid); \
++ gf_uuid_clear(ctr_local->gfdb_db_record.pargfid); \
++ memset(ctr_local->gfdb_db_record.file_name, 0, GF_NAME_MAX + 1); \
++ memset(ctr_local->gfdb_db_record.old_file_name, 0, GF_NAME_MAX + 1); \
++ ctr_local->gfdb_db_record.gfdb_fop_type = GFDB_FOP_INVALID_OP; \
++ ctr_local->ia_inode_type = IA_INVAL; \
++ } while (0)
++
++static gf_ctr_local_t *
++init_ctr_local_t(xlator_t *this)
++{
++ gf_ctr_local_t *ctr_local = NULL;
++
++ GF_ASSERT(this);
++
++ ctr_local = mem_get0(this->local_pool);
++ if (!ctr_local) {
++ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0,
++ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
++ "Error while creating ctr local");
++ goto out;
++ }
++
++ CLEAR_CTR_DB_RECORD(ctr_local);
++out:
++ return ctr_local;
++}
++
++static void
++free_ctr_local(gf_ctr_local_t *ctr_local)
++{
++ if (ctr_local)
++ mem_put(ctr_local);
++}
++
++/******************************************************************************
++ *
++ *
++ * Context Carrier Structures
++ *
++ *
++ * ****************************************************************************/
++
++/*
++ * Context Carrier structures are used to carry relevant information about
++ * inodes and links from the fops calls to the ctr_insert_wind.
++ * These structure just have pointers to the original data and donot
++ * do a deep copy of any data. This info is deep copied to
++ * ctr_local->gfdb_db_record and passed to insert_record() api of libgfdb. This
++ * info remains persistent for the unwind in ctr_local->gfdb_db_record
++ * and once used will be destroyed.
++ *
++ * gf_ctr_link_context_t : Context structure for hard links
++ * gf_ctr_inode_context_t : Context structure for inodes
++ *
++ * */
++
++/*Context Carrier Structure for hard links*/
++typedef struct gf_ctr_link_context {
++ uuid_t *pargfid;
++ const char *basename;
++} gf_ctr_link_context_t;
++
++/*Context Carrier Structure for inodes*/
++typedef struct gf_ctr_inode_context {
++ ia_type_t ia_type;
++ uuid_t *gfid;
++ uuid_t *old_gfid;
++ gf_ctr_link_context_t *new_link_cx;
++ gf_ctr_link_context_t *old_link_cx;
++ gfdb_fop_type_t fop_type;
++ gfdb_fop_path_t fop_path;
++ gf_boolean_t is_internal_fop;
++ /* Indicating metadata fops */
++ gf_boolean_t is_metadata_fop;
++} gf_ctr_inode_context_t;
++
++/*******************Util Macros for Context Carrier Structures*****************/
++
++/*Checks if ctr_link_cx is sane!*/
++#define IS_CTR_LINK_CX_SANE(ctr_link_cx) \
++ do { \
++ if (ctr_link_cx) { \
++ if (ctr_link_cx->pargfid) \
++ GF_ASSERT(*(ctr_link_cx->pargfid)); \
++ GF_ASSERT(ctr_link_cx->basename); \
++ }; \
++ } while (0)
++
++/*Clear and fill the ctr_link_context with values*/
++#define FILL_CTR_LINK_CX(ctr_link_cx, _pargfid, _basename, label) \
++ do { \
++ GF_VALIDATE_OR_GOTO("ctr", ctr_link_cx, label); \
++ GF_VALIDATE_OR_GOTO("ctr", _pargfid, label); \
++ GF_VALIDATE_OR_GOTO("ctr", _basename, label); \
++ memset(ctr_link_cx, 0, sizeof(*ctr_link_cx)); \
++ ctr_link_cx->pargfid = &_pargfid; \
++ ctr_link_cx->basename = _basename; \
++ } while (0)
++
++#define NEW_LINK_CX(ctr_inode_cx) ctr_inode_cx->new_link_cx
++
++#define OLD_LINK_CX(ctr_inode_cx) ctr_inode_cx->old_link_cx
++
++/*Checks if ctr_inode_cx is sane!*/
++#define IS_CTR_INODE_CX_SANE(ctr_inode_cx) \
++ do { \
++ GF_ASSERT(ctr_inode_cx); \
++ GF_ASSERT(ctr_inode_cx->gfid); \
++ GF_ASSERT(*(ctr_inode_cx->gfid)); \
++ GF_ASSERT(ctr_inode_cx->fop_type != GFDB_FOP_INVALID_OP); \
++ GF_ASSERT(ctr_inode_cx->fop_path != GFDB_FOP_INVALID); \
++ IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx)); \
++ IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx)); \
++ } while (0)
++
++/*Clear and fill the ctr_inode_context with values*/
++#define FILL_CTR_INODE_CONTEXT(ctr_inode_cx, _ia_type, _gfid, _new_link_cx, \
++ _old_link_cx, _fop_type, _fop_path) \
++ do { \
++ GF_ASSERT(ctr_inode_cx); \
++ GF_ASSERT(_gfid); \
++ GF_ASSERT(_fop_type != GFDB_FOP_INVALID_OP); \
++ GF_ASSERT(_fop_path != GFDB_FOP_INVALID); \
++ memset(ctr_inode_cx, 0, sizeof(*ctr_inode_cx)); \
++ ctr_inode_cx->ia_type = _ia_type; \
++ ctr_inode_cx->gfid = &_gfid; \
++ IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx)); \
++ if (_new_link_cx) \
++ NEW_LINK_CX(ctr_inode_cx) = _new_link_cx; \
++ IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx)); \
++ if (_old_link_cx) \
++ OLD_LINK_CX(ctr_inode_cx) = _old_link_cx; \
++ ctr_inode_cx->fop_type = _fop_type; \
++ ctr_inode_cx->fop_path = _fop_path; \
++ } while (0)
++
++/******************************************************************************
++ *
++ * Util functions or macros used by
++ * insert wind and insert unwind
++ *
++ * ****************************************************************************/
++/* Free ctr frame local */
++static inline void
++ctr_free_frame_local(call_frame_t *frame)
++{
++ if (frame) {
++ free_ctr_local((gf_ctr_local_t *)frame->local);
++ frame->local = NULL;
++ }
++}
++
++/* Setting GF_REQUEST_LINK_COUNT_XDATA in dict
++ * that has to be sent to POSIX Xlator to send
++ * link count in unwind path.
++ * return 0 for success with not creation of dict
++ * return 1 for success with creation of dict
++ * return -1 for failure.
++ * */
++static inline int
++set_posix_link_request(xlator_t *this, dict_t **xdata)
++{
++ int ret = -1;
++ gf_boolean_t is_created = _gf_false;
++
++ GF_VALIDATE_OR_GOTO("ctr", this, out);
++ GF_VALIDATE_OR_GOTO(this->name, xdata, out);
++
++ /*create xdata if NULL*/
++ if (!*xdata) {
++ *xdata = dict_new();
++ is_created = _gf_true;
++ ret = 1;
++ } else {
++ ret = 0;
++ }
++
++ if (!*xdata) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL,
++ "xdata is NULL :Cannot send "
++ "GF_REQUEST_LINK_COUNT_XDATA to posix");
++ ret = -1;
++ goto out;
++ }
++
++ ret = dict_set_int32(*xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
++ "Failed setting GF_REQUEST_LINK_COUNT_XDATA");
++ ret = -1;
++ goto out;
++ }
++ ret = 0;
++out:
++ if (ret == -1) {
++ if (*xdata && is_created) {
++ dict_unref(*xdata);
++ }
++ }
++ return ret;
++}
++
++/*
++ * If a bitrot fop
++ * */
++#define BITROT_FOP(frame) \
++ (frame->root->pid == GF_CLIENT_PID_BITD || \
++ frame->root->pid == GF_CLIENT_PID_SCRUB)
++
++/*
++ * If a rebalancer fop
++ * */
++#define REBALANCE_FOP(frame) (frame->root->pid == GF_CLIENT_PID_DEFRAG)
++
++/*
++ * If its a tiering rebalancer fop
++ * */
++#define TIER_REBALANCE_FOP(frame) \
++ (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG)
++
++/*
++ * If its a AFR SELF HEAL
++ * */
++#define AFR_SELF_HEAL_FOP(frame) (frame->root->pid == GF_CLIENT_PID_SELF_HEALD)
++
++/*
++ * if a rebalancer fop goto
++ * */
++#define CTR_IF_REBALANCE_FOP_THEN_GOTO(frame, label) \
++ do { \
++ if (REBALANCE_FOP(frame)) \
++ goto label; \
++ } while (0)
++
++/*
++ * Internal fop
++ *
++ * */
++static inline gf_boolean_t
++is_internal_fop(call_frame_t *frame, dict_t *xdata)
++{
++ gf_boolean_t ret = _gf_false;
++
++ GF_ASSERT(frame);
++ GF_ASSERT(frame->root);
++
++ if (AFR_SELF_HEAL_FOP(frame)) {
++ ret = _gf_true;
++ }
++ if (BITROT_FOP(frame)) {
++ ret = _gf_true;
++ }
++ if (REBALANCE_FOP(frame) || TIER_REBALANCE_FOP(frame)) {
++ ret = _gf_true;
++ if (xdata && dict_get(xdata, CTR_ATTACH_TIER_LOOKUP)) {
++ ret = _gf_false;
++ }
++ }
++ if (xdata && dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) {
++ ret = _gf_true;
++ }
++
++ return ret;
++}
++
++#define CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label) \
++ do { \
++ if (is_internal_fop(frame, dict)) \
++ goto label; \
++ } while (0)
++
++/* if fop has failed exit */
++#define CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, label) \
++ do { \
++ if (op_ret == -1) { \
++ gf_msg_trace(this->name, 0, "Failed fop with %s", \
++ strerror(op_errno)); \
++ goto label; \
++ }; \
++ } while (0)
++
++/*
++ * IS CTR Xlator is disabled then goto to label
++ * */
++#define CTR_IS_DISABLED_THEN_GOTO(this, label) \
++ do { \
++ gf_ctr_private_t *_priv = NULL; \
++ GF_ASSERT(this); \
++ GF_ASSERT(this->private); \
++ _priv = this->private; \
++ if (!_priv->_db_conn) \
++ goto label; \
++ } while (0)
++
++/*
++ * IS CTR record metadata heat is disabled then goto to label
++ * */
++#define CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, label) \
++ do { \
++ gf_ctr_private_t *_priv = NULL; \
++ GF_ASSERT(this); \
++ GF_ASSERT(this->private); \
++ _priv = this->private; \
++ if (!_priv->ctr_record_metadata_heat) \
++ goto label; \
++ } while (0)
++
++int
++fill_db_record_for_unwind(xlator_t *this, gf_ctr_local_t *ctr_local,
++ gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path);
++
++int
++fill_db_record_for_wind(xlator_t *this, gf_ctr_local_t *ctr_local,
++ gf_ctr_inode_context_t *ctr_inode_cx);
++
++/*******************************************************************************
++ * CTR INSERT WIND
++ * *****************************************************************************
++ * Function used to insert/update record into the database during a wind fop
++ * This function creates ctr_local structure into the frame of the fop
++ * call.
++ * ****************************************************************************/
++
++static inline int
++ctr_insert_wind(call_frame_t *frame, xlator_t *this,
++ gf_ctr_inode_context_t *ctr_inode_cx)
++{
++ int ret = -1;
++ gf_ctr_private_t *_priv = NULL;
++ gf_ctr_local_t *ctr_local = NULL;
++
++ GF_ASSERT(frame);
++ GF_ASSERT(frame->root);
++ GF_ASSERT(this);
++ IS_CTR_INODE_CX_SANE(ctr_inode_cx);
++
++ _priv = this->private;
++ GF_ASSERT(_priv);
++
++ GF_ASSERT(_priv->_db_conn);
++
++ /*If record_wind option of CTR is on record wind for
++ * regular files only*/
++ if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) {
++ frame->local = init_ctr_local_t(this);
++ if (!frame->local) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
++ "WIND: Error while creating ctr local");
++ goto out;
++ };
++ ctr_local = frame->local;
++ ctr_local->client_pid = frame->root->pid;
++ ctr_local->is_internal_fop = ctr_inode_cx->is_internal_fop;
++
++ /* Decide whether to record counters or not */
++ CTR_DB_REC(ctr_local).do_record_counters = _gf_false;
++ /* If record counter is enabled */
++ if (_priv->ctr_record_counter) {
++ /* If not a internal fop */
++ if (!(ctr_local->is_internal_fop)) {
++ /* If its a metadata fop AND
++ * record metadata heat
++ * OR
++ * its NOT a metadata fop */
++ if ((ctr_inode_cx->is_metadata_fop &&
++ _priv->ctr_record_metadata_heat) ||
++ (!ctr_inode_cx->is_metadata_fop)) {
++ CTR_DB_REC(ctr_local).do_record_counters = _gf_true;
++ }
++ }
++ }
++
++ /* Decide whether to record times or not
++ * For non internal FOPS record times as usual*/
++ CTR_DB_REC(ctr_local).do_record_times = _gf_false;
++ if (!ctr_local->is_internal_fop) {
++ /* If its a metadata fop AND
++ * record metadata heat
++ * OR
++ * its NOT a metadata fop */
++ if ((ctr_inode_cx->is_metadata_fop &&
++ _priv->ctr_record_metadata_heat) ||
++ (!ctr_inode_cx->is_metadata_fop)) {
++ CTR_DB_REC(ctr_local).do_record_times =
++ (_priv->ctr_record_wind || _priv->ctr_record_unwind);
++ }
++ }
++ /* when its a internal FOPS*/
++ else {
++ /* Record times only for create
++ * i.e when the inode is created */
++ CTR_DB_REC(ctr_local).do_record_times = (isdentrycreatefop(
++ ctr_inode_cx->fop_type))
++ ? _gf_true
++ : _gf_false;
++ }
++
++ /*Fill the db record for insertion*/
++ ret = fill_db_record_for_wind(this, ctr_local, ctr_inode_cx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND,
++ "WIND: Error filling ctr local");
++ goto out;
++ }
++
++ /*Insert the db record*/
++ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_INSERT_RECORD_WIND_FAILED,
++ "WIND: Inserting of record failed!");
++ goto out;
++ }
++ }
++ ret = 0;
++out:
++
++ if (ret) {
++ free_ctr_local(ctr_local);
++ frame->local = NULL;
++ }
++
++ return ret;
++}
++
++/*******************************************************************************
++ * CTR INSERT UNWIND
++ * *****************************************************************************
++ * Function used to insert/update record into the database during a unwind fop
++ * This function destroys ctr_local structure into the frame of the fop
++ * call at the end.
++ * ****************************************************************************/
++static inline int
++ctr_insert_unwind(call_frame_t *frame, xlator_t *this, gfdb_fop_type_t fop_type,
++ gfdb_fop_path_t fop_path)
++{
++ int ret = -1;
++ gf_ctr_private_t *_priv = NULL;
++ gf_ctr_local_t *ctr_local = NULL;
++
++ GF_ASSERT(frame);
++ GF_ASSERT(this);
++
++ _priv = this->private;
++ GF_ASSERT(_priv);
++
++ GF_ASSERT(_priv->_db_conn);
++
++ ctr_local = frame->local;
++
++ if (ctr_local && (_priv->ctr_record_unwind || isdentryfop(fop_type)) &&
++ (ctr_local->ia_inode_type != IA_IFDIR)) {
++ CTR_DB_REC(ctr_local).do_record_uwind_time = _priv->ctr_record_unwind;
++
++ ret = fill_db_record_for_unwind(this, ctr_local, fop_type, fop_path);
++ if (ret == -1) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
++ "UNWIND: Error filling ctr local");
++ goto out;
++ }
++
++ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record);
++ if (ret == -1) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
++ "UNWIND: Error filling ctr local");
++ goto out;
++ }
++ }
++ ret = 0;
++out:
++ return ret;
++}
++
++/******************************************************************************
++ * Delete file/flink record/s from db
++ * ****************************************************************************/
++static inline int
++ctr_delete_hard_link_from_db(xlator_t *this, uuid_t gfid, uuid_t pargfid,
++ char *basename, gfdb_fop_type_t fop_type,
++ gfdb_fop_path_t fop_path)
++{
++ int ret = -1;
++ gfdb_db_record_t gfdb_db_record;
++ gf_ctr_private_t *_priv = NULL;
++
++ _priv = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, _priv, out);
++ GF_VALIDATE_OR_GOTO(this->name, (!gf_uuid_is_null(gfid)), out);
++ GF_VALIDATE_OR_GOTO(this->name, (!gf_uuid_is_null(pargfid)), out);
++ GF_VALIDATE_OR_GOTO(this->name, (fop_type == GFDB_FOP_DENTRY_WRITE), out);
++ GF_VALIDATE_OR_GOTO(
++ this->name, (fop_path == GFDB_FOP_UNDEL || GFDB_FOP_UNDEL_ALL), out);
++
++ /* Set gfdb_db_record to 0 */
++ memset(&gfdb_db_record, 0, sizeof(gfdb_db_record));
++
++ /* Copy basename */
++ if (snprintf(gfdb_db_record.file_name, GF_NAME_MAX, "%s", basename) >=
++ GF_NAME_MAX)
++ goto out;
++
++ /* Copy gfid into db record */
++ gf_uuid_copy(gfdb_db_record.gfid, gfid);
++
++ /* Copy pargid into db record */
++ gf_uuid_copy(gfdb_db_record.pargfid, pargfid);
++
++ gfdb_db_record.gfdb_fop_path = fop_path;
++ gfdb_db_record.gfdb_fop_type = fop_type;
++
++ /*send delete request to db*/
++ ret = insert_record(_priv->_db_conn, &gfdb_db_record);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RECORD_WIND_FAILED,
++ "Failed to delete record. %s", basename);
++ goto out;
++ }
++
++ ret = 0;
++out:
++ return ret;
++}
++
++/******************************* Hard link function ***************************/
++
++static inline gf_boolean_t
++__is_inode_expired(ctr_xlator_ctx_t *ctr_xlator_ctx, gf_ctr_private_t *_priv,
++ gfdb_time_t *current_time)
++{
++ gf_boolean_t ret = _gf_false;
++ uint64_t time_diff = 0;
++
++ GF_ASSERT(ctr_xlator_ctx);
++ GF_ASSERT(_priv);
++ GF_ASSERT(current_time);
++
++ time_diff = current_time->tv_sec - ctr_xlator_ctx->inode_heal_period;
++
++ ret = (time_diff >= _priv->ctr_lookupheal_inode_timeout) ? _gf_true
++ : _gf_false;
++ return ret;
++}
++
++static inline gf_boolean_t
++__is_hardlink_expired(ctr_hard_link_t *ctr_hard_link, gf_ctr_private_t *_priv,
++ gfdb_time_t *current_time)
++{
++ gf_boolean_t ret = _gf_false;
++ uint64_t time_diff = 0;
++
++ GF_ASSERT(ctr_hard_link);
++ GF_ASSERT(_priv);
++ GF_ASSERT(current_time);
++
++ time_diff = current_time->tv_sec - ctr_hard_link->hardlink_heal_period;
++
++ ret = ret || (time_diff >= _priv->ctr_lookupheal_link_timeout) ? _gf_true
++ : _gf_false;
++
++ return ret;
++}
++
++/* Return values of heal*/
++typedef enum ctr_heal_ret_val {
++ CTR_CTX_ERROR = -1,
++ /* No healing required */
++ CTR_TRY_NO_HEAL = 0,
++ /* Try healing hard link */
++ CTR_TRY_HARDLINK_HEAL = 1,
++ /* Try healing inode */
++ CTR_TRY_INODE_HEAL = 2,
++} ctr_heal_ret_val_t;
++
++/**
++ * @brief Function to add hard link to the inode context variable.
++ * The inode context maintainences a in-memory list. This is used
++ * smart healing of database.
++ * @param frame of the FOP
++ * @param this is the Xlator instant
++ * @param inode
++ * @return Return ctr_heal_ret_val_t
++ */
++
++static inline ctr_heal_ret_val_t
++add_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode)
++{
++ ctr_heal_ret_val_t ret_val = CTR_TRY_NO_HEAL;
++ int ret = -1;
++ gf_ctr_local_t *ctr_local = NULL;
++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
++ ctr_hard_link_t *ctr_hard_link = NULL;
++ gf_ctr_private_t *_priv = NULL;
++ gfdb_time_t current_time = {0};
++
++ GF_ASSERT(frame);
++ GF_ASSERT(this);
++ GF_ASSERT(inode);
++ GF_ASSERT(this->private);
++
++ _priv = this->private;
++
++ ctr_local = frame->local;
++ if (!ctr_local) {
++ goto out;
++ }
++
++ ctr_xlator_ctx = init_ctr_xlator_ctx(this, inode);
++ if (!ctr_xlator_ctx) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED,
++ "Failed accessing ctr inode context");
++ goto out;
++ }
++
++ LOCK(&ctr_xlator_ctx->lock);
++
++ /* Check if the hard link already exists
++ * in the ctr inode context*/
++ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx,
++ CTR_DB_REC(ctr_local).pargfid,
++ CTR_DB_REC(ctr_local).file_name);
++ /* if there then ignore */
++ if (ctr_hard_link) {
++ ret = gettimeofday(&current_time, NULL);
++ if (ret == -1) {
++ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
++ ret_val = CTR_CTX_ERROR;
++ goto unlock;
++ }
++
++ if (__is_hardlink_expired(ctr_hard_link, _priv, &current_time)) {
++ ctr_hard_link->hardlink_heal_period = current_time.tv_sec;
++ ret_val = ret_val | CTR_TRY_HARDLINK_HEAL;
++ }
++
++ if (__is_inode_expired(ctr_xlator_ctx, _priv, &current_time)) {
++ ctr_xlator_ctx->inode_heal_period = current_time.tv_sec;
++ ret_val = ret_val | CTR_TRY_INODE_HEAL;
++ }
++
++ goto unlock;
++ }
++
++ /* Add the hard link to the list*/
++ ret = ctr_add_hard_link(this, ctr_xlator_ctx, CTR_DB_REC(ctr_local).pargfid,
++ CTR_DB_REC(ctr_local).file_name);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED,
++ "Failed to add hardlink to the ctr inode context");
++ ret_val = CTR_CTX_ERROR;
++ goto unlock;
++ }
++
++ ret_val = CTR_TRY_NO_HEAL;
++unlock:
++ UNLOCK(&ctr_xlator_ctx->lock);
++out:
++ return ret_val;
++}
++
++static inline int
++delete_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode)
++{
++ int ret = -1;
++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
++ gf_ctr_local_t *ctr_local = NULL;
++
++ GF_ASSERT(frame);
++ GF_ASSERT(this);
++ GF_ASSERT(inode);
++
++ ctr_local = frame->local;
++ if (!ctr_local) {
++ goto out;
++ }
++
++ ctr_xlator_ctx = get_ctr_xlator_ctx(this, inode);
++ if (!ctr_xlator_ctx) {
++ /* Since there is no ctr inode context so nothing more to do */
++ ret = 0;
++ goto out;
++ }
++
++ ret = ctr_delete_hard_link(this, ctr_xlator_ctx,
++ CTR_DB_REC(ctr_local).pargfid,
++ CTR_DB_REC(ctr_local).file_name);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
++ "Failed to delete hard link");
++ goto out;
++ }
++
++ ret = 0;
++
++out:
++ return ret;
++}
++
++static inline int
++update_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode)
++{
++ int ret = -1;
++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
++ gf_ctr_local_t *ctr_local = NULL;
++
++ GF_ASSERT(frame);
++ GF_ASSERT(this);
++ GF_ASSERT(inode);
++
++ ctr_local = frame->local;
++ if (!ctr_local) {
++ goto out;
++ }
++
++ ctr_xlator_ctx = init_ctr_xlator_ctx(this, inode);
++ if (!ctr_xlator_ctx) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED,
++ "Failed accessing ctr inode context");
++ goto out;
++ }
++
++ ret = ctr_update_hard_link(
++ this, ctr_xlator_ctx, CTR_DB_REC(ctr_local).pargfid,
++ CTR_DB_REC(ctr_local).file_name, CTR_DB_REC(ctr_local).old_pargfid,
++ CTR_DB_REC(ctr_local).old_file_name);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
++ "Failed to delete hard link");
++ goto out;
++ }
++
++ ret = 0;
++
++out:
++ return ret;
++}
++
++/******************************************************************************
++ *
++ * CTR xlator init related functions
++ *
++ *
++ * ****************************************************************************/
++int
++extract_db_params(xlator_t *this, dict_t *params_dict, gfdb_db_type_t db_type);
++
++int
++extract_ctr_options(xlator_t *this, gf_ctr_private_t *_priv);
++
++#endif
+diff --git a/xlators/features/changetimerecorder/src/ctr-messages.h b/xlators/features/changetimerecorder/src/ctr-messages.h
+new file mode 100644
+index 0000000..23adf0a
+--- /dev/null
++++ b/xlators/features/changetimerecorder/src/ctr-messages.h
+@@ -0,0 +1,61 @@
++/*
++ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++ */
++
++#ifndef _CTR_MESSAGES_H_
++#define _CTR_MESSAGES_H_
++
++#include <glusterfs/glfs-message-id.h>
++
++/* To add new message IDs, append new identifiers at the end of the list.
++ *
++ * Never remove a message ID. If it's not used anymore, you can rename it or
++ * leave it as it is, but not delete it. This is to prevent reutilization of
++ * IDs by other messages.
++ *
++ * The component name must match one of the entries defined in
++ * glfs-message-id.h.
++ */
++
++GLFS_MSGID(
++ CTR, CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
++ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND,
++ CTR_MSG_INSERT_LINK_WIND_FAILED, CTR_MSG_INSERT_WRITEV_WIND_FAILED,
++ CTR_MSG_INSERT_WRITEV_UNWIND_FAILED, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
++ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED,
++ CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED,
++ CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED,
++ CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED,
++ CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED,
++ CTR_MSG_INSERT_TRUNCATE_WIND_FAILED, CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED,
++ CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED,
++ CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED, CTR_MSG_INSERT_RENAME_WIND_FAILED,
++ CTR_MSG_INSERT_RENAME_UNWIND_FAILED,
++ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, CTR_MSG_ADD_HARDLINK_FAILED,
++ CTR_MSG_DELETE_HARDLINK_FAILED, CTR_MSG_UPDATE_HARDLINK_FAILED,
++ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
++ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
++ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, CTR_MSG_INSERT_UNLINK_WIND_FAILED,
++ CTR_MSG_XDATA_NULL, CTR_MSG_INSERT_FSYNC_WIND_FAILED,
++ CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, CTR_MSG_INSERT_MKNOD_UNWIND_FAILED,
++ CTR_MSG_INSERT_MKNOD_WIND_FAILED, CTR_MSG_INSERT_CREATE_WIND_FAILED,
++ CTR_MSG_INSERT_CREATE_UNWIND_FAILED, CTR_MSG_INSERT_RECORD_WIND_FAILED,
++ CTR_MSG_INSERT_READV_WIND_FAILED, CTR_MSG_GET_GFID_FROM_DICT_FAILED,
++ CTR_MSG_SET, CTR_MSG_FATAL_ERROR, CTR_MSG_DANGLING_VOLUME,
++ CTR_MSG_CALLOC_FAILED, CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED,
++ CTR_MSG_INIT_DB_PARAMS_FAILED, CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED,
++ CTR_MSG_MEM_ACC_INIT_FAILED, CTR_MSG_CLOSE_DB_CONN_FAILED,
++ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, CTR_MSG_WRONG_FOP_PATH,
++ CTR_MSG_CONSTRUCT_DB_PATH_FAILED, CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
++ CTR_MSG_XLATOR_DISABLED, CTR_MSG_HARDLINK_MISSING_IN_LIST,
++ CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED, CTR_MSG_INIT_LOCK_FAILED,
++ CTR_MSG_COPY_FAILED, CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED,
++ CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED, CTR_MSG_NULL_LOCAL);
++
++#endif /* !_CTR_MESSAGES_H_ */
+diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c
+new file mode 100644
+index 0000000..b6b66d5
+--- /dev/null
++++ b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c
+@@ -0,0 +1,362 @@
++/*
++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#include "ctr-xlator-ctx.h"
++#include "ctr-messages.h"
++#include <time.h>
++#include <sys/time.h>
++
++#define IS_THE_ONLY_HARDLINK(ctr_hard_link) \
++ (ctr_hard_link->list.next == ctr_hard_link->list.prev)
++
++static void
++fini_ctr_hard_link(ctr_hard_link_t **ctr_hard_link)
++{
++ GF_ASSERT(ctr_hard_link);
++
++ if (*ctr_hard_link)
++ return;
++ GF_FREE((*ctr_hard_link)->base_name);
++ GF_FREE(*ctr_hard_link);
++ *ctr_hard_link = NULL;
++}
++
++/* Please lock the ctr_xlator_ctx before using this function */
++ctr_hard_link_t *
++ctr_search_hard_link_ctx(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
++ uuid_t pgfid, const char *base_name)
++{
++ ctr_hard_link_t *_hard_link = NULL;
++ ctr_hard_link_t *searched_hardlink = NULL;
++
++ GF_ASSERT(this);
++ GF_ASSERT(ctr_xlator_ctx);
++
++ if (pgfid == NULL || base_name == NULL)
++ goto out;
++
++ /*linear search*/
++ list_for_each_entry(_hard_link, &ctr_xlator_ctx->hardlink_list, list)
++ {
++ if (gf_uuid_compare(_hard_link->pgfid, pgfid) == 0 &&
++ _hard_link->base_name &&
++ strcmp(_hard_link->base_name, base_name) == 0) {
++ searched_hardlink = _hard_link;
++ break;
++ }
++ }
++
++out:
++ return searched_hardlink;
++}
++
++/* Please lock the ctr_xlator_ctx before using this function */
++int
++ctr_add_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
++ uuid_t pgfid, const char *base_name)
++{
++ int ret = -1;
++ ctr_hard_link_t *ctr_hard_link = NULL;
++ struct timeval current_time = {0};
++
++ GF_ASSERT(this);
++ GF_ASSERT(ctr_xlator_ctx);
++
++ if (pgfid == NULL || base_name == NULL)
++ goto out;
++
++ ctr_hard_link = GF_CALLOC(1, sizeof(*ctr_hard_link), gf_ctr_mt_hard_link_t);
++ if (!ctr_hard_link) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CTR_MSG_CALLOC_FAILED,
++ "Failed allocating "
++ "ctr_hard_link");
++ goto out;
++ }
++
++ /*Initialize the ctr_hard_link object and
++ * Assign the values : parent GFID and basename*/
++ INIT_LIST_HEAD(&ctr_hard_link->list);
++ gf_uuid_copy(ctr_hard_link->pgfid, pgfid);
++ ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_COPY_FAILED,
++ "Failed copying basename"
++ "to ctr_hard_link");
++ goto error;
++ }
++
++ ret = gettimeofday(&current_time, NULL);
++ if (ret == -1) {
++ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
++ goto error;
++ }
++
++ /*Add the hard link to the list*/
++ list_add_tail(&ctr_hard_link->list, &ctr_xlator_ctx->hardlink_list);
++
++ ctr_hard_link->hardlink_heal_period = current_time.tv_sec;
++
++ /*aal izz well!*/
++ ret = 0;
++ goto out;
++error:
++ GF_FREE(ctr_hard_link);
++out:
++ return ret;
++}
++
++static void
++__delete_hard_link_from_list(ctr_hard_link_t **ctr_hard_link)
++{
++ GF_ASSERT(ctr_hard_link);
++ GF_ASSERT(*ctr_hard_link);
++
++ /*Remove hard link from list*/
++ list_del(&(*ctr_hard_link)->list);
++ fini_ctr_hard_link(ctr_hard_link);
++}
++
++int
++ctr_delete_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
++ uuid_t pgfid, const char *base_name)
++{
++ int ret = -1;
++ ctr_hard_link_t *ctr_hard_link = NULL;
++
++ GF_ASSERT(this);
++ GF_ASSERT(ctr_xlator_ctx);
++
++ LOCK(&ctr_xlator_ctx->lock);
++
++ /*Check if the hard link is present */
++ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, pgfid,
++ base_name);
++ if (!ctr_hard_link) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_HARDLINK_MISSING_IN_LIST,
++ "Hard link doesn't exist in the list");
++ goto out;
++ }
++
++ __delete_hard_link_from_list(&ctr_hard_link);
++ ctr_hard_link = NULL;
++
++ ret = 0;
++out:
++ UNLOCK(&ctr_xlator_ctx->lock);
++
++ return ret;
++}
++
++int
++ctr_update_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
++ uuid_t pgfid, const char *base_name, uuid_t old_pgfid,
++ const char *old_base_name)
++{
++ int ret = -1;
++ ctr_hard_link_t *ctr_hard_link = NULL;
++ struct timeval current_time = {0};
++
++ GF_ASSERT(this);
++ GF_ASSERT(ctr_xlator_ctx);
++
++ LOCK(&ctr_xlator_ctx->lock);
++
++ /*Check if the hard link is present */
++ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, old_pgfid,
++ old_base_name);
++ if (!ctr_hard_link) {
++ gf_msg_trace(this->name, 0,
++ "Hard link doesn't exist"
++ " in the list");
++ /* Since the hard link is not present in the list
++ * we add it to the list */
++ ret = ctr_add_hard_link(this, ctr_xlator_ctx, pgfid, base_name);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED,
++ "Failed adding hard link to the list");
++ goto out;
++ }
++ ret = 0;
++ goto out;
++ }
++
++ /* update the hard link */
++ gf_uuid_copy(ctr_hard_link->pgfid, pgfid);
++ GF_FREE(ctr_hard_link->base_name);
++ ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_COPY_FAILED,
++ "Failed copying basename"
++ "to ctr_hard_link");
++ /* delete the corrupted entry */
++ __delete_hard_link_from_list(&ctr_hard_link);
++ ctr_hard_link = NULL;
++ goto out;
++ }
++
++ ret = gettimeofday(&current_time, NULL);
++ if (ret == -1) {
++ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
++ ctr_hard_link->hardlink_heal_period = 0;
++ } else {
++ ctr_hard_link->hardlink_heal_period = current_time.tv_sec;
++ }
++
++ ret = 0;
++
++out:
++ UNLOCK(&ctr_xlator_ctx->lock);
++
++ return ret;
++}
++
++/* Delete all hardlinks */
++static int
++ctr_delete_all_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx)
++{
++ int ret = -1;
++ ctr_hard_link_t *ctr_hard_link = NULL;
++ ctr_hard_link_t *tmp = NULL;
++
++ GF_ASSERT(ctr_xlator_ctx);
++
++ LOCK(&ctr_xlator_ctx->lock);
++
++ list_for_each_entry_safe(ctr_hard_link, tmp, &ctr_xlator_ctx->hardlink_list,
++ list)
++ {
++ /*Remove hard link from list*/
++ __delete_hard_link_from_list(&ctr_hard_link);
++ ctr_hard_link = NULL;
++ }
++
++ UNLOCK(&ctr_xlator_ctx->lock);
++
++ ret = 0;
++
++ return ret;
++}
++
++/* Please lock the inode before using this function */
++static ctr_xlator_ctx_t *
++__get_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
++{
++ int ret = 0;
++ uint64_t _addr = 0;
++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
++
++ GF_ASSERT(this);
++ GF_ASSERT(inode);
++
++ ret = __inode_ctx_get(inode, this, &_addr);
++ if (ret < 0)
++ _addr = 0;
++ if (_addr != 0) {
++ ctr_xlator_ctx = (ctr_xlator_ctx_t *)(long)_addr;
++ }
++
++ return ctr_xlator_ctx;
++}
++
++ctr_xlator_ctx_t *
++init_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
++{
++ int ret = -1;
++ uint64_t _addr = 0;
++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
++ struct timeval current_time = {0};
++
++ GF_ASSERT(this);
++ GF_ASSERT(inode);
++
++ LOCK(&inode->lock);
++ {
++ ctr_xlator_ctx = __get_ctr_xlator_ctx(this, inode);
++ if (ctr_xlator_ctx) {
++ ret = 0;
++ goto out;
++ }
++ ctr_xlator_ctx = GF_CALLOC(1, sizeof(*ctr_xlator_ctx),
++ gf_ctr_mt_xlator_ctx);
++ if (!ctr_xlator_ctx)
++ goto out;
++
++ ret = LOCK_INIT(&ctr_xlator_ctx->lock);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, ret, CTR_MSG_INIT_LOCK_FAILED,
++ "Failed init lock %s", strerror(ret));
++ goto out;
++ }
++ _addr = (uint64_t)(uintptr_t)ctr_xlator_ctx;
++
++ ret = __inode_ctx_set(inode, this, &_addr);
++ if (ret) {
++ goto out;
++ }
++
++ INIT_LIST_HEAD(&ctr_xlator_ctx->hardlink_list);
++
++ ret = gettimeofday(&current_time, NULL);
++ if (ret == -1) {
++ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
++ goto out;
++ }
++
++ ctr_xlator_ctx->inode_heal_period = current_time.tv_sec;
++ }
++ ret = 0;
++out:
++ if (ret) {
++ GF_FREE(ctr_xlator_ctx);
++ ctr_xlator_ctx = NULL;
++ }
++
++ UNLOCK(&inode->lock);
++
++ return ctr_xlator_ctx;
++}
++
++void
++fini_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
++{
++ int ret = 0;
++ uint64_t _addr = 0;
++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
++
++ inode_ctx_del(inode, this, &_addr);
++ if (!_addr)
++ return;
++
++ ctr_xlator_ctx = (ctr_xlator_ctx_t *)(long)_addr;
++
++ ret = ctr_delete_all_hard_link(this, ctr_xlator_ctx);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
++ "Failed deleting all "
++ "hard links from inode context");
++ }
++
++ LOCK_DESTROY(&ctr_xlator_ctx->lock);
++
++ GF_FREE(ctr_xlator_ctx);
++}
++
++ctr_xlator_ctx_t *
++get_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
++{
++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
++
++ LOCK(&inode->lock);
++ ctr_xlator_ctx = __get_ctr_xlator_ctx(this, inode);
++ UNLOCK(&inode->lock);
++
++ return ctr_xlator_ctx;
++}
+diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h
+new file mode 100644
+index 0000000..4e3bf7e
+--- /dev/null
++++ b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h
+@@ -0,0 +1,68 @@
++/*
++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#ifndef __CTR_XLATOR_CTX_H
++#define __CTR_XLATOR_CTX_H
++
++#include <glusterfs/xlator.h>
++#include "ctr_mem_types.h"
++#include <glusterfs/iatt.h>
++#include <glusterfs/glusterfs.h>
++#include <glusterfs/xlator.h>
++#include <glusterfs/logging.h>
++#include <glusterfs/locking.h>
++#include <glusterfs/common-utils.h>
++#include <time.h>
++#include <sys/time.h>
++
++typedef struct ctr_hard_link {
++ uuid_t pgfid;
++ char *base_name;
++ /* Hardlink expiry : Defines the expiry period after which a
++ * database heal is attempted. */
++ uint64_t hardlink_heal_period;
++ struct list_head list;
++} ctr_hard_link_t;
++
++typedef struct ctr_xlator_ctx {
++ /* This represents the looked up hardlinks
++ * NOTE: This doesn't represent all physical hardlinks of the inode*/
++ struct list_head hardlink_list;
++ uint64_t inode_heal_period;
++ gf_lock_t lock;
++} ctr_xlator_ctx_t;
++
++ctr_hard_link_t *
++ctr_search_hard_link_ctx(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
++ uuid_t pgfid, const char *base_name);
++
++int
++ctr_add_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
++ uuid_t pgfid, const char *base_name);
++
++int
++ctr_delete_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
++ uuid_t pgfid, const char *base_name);
++
++int
++ctr_update_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
++ uuid_t pgfid, const char *base_name, uuid_t old_pgfid,
++ const char *old_base_name);
++
++ctr_xlator_ctx_t *
++get_ctr_xlator_ctx(xlator_t *this, inode_t *inode);
++
++ctr_xlator_ctx_t *
++init_ctr_xlator_ctx(xlator_t *this, inode_t *inode);
++
++void
++fini_ctr_xlator_ctx(xlator_t *this, inode_t *inode);
++
++#endif
+diff --git a/xlators/features/changetimerecorder/src/ctr_mem_types.h b/xlators/features/changetimerecorder/src/ctr_mem_types.h
+new file mode 100644
+index 0000000..7b8f531
+--- /dev/null
++++ b/xlators/features/changetimerecorder/src/ctr_mem_types.h
+@@ -0,0 +1,22 @@
++/*
++ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#ifndef __CTR_MEM_TYPES_H__
++#define __CTR_MEM_TYPES_H__
++
++#include "gfdb_mem-types.h"
++
++enum gf_ctr_mem_types_ {
++ gf_ctr_mt_private_t = gfdb_mt_end + 1,
++ gf_ctr_mt_xlator_ctx,
++ gf_ctr_mt_hard_link_t,
++ gf_ctr_mt_end
++};
++#endif
+--
+1.8.3.1
+
diff --git a/0086-Revert-tiering-remove-the-translator-from-build-and-.patch b/0086-Revert-tiering-remove-the-translator-from-build-and-.patch
new file mode 100644
index 0000000..b612ddf
--- /dev/null
+++ b/0086-Revert-tiering-remove-the-translator-from-build-and-.patch
@@ -0,0 +1,3194 @@
+From 06adac5dbac7b2067232270cbee12931400f7824 Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <hgowtham@redhat.com>
+Date: Sat, 6 Apr 2019 17:00:47 +0530
+Subject: [PATCH 086/124] Revert "tiering: remove the translator from build and
+ glusterd"
+
+This reverts commit 55a6ba56bea9ec0d3316c005300c514ea3ab0e54.
+Add the test files and glusterd related changes.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ib704b7142a82cb1e94538a48af916730992a5701
+Signed-off-by: Hari Gowtham <hgowtham@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/166246
+Reviewed-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ MAINTAINERS | 18 +
+ tests/basic/afr/granular-esh/cli.t | 30 +-
+ ...1214222-directories_missing_after_attach_tier.t | 61 ++
+ ...60185-donot-allow-detach-commit-unnecessarily.t | 47 ++
+ tests/basic/tier/ctr-rename-overwrite.t | 50 ++
+ tests/basic/tier/file_lock.c | 72 ++
+ tests/basic/tier/file_with_spaces.t | 71 ++
+ tests/basic/tier/fops-during-migration-pause.t | 89 +++
+ tests/basic/tier/fops-during-migration.t | 105 +++
+ tests/basic/tier/frequency-counters.t | 82 +++
+ tests/basic/tier/legacy-many.t | 92 +++
+ tests/basic/tier/locked_file_migration.t | 80 +++
+ tests/basic/tier/new-tier-cmds.t | 129 ++++
+ tests/basic/tier/readdir-during-migration.t | 65 ++
+ tests/basic/tier/record-metadata-heat.t | 106 +++
+ tests/basic/tier/tier-heald.t | 98 +++
+ tests/basic/tier/tier-snapshot.t | 47 ++
+ tests/basic/tier/tier.t | 219 +++++++
+ tests/basic/tier/tier_lookup_heal.t | 69 ++
+ tests/basic/tier/tierd_check.t | 128 ++++
+ tests/basic/tier/unlink-during-migration.t | 92 +++
+ ...03028-Rebalance-glusterd-rpc-connection-issue.t | 78 +++
+ tests/bugs/quota/bug-1288474.t | 51 ++
+ .../bug-1290965-detect-bitrotten-objects.t | 53 ++
+ .../tier/bug-1205545-CTR-and-trash-integration.t | 72 ++
+ tests/bugs/tier/bug-1279376-rename-demoted-file.t | 93 +++
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 75 +++
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 723 +++++++++++++++++++++
+ 28 files changed, 2894 insertions(+), 1 deletion(-)
+ create mode 100755 tests/basic/tier/bug-1214222-directories_missing_after_attach_tier.t
+ create mode 100644 tests/basic/tier/bug-1260185-donot-allow-detach-commit-unnecessarily.t
+ create mode 100755 tests/basic/tier/ctr-rename-overwrite.t
+ create mode 100644 tests/basic/tier/file_lock.c
+ create mode 100755 tests/basic/tier/file_with_spaces.t
+ create mode 100755 tests/basic/tier/fops-during-migration-pause.t
+ create mode 100755 tests/basic/tier/fops-during-migration.t
+ create mode 100644 tests/basic/tier/frequency-counters.t
+ create mode 100644 tests/basic/tier/legacy-many.t
+ create mode 100755 tests/basic/tier/locked_file_migration.t
+ create mode 100644 tests/basic/tier/new-tier-cmds.t
+ create mode 100644 tests/basic/tier/readdir-during-migration.t
+ create mode 100755 tests/basic/tier/record-metadata-heat.t
+ create mode 100644 tests/basic/tier/tier-heald.t
+ create mode 100644 tests/basic/tier/tier-snapshot.t
+ create mode 100755 tests/basic/tier/tier.t
+ create mode 100755 tests/basic/tier/tier_lookup_heal.t
+ create mode 100644 tests/basic/tier/tierd_check.t
+ create mode 100755 tests/basic/tier/unlink-during-migration.t
+ create mode 100644 tests/bugs/glusterd/bug-1303028-Rebalance-glusterd-rpc-connection-issue.t
+ create mode 100755 tests/bugs/quota/bug-1288474.t
+ create mode 100644 tests/bugs/replicate/bug-1290965-detect-bitrotten-objects.t
+ create mode 100644 tests/bugs/tier/bug-1205545-CTR-and-trash-integration.t
+ create mode 100755 tests/bugs/tier/bug-1279376-rename-demoted-file.t
+
+diff --git a/MAINTAINERS b/MAINTAINERS
+index b1fc0ee..1f4c93a 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -103,6 +103,12 @@ P: Kotresh HR <khiremat@redhat.com>
+ S: Maintained
+ F: xlators/features/changelog/
+
++Changetimerecorder
++M: Shyamsundar Ranganathan <srangana@redhat.com>
++P: Hari Gowtham <hgowtham@redhat.com>
++S: Maintained
++F: xlators/features/changetimerecorder/
++
+ Decompounder
+ M: Krutika Dhananjay <kdhananj@redhat.com>
+ P: Pranith Karampuri <pkarampu@redhat.com>
+@@ -248,6 +254,12 @@ P: Xavier Hernandez <xhernandez@redhat.com>
+ S: Maintained
+ F: xlators/features/shard/
+
++Tiering
++M: Shyamsundar Ranganathan <srangana@redhat.com>
++P: Hari Gowtham <hgowtham@redhat.com>
++S: Maintained
++F: xlators/cluster/dht/src/tier.c
++
+ Trash
+ M: Anoop C S <anoopcs@redhat.com>
+ M: Jiffin Tony Thottan <jthottan@redhat.com>
+@@ -327,6 +339,12 @@ P: Soumya Koduri <skoduri@redhat.com>
+ S: Maintained
+ F: api/
+
++libgfdb
++M: Shyamsundar Ranganathan <srangana@redhat.com>
++P: Hari Gowtham <hgowtham@redhat.com>
++S: Maintained
++F: libglusterfs/src/gfdb/
++
+ libglusterfs
+ M: Amar Tumballi <amarts@redhat.com>
+ M: Jeff Darcy <jeff@pl.atyp.us>
+diff --git a/tests/basic/afr/granular-esh/cli.t b/tests/basic/afr/granular-esh/cli.t
+index 10b6c63..995d93e 100644
+--- a/tests/basic/afr/granular-esh/cli.t
++++ b/tests/basic/afr/granular-esh/cli.t
+@@ -11,7 +11,7 @@ TESTS_EXPECTED_IN_LOOP=4
+ TEST glusterd
+ TEST pidof glusterd
+
+-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+ # Test that enabling the option should work on a newly created volume
+ TEST $CLI volume set $V0 cluster.granular-entry-heal on
+ TEST $CLI volume set $V0 cluster.granular-entry-heal off
+@@ -25,6 +25,34 @@ TEST $CLI volume start $V1
+ TEST ! $CLI volume heal $V1 granular-entry-heal enable
+ TEST ! $CLI volume heal $V1 granular-entry-heal disable
+
++#######################
++###### TIER TEST ######
++#######################
++# Execute the same command on a disperse + replicate tiered volume and make
++# sure the option is set on the replicate leg of the volume
++TEST $CLI volume tier $V1 attach replica 2 $H0:$B0/${V1}{3,4}
++TEST $CLI volume heal $V1 granular-entry-heal enable
++EXPECT "enable" volume_get_field $V1 cluster.granular-entry-heal
++TEST $CLI volume heal $V1 granular-entry-heal disable
++EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal
++
++# Kill a disperse brick and make heal be pending on the volume.
++TEST kill_brick $V1 $H0 $B0/${V1}0
++
++# Now make sure that one offline brick in disperse does not affect enabling the
++# option on the volume.
++TEST $CLI volume heal $V1 granular-entry-heal enable
++EXPECT "enable" volume_get_field $V1 cluster.granular-entry-heal
++TEST $CLI volume heal $V1 granular-entry-heal disable
++EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal
++
++# Now kill a replicate brick.
++TEST kill_brick $V1 $H0 $B0/${V1}3
++# Now make sure that one offline brick in replicate causes the command to be
++# failed.
++TEST ! $CLI volume heal $V1 granular-entry-heal enable
++EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal
++
+ ######################
+ ### REPLICATE TEST ###
+ ######################
+diff --git a/tests/basic/tier/bug-1214222-directories_missing_after_attach_tier.t b/tests/basic/tier/bug-1214222-directories_missing_after_attach_tier.t
+new file mode 100755
+index 0000000..f9166d7
+--- /dev/null
++++ b/tests/basic/tier/bug-1214222-directories_missing_after_attach_tier.t
+@@ -0,0 +1,61 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++LAST_BRICK=3
++CACHE_BRICK_FIRST=4
++CACHE_BRICK_LAST=5
++DEMOTE_TIMEOUT=12
++PROMOTE_TIMEOUT=5
++
++
++LAST_BRICK=1
++CACHE_BRICK=2
++DEMOTE_TIMEOUT=12
++PROMOTE_TIMEOUT=5
++MIGRATION_TIMEOUT=10
++cleanup
++
++
++TEST glusterd
++
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK}
++TEST $CLI volume start $V0
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++
++# Basic operations.
++cd $M0
++TEST stat .
++TEST mkdir d1
++TEST [ -d d1 ]
++TEST touch file1
++TEST [ -e file1 ]
++
++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST
++TEST $CLI volume set $V0 features.ctr-enabled on
++
++#check whether the directory's and files are present on mount or not.
++TEST [ -d d1 ]
++TEST [ -e file1 ]
++
++cd
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
++
++tier_status ()
++{
++ $CLI volume tier $V0 detach status | grep progress | wc -l
++}
++
++TEST $CLI volume tier $V0 detach start
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_status
++TEST $CLI volume tier $V0 detach commit
++
++EXPECT "0" confirm_tier_removed ${V0}${CACHE_BRICK_FIRST}
++
++EXPECT_WITHIN $REBALANCE_TIMEOUT "0" confirm_vol_stopped $V0
++
++cleanup;
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+diff --git a/tests/basic/tier/bug-1260185-donot-allow-detach-commit-unnecessarily.t b/tests/basic/tier/bug-1260185-donot-allow-detach-commit-unnecessarily.t
+new file mode 100644
+index 0000000..6efbe32
+--- /dev/null
++++ b/tests/basic/tier/bug-1260185-donot-allow-detach-commit-unnecessarily.t
+@@ -0,0 +1,47 @@
++#!/bin/bash
++
++## Test case for BZ: 1260185
++## Do not allow detach-tier commit without "force" option or without
++## user have not started "detach-tier start" operation
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../cluster.rc
++. $(dirname $0)/../../tier.rc
++
++cleanup;
++
++## Start glusterd
++TEST glusterd;
++TEST pidof glusterd;
++
++## Lets create and start the volume
++TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2}
++TEST $CLI volume start $V0
++
++## Perform attach-tier operation on volume $V0
++TEST $CLI volume tier $V0 attach $H0:$B0/${V0}{3..4}
++
++## detach-tier commit operation without force option on volume $V0
++## should not succeed
++TEST ! $CLI --mode=script volume tier $V0 detach commit
++
++## detach-tier commit operation with force option on volume $V0
++## should succeed
++TEST $CLI volume tier $V0 detach force
++
++sleep 3
++
++## Again performing attach-tier operation on volume $V0
++TEST $CLI volume tier $V0 attach $H0:$B0/${V0}{5..6}
++
++## Do detach-tier start on volume $V0
++TEST $CLI volume tier $V0 detach start
++
++## Now detach-tier commit on volume $V0 should succeed.
++## wait for the detach to complete
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_commit_for_single_node
++
++cleanup;
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1517961
+diff --git a/tests/basic/tier/ctr-rename-overwrite.t b/tests/basic/tier/ctr-rename-overwrite.t
+new file mode 100755
+index 0000000..73ee758
+--- /dev/null
++++ b/tests/basic/tier/ctr-rename-overwrite.t
+@@ -0,0 +1,50 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++LAST_BRICK=1
++CACHE_BRICK_FIRST=4
++CACHE_BRICK_LAST=5
++
++DEMOTE_FREQ=5
++PROMOTE_FREQ=5
++
++cleanup
++
++# Start glusterd
++TEST glusterd
++TEST pidof glusterd
++
++# Set-up tier cluster
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK}
++TEST $CLI volume start $V0
++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST
++
++TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
++TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
++
++# Start and mount the volume after enabling CTR
++TEST $CLI volume set $V0 features.ctr-enabled on
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++
++# create two files
++echo "hello world" > $M0/file1
++echo "hello world" > $M0/file2
++
++# db in hot brick shows 4 record. 2 for file1 and 2 for file2
++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \
++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l )
++TEST [ $ENTRY_COUNT -eq 4 ]
++
++#overwrite file2 with file1
++mv -f $M0/file1 $M0/file2
++
++# Now the db in hot tier should have only 2 records for file1.
++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \
++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l )
++TEST [ $ENTRY_COUNT -eq 2 ]
++
++cleanup
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+diff --git a/tests/basic/tier/file_lock.c b/tests/basic/tier/file_lock.c
+new file mode 100644
+index 0000000..20fdbc0
+--- /dev/null
++++ b/tests/basic/tier/file_lock.c
+@@ -0,0 +1,72 @@
++#include <stdio.h>
++#include <stdlib.h>
++#include <unistd.h>
++#include <fcntl.h>
++
++void
++usage(void)
++{
++ printf("Usage: testlock <filepath> [R|W]\n");
++ return;
++}
++
++int
++main(int argc, char *argv[])
++{
++ char *file_path = NULL;
++ int fd = -1;
++ struct flock lock = {0};
++ int ret = -1;
++ int c = 0;
++
++ if (argc != 3) {
++ usage();
++ exit(1);
++ }
++
++ file_path = argv[1];
++ fd = open(file_path, O_RDWR);
++
++ if (-1 == fd) {
++ printf("Failed to open file %s. %m\n", file_path);
++ exit(1);
++ }
++
++ /* TODO: Check for invalid input*/
++
++ if (!strcmp(argv[2], "W")) {
++ lock.l_type = F_WRLCK;
++ printf("Taking write lock\n");
++
++ } else {
++ lock.l_type = F_RDLCK;
++ printf("Taking read lock\n");
++ }
++
++ lock.l_whence = SEEK_SET;
++ lock.l_start = 0;
++ lock.l_len = 0;
++ lock.l_pid = getpid();
++
++ printf("Acquiring lock on %s\n", file_path);
++ ret = fcntl(fd, F_SETLK, &lock);
++ if (ret) {
++ printf("Failed to acquire lock on %s (%m)\n", file_path);
++ close(fd);
++ exit(1);
++ }
++
++ sleep(10);
++
++ /*Unlock*/
++
++ printf("Releasing lock on %s\n", file_path);
++ lock.l_type = F_UNLCK;
++ ret = fcntl(fd, F_SETLK, &lock);
++ if (ret) {
++ printf("Failed to release lock on %s (%m)\n", file_path);
++ }
++
++ close(fd);
++ return ret;
++}
+diff --git a/tests/basic/tier/file_with_spaces.t b/tests/basic/tier/file_with_spaces.t
+new file mode 100755
+index 0000000..919b900
+--- /dev/null
++++ b/tests/basic/tier/file_with_spaces.t
+@@ -0,0 +1,71 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++NUM_BRICKS=3
++DEMOTE_FREQ=5
++DEMOTE_TIMEOUT=10
++PROMOTE_FREQ=5
++
++FILE_SPACE="Testing filenames with spaces.log"
++
++
++# Creates a tiered volume with pure distribute hot and cold tiers
++# Both hot and cold tiers will have an equal number of bricks.
++
++function create_dist_tier_vol () {
++ mkdir $B0/cold
++ mkdir $B0/hot
++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1}
++ TEST $CLI volume set $V0 performance.quick-read off
++ TEST $CLI volume set $V0 performance.io-cache off
++ TEST $CLI volume set $V0 features.ctr-enabled on
++ TEST $CLI volume start $V0
++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1}
++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0
++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0
++ TEST $CLI volume set $V0 cluster.tier-mode test
++}
++
++
++cleanup;
++
++#Basic checks
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume info
++
++
++#Create and start a tiered volume
++create_dist_tier_vol $NUM_BRICKS
++
++# Mount FUSE
++TEST glusterfs -s $H0 --volfile-id $V0 $M0
++
++
++# The file will be created on the hot tier
++
++touch "$M0/$FILE_SPACE"
++
++# Get the path of the file on the hot tier
++HPATH=`find $B0/hot/ -name "$FILE_SPACE"`
++echo "File path on hot tier: "$HPATH
++
++EXPECT "yes" exists_and_regular_file $HPATH
++
++# Wait for the tier process to demote the file
++sleep $DEMOTE_TIMEOUT
++
++# Get the path of the file on the cold tier
++CPATH=`find $B0/cold/ -name "$FILE_SPACE"`
++echo "File path on cold tier: "$CPATH
++
++EXPECT "yes" exists_and_regular_file $CPATH
++
++cleanup;
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+diff --git a/tests/basic/tier/fops-during-migration-pause.t b/tests/basic/tier/fops-during-migration-pause.t
+new file mode 100755
+index 0000000..46fc6e4
+--- /dev/null
++++ b/tests/basic/tier/fops-during-migration-pause.t
+@@ -0,0 +1,89 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++NUM_BRICKS=3
++DEMOTE_FREQ=10
++PROMOTE_FREQ=10
++
++TEST_STR="Testing write and truncate fops on tier migration"
++
++function is_sticky_set () {
++ echo $1
++ if [ -k $1 ];
++ then
++ echo "yes"
++ else
++ echo "no"
++ fi
++}
++
++
++# Creates a tiered volume with pure distribute hot and cold tiers
++# Both hot and cold tiers will have an equal number of bricks.
++
++function create_dist_tier_vol () {
++ mkdir $B0/cold
++ mkdir $B0/hot
++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1}
++ TEST $CLI volume set $V0 performance.quick-read off
++ TEST $CLI volume set $V0 performance.io-cache off
++ TEST $CLI volume set $V0 features.ctr-enabled on
++ TEST $CLI volume start $V0
++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1}
++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0
++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0
++ TEST $CLI volume set $V0 cluster.tier-mode test
++}
++
++
++cleanup;
++
++#Basic checks
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume info
++
++
++#Create and start a tiered volume
++create_dist_tier_vol $NUM_BRICKS
++
++# Mount FUSE
++TEST glusterfs -s $H0 --volfile-id $V0 $M0
++
++TEST mkdir $M0/dir1
++
++# Create a large file (800MB), so that rebalance takes time
++# The file will be created on the hot tier
++sleep_until_mid_cycle $DEMOTE_FREQ
++dd if=/dev/zero of=$M0/dir1/FILE1 bs=256k count=5120
++
++# Get the path of the file on the hot tier
++HPATH=`find $B0/hot/ -name FILE1`
++echo "File path on hot tier: "$HPATH
++
++
++# Wait for the tier process to demote the file
++EXPECT_WITHIN $REBALANCE_TIMEOUT "yes" is_sticky_set $HPATH
++
++TEST $CLI volume set $V0 cluster.tier-pause on
++
++# Wait for the tier process to finish migrating the file
++EXPECT_WITHIN $REBALANCE_TIMEOUT "no" is_sticky_set $HPATH
++
++# Get the path of the file on the cold tier
++CPATH=`find $B0/cold/ -name FILE1`
++
++# make sure destination is empty
++TEST ! test -s $CPATH
++
++# make sure source exists and not empty
++TEST test -s $HPATH
++
++cleanup;
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+diff --git a/tests/basic/tier/fops-during-migration.t b/tests/basic/tier/fops-during-migration.t
+new file mode 100755
+index 0000000..458c01e
+--- /dev/null
++++ b/tests/basic/tier/fops-during-migration.t
+@@ -0,0 +1,105 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++
++NUM_BRICKS=3
++DEMOTE_FREQ=5
++PROMOTE_FREQ=5
++
++TEST_STR="Testing write and truncate fops on tier migration"
++
++
++# Creates a tiered volume with pure distribute hot and cold tiers
++# Both hot and cold tiers will have an equal number of bricks.
++
++function create_dist_tier_vol () {
++ mkdir $B0/cold
++ mkdir $B0/hot
++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1}
++ TEST $CLI volume set $V0 performance.quick-read off
++ TEST $CLI volume set $V0 performance.io-cache off
++ TEST $CLI volume set $V0 features.ctr-enabled on
++ TEST $CLI volume set $V0 cluster.force-migration on
++ TEST $CLI volume start $V0
++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1}
++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0
++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0
++ TEST $CLI volume set $V0 cluster.tier-mode test
++}
++
++
++# Checks that the contents of the file matches the input string
++#$1 : file_path
++#$2 : comparison string
++
++function check_file_content () {
++ contents=`cat $1`
++ echo $contents
++ if [ "$contents" = "$2" ]; then
++ echo "1"
++ else
++ echo "0"
++ fi
++}
++
++
++cleanup;
++
++#Basic checks
++TEST glusterd
++
++#Create and start a tiered volume
++create_dist_tier_vol $NUM_BRICKS
++
++# Mount FUSE
++TEST glusterfs -s $H0 --volfile-id $V0 $M0
++
++$CLI volume set $V0 diagnostics.client-log-level DEBUG
++
++TEST mkdir $M0/dir1
++
++# Create a large file (320MB), so that rebalance takes time
++# The file will be created on the hot tier
++
++dd if=/dev/zero of=$M0/dir1/FILE1 bs=64k count=5120
++
++# Get the path of the file on the hot tier
++HPATH=`find $B0/hot/ -name FILE1`
++echo "File path on hot tier: "$HPATH
++
++
++# Wait for the tier process to demote the file
++EXPECT_WITHIN $REBALANCE_TIMEOUT "yes" is_sticky_set $HPATH
++
++# Get the path of the file on the cold tier
++CPATH=`find $B0/cold/ -name FILE1`
++echo "File path on cold tier: "$CPATH
++
++# Test setxattr
++TEST setfattr -n "user.test_xattr" -v "qwerty" $M0/dir1/FILE1
++
++# Change the file contents while it is being migrated
++echo $TEST_STR > $M0/dir1/FILE1
++
++# The file contents should have changed even if the file
++# is not done migrating
++EXPECT "1" check_file_content $M0/dir1/FILE1 "$TEST_STR"
++
++
++# Wait for the tier process to finish migrating the file
++EXPECT_WITHIN $REBALANCE_TIMEOUT "no" is_sticky_set $CPATH
++
++# The file contents should have changed
++EXPECT "1" check_file_content $M0/dir1/FILE1 "$TEST_STR"
++
++
++TEST getfattr -n "user.test_xattr" $M0/dir1/FILE1
++
++cleanup;
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+diff --git a/tests/basic/tier/frequency-counters.t b/tests/basic/tier/frequency-counters.t
+new file mode 100644
+index 0000000..08e05df
+--- /dev/null
++++ b/tests/basic/tier/frequency-counters.t
+@@ -0,0 +1,82 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++
++NUM_BRICKS=3
++DEMOTE_FREQ=10
++PROMOTE_FREQ=10
++NUM_FILES=5
++TEST_DIR=test
++# Creates a tiered volume with pure distribute hot and cold tiers
++# Both hot and cold tiers will have an equal number of bricks.
++
++function create_dist_vol () {
++ mkdir $B0/cold
++ mkdir $B0/hot
++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1}
++ TEST $CLI volume set $V0 performance.quick-read off
++ TEST $CLI volume set $V0 performance.io-cache off
++ TEST $CLI volume start $V0
++}
++
++function create_dist_tier_vol () {
++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1}
++ TEST $CLI volume set $V0 cluster.tier-mode test
++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
++ TEST $CLI volume set $V0 features.record-counters on
++ TEST $CLI volume set $V0 cluster.read-freq-threshold 2
++ TEST $CLI volume set $V0 cluster.write-freq-threshold 2
++}
++
++cleanup;
++
++
++TEST glusterd
++
++#Create and start a tiered volume
++create_dist_vol $NUM_BRICKS
++
++# Mount FUSE
++TEST glusterfs -s $H0 --volfile-id $V0 $M0
++
++# create some files
++mkdir $M0/$TEST_DIR
++cd $M0/${TEST_DIR}
++
++date > file1
++touch file2
++
++# attach tier
++create_dist_tier_vol $NUM_BRICKS
++
++sleep_until_mid_cycle $PROMOTE_FREQ
++
++# check if promotion on single hit, should fail
++date >> file2
++cat file1
++drop_cache $M0
++sleep $PROMOTE_FREQ
++EXPECT "0" check_counters 0 0
++
++# check if promotion on double hit, should suceed
++sleep_until_mid_cycle $PROMOTE_FREQ
++date >> file2
++drop_cache $M0
++cat file1
++date >> file2
++drop_cache $M0
++cat file1
++
++EXPECT_WITHIN $PROMOTE_FREQ "0" check_counters 2 0
++
++TEST ! $CLI volume set $V0 features.record-counters off
++
++cd /
++
++cleanup
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+diff --git a/tests/basic/tier/legacy-many.t b/tests/basic/tier/legacy-many.t
+new file mode 100644
+index 0000000..5795428
+--- /dev/null
++++ b/tests/basic/tier/legacy-many.t
+@@ -0,0 +1,92 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++
++LAST_BRICK=3
++CACHE_BRICK_FIRST=4
++CACHE_BRICK_LAST=5
++DEMOTE_TIMEOUT=12
++PROMOTE_TIMEOUT=12
++MIGRATION_TIMEOUT=10
++DEMOTE_FREQ=60
++PROMOTE_FREQ=10
++TEST_DIR="test_files"
++NUM_FILES=15
++
++function read_all {
++ for file in *
++ do
++ cat $file
++ done
++}
++
++function tier_status () {
++ $CLI volume tier $V0 status | grep "success" | wc -l
++}
++
++cleanup
++
++TEST glusterd
++TEST pidof glusterd
++
++# Create distributed replica volume
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK}
++TEST $CLI volume start $V0
++
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 features.ctr-enabled on
++
++
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++
++# Create a number of "legacy" files before attaching tier
++mkdir $M0/${TEST_DIR}
++cd $M0/${TEST_DIR}
++TEST create_many_files file $NUM_FILES
++wait
++
++# Attach tier
++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST
++
++TEST $CLI volume set $V0 cluster.tier-mode test
++TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
++TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
++TEST $CLI volume set $V0 cluster.read-freq-threshold 0
++TEST $CLI volume set $V0 cluster.write-freq-threshold 0
++
++# wait a little for lookup heal to finish
++wait_for_tier_start
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_status
++
++# make sure fix layout completed
++CPATH=$B0/${V0}0
++echo $CPATH > /tmp/out
++TEST getfattr -n "trusted.tier.fix.layout.complete" $CPATH
++
++# Read "legacy" files
++drop_cache $M0
++
++sleep_until_mid_cycle $DEMOTE_FREQ
++
++TEST read_all
++
++# Test to make sure files were promoted as expected
++sleep $PROMOTE_TIMEOUT
++EXPECT_WITHIN $PROMOTE_TIMEOUT "0" check_counters $NUM_FILES 0
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" detach_start $V0
++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}${CACHE_BRICK_FIRST}"
++
++TEST $CLI volume tier $V0 detach commit
++
++# fix layout flag should be cleared
++TEST ! getfattr -n "trusted.tier.fix.layout.complete" $CPATH
++
++cd;
++cleanup
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+diff --git a/tests/basic/tier/locked_file_migration.t b/tests/basic/tier/locked_file_migration.t
+new file mode 100755
+index 0000000..7fb1717
+--- /dev/null
++++ b/tests/basic/tier/locked_file_migration.t
+@@ -0,0 +1,80 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++
++NUM_BRICKS=3
++DEMOTE_FREQ=7
++PROMOTE_FREQ=30
++DEMOTE_TIMEOUT=15
++
++TEST_STR="Testing write and truncate fops on tier migration"
++
++
++# Creates a tiered volume with pure distribute hot and cold tiers
++# Both hot and cold tiers will have an equal number of bricks.
++
++function create_dist_tier_vol () {
++ mkdir $B0/cold
++ mkdir $B0/hot
++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1}
++ TEST $CLI volume set $V0 performance.quick-read off
++ TEST $CLI volume set $V0 performance.io-cache off
++ TEST $CLI volume set $V0 features.ctr-enabled on
++ TEST $CLI volume start $V0
++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1}
++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
++
++#We don't want promotes to happen in this test
++ TEST $CLI volume set $V0 cluster.read-freq-threshold 10
++ TEST $CLI volume set $V0 cluster.write-freq-threshold 10
++ TEST $CLI volume set $V0 cluster.tier-mode test
++}
++
++
++cleanup;
++
++#Basic checks
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume info
++
++
++# Create and start a tiered volume
++create_dist_tier_vol $NUM_BRICKS
++
++# Mount FUSE
++TEST glusterfs -s $H0 --volfile-id $V0 $M0
++
++TEST mkdir $M0/dir1
++build_tester $(dirname $0)/file_lock.c -o file_lock
++cp $(dirname $0)/file_lock $M0/file_lock
++
++# The files will be created on the hot tier
++touch $M0/dir1/FILE1
++touch $M0/dir1/FILE2
++
++# For FILE1, take a POSIX write lock on the entire file.
++# Don't take a lock on FILE2
++
++./file_lock $M0/dir1/FILE1 W &
++
++sleep $DEMOTE_FREQ
++
++# Wait for the tier process to demote the file
++# Only FILE2 and file_lock should be demoted
++# FILE1 should be skipped because of the lock held
++# on it
++
++EXPECT_WITHIN $DEMOTE_TIMEOUT "0" check_counters 0 2
++
++sleep 10
++
++rm $(dirname $0)/file_lock
++
++cleanup;
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+diff --git a/tests/basic/tier/new-tier-cmds.t b/tests/basic/tier/new-tier-cmds.t
+new file mode 100644
+index 0000000..b9c9390
+--- /dev/null
++++ b/tests/basic/tier/new-tier-cmds.t
+@@ -0,0 +1,129 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++. $(dirname $0)/../../cluster.rc
++
++
++# Creates a tiered volume with pure distribute hot and cold tiers
++# Both hot and cold tiers will have an equal number of bricks.
++
++function check_peers {
++ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
++}
++
++function create_dist_tier_vol () {
++ TEST $CLI_1 volume create $V0 disperse 6 redundancy 2 $H1:$B1/${V0}_b1 $H2:$B2/${V0}_b2 $H3:$B3/${V0}_b3 $H1:$B1/${V0}_b4 $H2:$B2/${V0}_b5 $H3:$B3/${V0}_b6
++ TEST $CLI_1 volume start $V0
++ TEST $CLI_1 volume tier $V0 attach replica 2 $H1:$B1/${V0}_h1 $H2:$B2/${V0}_h2 $H3:$B3/${V0}_h3 $H1:$B1/${V0}_h4 $H2:$B2/${V0}_h5 $H3:$B3/${V0}_h6
++}
++
++function tier_daemon_status {
++ local _VAR=CLI_$1
++ local xpath_sel='//node[hostname="Tier Daemon"][path="localhost"]/status'
++ ${!_VAR} --xml volume status $V0 \
++ | xmllint --xpath "$xpath_sel" - \
++ | sed -n '/.*<status>\([0-9]*\).*/s//\1/p'
++}
++
++function detach_xml_status {
++ $CLI_1 volume tier $V0 detach status --xml | sed -n \
++ '/.*<opErrstr>Detach tier status successful/p' | wc -l
++}
++
++cleanup;
++
++#setup cluster and test volume
++TEST launch_cluster 3; # start 3-node virtual cluster
++TEST $CLI_1 peer probe $H2; # peer probe server 2 from server 1 cli
++TEST $CLI_1 peer probe $H3; # peer probe server 3 from server 1 cli
++
++EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers;
++
++#Create and start a tiered volume
++create_dist_tier_vol
++
++########### check failure for older commands #############
++
++TEST ! $CLI_1 volume rebalance $V0 tier status
++
++# failure for older command can be removed in 3.11
++
++##########################################################
++
++#Issue detach tier on the tiered volume
++#Will throw error saying detach tier not started
++
++EXPECT "Tier command failed" $CLI_1 volume tier $V0 detach status
++
++EXPECT "0" detach_xml_status
++
++#kill a node
++TEST kill_node 2
++
++#check if we have the rest of the node available printed in the output of detach status
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_status_node_down
++
++TEST $glusterd_2;
++
++EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers;
++
++#after starting detach tier the detach tier status should display the status
++sleep 2
++$CLI_1 volume status
++TEST $CLI_1 volume tier $V0 detach start
++
++EXPECT "1" detach_xml_status
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status
++
++#kill a node
++TEST kill_node 2
++
++#check if we have the rest of the node available printed in the output of detach status
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status_node_down
++
++TEST $glusterd_2;
++
++EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers;
++# Make sure we check that the *bricks* are up and not just the node. >:-(
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_b2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_h2
++
++# Parsing normal output doesn't work because of line-wrap issues on our
++# regression machines, and the version of xmllint there doesn't support --xpath
++# so we can't do it that way either. In short, there's no way for us to detect
++# when we can stop waiting, so we just have to wait the maximum time every time
++# and hope any failures will show up later in the script.
++sleep $PROCESS_UP_TIMEOUT
++#XPECT_WITHIN $PROCESS_UP_TIMEOUT 1 tier_daemon_status 2
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status
++
++TEST $CLI_1 volume tier $V0 detach stop
++
++#If detach tier is stopped the detach tier command will fail
++
++EXPECT "Tier command failed" $CLI_1 volume tier $V0 detach status
++
++TEST $CLI_1 volume tier $V0 detach start
++
++#wait for the detach to complete
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_commit
++
++#If detach tier is committed then the detach status should fail throwing an error
++#saying its not a tiered volume
++
++EXPECT "Tier command failed" $CLI_1 volume tier $V0 detach status
++
++########### check failure for older commands #############
++
++TEST ! $CLI_1 volume rebalance $V0 tier start
++
++# failure for older command can be removed in 3.11
++
++##########################################################
++cleanup;
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+diff --git a/tests/basic/tier/readdir-during-migration.t b/tests/basic/tier/readdir-during-migration.t
+new file mode 100644
+index 0000000..292ca88
+--- /dev/null
++++ b/tests/basic/tier/readdir-during-migration.t
+@@ -0,0 +1,65 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++
++NUM_BRICKS=3
++DEMOTE_FREQ=5
++PROMOTE_FREQ=5
++NUM_FILES=30
++TEST_DIR=test
++# Creates a tiered volume with pure distribute hot and cold tiers
++# Both hot and cold tiers will have an equal number of bricks.
++
++function create_dist_tier_vol () {
++ mkdir $B0/cold
++ mkdir $B0/hot
++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1}
++ TEST $CLI volume set $V0 performance.quick-read off
++ TEST $CLI volume set $V0 performance.io-cache off
++ TEST $CLI volume start $V0
++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1}
++ TEST $CLI volume set $V0 cluster.tier-mode test
++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0
++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0
++}
++
++function check_file_count() {
++ if [ $(ls -1 | wc -l) == $1 ]; then
++ echo "1"
++ else
++ echo "0"
++ fi
++}
++
++cleanup;
++
++
++TEST glusterd
++
++#Create and start a tiered volume
++create_dist_tier_vol $NUM_BRICKS
++
++# Mount FUSE
++TEST glusterfs -s $H0 --volfile-id $V0 $M0
++
++# Create a number of "legacy" files before attaching tier
++mkdir $M0/${TEST_DIR}
++cd $M0/${TEST_DIR}
++TEST create_many_files tfile $NUM_FILES
++
++EXPECT "1" check_file_count $NUM_FILES
++
++sleep $DEMOTE_FREQ
++
++EXPECT "1" check_file_count $NUM_FILES
++
++cd /
++
++cleanup;
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+diff --git a/tests/basic/tier/record-metadata-heat.t b/tests/basic/tier/record-metadata-heat.t
+new file mode 100755
+index 0000000..f6f35a8
+--- /dev/null
++++ b/tests/basic/tier/record-metadata-heat.t
+@@ -0,0 +1,106 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++NUM_BRICKS=3
++DEMOTE_FREQ=5
++DEMOTE_TIMEOUT=10
++PROMOTE_FREQ=5
++
++FILE="file1.txt"
++FILE_LINK="file2.txt"
++
++# Creates a tiered volume with pure distribute hot and cold tiers
++# Both hot and cold tiers will have an equal number of bricks.
++
++function create_dist_tier_vol () {
++ mkdir $B0/cold
++ mkdir $B0/hot
++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1}
++ TEST $CLI volume set $V0 performance.quick-read off
++ TEST $CLI volume set $V0 performance.io-cache off
++ TEST $CLI volume set $V0 features.ctr-enabled on
++ TEST $CLI volume start $V0
++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1}
++ TEST $CLI volume set $V0 cluster.tier-mode test
++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.read-freq-threshold 4
++ TEST $CLI volume set $V0 cluster.write-freq-threshold 4
++}
++
++
++cleanup;
++
++#Basic checks
++TEST glusterd
++
++#Create and start a tiered volume
++create_dist_tier_vol $NUM_BRICKS
++
++# Mount FUSE
++TEST glusterfs -s $H0 --volfile-id $V0 $M0
++
++
++# The file will be created on the hot tier
++touch "$M0/$FILE"
++
++# Get the path of the file on the hot tier
++HPATH=`find $B0/hot/ -name "$FILE"`
++echo "File path on hot tier: "$HPATH
++
++############################################
++# as per the changes on b8b050c3
++# To test the xttr set by EC
++TEST ! getfattr -n "trusted.ec.size" $HPATH
++############################################
++
++# Expecting the file to be on the hot tier
++EXPECT "yes" exists_and_regular_file $HPATH
++
++sleep_until_mid_cycle $DEMOTE_FREQ
++
++# Try to heat the file using 5 metadata operations
++# WITHOUT setting ctr-record-metadata-heat on
++touch "$M0/$FILE"
++chmod +x "$M0/$FILE"
++chown root "$M0/$FILE"
++ln "$M0/$FILE" "$M0/$FILE_LINK"
++rm -rf "$M0/$FILE_LINK"
++
++# Wait for the tier process to demote the file
++sleep $DEMOTE_TIMEOUT
++
++# Get the path of the file on the cold tier
++CPATH=`find $B0/cold/ -name "$FILE"`
++echo "File path on cold tier: "$CPATH
++
++# Expecting the file to be on cold tier
++EXPECT "yes" exists_and_regular_file $CPATH
++
++#Set ctr-record-metadata-heat on
++TEST $CLI volume set $V0 ctr-record-metadata-heat on
++
++sleep_until_mid_cycle $DEMOTE_FREQ
++
++# Heating the file using 5 metadata operations
++touch "$M0/$FILE"
++chmod +x "$M0/$FILE"
++chown root "$M0/$FILE"
++ln "$M0/$FILE" "$M0/$FILE_LINK"
++rm -rf "$M0/$FILE_LINK"
++
++# Wait for the tier process to demote the file
++sleep $DEMOTE_TIMEOUT
++
++# Get the path of the file on the hot tier
++echo "File path on hot tier: "$HPATH
++
++# Expecting the file to be on the hot tier
++EXPECT "yes" exists_and_regular_file $HPATH
++
++cleanup;
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+diff --git a/tests/basic/tier/tier-heald.t b/tests/basic/tier/tier-heald.t
+new file mode 100644
+index 0000000..a8e634f
+--- /dev/null
++++ b/tests/basic/tier/tier-heald.t
+@@ -0,0 +1,98 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++# This test contains volume heal commands handled by glusterd.
++# Covers enable/disable at the moment. Will be enhanced later to include
++# the other commands as well.
++
++cleanup;
++TEST glusterd
++TEST pidof glusterd
++
++volfile=$(gluster system:: getwd)"/glustershd/glustershd-server.vol"
++
++# Commands should fail when both tiers are not of distribute type.
++# Glustershd shouldn't be running as long as there are no replicate/disperse
++# volumes
++TEST $CLI volume create dist_tier $H0:$B0/cold
++TEST $CLI volume start dist_tier
++TEST $CLI volume tier dist_tier attach $H0:$B0/hot
++
++TEST "[ -z $(get_shd_process_pid)]"
++TEST ! $CLI volume heal dist_tier enable
++TEST ! $CLI volume heal dist_tier disable
++
++# Commands should work on replicate/disperse volume.
++TEST $CLI volume create r2 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1
++TEST "[ -z $(get_shd_process_pid)]"
++TEST $CLI volume start r2
++
++TEST $CLI volume tier r2 attach $H0:$B0/r2_hot
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
++TEST $CLI volume heal r2 enable
++EXPECT "enable" volume_option r2 "cluster.self-heal-daemon"
++EXPECT "enable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
++TEST $CLI volume heal r2 disable
++EXPECT "disable" volume_option r2 "cluster.self-heal-daemon"
++EXPECT "disable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
++# Commands should work on disperse volume.
++TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2
++TEST $CLI volume start ec2
++
++TEST $CLI volume tier ec2 attach replica 2 $H0:$B0/ec2_hot{1..4}
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
++TEST $CLI volume heal ec2 enable
++EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon"
++EXPECT "enable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
++TEST $CLI volume heal ec2 disable
++EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon"
++EXPECT "disable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
++
++#Check that shd graph is rewritten correctly on volume stop/start
++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
++EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
++TEST $CLI volume stop r2
++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
++EXPECT "N" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
++TEST $CLI volume stop ec2
++# When both the volumes are stopped glustershd volfile is not modified just the
++# process is stopped
++TEST "[ -z $(get_shd_process_pid) ]"
++
++TEST $CLI volume start r2
++EXPECT "N" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
++EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
++
++TEST $CLI volume start ec2
++
++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
++EXPECT "Y" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate
++
++TEST $CLI volume tier ec2 detach force
++
++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
++EXPECT "N" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate
++
++TEST $CLI volume set r2 self-heal-daemon on
++TEST $CLI volume set r2 cluster.self-heal-daemon off
++TEST ! $CLI volume set ec2 self-heal-daemon off
++TEST ! $CLI volume set ec2 cluster.self-heal-daemon on
++TEST ! $CLI volume set dist self-heal-daemon off
++TEST ! $CLI volume set dist cluster.self-heal-daemon on
++
++TEST $CLI volume set ec2 disperse-self-heal-daemon off
++TEST $CLI volume set ec2 cluster.disperse-self-heal-daemon on
++TEST ! $CLI volume set r2 disperse-self-heal-daemon on
++TEST ! $CLI volume set r2 cluster.disperse-self-heal-daemon off
++TEST ! $CLI volume set dist disperse-self-heal-daemon off
++TEST ! $CLI volume set dist cluster.disperse-self-heal-daemon on
++
++cleanup
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+diff --git a/tests/basic/tier/tier-snapshot.t b/tests/basic/tier/tier-snapshot.t
+new file mode 100644
+index 0000000..8747c5d
+--- /dev/null
++++ b/tests/basic/tier/tier-snapshot.t
+@@ -0,0 +1,47 @@
++#!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../snapshot.rc
++
++cleanup;
++
++TEST init_n_bricks 4;
++TEST setup_lvm 4;
++
++TEST glusterd;
++
++TEST $CLI volume create $V0 replica 2 $H0:$L1 $H0:$L2 ;
++
++TEST $CLI volume start $V0;
++
++TEST $CLI volume tier $V0 attach replica 2 $H0:$L3 $H0:$L4 ;
++
++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
++
++for i in {1..10} ; do echo "file" > $M0/file$i ; done
++
++TEST $CLI snapshot config activate-on-create enable
++
++TEST $CLI snapshot create snap1 $V0 no-timestamp;
++
++for i in {11..20} ; do echo "file" > $M0/file$i ; done
++
++TEST $CLI snapshot create snap2 $V0 no-timestamp;
++
++mkdir $M0/dir1;
++mkdir $M0/dir2;
++
++for i in {1..10} ; do echo "foo" > $M0/dir1/foo$i ; done
++for i in {1..10} ; do echo "foo" > $M0/dir2/foo$i ; done
++
++TEST $CLI snapshot create snap3 $V0 no-timestamp;
++
++for i in {11..20} ; do echo "foo" > $M0/dir1/foo$i ; done
++for i in {11..20} ; do echo "foo" > $M0/dir2/foo$i ; done
++
++TEST $CLI snapshot create snap4 $V0 no-timestamp;
++
++TEST $CLI snapshot delete all;
++
++cleanup;
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+diff --git a/tests/basic/tier/tier.t b/tests/basic/tier/tier.t
+new file mode 100755
+index 0000000..1798541
+--- /dev/null
++++ b/tests/basic/tier/tier.t
+@@ -0,0 +1,219 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++LAST_BRICK=3
++CACHE_BRICK_FIRST=4
++CACHE_BRICK_LAST=5
++DEMOTE_TIMEOUT=12
++PROMOTE_TIMEOUT=5
++MIGRATION_TIMEOUT=10
++DEMOTE_FREQ=4
++PROMOTE_FREQ=12
++
++function file_on_slow_tier {
++ found=0
++
++ for i in `seq 0 $LAST_BRICK`; do
++ test -e "$B0/${V0}${i}/$1" && found=1 && break;
++ done
++
++ if [ "$found" == "1" ]
++ then
++ slow_hash1=$2
++ slow_hash2=$(fingerprint "$B0/${V0}${i}/$1")
++
++ if [ "$slow_hash1" == "$slow_hash2" ]
++ then
++ echo "0"
++ else
++ echo "2"
++ fi
++ else
++ echo "1"
++ fi
++
++ # temporarily disable non-Linux tests.
++ case $OSTYPE in
++ NetBSD | FreeBSD | Darwin)
++ echo "0"
++ ;;
++ esac
++}
++
++function file_on_fast_tier {
++ found=0
++
++ for j in `seq $CACHE_BRICK_FIRST $CACHE_BRICK_LAST`; do
++ test -e "$B0/${V0}${j}/$1" && found=1 && break;
++ done
++
++
++ if [ "$found" == "1" ]
++ then
++ fast_hash1=$2
++ fast_hash2=$(fingerprint "$B0/${V0}${j}/$1")
++
++ if [ "$fast_hash1" == "$fast_hash2" ]
++ then
++ echo "0"
++ else
++ echo "2"
++ fi
++ else
++ echo "1"
++ fi
++}
++
++
++cleanup
++
++TEST glusterd
++TEST pidof glusterd
++
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK}
++# testing bug 1215122, ie should fail if replica count and bricks are not compatible.
++
++TEST ! $CLI volume tier $V0 attach replica 5 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST
++
++TEST $CLI volume start $V0
++
++# The following two commands instigate a graph switch. Do them
++# before attaching the tier. If done on a tiered volume the rebalance
++# daemon will terminate and must be restarted manually.
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++
++#Not a tier volume
++TEST ! $CLI volume set $V0 cluster.tier-demote-frequency 4
++
++#testing bug #1228112, glusterd crashed when trying to detach-tier commit force on a non-tiered volume.
++TEST ! $CLI volume tier $V0 detach commit force
++
++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST
++
++TEST $CLI volume set $V0 cluster.tier-mode test
++
++# create a file, make sure it can be deleted after attach tier.
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++cd $M0
++TEST touch delete_me.txt
++TEST rm -f delete_me.txt
++
++# confirm watermark CLI works
++TEST $CLI volume set $V0 cluster.watermark-hi 85
++TEST $CLI volume set $V0 cluster.watermark-low 75
++TEST $CLI volume set $V0 cluster.tier-max-mb 1000
++TEST $CLI volume set $V0 cluster.tier-max-files 1000
++TEST $CLI volume set $V0 cluster.tier-max-promote-file-size 1000
++TEST ! $CLI volume set $V0 cluster.tier-max-files -3
++TEST ! $CLI volume set $V0 cluster.watermark-low 90
++TEST ! $CLI volume set $V0 cluster.watermark-hi 75
++TEST ! $CLI volume set $V0 cluster.read-freq-threshold -12
++TEST ! $CLI volume set $V0 cluster.write-freq-threshold -12
++
++#check for watermark reset
++TEST $CLI volume set $V0 cluster.watermark-low 10
++TEST $CLI volume set $V0 cluster.watermark-hi 30
++TEST ! $CLI volume reset $V0 cluster.watermark-low
++TEST $CLI volume reset $V0 cluster.watermark-hi
++TEST $CLI volume reset $V0 cluster.watermark-low
++
++# stop the volume and restart it. The rebalance daemon should restart.
++cd /tmp
++umount $M0
++TEST $CLI volume stop $V0
++TEST $CLI volume start $V0
++
++wait_for_tier_start
++
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++cd $M0
++
++sleep_first_cycle $DEMOTE_FREQ
++$CLI volume tier $V0 status
++
++#Tier options expect non-negative value
++TEST ! $CLI volume set $V0 cluster.tier-promote-frequency -1
++
++#Tier options expect non-negative value
++TEST ! $CLI volume set $V0 cluster.read-freq-threshold qwerty
++
++
++TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
++TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
++TEST $CLI volume set $V0 cluster.read-freq-threshold 0
++TEST $CLI volume set $V0 cluster.write-freq-threshold 0
++
++# Basic operations.
++TEST stat .
++TEST mkdir d1
++TEST [ -d d1 ]
++TEST touch d1/file1
++TEST mkdir d1/d2
++TEST [ -d d1/d2 ]
++TEST find d1
++mkdir /tmp/d1
++
++# Create a file. It should be on the fast tier.
++uuidgen > /tmp/d1/data.txt
++md5data=$(fingerprint /tmp/d1/data.txt)
++mv /tmp/d1/data.txt ./d1/data.txt
++
++TEST file_on_fast_tier d1/data.txt $md5data
++
++uuidgen > /tmp/d1/data2.txt
++md5data2=$(fingerprint /tmp/d1/data2.txt)
++cp /tmp/d1/data2.txt ./d1/data2.txt
++
++#File with spaces and special characters.
++SPACE_FILE="file with spaces & $peci@l ch@r@cter$ @!@$%^$#@^^*&%$#$%.txt"
++
++uuidgen > "/tmp/d1/$SPACE_FILE"
++md5space=$(fingerprint "/tmp/d1/$SPACE_FILE")
++mv "/tmp/d1/$SPACE_FILE" "./d1/$SPACE_FILE"
++
++# Check auto-demotion on write new.
++sleep $DEMOTE_TIMEOUT
++
++# Check auto-promotion on write append.
++UUID=$(uuidgen)
++echo $UUID >> /tmp/d1/data2.txt
++md5data2=$(fingerprint /tmp/d1/data2.txt)
++
++sleep_until_mid_cycle $DEMOTE_FREQ
++drop_cache $M0
++
++echo $UUID >> ./d1/data2.txt
++cat "./d1/$SPACE_FILE"
++
++sleep $PROMOTE_TIMEOUT
++sleep $DEMOTE_FREQ
++EXPECT_WITHIN $DEMOTE_TIMEOUT "0" check_counters 2 6
++
++# stop gluster, when it comes back info file should have tiered volume
++killall glusterd
++TEST glusterd
++
++EXPECT "0" file_on_slow_tier d1/data.txt $md5data
++EXPECT "0" file_on_slow_tier d1/data2.txt $md5data2
++EXPECT "0" file_on_slow_tier "./d1/$SPACE_FILE" $md5space
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" detach_start $V0
++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}${CACHE_BRICK_FIRST}"
++
++TEST $CLI volume tier $V0 detach commit
++
++EXPECT "0" confirm_tier_removed ${V0}${CACHE_BRICK_FIRST}
++
++confirm_vol_stopped $V0
++
++cd;
++
++cleanup
++rm -rf /tmp/d1
++
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+diff --git a/tests/basic/tier/tier_lookup_heal.t b/tests/basic/tier/tier_lookup_heal.t
+new file mode 100755
+index 0000000..c7c7f27
+--- /dev/null
++++ b/tests/basic/tier/tier_lookup_heal.t
+@@ -0,0 +1,69 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++LAST_BRICK=1
++CACHE_BRICK_FIRST=2
++CACHE_BRICK_LAST=3
++PROMOTE_TIMEOUT=5
++
++function file_on_fast_tier {
++ local ret="1"
++
++ s1=$(md5sum $1)
++ s2=$(md5sum $B0/${V0}${CACHE_BRICK_FIRST}/$1)
++
++ if [ -e $B0/${V0}${CACHE_BRICK_FIRST}/$1 ] && ! [ "$s1" == "$s2" ]; then
++ echo "0"
++ else
++ echo "1"
++ fi
++}
++
++cleanup
++
++
++TEST glusterd
++
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK}
++TEST $CLI volume start $V0
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++
++# Create files before CTR xlator is on.
++cd $M0
++TEST stat .
++TEST touch file1
++TEST stat file1
++
++#Attach tier and switch ON CTR Xlator.
++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST
++TEST $CLI volume set $V0 features.ctr-enabled on
++TEST $CLI volume set $V0 cluster.tier-demote-frequency 4
++TEST $CLI volume set $V0 cluster.tier-promote-frequency 4
++TEST $CLI volume set $V0 cluster.read-freq-threshold 0
++TEST $CLI volume set $V0 cluster.write-freq-threshold 0
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 cluster.tier-mode test
++
++#The lookup should heal the database.
++TEST ls file1
++
++# gf_file_tb and gf_flink_tb should NOT be empty
++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \
++ sqlite3 $B0/${V0}$LAST_BRICK/.glusterfs/${V0}$LAST_BRICK.db | wc -l )
++TEST [ $ENTRY_COUNT -eq 2 ]
++
++# Heat-up the file
++uuidgen > file1
++sleep 5
++
++#Check if the file is promoted
++EXPECT_WITHIN $PROMOTE_TIMEOUT "0" file_on_fast_tier file1
++
++cd;
++
++cleanup;
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+diff --git a/tests/basic/tier/tierd_check.t b/tests/basic/tier/tierd_check.t
+new file mode 100644
+index 0000000..5701fa9
+--- /dev/null
++++ b/tests/basic/tier/tierd_check.t
+@@ -0,0 +1,128 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++. $(dirname $0)/../../cluster.rc
++
++
++# Creates a tiered volume with pure distribute hot and cold tiers
++# Both hot and cold tiers will have an equal number of bricks.
++
++function check_peers {
++ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
++}
++
++function create_dist_tier_vol () {
++ TEST $CLI_1 volume create $V0 $H1:$B1/${V0} $H2:$B2/${V0}
++ TEST $CLI_1 volume start $V0
++ TEST $CLI_1 volume tier $V0 attach $H1:$B1/${V0}_h1 $H2:$B2/${V0}_h2
++}
++
++function tier_status () {
++ #$CLI_1 volume tier $V0 status | grep progress | wc -l
++ # I don't want to disable the entire test, but this part of it seems
++ # highly suspect. *Why* do we always expect the number of lines to be
++ # exactly two? What would it mean for it to be otherwise? Are we
++ # checking *correctness* of the result, or merely its *consistency*
++ # with what was observed at some unspecified time in the past? Does
++ # this check only serve to inhibit actual improvements? Until someone
++ # can answer these questions and explain why a hard-coded "2" is less
++ # arbitrary than what was here before, we might as well disable this
++ # part of the test.
++ echo "2"
++}
++
++function tier_daemon_kill () {
++pkill -f "tierd/$V0"
++echo "$?"
++}
++
++cleanup;
++
++#setup cluster and test volume
++TEST launch_cluster 3; # start 3-node virtual cluster
++TEST $CLI_1 peer probe $H2; # peer probe server 2 from server 1 cli
++TEST $CLI_1 peer probe $H3; # peer probe server 3 from server 1 cli
++
++EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers;
++
++#Create and start a tiered volume
++create_dist_tier_vol
++
++wait_for_tier_start
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 tier_daemon_check
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 tier_daemon_kill
++
++TEST $CLI_1 volume tier $V0 start
++
++wait_for_tier_start
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_check
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_kill
++
++TEST $CLI_3 volume tier $V0 start force
++
++wait_for_tier_start
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_check
++
++#The pattern progress should occur twice only.
++#it shouldn't come up on the third node without tierd even
++#after the tier start force is issued on the node without
++#tierd
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status
++
++#kill the node on which tier is not supposed to run
++TEST kill_node 3
++
++#bring the node back, it should not have tierd running on it
++TEST $glusterd_3;
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status
++
++#after volume restart, check for tierd
++
++TEST $CLI_3 volume stop $V0
++
++TEST $CLI_3 volume start $V0
++
++wait_for_tier_start
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status
++
++#check for detach start and stop
++
++TEST $CLI_3 volume tier $V0 detach start
++
++TEST $CLI_3 volume tier $V0 detach stop
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status
++
++TEST $CLI_1 volume tier $V0 start force
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_check
++
++# To test for detach start fail while the brick is down
++
++TEST pkill -f "$B1/$V0"
++
++TEST ! $CLI_1 volume tier $V0 detach start
++
++cleanup
++# This test isn't worth keeping. Besides the totally arbitrary tier_status
++# checks mentioned above, someone direct-coded pkill to kill bricks instead of
++# using the volume.rc function we already had. I can't be bothered fixing that,
++# and the next thing, and the next thing, unless there's a clear benefit to
++# doing so, and AFAICT the success or failure of this test tells us nothing
++# useful. Therefore, it's disabled until further notice.
++#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=000000
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+diff --git a/tests/basic/tier/unlink-during-migration.t b/tests/basic/tier/unlink-during-migration.t
+new file mode 100755
+index 0000000..1330092
+--- /dev/null
++++ b/tests/basic/tier/unlink-during-migration.t
+@@ -0,0 +1,92 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++
++DEMOTE_FREQ=5
++PROMOTE_FREQ=5
++
++function create_dist_rep_vol () {
++ mkdir $B0/cold
++ mkdir $B0/hot
++ TEST $CLI volume create $V0 replica 2 $H0:$B0/cold/${V0}{0..3}
++ TEST $CLI volume set $V0 performance.quick-read off
++ TEST $CLI volume set $V0 performance.io-cache off
++ TEST $CLI volume set $V0 features.ctr-enabled on
++ TEST $CLI volume start $V0
++}
++
++function attach_dist_rep_tier () {
++ TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/hot/${V0}{0..3}
++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0
++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0
++ TEST $CLI volume set $V0 cluster.tier-mode test
++}
++
++cleanup;
++
++#Basic checks
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume info
++
++
++#Create and start a volume
++create_dist_rep_vol
++
++# Mount FUSE
++TEST glusterfs -s $H0 --volfile-id $V0 $M0
++
++# Create a large file (320MB), so that rebalance takes time
++TEST dd if=/dev/zero of=$M0/foo bs=64k count=5120
++
++# Get the path of the file on the cold tier
++CPATH=`find $B0/cold/ -name foo`
++echo "File path on cold tier: "$CPATH
++
++#Now attach the tier
++attach_dist_rep_tier
++
++#Write into the file to promote it
++echo "good morning">>$M0/foo
++
++# Wait for the tier process to promote the file
++EXPECT_WITHIN $REBALANCE_TIMEOUT "yes" is_sticky_set $CPATH
++
++# Get the path of the file on the hot tier
++HPATH=`find $B0/hot/ -name foo`
++
++echo "File path on hot tier: "$HPATH
++TEST rm -rf $M0/foo
++TEST ! stat $HPATH
++TEST ! stat $CPATH
++
++#unlink during demotion
++HPATH="";
++CPATH="";
++
++# Create a large file (320MB), so that rebalance takes time
++TEST dd if=/dev/zero of=$M0/foo1 bs=64k count=5120
++
++# Get the path of the file on the hot tier
++HPATH=`find $B0/hot/ -name foo1`
++echo "File path on hot tier : "$HPATH
++
++EXPECT_WITHIN $REBALANCE_TIMEOUT "yes" is_sticky_set $HPATH
++
++# Get the path of the file on the cold tier
++CPATH=`find $B0/cold/ -name foo1`
++echo "File path on cold tier : "$CPATH
++
++TEST rm -rf $M0/foo1
++
++TEST ! stat $HPATH
++TEST ! stat $CPATH
++
++cleanup;
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+diff --git a/tests/bugs/glusterd/bug-1303028-Rebalance-glusterd-rpc-connection-issue.t b/tests/bugs/glusterd/bug-1303028-Rebalance-glusterd-rpc-connection-issue.t
+new file mode 100644
+index 0000000..3b62a45
+--- /dev/null
++++ b/tests/bugs/glusterd/bug-1303028-Rebalance-glusterd-rpc-connection-issue.t
+@@ -0,0 +1,78 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++
++# Creates a tiered volume with pure distribute hot and cold tiers
++# Both hot and cold tiers will have an equal number of bricks.
++
++function create_dist_tier_vol () {
++ mkdir $B0/cold
++ mkdir $B0/hot
++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{1..3}
++ TEST $CLI volume set $V0 performance.quick-read off
++ TEST $CLI volume set $V0 performance.io-cache off
++ TEST $CLI volume start $V0
++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{1..2}
++ TEST $CLI volume set $V0 cluster.tier-mode test
++}
++
++function non_zero_check () {
++ if [ "$1" -ne 0 ]
++ then
++ echo "0"
++ else
++ echo "1"
++ fi
++}
++
++function num_bricks_up {
++ local b
++ local n_up=0
++
++ for b in $B0/hot/${V0}{1..2} $B0/cold/${V0}{1..3}; do
++ if [ x"$(brick_up_status $V0 $H0 $b)" = x"1" ]; then
++ n_up=$((n_up+1))
++ fi
++ done
++
++ echo $n_up
++}
++
++cleanup;
++
++#Basic checks
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume status
++
++
++#Create and start a tiered volume
++create_dist_tier_vol
++# Wait for the bricks to come up, *then* the tier daemon.
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 5 num_bricks_up
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 tier_daemon_check
++sleep 5 #wait for some time to run tier daemon
++time_before_restarting=$(rebalance_run_time $V0);
++
++#checking for elapsed time after sleeping for two seconds.
++EXPECT "0" non_zero_check $time_before_restarting;
++
++#Difference of elapsed time should be positive
++
++kill -9 $(pidof glusterd);
++TEST glusterd;
++sleep 2;
++# Wait for the bricks to come up, *then* the tier daemon.
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 5 num_bricks_up
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_check;
++sleep 1;
++time1=$(rebalance_run_time $V0);
++EXPECT "0" non_zero_check $time1;
++sleep 2;
++time2=$(rebalance_run_time $V0);
++EXPECT "0" non_zero_check $time2;
++diff=`expr $time2 - $time1`
++EXPECT "0" non_zero_check $diff;
+diff --git a/tests/bugs/quota/bug-1288474.t b/tests/bugs/quota/bug-1288474.t
+new file mode 100755
+index 0000000..b8f4ba3
+--- /dev/null
++++ b/tests/bugs/quota/bug-1288474.t
+@@ -0,0 +1,51 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++NUM_BRICKS=2
++
++function create_dist_tier_vol () {
++ mkdir -p $B0/cold/${V0}{0..$1}
++ mkdir -p $B0/hot/${V0}{0..$1}
++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1}
++ TEST $CLI volume set $V0 nfs.disable false
++ TEST $CLI volume start $V0
++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1}
++}
++
++cleanup;
++
++#Basic checks
++TEST glusterd
++
++#Create and start a tiered volume
++create_dist_tier_vol $NUM_BRICKS
++
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++touch $M0/foobar
++
++TEST $CLI volume quota $V0 enable
++TEST $CLI volume quota $V0 limit-usage / 10MB
++
++EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quota_list_field "/" 5
++
++#check quota list after detach tier
++TEST $CLI volume tier $V0 detach start
++sleep 1
++TEST $CLI volume tier $V0 detach force
++
++EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quota_list_field "/" 5
++
++#check quota list after attach tier
++rm -rf $B0/hot
++mkdir $B0/hot
++TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1}
++
++EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quota_list_field "/" 5
++
++TEST umount $M0
++
++cleanup;
++
+diff --git a/tests/bugs/replicate/bug-1290965-detect-bitrotten-objects.t b/tests/bugs/replicate/bug-1290965-detect-bitrotten-objects.t
+new file mode 100644
+index 0000000..9863834
+--- /dev/null
++++ b/tests/bugs/replicate/bug-1290965-detect-bitrotten-objects.t
+@@ -0,0 +1,53 @@
++#!/bin/bash
++#Self-heal tests
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
++TEST $CLI volume set $V0 self-heal-daemon off
++TEST $CLI volume set $V0 entry-self-heal off
++TEST $CLI volume set $V0 metadata-self-heal off
++TEST $CLI volume set $V0 data-self-heal off
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume start $V0
++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/brick{2,3}
++TEST $CLI volume bitrot $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
++TEST $CLI volume bitrot $V0 scrub-frequency hourly
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++TEST dd if=/dev/urandom of=$M0/FILE bs=1024 count=1
++
++#Corrupt file from back-end
++TEST stat $B0/brick3/FILE
++echo "Corrupted data" >> $B0/brick3/FILE
++#Manually set bad-file xattr since we can't wait for an hour.
++TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B0/brick3/FILE
++
++TEST $CLI volume stop $V0
++TEST $CLI volume start $V0
++EXPECT 'Started' volinfo_field $V0 'Status';
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick3
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 3
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
++#Trigger lookup so that bitrot xlator marks file as bad in its inode context.
++stat $M0/FILE
++# Remove hot-tier
++TEST $CLI volume tier $V0 detach start
++sleep 1
++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" detach_tier_status_field_complete $V0
++TEST $CLI volume tier $V0 detach commit
++#Test that file has migrated to cold tier.
++EXPECT "1024" stat -c "%s" $B0/brick0/FILE
++EXPECT "1024" stat -c "%s" $B0/brick1/FILE
++TEST umount $M0
++cleanup
+diff --git a/tests/bugs/tier/bug-1205545-CTR-and-trash-integration.t b/tests/bugs/tier/bug-1205545-CTR-and-trash-integration.t
+new file mode 100644
+index 0000000..b2d382a
+--- /dev/null
++++ b/tests/bugs/tier/bug-1205545-CTR-and-trash-integration.t
+@@ -0,0 +1,72 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++LAST_BRICK=3
++CACHE_BRICK_FIRST=4
++CACHE_BRICK_LAST=5
++
++cleanup
++
++# Start glusterd [1-2]
++TEST glusterd
++TEST pidof glusterd
++
++# Set-up tier cluster [3-4]
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK}
++TEST $CLI volume start $V0
++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST
++
++# Start and mount the volume after enabling CTR and trash [5-8]
++TEST $CLI volume set $V0 features.ctr-enabled on
++TEST $CLI volume set $V0 features.trash on
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++
++# Create an empty file
++touch $M0/foo
++
++# gf_file_tb and gf_flink_tb should contain one entry each [9]
++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \
++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l )
++TEST [ $ENTRY_COUNT -eq 2 ]
++
++# Create two hard links
++ln $M0/foo $M0/lnk1
++ln $M0/foo $M0/lnk2
++
++# Now gf_flink_tb should contain 3 entries [10]
++ENTRY_COUNT=$(echo "select * from gf_flink_tb;" | \
++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l )
++TEST [ $ENTRY_COUNT -eq 3 ]
++
++# Delete the hard link
++rm -rf $M0/lnk1
++
++# Corresponding hard link entry must be removed from gf_flink_tb
++# but gf_file_tb should still contain the file entry [11]
++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \
++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l )
++TEST [ $ENTRY_COUNT -eq 3 ]
++
++# Remove the file
++rm -rf $M0/foo
++
++# Another hardlink removed [12]
++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \
++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l )
++TEST [ $ENTRY_COUNT -eq 2 ]
++
++# Remove the last hardlink
++rm -rf $M0/lnk2
++
++# All entried must be removed from gf_flink_tb and gf_file_tb [13]
++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \
++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l )
++TEST [ $ENTRY_COUNT -eq 0 ]
++
++cleanup
++
++
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+diff --git a/tests/bugs/tier/bug-1279376-rename-demoted-file.t b/tests/bugs/tier/bug-1279376-rename-demoted-file.t
+new file mode 100755
+index 0000000..c4a50d9
+--- /dev/null
++++ b/tests/bugs/tier/bug-1279376-rename-demoted-file.t
+@@ -0,0 +1,93 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../tier.rc
++
++
++NUM_BRICKS=2
++DEMOTE_FREQ=15
++DEMOTE_TIMEOUT=10
++PROMOTE_FREQ=500
++
++
++#Both src and dst files must hash to the same hot tier subvol
++SRC_FILE="file1.txt"
++DST_FILE="newfile1.txt"
++
++
++# Creates a tiered volume with pure distribute hot and cold tiers
++# Both hot and cold tiers will have an equal number of bricks.
++
++function create_dist_tier_vol () {
++ mkdir $B0/cold
++ mkdir $B0/hot
++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1}
++ TEST $CLI volume set $V0 performance.quick-read off
++ TEST $CLI volume set $V0 performance.io-cache off
++ TEST $CLI volume start $V0
++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1}
++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
++ TEST $CLI volume set $V0 cluster.tier-mode test
++
++#We do not want any files to be promoted during this test
++ TEST $CLI volume set $V0 features.record-counters on
++ TEST $CLI volume set $V0 cluster.read-freq-threshold 50
++ TEST $CLI volume set $V0 cluster.write-freq-threshold 50
++}
++
++
++cleanup;
++
++#Basic checks
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume info
++
++
++#Create and start a tiered volume
++create_dist_tier_vol $NUM_BRICKS
++
++# Mount FUSE
++TEST glusterfs -s $H0 --volfile-id $V0 $M0
++
++
++# The file will be created on the hot tier
++
++TEST touch "$M0/$SRC_FILE"
++
++# Get the path of the file on the hot tier
++HPATH=`find $B0/hot/ -name "$SRC_FILE"`
++echo "File path on hot tier: "$HPATH
++
++
++EXPECT "yes" exists_and_regular_file $HPATH
++
++# Wait for the tier process to demote the file
++sleep $DEMOTE_FREQ
++
++# Get the path of the file on the cold tier
++CPATH=`find $B0/cold/ -name "$SRC_FILE"`
++echo "File path on cold tier: "$CPATH
++
++EXPECT_WITHIN $DEMOTE_TIMEOUT "yes" exists_and_regular_file $CPATH
++
++#We don't want $DST_FILE to get demoted
++TEST $CLI volume set $V0 cluster.tier-demote-frequency $PROMOTE_FREQ
++
++#This will be created on the hot tier
++
++touch "$M0/$DST_FILE"
++HPATH=`find $B0/hot/ -name "$DST_FILE"`
++echo "File path on hot tier: "$HPATH
++
++TEST mv $M0/$SRC_FILE $M0/$DST_FILE
++
++# We expect a single file to exist at this point
++# when viewed on the mountpoint
++EXPECT 1 echo $(ls -l $M0 | grep $DST_FILE | wc -l)
++
++cleanup;
++
++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index b7c7bd9..ed24858 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -1859,6 +1859,78 @@ out:
+ return ret;
+ }
+
++#if USE_GFDB /* only add changetimerecorder when GFDB is enabled */
++static int
++brick_graph_add_changetimerecorder(volgen_graph_t *graph,
++ glusterd_volinfo_t *volinfo,
++ dict_t *set_dict,
++ glusterd_brickinfo_t *brickinfo)
++{
++ xlator_t *xl = NULL;
++ int ret = -1;
++ char *brickname = NULL;
++ char *path = NULL;
++ char index_basepath[PATH_MAX] = {0};
++ char *hotbrick = NULL;
++
++ if (!graph || !volinfo || !set_dict || !brickinfo)
++ goto out;
++
++ path = brickinfo->path;
++
++ xl = volgen_graph_add(graph, "features/changetimerecorder",
++ volinfo->volname);
++ if (!xl)
++ goto out;
++
++ ret = xlator_set_fixed_option(xl, "db-type", "sqlite3");
++ if (ret)
++ goto out;
++
++ if (!set_dict || dict_get_str(set_dict, "hot-brick", &hotbrick))
++ hotbrick = "off";
++
++ ret = xlator_set_fixed_option(xl, "hot-brick", hotbrick);
++ if (ret)
++ goto out;
++
++ brickname = strrchr(path, '/') + 1;
++ snprintf(index_basepath, sizeof(index_basepath), "%s.db", brickname);
++ ret = xlator_set_fixed_option(xl, "db-name", index_basepath);
++ if (ret)
++ goto out;
++
++ snprintf(index_basepath, sizeof(index_basepath), "%s/%s", path,
++ ".glusterfs/");
++ ret = xlator_set_fixed_option(xl, "db-path", index_basepath);
++ if (ret)
++ goto out;
++
++ ret = xlator_set_fixed_option(xl, "record-exit", "off");
++ if (ret)
++ goto out;
++
++ ret = xlator_set_fixed_option(xl, "ctr_link_consistency", "off");
++ if (ret)
++ goto out;
++
++ ret = xlator_set_fixed_option(xl, "ctr_lookupheal_link_timeout", "300");
++ if (ret)
++ goto out;
++
++ ret = xlator_set_fixed_option(xl, "ctr_lookupheal_inode_timeout", "300");
++ if (ret)
++ goto out;
++
++ ret = xlator_set_fixed_option(xl, "record-entry", "on");
++ if (ret)
++ goto out;
++
++out:
++ return ret;
++}
++#endif /* USE_GFDB */
++
+ static int
+ brick_graph_add_acl(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+@@ -2615,6 +2687,9 @@ static volgen_brick_xlator_t server_graph_table[] = {
+ {brick_graph_add_acl, "acl"},
+ {brick_graph_add_bitrot_stub, "bitrot-stub"},
+ {brick_graph_add_changelog, "changelog"},
++#if USE_GFDB /* changetimerecorder depends on gfdb */
++ {brick_graph_add_changetimerecorder, "changetimerecorder"},
++#endif
+ {brick_graph_add_bd, "bd"},
+ {brick_graph_add_trash, "trash"},
+ {brick_graph_add_arbiter, "arbiter"},
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index c8f6e67..a877805 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -11,6 +11,474 @@ cases as published by the Free Software Foundation.
+ #include "glusterd-volgen.h"
+ #include "glusterd-utils.h"
+
++#if USE_GFDB /* no GFDB means tiering is disabled */
++
++static int
++get_tier_freq_threshold(glusterd_volinfo_t *volinfo, char *threshold_key)
++{
++ int threshold = 0;
++ char *str_thresold = NULL;
++ int ret = -1;
++ xlator_t *this = NULL;
++
++ this = THIS;
++ GF_ASSERT(this);
++
++ glusterd_volinfo_get(volinfo, threshold_key, &str_thresold);
++ if (str_thresold) {
++ ret = gf_string2int(str_thresold, &threshold);
++ if (ret == -1) {
++ threshold = ret;
++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE,
++ "Failed to convert "
++ "string to integer");
++ }
++ }
++
++ return threshold;
++}
++
++/*
++ * Validation function for record-counters
++ * if write-freq-threshold and read-freq-threshold both have non-zero values
++ * record-counters cannot be set to off
++ * if record-counters is set to on
++ * check if both the frequency thresholds are zero, then pop
++ * a note, but volume set is not failed.
++ * */
++static int
++validate_tier_counters(glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
++ char *value, char **op_errstr)
++{
++ char errstr[2048] = "";
++ int ret = -1;
++ xlator_t *this = NULL;
++ gf_boolean_t origin_val = -1;
++ int current_wt = 0;
++ int current_rt = 0;
++
++ this = THIS;
++ GF_ASSERT(this);
++
++ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
++ snprintf(errstr, sizeof(errstr),
++ "Volume %s is not a tier "
++ "volume. Option %s is only valid for tier volume.",
++ volinfo->volname, key);
++ goto out;
++ }
++
++ ret = gf_string2boolean(value, &origin_val);
++ if (ret) {
++ snprintf(errstr, sizeof(errstr),
++ "%s is not a compatible "
++ "value. %s expects an boolean value",
++ value, key);
++ goto out;
++ }
++
++ current_rt = get_tier_freq_threshold(volinfo,
++ "cluster.read-freq-threshold");
++ if (current_rt == -1) {
++ snprintf(errstr, sizeof(errstr),
++ " Failed to retrieve value"
++ " of cluster.read-freq-threshold");
++ goto out;
++ }
++ current_wt = get_tier_freq_threshold(volinfo,
++ "cluster.write-freq-threshold");
++ if (current_wt == -1) {
++ snprintf(errstr, sizeof(errstr),
++ " Failed to retrieve value "
++ "of cluster.write-freq-threshold");
++ goto out;
++ }
++ /* If record-counters is set to off */
++ if (!origin_val) {
++ /* Both the thresholds should be zero to set
++ * record-counters to off*/
++ if (current_rt || current_wt) {
++ snprintf(errstr, sizeof(errstr),
++ "Cannot set features.record-counters to \"%s\""
++ " as cluster.write-freq-threshold is %d"
++ " and cluster.read-freq-threshold is %d. Please"
++ " set both cluster.write-freq-threshold and "
++ " cluster.read-freq-threshold to 0, to set "
++ " features.record-counters to \"%s\".",
++ value, current_wt, current_rt, value);
++ ret = -1;
++ goto out;
++ }
++ }
++ /* TODO give a warning message to the user. errstr without re = -1 will
++ * not result in a warning on cli for now.
++ else {
++ if (!current_rt && !current_wt) {
++ snprintf (errstr, sizeof (errstr),
++ " Note : cluster.write-freq-threshold is %d"
++ " and cluster.read-freq-threshold is %d. Please"
++ " set both cluster.write-freq-threshold and "
++ " cluster.read-freq-threshold to"
++ " appropriate positive values.",
++ current_wt, current_rt);
++ }
++ }*/
++
++ ret = 0;
++out:
++
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE,
++ "%s", errstr);
++ *op_errstr = gf_strdup(errstr);
++ }
++
++ return ret;
++}
++
++/*
++ * Validation function for ctr sql params
++ * features.ctr-sql-db-cachesize (Range: 1000 to 262144 pages)
++ * features.ctr-sql-db-wal-autocheckpoint (Range: 1000 to 262144 pages)
++ * */
++static int
++validate_ctr_sql_params(glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
++ char *value, char **op_errstr)
++{
++ int ret = -1;
++ xlator_t *this = NULL;
++ char errstr[2048] = "";
++ int origin_val = -1;
++
++ this = THIS;
++ GF_ASSERT(this);
++
++ ret = gf_string2int(value, &origin_val);
++ if (ret) {
++ snprintf(errstr, sizeof(errstr),
++ "%s is not a compatible "
++ "value. %s expects an integer value.",
++ value, key);
++ ret = -1;
++ goto out;
++ }
++
++ if (origin_val < 0) {
++ snprintf(errstr, sizeof(errstr),
++ "%s is not a "
++ "compatible value. %s expects a positive"
++ "integer value.",
++ value, key);
++ ret = -1;
++ goto out;
++ }
++
++ if (strstr(key, "sql-db-cachesize") ||
++ strstr(key, "sql-db-wal-autocheckpoint")) {
++ if ((origin_val < 1000) || (origin_val > 262144)) {
++ snprintf(errstr, sizeof(errstr),
++ "%s is not a "
++ "compatible value. %s "
++ "expects a value between : "
++ "1000 to 262144.",
++ value, key);
++ ret = -1;
++ goto out;
++ }
++ }
++
++ ret = 0;
++out:
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE,
++ "%s", errstr);
++ *op_errstr = gf_strdup(errstr);
++ }
++ return ret;
++}
++
++/* Validation for tiering frequency thresholds
++ * If any of the frequency thresholds are set to a non-zero value,
++ * switch record-counters on, if not already on
++ * If both the frequency thresholds are set to zero,
++ * switch record-counters off, if not already off
++ * */
++static int
++validate_tier_thresholds(glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
++ char *value, char **op_errstr)
++{
++ char errstr[2048] = "";
++ int ret = -1;
++ xlator_t *this = NULL;
++ int origin_val = -1;
++ gf_boolean_t current_rc = _gf_false;
++ int current_wt = 0;
++ int current_rt = 0;
++ gf_boolean_t is_set_rc = _gf_false;
++ char *proposed_rc = NULL;
++
++ this = THIS;
++ GF_ASSERT(this);
++
++ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
++ snprintf(errstr, sizeof(errstr),
++ "Volume %s is not a tier "
++ "volume. Option %s is only valid for tier volume.",
++ volinfo->volname, key);
++ goto out;
++ }
++
++ ret = gf_string2int(value, &origin_val);
++ if (ret) {
++ snprintf(errstr, sizeof(errstr),
++ "%s is not a compatible "
++ "value. %s expects an integer value.",
++ value, key);
++ ret = -1;
++ goto out;
++ }
++
++ if (origin_val < 0) {
++ snprintf(errstr, sizeof(errstr),
++ "%s is not a "
++ "compatible value. %s expects a positive"
++ "integer value.",
++ value, key);
++ ret = -1;
++ goto out;
++ }
++
++ /* Get the record-counters value */
++ ret = glusterd_volinfo_get_boolean(volinfo, "features.record-counters");
++ if (ret == -1) {
++ snprintf(errstr, sizeof(errstr),
++ "Failed to retrieve value of"
++ "features.record-counters from volume info");
++ goto out;
++ }
++ current_rc = ret;
++
++ /* if any of the thresholds are set to a non-zero value
++ * switch record-counters on, if not already on*/
++ if (origin_val > 0) {
++ if (!current_rc) {
++ is_set_rc = _gf_true;
++ current_rc = _gf_true;
++ }
++ } else {
++ /* if the set is for write-freq-threshold */
++ if (strstr(key, "write-freq-threshold")) {
++ current_rt = get_tier_freq_threshold(volinfo,
++ "cluster.read-freq-threshold");
++ if (current_rt == -1) {
++ snprintf(errstr, sizeof(errstr),
++ " Failed to retrieve value of"
++ "cluster.read-freq-threshold");
++ goto out;
++ }
++ current_wt = origin_val;
++ }
++ /* else it should be read-freq-threshold */
++ else {
++ current_wt = get_tier_freq_threshold(
++ volinfo, "cluster.write-freq-threshold");
++ if (current_wt == -1) {
++ snprintf(errstr, sizeof(errstr),
++ " Failed to retrieve value of"
++ "cluster.write-freq-threshold");
++ goto out;
++ }
++ current_rt = origin_val;
++ }
++
++ /* Since both the thresholds are zero, set record-counters
++ * to off, if not already off */
++ if (current_rt == 0 && current_wt == 0) {
++ if (current_rc) {
++ is_set_rc = _gf_true;
++ current_rc = _gf_false;
++ }
++ }
++ }
++
++ /* if record-counter has to be set to proposed value */
++ if (is_set_rc) {
++ if (current_rc) {
++ ret = gf_asprintf(&proposed_rc, "on");
++ } else {
++ ret = gf_asprintf(&proposed_rc, "off");
++ }
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE,
++ "Failed to allocate memory to dict_value");
++ goto error;
++ }
++ ret = dict_set_str(volinfo->dict, "features.record-counters",
++ proposed_rc);
++ error:
++ if (ret) {
++ snprintf(errstr, sizeof(errstr),
++ "Failed to set features.record-counters"
++ "to \"%s\" automatically."
++ "Please try to set features.record-counters "
++ "\"%s\" manually. The options "
++ "cluster.write-freq-threshold and "
++ "cluster.read-freq-threshold can only "
++ "be set to a non zero value, if "
++ "features.record-counters is "
++ "set to \"on\".",
++ proposed_rc, proposed_rc);
++ goto out;
++ }
++ }
++ ret = 0;
++out:
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE,
++ "%s", errstr);
++ *op_errstr = gf_strdup(errstr);
++ if (proposed_rc)
++ GF_FREE(proposed_rc);
++ }
++ return ret;
++}
++
++static int
++validate_tier(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, char *value,
++ char **op_errstr)
++{
++ char errstr[2048] = "";
++ int ret = 0;
++ xlator_t *this = NULL;
++ int origin_val = -1;
++ char *current_wm_hi = NULL;
++ char *current_wm_low = NULL;
++ uint64_t wm_hi = 0;
++ uint64_t wm_low = 0;
++
++ this = THIS;
++ GF_ASSERT(this);
++
++ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
++ snprintf(errstr, sizeof(errstr),
++ "Volume %s is not a tier "
++ "volume. Option %s is only valid for tier volume.",
++ volinfo->volname, key);
++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE,
++ "%s", errstr);
++ *op_errstr = gf_strdup(errstr);
++ ret = -1;
++ goto out;
++ }
++
++ if (strstr(key, "cluster.tier-mode")) {
++ if (strcmp(value, "test") && strcmp(value, "cache")) {
++ ret = -1;
++ goto out;
++ }
++ goto out;
++ } else if (strstr(key, "tier-pause")) {
++ if (strcmp(value, "off") && strcmp(value, "on")) {
++ ret = -1;
++ goto out;
++ }
++ goto out;
++ } else if (strstr(key, "tier-compact")) {
++ if (strcmp(value, "on") && strcmp(value, "off")) {
++ ret = -1;
++ goto out;
++ }
++
++ goto out;
++ }
++
++ /*
++ * Rest of the volume set options for tier are expecting a positive
++ * Integer. Change the function accordingly if this constraint is
++ * changed.
++ */
++ ret = gf_string2int(value, &origin_val);
++ if (ret) {
++ snprintf(errstr, sizeof(errstr),
++ "%s is not a compatible "
++ "value. %s expects an integer value.",
++ value, key);
++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE,
++ "%s", errstr);
++ *op_errstr = gf_strdup(errstr);
++ ret = -1;
++ goto out;
++ }
++
++ if (strstr(key, "watermark-hi") || strstr(key, "watermark-low")) {
++ if ((origin_val < 1) || (origin_val > 99)) {
++ snprintf(errstr, sizeof(errstr),
++ "%s is not a "
++ "compatible value. %s expects a "
++ "percentage from 1-99.",
++ value, key);
++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE,
++ "%s", errstr);
++ *op_errstr = gf_strdup(errstr);
++ ret = -1;
++ goto out;
++ }
++
++ if (strstr(key, "watermark-hi")) {
++ wm_hi = origin_val;
++ } else {
++ glusterd_volinfo_get(volinfo, "cluster.watermark-hi",
++ &current_wm_hi);
++ gf_string2bytesize_uint64(current_wm_hi, &wm_hi);
++ }
++
++ if (strstr(key, "watermark-low")) {
++ wm_low = origin_val;
++ } else {
++ glusterd_volinfo_get(volinfo, "cluster.watermark-low",
++ &current_wm_low);
++ gf_string2bytesize_uint64(current_wm_low, &wm_low);
++ }
++ if (wm_low >= wm_hi) {
++ snprintf(errstr, sizeof(errstr),
++ "lower watermark"
++ " cannot be equal or exceed upper "
++ "watermark.");
++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE,
++ "%s", errstr);
++ *op_errstr = gf_strdup(errstr);
++ ret = -1;
++ goto out;
++ }
++ } else if (strstr(key, "tier-promote-frequency") ||
++ strstr(key, "tier-max-mb") ||
++ strstr(key, "tier-max-promote-file-size") ||
++ strstr(key, "tier-max-files") ||
++ strstr(key, "tier-demote-frequency") ||
++ strstr(key, "tier-hot-compact-frequency") ||
++ strstr(key, "tier-cold-compact-frequency") ||
++ strstr(key, "tier-query-limit")) {
++ if (origin_val < 1) {
++ snprintf(errstr, sizeof(errstr),
++ "%s is not a "
++ " compatible value. %s expects a positive "
++ "integer value greater than 0.",
++ value, key);
++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE,
++ "%s", errstr);
++ *op_errstr = gf_strdup(errstr);
++ ret = -1;
++ goto out;
++ }
++ }
++out:
++ gf_msg_debug(this->name, 0, "Returning %d", ret);
++
++ return ret;
++}
++
++#endif /* End for USE_GFDB */
++
+ static int
+ validate_cache_max_min_size(glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char *value, char **op_errstr)
+@@ -2485,6 +2953,261 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ "/var/run/gluster/shared_storage on enabling this "
+ "option. Unmount and delete the shared storage volume "
+ " on disabling this option."},
++#if USE_GFDB /* no GFDB means tiering is disabled */
++ /* tier translator - global tunables */
++ {.key = "cluster.write-freq-threshold",
++ .voltype = "cluster/tier",
++ .value = "0",
++ .option = "write-freq-threshold",
++ .op_version = GD_OP_VERSION_3_7_0,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier_thresholds,
++ .description = "Defines the number of writes, in a promotion/demotion"
++ " cycle, that would mark a file HOT for promotion. Any"
++ " file that has write hits less than this value will "
++ "be considered as COLD and will be demoted."},
++ {.key = "cluster.read-freq-threshold",
++ .voltype = "cluster/tier",
++ .value = "0",
++ .option = "read-freq-threshold",
++ .op_version = GD_OP_VERSION_3_7_0,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier_thresholds,
++ .description = "Defines the number of reads, in a promotion/demotion "
++ "cycle, that would mark a file HOT for promotion. Any "
++ "file that has read hits less than this value will be "
++ "considered as COLD and will be demoted."},
++ {
++ .key = "cluster.tier-pause",
++ .voltype = "cluster/tier",
++ .option = "tier-pause",
++ .op_version = GD_OP_VERSION_3_7_6,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier,
++ },
++ {
++ .key = "cluster.tier-promote-frequency",
++ .voltype = "cluster/tier",
++ .value = "120",
++ .option = "tier-promote-frequency",
++ .op_version = GD_OP_VERSION_3_7_0,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier,
++ },
++ {
++ .key = "cluster.tier-demote-frequency",
++ .voltype = "cluster/tier",
++ .value = "3600",
++ .option = "tier-demote-frequency",
++ .op_version = GD_OP_VERSION_3_7_0,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier,
++ },
++ {.key = "cluster.watermark-hi",
++ .voltype = "cluster/tier",
++ .value = "90",
++ .option = "watermark-hi",
++ .op_version = GD_OP_VERSION_3_7_6,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier,
++ .description =
++ "Upper % watermark for promotion. If hot tier fills"
++ " above this percentage, no promotion will happen and demotion will "
++ "happen with high probability."},
++ {.key = "cluster.watermark-low",
++ .voltype = "cluster/tier",
++ .value = "75",
++ .option = "watermark-low",
++ .op_version = GD_OP_VERSION_3_7_6,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier,
++ .description =
++ "Lower % watermark. If hot tier is less "
++ "full than this, promotion will happen and demotion will not happen. "
++ "If greater than this, promotion/demotion will happen at a "
++ "probability "
++ "relative to how full the hot tier is."},
++ {.key = "cluster.tier-mode",
++ .voltype = "cluster/tier",
++ .option = "tier-mode",
++ .value = "cache",
++ .op_version = GD_OP_VERSION_3_7_6,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier,
++ .description =
++ "Either 'test' or 'cache'. Test mode periodically"
++ " demotes or promotes files automatically based on access."
++ " Cache mode does so based on whether the cache is full or not,"
++ " as specified with watermarks."},
++ {.key = "cluster.tier-max-promote-file-size",
++ .voltype = "cluster/tier",
++ .option = "tier-max-promote-file-size",
++ .value = "0",
++ .op_version = GD_OP_VERSION_3_7_10,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier,
++ .description =
++ "The maximum file size in bytes that is promoted. If 0, there"
++ " is no maximum size (default)."},
++ {.key = "cluster.tier-max-mb",
++ .voltype = "cluster/tier",
++ .option = "tier-max-mb",
++ .value = "4000",
++ .op_version = GD_OP_VERSION_3_7_6,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier,
++ .description = "The maximum number of MB that may be migrated"
++ " in any direction in a given cycle by a single node."},
++ {.key = "cluster.tier-max-files",
++ .voltype = "cluster/tier",
++ .option = "tier-max-files",
++ .value = "10000",
++ .op_version = GD_OP_VERSION_3_7_6,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier,
++ .description = "The maximum number of files that may be migrated"
++ " in any direction in a given cycle by a single node."},
++ {.key = "cluster.tier-query-limit",
++ .voltype = "cluster/tier",
++ .option = "tier-query-limit",
++ .value = "100",
++ .op_version = GD_OP_VERSION_3_9_1,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier,
++ .type = NO_DOC,
++ .description = "The maximum number of files that may be migrated "
++ "during an emergency demote. An emergency condition "
++ "is flagged when writes breach the hi-watermark."},
++ {.key = "cluster.tier-compact",
++ .voltype = "cluster/tier",
++ .option = "tier-compact",
++ .value = "on",
++ .op_version = GD_OP_VERSION_3_9_0,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier,
++ .description = "Activate or deactivate the compaction of the DB"
++ " for the volume's metadata."},
++ {
++ .key = "cluster.tier-hot-compact-frequency",
++ .voltype = "cluster/tier",
++ .value = "604800",
++ .option = "tier-hot-compact-frequency",
++ .op_version = GD_OP_VERSION_3_9_0,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier,
++ },
++ {
++ .key = "cluster.tier-cold-compact-frequency",
++ .voltype = "cluster/tier",
++ .value = "604800",
++ .option = "tier-cold-compact-frequency",
++ .op_version = GD_OP_VERSION_3_9_0,
++ .flags = VOLOPT_FLAG_CLIENT_OPT,
++ .validate_fn = validate_tier,
++ },
++ {.key = "features.ctr-enabled",
++ .voltype = "features/changetimerecorder",
++ .value = "off",
++ .option = "ctr-enabled",
++ .op_version = GD_OP_VERSION_3_7_0,
++ .description = "Enable CTR xlator"},
++ {.key = "features.record-counters",
++ .voltype = "features/changetimerecorder",
++ .value = "off",
++ .option = "record-counters",
++ .op_version = GD_OP_VERSION_3_7_0,
++ .validate_fn = validate_tier_counters,
++ .description = "Its a Change Time Recorder Xlator option to "
++ "enable recording write "
++ "and read heat counters. The default is disabled. "
++ "If enabled, \"cluster.write-freq-threshold\" and "
++ "\"cluster.read-freq-threshold\" defined the number "
++ "of writes (or reads) to a given file are needed "
++ "before triggering migration."},
++ {.key = "features.ctr-record-metadata-heat",
++ .voltype = "features/changetimerecorder",
++ .value = "off",
++ .option = "ctr-record-metadata-heat",
++ .op_version = GD_OP_VERSION_3_7_0,
++ .type = NO_DOC,
++ .description = "Its a Change Time Recorder Xlator option to "
++ "enable recording write heat on metadata of the file. "
++ "The default is disabled. "
++ "Metadata is inode attributes like atime, mtime,"
++ " permissions etc and "
++ "extended attributes of a file ."},
++ {.key = "features.ctr_link_consistency",
++ .voltype = "features/changetimerecorder",
++ .value = "off",
++ .option = "ctr_link_consistency",
++ .op_version = GD_OP_VERSION_3_7_0,
++ .type = NO_DOC,
++ .description = "Enable a crash consistent way of recording hardlink "
++ "updates by Change Time Recorder Xlator. "
++ "When recording in a crash "
++ "consistent way the data operations will "
++ "experience more latency."},
++ {.key = "features.ctr_lookupheal_link_timeout",
++ .voltype = "features/changetimerecorder",
++ .value = "300",
++ .option = "ctr_lookupheal_link_timeout",
++ .op_version = GD_OP_VERSION_3_7_2,
++ .type = NO_DOC,
++ .description = "Defines the expiry period of in-memory "
++ "hardlink of an inode,"
++ "used by lookup heal in Change Time Recorder."
++ "Once the expiry period"
++ "hits an attempt to heal the database per "
++ "hardlink is done and the "
++ "in-memory hardlink period is reset"},
++ {.key = "features.ctr_lookupheal_inode_timeout",
++ .voltype = "features/changetimerecorder",
++ .value = "300",
++ .option = "ctr_lookupheal_inode_timeout",
++ .op_version = GD_OP_VERSION_3_7_2,
++ .type = NO_DOC,
++ .description = "Defines the expiry period of in-memory inode,"
++ "used by lookup heal in Change Time Recorder. "
++ "Once the expiry period"
++ "hits an attempt to heal the database per "
++ "inode is done"},
++ {.key = "features.ctr-sql-db-cachesize",
++ .voltype = "features/changetimerecorder",
++ .value = "12500",
++ .option = "sql-db-cachesize",
++ .validate_fn = validate_ctr_sql_params,
++ .op_version = GD_OP_VERSION_3_7_7,
++ .description = "Defines the cache size of the sqlite database of "
++ "changetimerecorder xlator."
++ "The input to this option is in pages."
++ "Each page is 4096 bytes. Default value is 12500 "
++ "pages."
++ "The max value is 262144 pages i.e 1 GB and "
++ "the min value is 1000 pages i.e ~ 4 MB. "},
++ {.key = "features.ctr-sql-db-wal-autocheckpoint",
++ .voltype = "features/changetimerecorder",
++ .value = "25000",
++ .option = "sql-db-wal-autocheckpoint",
++ .validate_fn = validate_ctr_sql_params,
++ .op_version = GD_OP_VERSION_3_7_7,
++ .description = "Defines the autocheckpoint of the sqlite database of "
++ " changetimerecorder. "
++ "The input to this option is in pages. "
++ "Each page is 4096 bytes. Default value is 25000 "
++ "pages."
++ "The max value is 262144 pages i.e 1 GB and "
++ "the min value is 1000 pages i.e ~4 MB."},
++ {.key = VKEY_FEATURES_SELINUX,
++ .voltype = "features/selinux",
++ .type = NO_DOC,
++ .value = "on",
++ .op_version = GD_OP_VERSION_3_11_0,
++ .description = "Convert security.selinux xattrs to "
++ "trusted.gluster.selinux on the bricks. Recommended "
++ "to have enabled when clients and/or bricks support "
++ "SELinux."},
++
++#endif /* USE_GFDB */
+ {
+ .key = "locks.trace",
+ .voltype = "features/locks",
+--
+1.8.3.1
+
diff --git a/0087-ganesha-fixing-minor-issues-after-the-backport-from-.patch b/0087-ganesha-fixing-minor-issues-after-the-backport-from-.patch
new file mode 100644
index 0000000..1f4a767
--- /dev/null
+++ b/0087-ganesha-fixing-minor-issues-after-the-backport-from-.patch
@@ -0,0 +1,89 @@
+From 144f2eb56d1bbecc9c455065755f41ec81974e3e Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Sun, 7 Apr 2019 21:54:07 +0530
+Subject: [PATCH 087/124] ganesha : fixing minor issues after the backport from
+ 3.4
+
+label : DOWNSTREAM ONLY
+
+Change-Id: Ib0f6d8728d2e33da63ed4baab0bb981a0b06a8e0
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167168
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Kaleb Keithley <kkeithle@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 20 +++++++++++++++++---
+ xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +-
+ 2 files changed, 18 insertions(+), 4 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index e0607ba..f6b823d 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -883,6 +883,15 @@ sed -i 's|option working-directory /etc/glusterd|option working-directory %{_sha
+ install -D -p -m 0644 extras/glusterfs-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs
+
++# ganesha ghosts
++%if ( 0%{!?_without_server:1} )
++mkdir -p %{buildroot}%{_sysconfdir}/ganesha
++touch %{buildroot}%{_sysconfdir}/ganesha/ganesha-ha.conf
++mkdir -p %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/
++touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf
++touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf
++%endif
++
+ %if ( 0%{!?_without_georeplication:1} )
+ mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/geo-replication
+ touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
+@@ -1197,7 +1206,7 @@ exit 0
+
+ %if ( 0%{?_without_server:1} )
+ #exclude ganesha related files
+-%exclude %{_sysconfdir}/ganesha/*
++%exclude %{_sysconfdir}/ganesha/ganesha-ha.conf.sample
+ %exclude %{_libexecdir}/ganesha/*
+ %exclude %{_prefix}/lib/ocf/resource.d/heartbeat/*
+ %endif
+@@ -1376,9 +1385,15 @@ exit 0
+
+ %if ( 0%{!?_without_server:1} )
+ %files ganesha
+-%{_sysconfdir}/ganesha/*
++%dir %{_libexecdir}/ganesha
++%{_sysconfdir}/ganesha/ganesha-ha.conf.sample
+ %{_libexecdir}/ganesha/*
+ %{_prefix}/lib/ocf/resource.d/heartbeat/*
++%{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh
++%ghost %attr(0644,-,-) %config(noreplace) %{_sysconfdir}/ganesha/ganesha-ha.conf
++%ghost %dir %attr(0755,-,-) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha
++%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf
++%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf
+ %endif
+
+ %if ( 0%{!?_without_ocf:1} )
+@@ -1508,7 +1523,6 @@ exit 0
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S29CTDBsetup.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S30samba-start.sh
+- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/post
+diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+index d882105..0a16925 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+@@ -278,7 +278,7 @@ glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr)
+ goto out;
+ }
+
+- if (strcmp(value, "enable")) {
++ if (strcmp(value, "enable") == 0) {
+ ret = start_ganesha(op_errstr);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_START_FAIL,
+--
+1.8.3.1
+
diff --git a/0088-tier-fix-failures-noticed-during-tier-start-and-tier.patch b/0088-tier-fix-failures-noticed-during-tier-start-and-tier.patch
new file mode 100644
index 0000000..db831a2
--- /dev/null
+++ b/0088-tier-fix-failures-noticed-during-tier-start-and-tier.patch
@@ -0,0 +1,74 @@
+From bbcfd7e28b43845bac675dcc486bde09b0953f64 Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <hgowtham@redhat.com>
+Date: Thu, 11 Apr 2019 14:40:11 +0530
+Subject: [PATCH 088/124] tier: fix failures noticed during tier start and tier
+ restart.
+
+Problem 1: when tier is started using the tier start command,
+the out put was skipped during a failure. failures don't have an
+transaction id. this id was checked and if its missing then
+it skips.
+
+fix: had to remove the unnecessary jump for that case.
+
+problem 2: When tier was restarted, the tierd doesn't come online.
+This was because, there were a certain values that were supposed
+to be stored in glusterd (gluster-store.c) which will be used
+during restart to come to the original state.
+These values were stored. as they were missing, tierd didn't come
+online.
+
+fix: store the value and make it available during the start.
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I7df898fa4c3b72fe8ded4adbf573307a59a37e5e
+Signed-off-by: Hari Gowtham <hgowtham@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167653
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ cli/src/cli-rpc-ops.c | 1 -
+ xlators/mgmt/glusterd/src/glusterd-store.c | 13 +++++++++++++
+ 2 files changed, 13 insertions(+), 1 deletion(-)
+
+diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
+index 736cd18..b167e26 100644
+--- a/cli/src/cli-rpc-ops.c
++++ b/cli/src/cli-rpc-ops.c
+@@ -1973,7 +1973,6 @@ gf_cli_defrag_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ if (ret) {
+ gf_log("cli", GF_LOG_WARNING, "failed to get %s from dict",
+ GF_REBALANCE_TID_KEY);
+- goto out;
+ }
+ if (rsp.op_ret && strcmp(rsp.op_errstr, "")) {
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index 351bd9e..4889217 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -3336,6 +3336,19 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)
+ break;
+
+ case GF_CLUSTER_TYPE_TIER:
++ if (volinfo->tier_info.cold_type ==
++ GF_CLUSTER_TYPE_DISPERSE)
++ volinfo->tier_info.cold_dist_leaf_count
++ = volinfo->disperse_count;
++ else
++ volinfo->tier_info.cold_dist_leaf_count
++ = glusterd_calc_dist_leaf_count (
++ volinfo->tier_info.
++ cold_replica_count,
++ 1);
++
++ break;
++
+ case GF_CLUSTER_TYPE_STRIPE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ gf_msg(this->name, GF_LOG_CRITICAL, ENOTSUP,
+--
+1.8.3.1
+
diff --git a/0089-glusterd-gNFS-On-post-upgrade-to-3.2-disable-gNFS-fo.patch b/0089-glusterd-gNFS-On-post-upgrade-to-3.2-disable-gNFS-fo.patch
new file mode 100644
index 0000000..6bdb7fc
--- /dev/null
+++ b/0089-glusterd-gNFS-On-post-upgrade-to-3.2-disable-gNFS-fo.patch
@@ -0,0 +1,85 @@
+From 8ba7e04362019ea2d0e80e67eb214d53dca58774 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Thu, 17 Nov 2016 12:44:38 +0530
+Subject: [PATCH 089/124] glusterd/gNFS : On post upgrade to 3.2, disable gNFS
+ for all volumes
+
+Currently on 3.2 gNFS is dsiabled for newly created volumes or old volumes
+with default value. There will be volumes which have explicitly turn off
+nfs.disable option. This change disable gNFS even for that volume as well.
+
+label : DOWNSTREAM ONLY
+
+Change-Id: I4ddeb23690271034b0bbb3fc50b359350b5eae87
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/90425
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: Atin Mukherjee <amukherj@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167573
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 43 ++++++++++++++++++------------
+ 1 file changed, 26 insertions(+), 17 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 10e2d48..06ea8cf 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -2501,26 +2501,35 @@ glusterd_update_volumes_dict(glusterd_volinfo_t *volinfo)
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ /* 3.9.0 onwards gNFS will be disabled by default. In case of an upgrade
+- * from anything below than 3.9.0 to 3.9.x the volume's dictionary will
+- * not have 'nfs.disable' key set which means the same will not be set
+- * to on until explicitly done. setnfs.disable to 'on' at op-version
+- * bump up flow is the ideal way here. The same is also applicable for
+- * transport.address-family where if the transport type is set to tcp
+- * then transport.address-family is defaulted to 'inet'.
++ * from anything below than 3.9.0 to 3.9.x, the value for nfs.disable is
++ * set to 'on' for all volumes even if it is explicitly set to 'off' in
++ * previous version. This change is only applicable to downstream code.
++ * Setting nfs.disable to 'on' at op-version bump up flow is the ideal
++ * way here. The same is also applicable for transport.address-family
++ * where if the transport type is set to tcp then transport.address-family
++ * is defaulted to 'inet'.
+ */
+ if (conf->op_version >= GD_OP_VERSION_3_9_0) {
+- if (dict_get_str_boolean(volinfo->dict, NFS_DISABLE_MAP_KEY, 1)) {
+- ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY,
+- "on");
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+- "Failed to set "
+- "option ' NFS_DISABLE_MAP_KEY ' on "
+- "volume %s",
+- volinfo->volname);
+- goto out;
+- }
++ if (!(dict_get_str_boolean(volinfo->dict, NFS_DISABLE_MAP_KEY, 0))) {
++ gf_msg(this->name, GF_LOG_INFO, 0, 0,
++ "Gluster NFS is"
++ " being deprecated in favor of NFS-Ganesha, "
++ "hence setting nfs.disable to 'on' for volume "
++ "%s. Please re-enable it if requires",
++ volinfo->volname);
++ }
++
++ ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY,
++ "on");
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
++ "Failed to set "
++ "option ' NFS_DISABLE_MAP_KEY ' on "
++ "volume %s",
++ volinfo->volname);
++ goto out;
+ }
++
+ ret = dict_get_strn(volinfo->dict, "transport.address-family",
+ SLEN("transport.address-family"),
+ &address_family_str);
+--
+1.8.3.1
+
diff --git a/0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch b/0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch
new file mode 100644
index 0000000..cbe1403
--- /dev/null
+++ b/0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch
@@ -0,0 +1,307 @@
+From 19210e4fc551893d1545e719fa26d9ad2d2f5cba Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Mon, 13 Nov 2017 18:41:58 +0530
+Subject: [PATCH 090/124] Revert "build: conditionally build legacy gNFS server
+ and associated sub-packaging"
+
+This reverts commit 83abcba6b42f94eb5a6495a634d4055362a9d79d.
+
+label : DOWNSTREAM ONLY
+
+Change-Id: If1c02d80b746e0a5b5e2c9a3625909158eff55d5
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167575
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Reviewed-by: Kaleb Keithley <kkeithle@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ configure.ac | 13 -------
+ extras/LinuxRPM/Makefile.am | 4 +--
+ glusterfs.spec.in | 54 ++++++----------------------
+ xlators/Makefile.am | 5 +--
+ xlators/mgmt/glusterd/src/glusterd-nfs-svc.c | 27 ++++++--------
+ 5 files changed, 24 insertions(+), 79 deletions(-)
+
+diff --git a/configure.ac b/configure.ac
+index 633e850..521671b 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -1359,18 +1359,6 @@ if test -n "$LIBAIO"; then
+ BUILD_LIBAIO=yes
+ fi
+
+-dnl gnfs section
+-BUILD_GNFS="no"
+-AC_ARG_ENABLE([gnfs],
+- AC_HELP_STRING([--enable-gnfs],
+- [Enable legacy gnfs server xlator.]))
+-if test "x${with_server}" = "xyes" -a "x$enable_gnfs" = "xyes"; then
+- BUILD_GNFS="yes"
+-fi
+-AM_CONDITIONAL([BUILD_GNFS], [test x$BUILD_GNFS = xyes])
+-AC_SUBST(BUILD_GNFS)
+-dnl end gnfs section
+-
+ dnl Check for userspace-rcu
+ PKG_CHECK_MODULES([URCU], [liburcu-bp], [],
+ [AC_CHECK_HEADERS([urcu-bp.h],
+@@ -1624,7 +1612,6 @@ echo "EC dynamic support : $EC_DYNAMIC_SUPPORT"
+ echo "Use memory pools : $USE_MEMPOOL"
+ echo "Nanosecond m/atimes : $BUILD_NANOSECOND_TIMESTAMPS"
+ echo "Server components : $with_server"
+-echo "Legacy gNFS server : $BUILD_GNFS"
+ echo "IPV6 default : $with_ipv6_default"
+ echo "Use TIRPC : $with_libtirpc"
+ echo "With Python : ${PYTHON_VERSION}"
+diff --git a/extras/LinuxRPM/Makefile.am b/extras/LinuxRPM/Makefile.am
+index f028537..61fd6da 100644
+--- a/extras/LinuxRPM/Makefile.am
++++ b/extras/LinuxRPM/Makefile.am
+@@ -18,7 +18,7 @@ autogen:
+ cd ../.. && \
+ rm -rf autom4te.cache && \
+ ./autogen.sh && \
+- ./configure --enable-gnfs --with-previous-options
++ ./configure --with-previous-options
+
+ prep:
+ $(MAKE) -C ../.. dist;
+@@ -36,7 +36,7 @@ srcrpm:
+ mv rpmbuild/SRPMS/* .
+
+ rpms:
+- rpmbuild --define '_topdir $(shell pwd)/rpmbuild' --with gnfs -bb rpmbuild/SPECS/glusterfs.spec
++ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' -bb rpmbuild/SPECS/glusterfs.spec
+ mv rpmbuild/RPMS/*/* .
+
+ # EPEL-5 does not like new versions of rpmbuild and requires some
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index f6b823d..cb17eaa 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -52,11 +52,6 @@
+ # rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without georeplication
+ %{?_without_georeplication:%global _without_georeplication --disable-georeplication}
+
+-# gnfs
+-# if you wish to compile an rpm with the legacy gNFS server xlator
+-# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with gnfs
+-%{?_with_gnfs:%global _with_gnfs --enable-gnfs}
+-
+ # ipv6default
+ # if you wish to compile an rpm with IPv6 default...
+ # rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with ipv6default
+@@ -153,7 +148,6 @@
+ %if 0%{?_without_server:1}
+ %global _without_events --disable-events
+ %global _without_georeplication --disable-georeplication
+-%global _with_gnfs %{nil}
+ %global _without_tiering --disable-tiering
+ %global _without_ocf --without-ocf
+ %endif
+@@ -525,25 +519,6 @@ is in userspace and easily manageable.
+ This package provides support to geo-replication.
+ %endif
+
+-%if ( 0%{?_with_gnfs:1} )
+-%package gnfs
+-Summary: GlusterFS gNFS server
+-Requires: %{name}%{?_isa} = %{version}-%{release}
+-Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
+-Requires: nfs-utils
+-
+-%description gnfs
+-GlusterFS is a distributed file-system capable of scaling to several
+-petabytes. It aggregates various storage bricks over Infiniband RDMA
+-or TCP/IP interconnect into one large parallel network file
+-system. GlusterFS is one of the most sophisticated file systems in
+-terms of features and extensibility. It borrows a powerful concept
+-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+-is in user space and easily manageable.
+-
+-This package provides the glusterfs legacy gNFS server xlator
+-%endif
+-
+ %package libs
+ Summary: GlusterFS common libraries
+
+@@ -659,6 +634,7 @@ Requires: %{name}-api%{?_isa} = %{version}-%{release}
+ Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
+ # lvm2 for snapshot, and nfs-utils and rpcbind/portmap for gnfs server
+ Requires: lvm2
++Requires: nfs-utils
+ %if ( 0%{?_with_systemd:1} )
+ %{?systemd_requires}
+ %else
+@@ -789,7 +765,6 @@ export LDFLAGS
+ %{?_with_cmocka} \
+ %{?_with_debug} \
+ %{?_with_firewalld} \
+- %{?_with_gnfs} \
+ %{?_with_tmpfilesdir} \
+ %{?_with_tsan} \
+ %{?_with_valgrind} \
+@@ -1286,17 +1261,6 @@ exit 0
+ %{_bindir}/fusermount-glusterfs
+ %endif
+
+-%if ( 0%{?_with_gnfs:1} && 0%{!?_without_server:1} )
+-%files gnfs
+-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs
+- %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs/server.so
+-%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs
+-%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+-%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run
+-%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+-%endif
+-
+ %files thin-arbiter
+ %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+ %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+@@ -1409,11 +1373,6 @@ exit 0
+ %config(noreplace) %{_sysconfdir}/glusterfs
+ %exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol
+ %exclude %{_sysconfdir}/glusterfs/eventsconfig.json
+-%exclude %{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+-%exclude %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+-%if ( 0%{?_with_gnfs:1} )
+-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs/*
+-%endif
+ %config(noreplace) %{_sysconfdir}/sysconfig/glusterd
+ %if ( 0%{_for_fedora_koji_builds} )
+ %config(noreplace) %{_sysconfdir}/sysconfig/glusterfsd
+@@ -1461,6 +1420,7 @@ exit 0
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/trash.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/upcall.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/leases.so
++ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs*
+ %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt/glusterd.so
+ %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol
+@@ -1477,6 +1437,7 @@ exit 0
+
+ # /var/lib/glusterd, e.g. hookscripts, etc.
+ %ghost %attr(0644,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/glusterd.info
++%ghost %attr(0600,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/options
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/bitd
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/groups
+@@ -1529,7 +1490,11 @@ exit 0
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre/S30samba-stop.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre/S29CTDB-teardown.sh
+-%config(noreplace) %ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options
++%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs
++%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol
++%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run
++%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
++%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/peers
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/quotad
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/scrub
+@@ -1995,6 +1960,9 @@ fi
+ %endif
+
+ %changelog
++* Sun Apr 7 2019 Jiffin Tony Thottan <jthottan@redhat.com>
++- DOWNSTREAM ONLY - revert of 83abcb(gnfs in an optional subpackage)
++
+ * Sun Apr 7 2019 Soumya Koduri <skoduri@redhat.com>
+ - As an interim fix add dependency on netstat(/net-tools) for glusterfs-ganesha package (#1395574)
+
+diff --git a/xlators/Makefile.am b/xlators/Makefile.am
+index ef20cbb..be54eb3 100644
+--- a/xlators/Makefile.am
++++ b/xlators/Makefile.am
+@@ -1,12 +1,9 @@
+-if BUILD_GNFS
+- GNFS_DIR = nfs
+-endif
+
+ DIST_SUBDIRS = cluster storage protocol performance debug features \
+ mount nfs mgmt system playground meta
+
+ SUBDIRS = cluster storage protocol performance debug features \
+- mount ${GNFS_DIR} mgmt system playground meta
++ mount nfs mgmt system playground meta
+
+ EXTRA_DIST = xlator.sym
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c
+index 36e9052..3960031 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c
+@@ -18,6 +18,8 @@
+ #include "glusterd-messages.h"
+ #include "glusterd-svc-helper.h"
+
++static char *nfs_svc_name = "nfs";
++
+ static gf_boolean_t
+ glusterd_nfssvc_need_start()
+ {
+@@ -41,6 +43,12 @@ glusterd_nfssvc_need_start()
+ return start;
+ }
+
++int
++glusterd_nfssvc_init(glusterd_svc_t *svc)
++{
++ return glusterd_svc_init(svc, nfs_svc_name);
++}
++
+ static int
+ glusterd_nfssvc_create_volfile()
+ {
+@@ -49,7 +57,7 @@ glusterd_nfssvc_create_volfile()
+ };
+ glusterd_conf_t *conf = THIS->private;
+
+- glusterd_svc_build_volfile_path(conf->nfs_svc.name, conf->workdir, filepath,
++ glusterd_svc_build_volfile_path(nfs_svc_name, conf->workdir, filepath,
+ sizeof(filepath));
+ return glusterd_create_global_volfile(build_nfs_graph, filepath, NULL);
+ }
+@@ -60,7 +68,7 @@ glusterd_nfssvc_manager(glusterd_svc_t *svc, void *data, int flags)
+ int ret = -1;
+
+ if (!svc->inited) {
+- ret = glusterd_svc_init(svc, "nfs");
++ ret = glusterd_nfssvc_init(svc);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_NFSSVC,
+ "Failed to init nfs service");
+@@ -75,13 +83,6 @@ glusterd_nfssvc_manager(glusterd_svc_t *svc, void *data, int flags)
+ if (ret)
+ goto out;
+
+- /* not an error, or a (very) soft error at best */
+- if (sys_access(XLATORDIR "/nfs/server.so", R_OK) != 0) {
+- gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_GNFS_XLATOR_NOT_INSTALLED,
+- "nfs/server.so xlator is not installed");
+- goto out;
+- }
+-
+ ret = glusterd_nfssvc_create_volfile();
+ if (ret)
+ goto out;
+@@ -155,14 +156,6 @@ glusterd_nfssvc_reconfigure()
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+- /* not an error, or a (very) soft error at best */
+- if (sys_access(XLATORDIR "/nfs/server.so", R_OK) != 0) {
+- gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_GNFS_XLATOR_NOT_INSTALLED,
+- "nfs/server.so xlator is not installed");
+- ret = 0;
+- goto out;
+- }
+-
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+--
+1.8.3.1
+
diff --git a/0091-glusterd-gNFS-explicitly-set-nfs.disable-to-off-afte.patch b/0091-glusterd-gNFS-explicitly-set-nfs.disable-to-off-afte.patch
new file mode 100644
index 0000000..292b3f5
--- /dev/null
+++ b/0091-glusterd-gNFS-explicitly-set-nfs.disable-to-off-afte.patch
@@ -0,0 +1,110 @@
+From ca3a4ebeddfef8c6909ff5388787a91ee52fd675 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Thu, 15 Dec 2016 17:14:01 +0530
+Subject: [PATCH 091/124] glusterd/gNFS : explicitly set "nfs.disable" to "off"
+ after 3.2 upgrade
+
+Gluster NFS was enabled by default for all volumes till 3.1. But 3.2 onwards
+for the new volumes it will be disabled by setting "nfs.disable" to "on".
+This take patch will take care of existing volume in such a way that if the
+option is not configured, it will set "nfs.disable" to "off" during op-version
+bump up.
+
+Also this patch removes the warning message while enabling gluster NFS for
+a volume.
+
+label : DOWNSTREAM ONLY
+
+Change-Id: Ib199c3180204f917791b4627c58d846750d18a5a
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/93146
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167574
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-parser.c | 16 ---------------
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 31 ++++++++++++------------------
+ 2 files changed, 12 insertions(+), 35 deletions(-)
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index f85958b..92ceb8e 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -1678,22 +1678,6 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words,
+ goto out;
+ }
+ }
+- if ((!strcmp(key, "nfs.disable")) && (!strcmp(value, "off"))) {
+- question =
+- "Gluster NFS is being deprecated in favor "
+- "of NFS-Ganesha Enter \"yes\" to continue "
+- "using Gluster NFS";
+- answer = cli_cmd_get_confirmation(state, question);
+- if (GF_ANSWER_NO == answer) {
+- gf_log("cli", GF_LOG_ERROR,
+- "Operation "
+- "cancelled, exiting");
+- *op_errstr = gf_strdup("Aborted by user.");
+- ret = -1;
+- goto out;
+- }
+- }
+-
+ if ((strcmp(key, "cluster.brick-multiplex") == 0)) {
+ question =
+ "Brick-multiplexing is supported only for "
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 06ea8cf..df8a6ab 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -2502,32 +2502,25 @@ glusterd_update_volumes_dict(glusterd_volinfo_t *volinfo)
+
+ /* 3.9.0 onwards gNFS will be disabled by default. In case of an upgrade
+ * from anything below than 3.9.0 to 3.9.x, the value for nfs.disable is
+- * set to 'on' for all volumes even if it is explicitly set to 'off' in
++ * set to 'off' for all volumes even if it is explicitly set to 'on' in
+ * previous version. This change is only applicable to downstream code.
+- * Setting nfs.disable to 'on' at op-version bump up flow is the ideal
++ * Setting nfs.disable to 'off' at op-version bump up flow is the ideal
+ * way here. The same is also applicable for transport.address-family
+ * where if the transport type is set to tcp then transport.address-family
+ * is defaulted to 'inet'.
+ */
+ if (conf->op_version >= GD_OP_VERSION_3_9_0) {
+ if (!(dict_get_str_boolean(volinfo->dict, NFS_DISABLE_MAP_KEY, 0))) {
+- gf_msg(this->name, GF_LOG_INFO, 0, 0,
+- "Gluster NFS is"
+- " being deprecated in favor of NFS-Ganesha, "
+- "hence setting nfs.disable to 'on' for volume "
+- "%s. Please re-enable it if requires",
+- volinfo->volname);
+- }
+-
+- ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY,
+- "on");
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+- "Failed to set "
+- "option ' NFS_DISABLE_MAP_KEY ' on "
+- "volume %s",
+- volinfo->volname);
+- goto out;
++ ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY,
++ "off");
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
++ "Failed to set "
++ "option ' NFS_DISABLE_MAP_KEY ' off "
++ "volume %s",
++ volinfo->volname);
++ goto out;
++ }
+ }
+
+ ret = dict_get_strn(volinfo->dict, "transport.address-family",
+--
+1.8.3.1
+
diff --git a/0092-logging-Fix-GF_LOG_OCCASSIONALLY-API.patch b/0092-logging-Fix-GF_LOG_OCCASSIONALLY-API.patch
new file mode 100644
index 0000000..f4a39aa
--- /dev/null
+++ b/0092-logging-Fix-GF_LOG_OCCASSIONALLY-API.patch
@@ -0,0 +1,41 @@
+From 82d7c8e057b9e22d13ca89f2a75e65a42878b7c3 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Tue, 2 Apr 2019 10:45:15 +0530
+Subject: [PATCH 092/124] logging: Fix GF_LOG_OCCASSIONALLY API
+
+GF_LOG_OCCASSIONALLY doesn't log on the first instance rather at every
+42nd iterations which isn't effective as in some cases we might not have
+the code flow hitting the same log for as many as 42 times and we'd end
+up suppressing the log.
+
+>upstream fix : https://review.gluster.org/#/c/glusterfs/+/22475/
+>Fixes: bz#1694925
+>Change-Id: Iee293281d25a652b64df111d59b13de4efce06fa
+>Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+
+BUG: 1691620
+Change-Id: Iee293281d25a652b64df111d59b13de4efce06fa
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167822
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/logging.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/libglusterfs/src/glusterfs/logging.h b/libglusterfs/src/glusterfs/logging.h
+index c81e19b..3655b1d 100644
+--- a/libglusterfs/src/glusterfs/logging.h
++++ b/libglusterfs/src/glusterfs/logging.h
+@@ -300,7 +300,7 @@ _gf_log_eh(const char *function, const char *fmt, ...)
+
+ /* Log once in GF_UNIVERSAL_ANSWER times */
+ #define GF_LOG_OCCASIONALLY(var, args...) \
+- if (!(var++ % GF_UNIVERSAL_ANSWER)) { \
++ if (var++ == 0 || !((var - 1) % GF_UNIVERSAL_ANSWER)) { \
+ gf_log(args); \
+ }
+
+--
+1.8.3.1
+
diff --git a/0093-glusterd-Change-op-version-of-cache-invalidation-in-.patch b/0093-glusterd-Change-op-version-of-cache-invalidation-in-.patch
new file mode 100644
index 0000000..7c3782c
--- /dev/null
+++ b/0093-glusterd-Change-op-version-of-cache-invalidation-in-.patch
@@ -0,0 +1,106 @@
+From 713f55b4a5cc582d06a10a1c9a0cdf71a4636a10 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Mon, 15 Apr 2019 07:57:40 +0530
+Subject: [PATCH 093/124] glusterd: Change op-version of cache-invalidation in
+ quick-read
+
+In upstream cache-invalidation option in quick read was introduced with
+4.0. There are two problems with it:
+
+1. The key being made duplicate to md-cache xlator, so setting the same
+option actually enables this feature on both md-cache and quick-read.
+2. As the op-version tagged to this key was GD_OP_VERSION_4_0_0, with a RHGS
+3.5 cluster when a md-cache profile is set to a particular volume old
+client fails to mount which is wrong.
+
+Solving 1 with out breaking backward compatibility in upstream is quite hard.
+This patch addresses both the problems in downstream by (a) changing the
+op-version to GD_OP_VERSION_6_0_0 and (b) renaming this key to
+quick-read-cache-invalidation. The fix is currently made downstream only till a
+proper solution is identified in upstream.
+
+Label: DOWNSTREAM ONLY
+BUG: 1697820
+Change-Id: I1251424576d6ebbdb2a761400fd20f0aff0c80a2
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167823
+Reviewed-by: Amar Tumballi Suryanarayan <amarts@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/quick-read-with-upcall.t | 1 +
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 6 +++---
+ xlators/performance/quick-read/src/quick-read.c | 11 ++++++-----
+ 3 files changed, 10 insertions(+), 8 deletions(-)
+
+diff --git a/tests/basic/quick-read-with-upcall.t b/tests/basic/quick-read-with-upcall.t
+index 318e93a..647dacf 100644
+--- a/tests/basic/quick-read-with-upcall.t
++++ b/tests/basic/quick-read-with-upcall.t
+@@ -58,6 +58,7 @@ EXPECT "$D0" cat $M1/test1.txt
+ sleep 60
+ EXPECT "$D1" cat $M1/test1.txt
+
++TEST $CLI volume set $V0 performance.quick-read-cache-invalidation on
+ TEST $CLI volume set $V0 performance.cache-invalidation on
+
+ TEST write_to "$M0/test2.txt" "$D0"
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index a877805..42ca9bb 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -1690,10 +1690,10 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .option = "cache-timeout",
+ .op_version = 1,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+- {.key = "performance.cache-invalidation",
++ {.key = "performance.quick-read-cache-invalidation",
+ .voltype = "performance/quick-read",
+- .option = "cache-invalidation",
+- .op_version = GD_OP_VERSION_4_0_0,
++ .option = "quick-read-cache-invalidation",
++ .op_version = GD_OP_VERSION_6_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "performance.ctime-invalidation",
+ .voltype = "performance/quick-read",
+diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c
+index 244e8c8..59553c0 100644
+--- a/xlators/performance/quick-read/src/quick-read.c
++++ b/xlators/performance/quick-read/src/quick-read.c
+@@ -1218,8 +1218,8 @@ qr_reconfigure(xlator_t *this, dict_t *options)
+
+ GF_OPTION_RECONF("cache-timeout", conf->cache_timeout, options, int32, out);
+
+- GF_OPTION_RECONF("cache-invalidation", conf->qr_invalidation, options, bool,
+- out);
++ GF_OPTION_RECONF("quick-read-cache-invalidation", conf->qr_invalidation,
++ options, bool, out);
+
+ GF_OPTION_RECONF("ctime-invalidation", conf->ctime_invalidation, options,
+ bool, out);
+@@ -1369,7 +1369,8 @@ qr_init(xlator_t *this)
+
+ GF_OPTION_INIT("cache-timeout", conf->cache_timeout, int32, out);
+
+- GF_OPTION_INIT("cache-invalidation", conf->qr_invalidation, bool, out);
++ GF_OPTION_INIT("quick-read-cache-invalidation", conf->qr_invalidation, bool,
++ out);
+
+ GF_OPTION_INIT("cache-size", conf->cache_size, size_uint64, out);
+ if (!check_cache_size_ok(this, conf->cache_size)) {
+@@ -1615,10 +1616,10 @@ struct volume_options qr_options[] = {
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ },
+ {
+- .key = {"cache-invalidation"},
++ .key = {"quick-read-cache-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+- .op_version = {GD_OP_VERSION_4_0_0},
++ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .description = "When \"on\", invalidates/updates the metadata cache,"
+ " on receiving the cache-invalidation notifications",
+--
+1.8.3.1
+
diff --git a/0094-glusterd-load-ctime-in-the-client-graph-only-if-it-s.patch b/0094-glusterd-load-ctime-in-the-client-graph-only-if-it-s.patch
new file mode 100644
index 0000000..727f8b5
--- /dev/null
+++ b/0094-glusterd-load-ctime-in-the-client-graph-only-if-it-s.patch
@@ -0,0 +1,45 @@
+From dab37dc78d21762ac3379ad505f8fc4ec996d0f7 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Tue, 9 Apr 2019 14:58:29 +0530
+Subject: [PATCH 094/124] glusterd: load ctime in the client graph only if it's
+ not turned off
+
+Considering ctime is a client side feature, we can't blindly load ctime
+xlator into the client graph if it's explicitly turned off, that'd
+result into backward compatibility issue where an old client can't mount
+a volume configured on a server which is having ctime feature.
+
+> Upstream patch : https://review.gluster.org/#/c/glusterfs/+/22536/
+>Fixes: bz#1697907
+>Change-Id: I6ae7b96d056073aa6746de9a449cf319786d45cc
+>Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+
+BUG: 1697820
+Change-Id: I6ae7b96d056073aa6746de9a449cf319786d45cc
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167815
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-by: Amar Tumballi Suryanarayan <amarts@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index ed24858..012f38e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -4358,7 +4358,8 @@ client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ }
+ }
+
+- if (conf->op_version >= GD_OP_VERSION_5_0) {
++ if (conf->op_version >= GD_OP_VERSION_5_0 &&
++ !dict_get_str_boolean(set_dict, "features.ctime", _gf_false)) {
+ xl = volgen_graph_add(graph, "features/utime", volname);
+ if (!xl) {
+ ret = -1;
+--
+1.8.3.1
+
diff --git a/0095-cluster-afr-Remove-local-from-owners_list-on-failure.patch b/0095-cluster-afr-Remove-local-from-owners_list-on-failure.patch
new file mode 100644
index 0000000..0e17c44
--- /dev/null
+++ b/0095-cluster-afr-Remove-local-from-owners_list-on-failure.patch
@@ -0,0 +1,204 @@
+From cca418b78ec976aa69eacd56b0e6127ea7e3dd26 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Thu, 4 Apr 2019 15:31:56 +0530
+Subject: [PATCH 095/124] cluster/afr: Remove local from owners_list on failure
+ of lock-acquisition
+
+ Backport of https://review.gluster.org/c/glusterfs/+/22515
+
+When eager-lock lock acquisition fails because of say network failures, the
+local is not being removed from owners_list, this leads to accumulation of
+waiting frames and the application will hang because the waiting frames are
+under the assumption that another transaction is in the process of acquiring
+lock because owner-list is not empty. Handled this case as well in this patch.
+Added asserts to make it easier to find these problems in future.
+
+Change-Id: I3101393265e9827755725b1f2d94a93d8709e923
+fixes: bz#1688395
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167859
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/replicate/bug-1696599-io-hang.t | 47 ++++++++++++++++++++++++++++++
+ xlators/cluster/afr/src/afr-common.c | 8 ++---
+ xlators/cluster/afr/src/afr-lk-common.c | 1 -
+ xlators/cluster/afr/src/afr-transaction.c | 19 +++++-------
+ xlators/cluster/afr/src/afr.h | 4 +--
+ 5 files changed, 61 insertions(+), 18 deletions(-)
+ create mode 100755 tests/bugs/replicate/bug-1696599-io-hang.t
+
+diff --git a/tests/bugs/replicate/bug-1696599-io-hang.t b/tests/bugs/replicate/bug-1696599-io-hang.t
+new file mode 100755
+index 0000000..869cdb9
+--- /dev/null
++++ b/tests/bugs/replicate/bug-1696599-io-hang.t
+@@ -0,0 +1,47 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../fileio.rc
++
++#Tests that local structures in afr are removed from granted/blocked list of
++#locks when inodelk fails on all bricks
++
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume set $V0 performance.client-io-threads off
++TEST $CLI volume set $V0 delay-gen locks
++TEST $CLI volume set $V0 delay-gen.delay-duration 5000000
++TEST $CLI volume set $V0 delay-gen.delay-percentage 100
++TEST $CLI volume set $V0 delay-gen.enable finodelk
++
++TEST $CLI volume start $V0
++EXPECT 'Started' volinfo_field $V0 'Status'
++
++TEST $GFS -s $H0 --volfile-id $V0 $M0
++TEST touch $M0/file
++#Trigger write and stop bricks so inodelks fail on all bricks leading to
++#lock failure condition
++echo abc >> $M0/file &
++
++TEST $CLI volume stop $V0
++TEST $CLI volume reset $V0 delay-gen
++wait
++TEST $CLI volume start $V0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
++#Test that only one write succeeded, this tests that delay-gen worked as
++#expected
++echo abc >> $M0/file
++EXPECT "abc" cat $M0/file
++
++cleanup;
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 45b96e3..47a5d3a 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -5763,6 +5763,10 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this)
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
++ INIT_LIST_HEAD(&local->transaction.wait_list);
++ INIT_LIST_HEAD(&local->transaction.owner_list);
++ INIT_LIST_HEAD(&local->ta_waitq);
++ INIT_LIST_HEAD(&local->ta_onwireq);
+ ret = afr_internal_lock_init(&local->internal_lock, priv->child_count);
+ if (ret < 0)
+ goto out;
+@@ -5800,10 +5804,6 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this)
+ goto out;
+
+ ret = 0;
+- INIT_LIST_HEAD(&local->transaction.wait_list);
+- INIT_LIST_HEAD(&local->transaction.owner_list);
+- INIT_LIST_HEAD(&local->ta_waitq);
+- INIT_LIST_HEAD(&local->ta_onwireq);
+ out:
+ return ret;
+ }
+diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
+index 4091671..bc8eabe 100644
+--- a/xlators/cluster/afr/src/afr-lk-common.c
++++ b/xlators/cluster/afr/src/afr-lk-common.c
+@@ -397,7 +397,6 @@ afr_unlock_now(call_frame_t *frame, xlator_t *this)
+ int_lock->lk_call_count = call_count;
+
+ if (!call_count) {
+- GF_ASSERT(!local->transaction.do_eager_unlock);
+ gf_msg_trace(this->name, 0, "No internal locks unlocked");
+ int_lock->lock_cbk(frame, this);
+ goto out;
+diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
+index 229820b..15f3a7e 100644
+--- a/xlators/cluster/afr/src/afr-transaction.c
++++ b/xlators/cluster/afr/src/afr-transaction.c
+@@ -372,6 +372,8 @@ afr_transaction_done(call_frame_t *frame, xlator_t *this)
+ }
+ local->transaction.unwind(frame, this);
+
++ GF_ASSERT(list_empty(&local->transaction.owner_list));
++ GF_ASSERT(list_empty(&local->transaction.wait_list));
+ AFR_STACK_DESTROY(frame);
+
+ return 0;
+@@ -393,7 +395,7 @@ afr_lock_fail_shared(afr_local_t *local, struct list_head *list)
+ }
+
+ static void
+-afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked)
++afr_handle_lock_acquire_failure(afr_local_t *local)
+ {
+ struct list_head shared;
+ afr_lock_t *lock = NULL;
+@@ -414,13 +416,8 @@ afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked)
+ afr_lock_fail_shared(local, &shared);
+ local->transaction.do_eager_unlock = _gf_true;
+ out:
+- if (locked) {
+- local->internal_lock.lock_cbk = afr_transaction_done;
+- afr_unlock(local->transaction.frame, local->transaction.frame->this);
+- } else {
+- afr_transaction_done(local->transaction.frame,
+- local->transaction.frame->this);
+- }
++ local->internal_lock.lock_cbk = afr_transaction_done;
++ afr_unlock(local->transaction.frame, local->transaction.frame->this);
+ }
+
+ call_frame_t *
+@@ -619,7 +616,7 @@ afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this)
+ failure_count = AFR_COUNT(local->transaction.failed_subvols,
+ priv->child_count);
+ if (failure_count == priv->child_count) {
+- afr_handle_lock_acquire_failure(local, _gf_true);
++ afr_handle_lock_acquire_failure(local);
+ return 0;
+ } else {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+@@ -2092,7 +2089,7 @@ err:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+- afr_handle_lock_acquire_failure(local, _gf_true);
++ afr_handle_lock_acquire_failure(local);
+
+ if (xdata_req)
+ dict_unref(xdata_req);
+@@ -2361,7 +2358,7 @@ afr_internal_lock_finish(call_frame_t *frame, xlator_t *this)
+ } else {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ if (local->internal_lock.lock_op_ret < 0) {
+- afr_handle_lock_acquire_failure(local, _gf_false);
++ afr_handle_lock_acquire_failure(local);
+ } else {
+ lock->event_generation = local->event_generation;
+ afr_changelog_pre_op(frame, this);
+diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
+index 2cc3797..e731cfa 100644
+--- a/xlators/cluster/afr/src/afr.h
++++ b/xlators/cluster/afr/src/afr.h
+@@ -1091,8 +1091,8 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd);
+ #define AFR_FRAME_INIT(frame, op_errno) \
+ ({ \
+ frame->local = mem_get0(THIS->local_pool); \
+- if (afr_local_init(frame->local, THIS->private, &op_errno)) { \
+- afr_local_cleanup(frame->local, THIS); \
++ if (afr_local_init(frame->local, frame->this->private, &op_errno)) { \
++ afr_local_cleanup(frame->local, frame->this); \
+ mem_put(frame->local); \
+ frame->local = NULL; \
+ }; \
+--
+1.8.3.1
+
diff --git a/0096-core-Brick-is-not-able-to-detach-successfully-in-bri.patch b/0096-core-Brick-is-not-able-to-detach-successfully-in-bri.patch
new file mode 100644
index 0000000..cc2a448
--- /dev/null
+++ b/0096-core-Brick-is-not-able-to-detach-successfully-in-bri.patch
@@ -0,0 +1,94 @@
+From 3bcffadcd77eebe6b4f7e5015ad41ec7c1d1ec3e Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Thu, 11 Apr 2019 20:38:53 +0530
+Subject: [PATCH 096/124] core: Brick is not able to detach successfully in
+ brick_mux environment
+
+Problem: In brick_mux environment, while volumes are stopped in a
+ loop bricks are not detached successfully. Brick's are not
+ detached because xprtrefcnt has not become 0 for detached brick.
+ At the time of initiating brick detach process server_notify
+ saves xprtrefcnt on detach brick and once counter has become
+ 0 then server_rpc_notify spawn a server_graph_janitor_threads
+ for cleanup brick resources.xprtrefcnt has not become 0 because
+ socket framework is not working due to assigning 0 as a fd for socket.
+ In commit dc25d2c1eeace91669052e3cecc083896e7329b2
+ there was a change in changelog fini to close htime_fd if htime_fd is not
+ negative, by default htime_fd is 0 so it close 0 also.
+
+Solution: Initialize htime_fd to -1 after just allocate changelog_priv
+ by GF_CALLOC
+
+> Fixes: bz#1699025
+> Change-Id: I5f7ca62a0eb1c0510c3e9b880d6ab8af8d736a25
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Cherry picked from commit b777d83001d8006420b6c7d2d88fe68950aa7e00)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22549/
+
+Fixes: bz#1698919
+Change-Id: Ib5b74aa0818235625f8aac7c23d4daa599da3fd1
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167826
+Tested-by: Mohit Agrawal <moagrawa@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../bug-1699025-brick-mux-detach-brick-fd-issue.t | 33 ++++++++++++++++++++++
+ xlators/features/changelog/src/changelog.c | 1 +
+ 2 files changed, 34 insertions(+)
+ create mode 100644 tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t
+
+diff --git a/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t
+new file mode 100644
+index 0000000..1acbaa8
+--- /dev/null
++++ b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t
+@@ -0,0 +1,33 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../cluster.rc
++
++function count_brick_processes {
++ pgrep glusterfsd | wc -l
++}
++
++cleanup
++
++#bug-1444596 - validating brick mux
++
++TEST glusterd
++TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
++TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
++
++TEST $CLI volume set all cluster.brick-multiplex on
++
++TEST $CLI volume start $V0
++TEST $CLI volume start $V1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
++EXPECT 1 count_brick_processes
++
++TEST $CLI volume stop $V1
++# At the time initialize brick daemon it always keeps open
++# standard fd's (0, 1 , 2) so after stop 1 volume fd's should
++# be open
++nofds=$(ls -lrth /proc/`pgrep glusterfsd`/fd | grep dev/null | wc -l)
++TEST [ $((nofds)) -eq 3 ]
++
++cleanup
+diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
+index 1f22a97..d9025f3 100644
+--- a/xlators/features/changelog/src/changelog.c
++++ b/xlators/features/changelog/src/changelog.c
+@@ -2740,6 +2740,7 @@ init(xlator_t *this)
+ GF_ATOMIC_INIT(priv->clntcnt, 0);
+ GF_ATOMIC_INIT(priv->xprtcnt, 0);
+ INIT_LIST_HEAD(&priv->xprt_list);
++ priv->htime_fd = -1;
+
+ ret = changelog_init_options(this, priv);
+ if (ret)
+--
+1.8.3.1
+
diff --git a/0097-glusterd-tier-while-doing-an-attach-tier-the-self-he.patch b/0097-glusterd-tier-while-doing-an-attach-tier-the-self-he.patch
new file mode 100644
index 0000000..42f1e47
--- /dev/null
+++ b/0097-glusterd-tier-while-doing-an-attach-tier-the-self-he.patch
@@ -0,0 +1,61 @@
+From 302f3f87c9aa00c17ec3b49a81c8a4441d2bdf5f Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <hgowtham@redhat.com>
+Date: Mon, 15 Apr 2019 10:01:40 +0530
+Subject: [PATCH 097/124] glusterd/tier: while doing an attach tier, the self
+ heal daemon is not getting started
+
+Problem: on a replicated volume, if attach tier is done,
+The shd will be restarted. But here the restart fails because of the
+graph not getting generated properly. The dict which is used for graph
+creation doesn't get the values copied properly in prepare_shd_volume_options()
+glusterd_prepare_shd_volume_options_for_tier() fails and skips the copy.
+
+This patch reverts the changes back to the way it was in 3.4 and
+help in fixing the issue. Using the old dict_set_str works.
+
+label: DOWNSTREAM ONLY
+
+Change-Id: I21534ca177511e018ba76886e899b3b1a4ac4716
+Signed-off-by: Hari Gowtham <hgowtham@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167825
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 012f38e..1f53beb 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -4895,14 +4895,21 @@ glusterd_prepare_shd_volume_options_for_tier(glusterd_volinfo_t *volinfo,
+ dict_t *set_dict)
+ {
+ int ret = -1;
++ char *key = NULL;
+
+- ret = volgen_set_shd_key_enable(set_dict, volinfo->tier_info.cold_type);
+- if (ret)
+- goto out;
++ key = volgen_get_shd_key (volinfo->tier_info.cold_type);
++ if (key) {
++ ret = dict_set_str (set_dict, key, "enable");
++ if (ret)
++ goto out;
++ }
+
+- ret = volgen_set_shd_key_enable(set_dict, volinfo->tier_info.hot_type);
+- if (ret)
+- goto out;
++ key = volgen_get_shd_key (volinfo->tier_info.hot_type);
++ if (key) {
++ ret = dict_set_str (set_dict, key, "enable");
++ if (ret)
++ goto out;
++ }
+ out:
+ return ret;
+ }
+--
+1.8.3.1
+
diff --git a/0098-mgmt-shd-Implement-multiplexing-in-self-heal-daemon.patch b/0098-mgmt-shd-Implement-multiplexing-in-self-heal-daemon.patch
new file mode 100644
index 0000000..64d198d
--- /dev/null
+++ b/0098-mgmt-shd-Implement-multiplexing-in-self-heal-daemon.patch
@@ -0,0 +1,4617 @@
+From 2cede2b87fb3e3e0673be9cf67e7d6eec3f7879c Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Mon, 25 Feb 2019 10:05:32 +0530
+Subject: [PATCH 098/124] mgmt/shd: Implement multiplexing in self heal daemon
+
+Problem:
+
+Shd daemon is per node, which means they create a graph
+with all volumes on it. While this is a great for utilizing
+resources, it is so good in terms of performance and managebility.
+
+Because self-heal daemons doesn't have capability to automatically
+reconfigure their graphs. So each time when any configurations
+changes happens to the volumes(replicate/disperse), we need to restart
+shd to bring the changes into the graph.
+
+Because of this all on going heal for all other volumes has to be
+stopped in the middle, and need to restart all over again.
+
+Solution:
+
+This changes makes shd as a per volume daemon, so that the graph
+will be generated for each volumes.
+
+When we want to start/reconfigure shd for a volume, we first search
+for an existing shd running on the node, if there is none, we will
+start a new process. If already a daemon is running for shd, then
+we will simply detach a graph for a volume and reatach the updated
+graph for the volume. This won't touch any of the on going operations
+for any other volumes on the shd daemon.
+
+Example of an shd graph when it is per volume
+
+ graph
+ -----------------------
+ | debug-iostat |
+ -----------------------
+ / | \
+ / | \
+ --------- --------- ----------
+ | AFR-1 | | AFR-2 | | AFR-3 |
+ -------- --------- ----------
+
+A running shd daemon with 3 volumes will be like-->
+
+ graph
+ -----------------------
+ | debug-iostat |
+ -----------------------
+ / | \
+ / | \
+ ------------ ------------ ------------
+ | volume-1 | | volume-2 | | volume-3 |
+ ------------ ------------ ------------
+
+Backport of: https://review.gluster.org/#/c/glusterfs/+/22075
+
+>Change-Id: Idcb2698be3eeb95beaac47125565c93370afbd99
+>fixes: bz#1659708
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I38b6e2bb62edd818e460ccf6e9740364dc676876
+BUG: 1471742
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167830
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ glusterfsd/src/glusterfsd-messages.h | 3 +-
+ glusterfsd/src/glusterfsd-mgmt.c | 238 ++++++-
+ glusterfsd/src/glusterfsd.c | 18 -
+ libglusterfs/src/defaults-tmpl.c | 19 +-
+ libglusterfs/src/glusterfs/glusterfs.h | 7 +
+ libglusterfs/src/glusterfs/libglusterfs-messages.h | 4 +-
+ libglusterfs/src/glusterfs/xlator.h | 3 +
+ libglusterfs/src/graph.c | 451 +++++++++++++
+ libglusterfs/src/graph.y | 3 +
+ libglusterfs/src/libglusterfs.sym | 5 +
+ libglusterfs/src/statedump.c | 3 +-
+ libglusterfs/src/xlator.c | 16 +
+ rpc/rpc-lib/src/protocol-common.h | 2 +
+ tests/basic/glusterd/heald.t | 49 +-
+ .../reset-brick-and-daemons-follow-quorum.t | 8 +-
+ tests/volume.rc | 6 +-
+ xlators/mgmt/glusterd/src/Makefile.am | 6 +-
+ xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 2 +-
+ xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c | 42 ++
+ xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h | 4 +-
+ xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c | 3 +-
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 11 +-
+ xlators/mgmt/glusterd/src/glusterd-handshake.c | 21 +
+ xlators/mgmt/glusterd/src/glusterd-mem-types.h | 1 +
+ xlators/mgmt/glusterd/src/glusterd-messages.h | 4 +-
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 84 ++-
+ .../mgmt/glusterd/src/glusterd-shd-svc-helper.c | 140 ++++
+ .../mgmt/glusterd/src/glusterd-shd-svc-helper.h | 45 ++
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 540 ++++++++++++++--
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.h | 17 +-
+ xlators/mgmt/glusterd/src/glusterd-sm.c | 12 +-
+ xlators/mgmt/glusterd/src/glusterd-snapd-svc.c | 3 +-
+ xlators/mgmt/glusterd/src/glusterd-statedump.c | 3 -
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 715 ++++++++++++++++++++-
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.h | 40 +-
+ xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c | 246 +++++--
+ xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h | 27 +
+ xlators/mgmt/glusterd/src/glusterd-tier.c | 3 +-
+ xlators/mgmt/glusterd/src/glusterd-tierd-svc.c | 4 +-
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 137 +++-
+ xlators/mgmt/glusterd/src/glusterd-utils.h | 4 +
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 60 +-
+ xlators/mgmt/glusterd/src/glusterd-volgen.h | 11 +-
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 8 +-
+ xlators/mgmt/glusterd/src/glusterd.c | 12 +-
+ xlators/mgmt/glusterd/src/glusterd.h | 30 +-
+ xlators/protocol/client/src/client.c | 31 +-
+ 47 files changed, 2810 insertions(+), 291 deletions(-)
+ create mode 100644 xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
+ create mode 100644 xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
+
+diff --git a/glusterfsd/src/glusterfsd-messages.h b/glusterfsd/src/glusterfsd-messages.h
+index 602cd9e..94312a5 100644
+--- a/glusterfsd/src/glusterfsd-messages.h
++++ b/glusterfsd/src/glusterfsd-messages.h
+@@ -35,6 +35,7 @@ GLFS_MSGID(GLUSTERFSD, glusterfsd_msg_1, glusterfsd_msg_2, glusterfsd_msg_3,
+ glusterfsd_msg_28, glusterfsd_msg_29, glusterfsd_msg_30,
+ glusterfsd_msg_31, glusterfsd_msg_32, glusterfsd_msg_33,
+ glusterfsd_msg_34, glusterfsd_msg_35, glusterfsd_msg_36,
+- glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39);
++ glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39,
++ glusterfsd_msg_40, glusterfsd_msg_41, glusterfsd_msg_42);
+
+ #endif /* !_GLUSTERFSD_MESSAGES_H_ */
+diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
+index a6c3db5..a89c980 100644
+--- a/glusterfsd/src/glusterfsd-mgmt.c
++++ b/glusterfsd/src/glusterfsd-mgmt.c
+@@ -48,7 +48,20 @@ int
+ glusterfs_graph_unknown_options(glusterfs_graph_t *graph);
+ int
+ emancipate(glusterfs_ctx_t *ctx, int ret);
++int
++glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
++ char *volfile_id, char *checksum);
++int
++glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
++ gf_volfile_t *volfile_obj, char *checksum);
++int
++glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
++ char *volfile_id, char *checksum);
++int
++glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj);
+
++gf_boolean_t
++mgmt_is_multiplexed_daemon(char *name);
+ int
+ mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data)
+ {
+@@ -62,6 +75,96 @@ mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data)
+ }
+
+ int
++mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id)
++{
++ glusterfs_ctx_t *ctx = NULL;
++ int ret = 0;
++ FILE *tmpfp = NULL;
++ gf_volfile_t *volfile_obj = NULL;
++ gf_volfile_t *volfile_tmp = NULL;
++ char sha256_hash[SHA256_DIGEST_LENGTH] = {
++ 0,
++ };
++ int tmp_fd = -1;
++ char template[] = "/tmp/glfs.volfile.XXXXXX";
++
++ glusterfs_compute_sha256((const unsigned char *)volfile, size, sha256_hash);
++ ctx = THIS->ctx;
++ LOCK(&ctx->volfile_lock);
++ {
++ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
++ {
++ if (!strcmp(volfile_id, volfile_obj->vol_id)) {
++ if (!memcmp(sha256_hash, volfile_obj->volfile_checksum,
++ sizeof(volfile_obj->volfile_checksum))) {
++ UNLOCK(&ctx->volfile_lock);
++ gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_40,
++ "No change in volfile, continuing");
++ goto out;
++ }
++ volfile_tmp = volfile_obj;
++ break;
++ }
++ }
++
++ /* coverity[secure_temp] mkstemp uses 0600 as the mode */
++ tmp_fd = mkstemp(template);
++ if (-1 == tmp_fd) {
++ UNLOCK(&ctx->volfile_lock);
++ gf_msg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_39,
++ "Unable to create temporary file: %s", template);
++ ret = -1;
++ goto out;
++ }
++
++ /* Calling unlink so that when the file is closed or program
++ * terminates the temporary file is deleted.
++ */
++ ret = sys_unlink(template);
++ if (ret < 0) {
++ gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_39,
++ "Unable to delete temporary file: %s", template);
++ ret = 0;
++ }
++
++ tmpfp = fdopen(tmp_fd, "w+b");
++ if (!tmpfp) {
++ ret = -1;
++ goto unlock;
++ }
++
++ fwrite(volfile, size, 1, tmpfp);
++ fflush(tmpfp);
++ if (ferror(tmpfp)) {
++ ret = -1;
++ goto unlock;
++ }
++
++ if (!volfile_tmp) {
++ /* There is no checksum in the list, which means simple attach
++ * the volfile
++ */
++ ret = glusterfs_process_svc_attach_volfp(ctx, tmpfp, volfile_id,
++ sha256_hash);
++ goto unlock;
++ }
++ ret = glusterfs_mux_volfile_reconfigure(tmpfp, ctx, volfile_obj,
++ sha256_hash);
++ if (ret < 0) {
++ gf_msg_debug("glusterfsd-mgmt", EINVAL, "Reconfigure failed !!");
++ }
++ }
++unlock:
++ UNLOCK(&ctx->volfile_lock);
++out:
++ if (tmpfp)
++ fclose(tmpfp);
++ else if (tmp_fd != -1)
++ sys_close(tmp_fd);
++ return ret;
++}
++
++int
+ mgmt_cbk_event(struct rpc_clnt *rpc, void *mydata, void *data)
+ {
+ return 0;
+@@ -966,6 +1069,110 @@ glusterfs_handle_attach(rpcsvc_request_t *req)
+ }
+
+ int
++glusterfs_handle_svc_attach(rpcsvc_request_t *req)
++{
++ int32_t ret = -1;
++ gd1_mgmt_brick_op_req xlator_req = {
++ 0,
++ };
++ xlator_t *this = NULL;
++ glusterfs_ctx_t *ctx = NULL;
++
++ GF_ASSERT(req);
++ this = THIS;
++ GF_ASSERT(this);
++
++ ctx = this->ctx;
++ ret = xdr_to_generic(req->msg[0], &xlator_req,
++ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
++
++ if (ret < 0) {
++ /*failed to decode msg;*/
++ req->rpc_err = GARBAGE_ARGS;
++ goto out;
++ }
++ gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_41,
++ "received attach "
++ "request for volfile-id=%s",
++ xlator_req.name);
++ ret = 0;
++
++ if (ctx->active) {
++ ret = mgmt_process_volfile(xlator_req.input.input_val,
++ xlator_req.input.input_len, xlator_req.name);
++ } else {
++ gf_msg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42,
++ "got attach for %s but no active graph", xlator_req.name);
++ }
++out:
++ if (xlator_req.input.input_val)
++ free(xlator_req.input.input_val);
++ if (xlator_req.name)
++ free(xlator_req.name);
++ glusterfs_translator_info_response_send(req, ret, NULL, NULL);
++ return 0;
++}
++
++int
++glusterfs_handle_svc_detach(rpcsvc_request_t *req)
++{
++ gd1_mgmt_brick_op_req xlator_req = {
++ 0,
++ };
++ ssize_t ret;
++ glusterfs_ctx_t *ctx = NULL;
++ gf_volfile_t *volfile_obj = NULL;
++ gf_volfile_t *volfile_tmp = NULL;
++
++ ret = xdr_to_generic(req->msg[0], &xlator_req,
++ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
++ if (ret < 0) {
++ req->rpc_err = GARBAGE_ARGS;
++ return -1;
++ }
++ ctx = glusterfsd_ctx;
++
++ LOCK(&ctx->volfile_lock);
++ {
++ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
++ {
++ if (!strcmp(xlator_req.name, volfile_obj->vol_id)) {
++ volfile_tmp = volfile_obj;
++ break;
++ }
++ }
++
++ if (!volfile_tmp) {
++ UNLOCK(&ctx->volfile_lock);
++ gf_msg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_41,
++ "can't detach %s - not found", xlator_req.name);
++ /*
++ * Used to be -ENOENT. However, the caller asked us to
++ * make sure it's down and if it's already down that's
++ * good enough.
++ */
++ ret = 0;
++ goto out;
++ }
++ ret = glusterfs_process_svc_detach(ctx, volfile_tmp);
++ if (ret) {
++ UNLOCK(&ctx->volfile_lock);
++ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, glusterfsd_msg_41,
++ "Could not detach "
++ "old graph. Aborting the reconfiguration operation");
++ goto out;
++ }
++ }
++ UNLOCK(&ctx->volfile_lock);
++out:
++ glusterfs_terminate_response_send(req, ret);
++ free(xlator_req.name);
++ xlator_req.name = NULL;
++
++ return 0;
++}
++
++int
+ glusterfs_handle_dump_metrics(rpcsvc_request_t *req)
+ {
+ int32_t ret = -1;
+@@ -1849,6 +2056,13 @@ rpcsvc_actor_t glusterfs_actors[GLUSTERD_BRICK_MAXVALUE] = {
+
+ [GLUSTERD_DUMP_METRICS] = {"DUMP METRICS", GLUSTERD_DUMP_METRICS,
+ glusterfs_handle_dump_metrics, NULL, 0, DRC_NA},
++
++ [GLUSTERD_SVC_ATTACH] = {"ATTACH CLIENT", GLUSTERD_SVC_ATTACH,
++ glusterfs_handle_svc_attach, NULL, 0, DRC_NA},
++
++ [GLUSTERD_SVC_DETACH] = {"DETACH CLIENT", GLUSTERD_SVC_DETACH,
++ glusterfs_handle_svc_detach, NULL, 0, DRC_NA},
++
+ };
+
+ struct rpcsvc_program glusterfs_mop_prog = {
+@@ -1996,14 +2210,17 @@ mgmt_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ }
+
+ volfile:
+- ret = 0;
+ size = rsp.op_ret;
++ volfile_id = frame->local;
++ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) {
++ ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id);
++ goto post_graph_mgmt;
++ }
+
++ ret = 0;
+ glusterfs_compute_sha256((const unsigned char *)rsp.spec, size,
+ sha256_hash);
+
+- volfile_id = frame->local;
+-
+ LOCK(&ctx->volfile_lock);
+ {
+ locked = 1;
+@@ -2105,6 +2322,7 @@ volfile:
+ }
+
+ INIT_LIST_HEAD(&volfile_tmp->volfile_list);
++ volfile_tmp->graph = ctx->active;
+ list_add(&volfile_tmp->volfile_list, &ctx->volfile_list);
+ snprintf(volfile_tmp->vol_id, sizeof(volfile_tmp->vol_id), "%s",
+ volfile_id);
+@@ -2116,6 +2334,7 @@ volfile:
+
+ locked = 0;
+
++post_graph_mgmt:
+ if (!is_mgmt_rpc_reconnect) {
+ need_emancipate = 1;
+ glusterfs_mgmt_pmap_signin(ctx);
+@@ -2269,10 +2488,21 @@ glusterfs_volfile_fetch(glusterfs_ctx_t *ctx)
+ {
+ xlator_t *server_xl = NULL;
+ xlator_list_t *trav;
+- int ret;
++ gf_volfile_t *volfile_obj = NULL;
++ int ret = 0;
+
+ LOCK(&ctx->volfile_lock);
+ {
++ if (ctx->active &&
++ mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) {
++ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
++ {
++ ret |= glusterfs_volfile_fetch_one(ctx, volfile_obj->vol_id);
++ }
++ UNLOCK(&ctx->volfile_lock);
++ return ret;
++ }
++
+ if (ctx->active) {
+ server_xl = ctx->active->first;
+ if (strcmp(server_xl->type, "protocol/server") != 0) {
+diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
+index c983882..3aa89ca 100644
+--- a/glusterfsd/src/glusterfsd.c
++++ b/glusterfsd/src/glusterfsd.c
+@@ -2589,24 +2589,6 @@ out:
+ #endif
+
+ int
+-glusterfs_graph_fini(glusterfs_graph_t *graph)
+-{
+- xlator_t *trav = NULL;
+-
+- trav = graph->first;
+-
+- while (trav) {
+- if (trav->init_succeeded) {
+- trav->fini(trav);
+- trav->init_succeeded = 0;
+- }
+- trav = trav->next;
+- }
+-
+- return 0;
+-}
+-
+-int
+ glusterfs_process_volfp(glusterfs_ctx_t *ctx, FILE *fp)
+ {
+ glusterfs_graph_t *graph = NULL;
+diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c
+index 5bf64e8..82e7f78 100644
+--- a/libglusterfs/src/defaults-tmpl.c
++++ b/libglusterfs/src/defaults-tmpl.c
+@@ -127,6 +127,12 @@ default_notify(xlator_t *this, int32_t event, void *data, ...)
+ GF_UNUSED int ret = 0;
+ xlator_t *victim = data;
+
++ glusterfs_graph_t *graph = NULL;
++
++ GF_VALIDATE_OR_GOTO("notify", this, out);
++ graph = this->graph;
++ GF_VALIDATE_OR_GOTO(this->name, graph, out);
++
+ switch (event) {
+ case GF_EVENT_PARENT_UP:
+ case GF_EVENT_PARENT_DOWN: {
+@@ -159,6 +165,17 @@ default_notify(xlator_t *this, int32_t event, void *data, ...)
+ xlator_notify(parent->xlator, event, this, NULL);
+ parent = parent->next;
+ }
++
++ if (event == GF_EVENT_CHILD_DOWN &&
++ !(this->ctx && this->ctx->master) && (graph->top == this)) {
++ /* Make sure this is not a daemon with master xlator */
++ pthread_mutex_lock(&graph->mutex);
++ {
++ graph->used = 0;
++ pthread_cond_broadcast(&graph->child_down_cond);
++ }
++ pthread_mutex_unlock(&graph->mutex);
++ }
+ } break;
+ case GF_EVENT_UPCALL: {
+ xlator_list_t *parent = this->parents;
+@@ -205,7 +222,7 @@ default_notify(xlator_t *this, int32_t event, void *data, ...)
+ * nothing to do with readability.
+ */
+ }
+-
++out:
+ return 0;
+ }
+
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index 7c6af09..deec5ba 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -590,6 +590,10 @@ struct _glusterfs_graph {
+ int used; /* Should be set when fuse gets
+ first CHILD_UP */
+ uint32_t volfile_checksum;
++ void *last_xl; /* Stores the last xl of the graph, as of now only populated
++ in client multiplexed code path */
++ pthread_mutex_t mutex;
++ pthread_cond_t child_down_cond; /* for broadcasting CHILD_DOWN */
+ };
+ typedef struct _glusterfs_graph glusterfs_graph_t;
+
+@@ -732,6 +736,7 @@ typedef struct {
+ char volfile_checksum[SHA256_DIGEST_LENGTH];
+ char vol_id[NAME_MAX + 1];
+ struct list_head volfile_list;
++ glusterfs_graph_t *graph;
+
+ } gf_volfile_t;
+
+@@ -815,4 +820,6 @@ gf_free_mig_locks(lock_migration_info_t *locks);
+
+ int
+ glusterfs_read_secure_access_file(void);
++int
++glusterfs_graph_fini(glusterfs_graph_t *graph);
+ #endif /* _GLUSTERFS_H */
+diff --git a/libglusterfs/src/glusterfs/libglusterfs-messages.h b/libglusterfs/src/glusterfs/libglusterfs-messages.h
+index 1b72f6d..ea2aa60 100644
+--- a/libglusterfs/src/glusterfs/libglusterfs-messages.h
++++ b/libglusterfs/src/glusterfs/libglusterfs-messages.h
+@@ -109,6 +109,8 @@ GLFS_MSGID(
+ LG_MSG_PTHREAD_ATTR_INIT_FAILED, LG_MSG_INVALID_INODE_LIST,
+ LG_MSG_COMPACT_FAILED, LG_MSG_COMPACT_STATUS, LG_MSG_UTIMENSAT_FAILED,
+ LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG,
+- LG_MSG_XXH64_TO_GFID_FAILED);
++ LG_MSG_XXH64_TO_GFID_FAILED, LG_MSG_ASYNC_WARNING, LG_MSG_ASYNC_FAILURE,
++ LG_MSG_GRAPH_CLEANUP_FAILED, LG_MSG_GRAPH_SETUP_FAILED,
++ LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED);
+
+ #endif /* !_LG_MESSAGES_H_ */
+diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
+index 7002657..06152ec 100644
+--- a/libglusterfs/src/glusterfs/xlator.h
++++ b/libglusterfs/src/glusterfs/xlator.h
+@@ -1087,4 +1087,7 @@ handle_default_options(xlator_t *xl, dict_t *options);
+
+ void
+ gluster_graph_take_reference(xlator_t *tree);
++
++gf_boolean_t
++mgmt_is_multiplexed_daemon(char *name);
+ #endif /* _XLATOR_H */
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index bb5e67a..a492dd8 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -114,6 +114,53 @@ out:
+ return cert_depth;
+ }
+
++xlator_t *
++glusterfs_get_last_xlator(glusterfs_graph_t *graph)
++{
++ xlator_t *trav = graph->first;
++ if (!trav)
++ return NULL;
++
++ while (trav->next)
++ trav = trav->next;
++
++ return trav;
++}
++
++xlator_t *
++glusterfs_mux_xlator_unlink(xlator_t *pxl, xlator_t *cxl)
++{
++ xlator_list_t *unlink = NULL;
++ xlator_list_t *prev = NULL;
++ xlator_list_t **tmp = NULL;
++ xlator_t *next_child = NULL;
++ xlator_t *xl = NULL;
++
++ for (tmp = &pxl->children; *tmp; tmp = &(*tmp)->next) {
++ if ((*tmp)->xlator == cxl) {
++ unlink = *tmp;
++ *tmp = (*tmp)->next;
++ if (*tmp)
++ next_child = (*tmp)->xlator;
++ break;
++ }
++ prev = *tmp;
++ }
++
++ if (!prev)
++ xl = pxl;
++ else if (prev->xlator)
++ xl = prev->xlator->graph->last_xl;
++
++ if (xl)
++ xl->next = next_child;
++ if (next_child)
++ next_child->prev = xl;
++
++ GF_FREE(unlink);
++ return next_child;
++}
++
+ int
+ glusterfs_xlator_link(xlator_t *pxl, xlator_t *cxl)
+ {
+@@ -1092,6 +1139,8 @@ glusterfs_graph_destroy_residual(glusterfs_graph_t *graph)
+ ret = xlator_tree_free_memacct(graph->first);
+
+ list_del_init(&graph->list);
++ pthread_mutex_destroy(&graph->mutex);
++ pthread_cond_destroy(&graph->child_down_cond);
+ GF_FREE(graph);
+
+ return ret;
+@@ -1134,6 +1183,25 @@ out:
+ }
+
+ int
++glusterfs_graph_fini(glusterfs_graph_t *graph)
++{
++ xlator_t *trav = NULL;
++
++ trav = graph->first;
++
++ while (trav) {
++ if (trav->init_succeeded) {
++ trav->cleanup_starting = 1;
++ trav->fini(trav);
++ trav->init_succeeded = 0;
++ }
++ trav = trav->next;
++ }
++
++ return 0;
++}
++
++int
+ glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path,
+ glusterfs_graph_t **newgraph)
+ {
+@@ -1256,3 +1324,386 @@ glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path,
+
+ return 0;
+ }
++int
++glusterfs_muxsvc_cleanup_parent(glusterfs_ctx_t *ctx,
++ glusterfs_graph_t *parent_graph)
++{
++ if (parent_graph) {
++ if (parent_graph->first) {
++ xlator_destroy(parent_graph->first);
++ }
++ ctx->active = NULL;
++ GF_FREE(parent_graph);
++ parent_graph = NULL;
++ }
++ return 0;
++}
++
++void *
++glusterfs_graph_cleanup(void *arg)
++{
++ glusterfs_graph_t *graph = NULL;
++ glusterfs_ctx_t *ctx = THIS->ctx;
++ int ret = -1;
++ graph = arg;
++
++ if (!graph)
++ return NULL;
++
++ /* To destroy the graph, fitst sent a GF_EVENT_PARENT_DOWN
++ * Then wait for GF_EVENT_CHILD_DOWN to get on the top
++ * xl. Once we have GF_EVENT_CHILD_DOWN event, then proceed
++ * to fini.
++ *
++ * During fini call, this will take a last unref on rpc and
++ * rpc_transport_object.
++ */
++ if (graph->first)
++ default_notify(graph->first, GF_EVENT_PARENT_DOWN, graph->first);
++
++ ret = pthread_mutex_lock(&graph->mutex);
++ if (ret != 0) {
++ gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED,
++ "Failed to aquire a lock");
++ goto out;
++ }
++ /* check and wait for CHILD_DOWN for top xlator*/
++ while (graph->used) {
++ ret = pthread_cond_wait(&graph->child_down_cond, &graph->mutex);
++ if (ret != 0)
++ gf_msg("glusterfs", GF_LOG_INFO, 0, LG_MSG_GRAPH_CLEANUP_FAILED,
++ "cond wait failed ");
++ }
++
++ ret = pthread_mutex_unlock(&graph->mutex);
++ if (ret != 0) {
++ gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED,
++ "Failed to release a lock");
++ }
++
++ /* Though we got a child down on top xlator, we have to wait until
++ * all the notifier to exit. Because there should not be any threads
++ * that access xl variables.
++ */
++ pthread_mutex_lock(&ctx->notify_lock);
++ {
++ while (ctx->notifying)
++ pthread_cond_wait(&ctx->notify_cond, &ctx->notify_lock);
++ }
++ pthread_mutex_unlock(&ctx->notify_lock);
++
++ glusterfs_graph_fini(graph);
++ glusterfs_graph_destroy(graph);
++out:
++ return NULL;
++}
++
++glusterfs_graph_t *
++glusterfs_muxsvc_setup_parent_graph(glusterfs_ctx_t *ctx, char *name,
++ char *type)
++{
++ glusterfs_graph_t *parent_graph = NULL;
++ xlator_t *ixl = NULL;
++ int ret = -1;
++ parent_graph = GF_CALLOC(1, sizeof(*parent_graph),
++ gf_common_mt_glusterfs_graph_t);
++ if (!parent_graph)
++ goto out;
++
++ INIT_LIST_HEAD(&parent_graph->list);
++
++ ctx->active = parent_graph;
++ ixl = GF_CALLOC(1, sizeof(*ixl), gf_common_mt_xlator_t);
++ if (!ixl)
++ goto out;
++
++ ixl->ctx = ctx;
++ ixl->graph = parent_graph;
++ ixl->options = dict_new();
++ if (!ixl->options)
++ goto out;
++
++ ixl->name = gf_strdup(name);
++ if (!ixl->name)
++ goto out;
++
++ ixl->is_autoloaded = 1;
++
++ if (xlator_set_type(ixl, type) == -1) {
++ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
++ "%s (%s) set type failed", name, type);
++ goto out;
++ }
++
++ glusterfs_graph_set_first(parent_graph, ixl);
++ parent_graph->top = ixl;
++ ixl = NULL;
++
++ gettimeofday(&parent_graph->dob, NULL);
++ fill_uuid(parent_graph->graph_uuid, 128);
++ parent_graph->id = ctx->graph_id++;
++ ret = 0;
++out:
++ if (ixl)
++ xlator_destroy(ixl);
++
++ if (ret) {
++ glusterfs_muxsvc_cleanup_parent(ctx, parent_graph);
++ parent_graph = NULL;
++ }
++ return parent_graph;
++}
++
++int
++glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
++{
++ xlator_t *last_xl = NULL;
++ glusterfs_graph_t *graph = NULL;
++ glusterfs_graph_t *parent_graph = NULL;
++ pthread_t clean_graph = {
++ 0,
++ };
++ int ret = -1;
++ xlator_t *xl = NULL;
++
++ if (!ctx || !ctx->active || !volfile_obj)
++ goto out;
++ parent_graph = ctx->active;
++ graph = volfile_obj->graph;
++ if (graph && graph->first)
++ xl = graph->first;
++
++ last_xl = graph->last_xl;
++ if (last_xl)
++ last_xl->next = NULL;
++ if (!xl || xl->cleanup_starting)
++ goto out;
++
++ xl->cleanup_starting = 1;
++ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED,
++ "detaching child %s", volfile_obj->vol_id);
++
++ list_del_init(&volfile_obj->volfile_list);
++ glusterfs_mux_xlator_unlink(parent_graph->top, xl);
++ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph);
++ parent_graph->xl_count -= graph->xl_count;
++ parent_graph->leaf_count -= graph->leaf_count;
++ default_notify(xl, GF_EVENT_PARENT_DOWN, xl);
++ parent_graph->id++;
++ ret = 0;
++out:
++ if (!ret) {
++ list_del_init(&volfile_obj->volfile_list);
++ if (graph) {
++ ret = gf_thread_create_detached(
++ &clean_graph, glusterfs_graph_cleanup, graph, "graph_clean");
++ if (ret) {
++ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL,
++ LG_MSG_GRAPH_CLEANUP_FAILED,
++ "%s failed to create clean "
++ "up thread",
++ volfile_obj->vol_id);
++ ret = 0;
++ }
++ }
++ GF_FREE(volfile_obj);
++ }
++ return ret;
++}
++
++int
++glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
++ char *volfile_id, char *checksum)
++{
++ glusterfs_graph_t *graph = NULL;
++ glusterfs_graph_t *parent_graph = NULL;
++ glusterfs_graph_t *clean_graph = NULL;
++ int ret = -1;
++ xlator_t *xl = NULL;
++ xlator_t *last_xl = NULL;
++ gf_volfile_t *volfile_obj = NULL;
++ pthread_t thread_id = {
++ 0,
++ };
++
++ if (!ctx)
++ goto out;
++ parent_graph = ctx->active;
++ graph = glusterfs_graph_construct(fp);
++ if (!graph) {
++ gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
++ "failed to construct the graph");
++ goto out;
++ }
++ graph->last_xl = glusterfs_get_last_xlator(graph);
++
++ for (xl = graph->first; xl; xl = xl->next) {
++ if (strcmp(xl->type, "mount/fuse") == 0) {
++ gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL,
++ LG_MSG_GRAPH_ATTACH_FAILED,
++ "fuse xlator cannot be specified in volume file");
++ goto out;
++ }
++ }
++
++ graph->leaf_count = glusterfs_count_leaves(glusterfs_root(graph));
++ xl = graph->first;
++ /* TODO memory leaks everywhere need to free graph in case of error */
++ if (glusterfs_graph_prepare(graph, ctx, xl->name)) {
++ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
++ "failed to prepare graph for xlator %s", xl->name);
++ ret = -1;
++ goto out;
++ } else if (glusterfs_graph_init(graph)) {
++ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
++ "failed to initialize graph for xlator %s", xl->name);
++ ret = -1;
++ goto out;
++ } else if (glusterfs_graph_parent_up(graph)) {
++ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
++ "failed to link the graphs for xlator %s ", xl->name);
++ ret = -1;
++ goto out;
++ }
++
++ if (!parent_graph) {
++ parent_graph = glusterfs_muxsvc_setup_parent_graph(ctx, "glustershd",
++ "debug/io-stats");
++ if (!parent_graph)
++ goto out;
++ ((xlator_t *)parent_graph->top)->next = xl;
++ clean_graph = parent_graph;
++ } else {
++ last_xl = parent_graph->last_xl;
++ if (last_xl)
++ last_xl->next = xl;
++ xl->prev = last_xl;
++ }
++ parent_graph->last_xl = graph->last_xl;
++
++ ret = glusterfs_xlator_link(parent_graph->top, xl);
++ if (ret) {
++ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED,
++ "parent up notification failed");
++ goto out;
++ }
++ parent_graph->xl_count += graph->xl_count;
++ parent_graph->leaf_count += graph->leaf_count;
++ parent_graph->id++;
++
++ if (!volfile_obj) {
++ volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t);
++ if (!volfile_obj) {
++ ret = -1;
++ goto out;
++ }
++ }
++
++ graph->used = 1;
++ parent_graph->id++;
++ list_add(&graph->list, &ctx->graphs);
++ INIT_LIST_HEAD(&volfile_obj->volfile_list);
++ volfile_obj->graph = graph;
++ snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s",
++ volfile_id);
++ memcpy(volfile_obj->volfile_checksum, checksum,
++ sizeof(volfile_obj->volfile_checksum));
++ list_add_tail(&volfile_obj->volfile_list, &ctx->volfile_list);
++
++ gf_log_dump_graph(fp, graph);
++ graph = NULL;
++
++ ret = 0;
++out:
++ if (ret) {
++ if (graph) {
++ gluster_graph_take_reference(graph->first);
++ ret = gf_thread_create_detached(&thread_id, glusterfs_graph_cleanup,
++ graph, "graph_clean");
++ if (ret) {
++ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL,
++ LG_MSG_GRAPH_CLEANUP_FAILED,
++ "%s failed to create clean "
++ "up thread",
++ volfile_id);
++ ret = 0;
++ }
++ }
++ if (clean_graph)
++ glusterfs_muxsvc_cleanup_parent(ctx, clean_graph);
++ }
++ return ret;
++}
++
++int
++glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
++ gf_volfile_t *volfile_obj, char *checksum)
++{
++ glusterfs_graph_t *oldvolfile_graph = NULL;
++ glusterfs_graph_t *newvolfile_graph = NULL;
++
++ int ret = -1;
++
++ if (!ctx) {
++ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL,
++ "ctx is NULL");
++ goto out;
++ }
++
++ /* Change the message id */
++ if (!volfile_obj) {
++ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL,
++ "failed to get volfile object");
++ goto out;
++ }
++
++ oldvolfile_graph = volfile_obj->graph;
++ if (!oldvolfile_graph) {
++ goto out;
++ }
++
++ newvolfile_graph = glusterfs_graph_construct(newvolfile_fp);
++
++ if (!newvolfile_graph) {
++ goto out;
++ }
++ newvolfile_graph->last_xl = glusterfs_get_last_xlator(newvolfile_graph);
++
++ glusterfs_graph_prepare(newvolfile_graph, ctx, newvolfile_graph->first);
++
++ if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) {
++ ret = glusterfs_process_svc_detach(ctx, volfile_obj);
++ if (ret) {
++ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL,
++ LG_MSG_GRAPH_CLEANUP_FAILED,
++ "Could not detach "
++ "old graph. Aborting the reconfiguration operation");
++ goto out;
++ }
++ ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp,
++ volfile_obj->vol_id, checksum);
++ goto out;
++ }
++
++ gf_msg_debug("glusterfsd-mgmt", 0,
++ "Only options have changed in the"
++ " new graph");
++
++ ret = glusterfs_graph_reconfigure(oldvolfile_graph, newvolfile_graph);
++ if (ret) {
++ gf_msg_debug("glusterfsd-mgmt", 0,
++ "Could not reconfigure "
++ "new options in old graph");
++ goto out;
++ }
++ memcpy(volfile_obj->volfile_checksum, checksum,
++ sizeof(volfile_obj->volfile_checksum));
++
++ ret = 0;
++out:
++
++ if (newvolfile_graph)
++ glusterfs_graph_destroy(newvolfile_graph);
++
++ return ret;
++}
+diff --git a/libglusterfs/src/graph.y b/libglusterfs/src/graph.y
+index 5b92985..c60ff38 100644
+--- a/libglusterfs/src/graph.y
++++ b/libglusterfs/src/graph.y
+@@ -542,6 +542,9 @@ glusterfs_graph_new ()
+
+ INIT_LIST_HEAD (&graph->list);
+
++ pthread_mutex_init(&graph->mutex, NULL);
++ pthread_cond_init(&graph->child_down_cond, NULL);
++
+ gettimeofday (&graph->dob, NULL);
+
+ return graph;
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index e33d5cf..fa2025e 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -1154,3 +1154,8 @@ gf_changelog_register_generic
+ gf_gfid_generate_from_xxh64
+ find_xlator_option_in_cmd_args_t
+ gf_d_type_from_ia_type
++glusterfs_graph_fini
++glusterfs_process_svc_attach_volfp
++glusterfs_mux_volfile_reconfigure
++glusterfs_process_svc_detach
++mgmt_is_multiplexed_daemon
+diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c
+index d18b50f..0cf80c0 100644
+--- a/libglusterfs/src/statedump.c
++++ b/libglusterfs/src/statedump.c
+@@ -810,7 +810,8 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
+ if (!ctx)
+ goto out;
+
+- if (ctx && ctx->active) {
++ if (!mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name) &&
++ (ctx && ctx->active)) {
+ top = ctx->active->first;
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ brick_count++;
+diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
+index dc1e887..5d6f8d2 100644
+--- a/libglusterfs/src/xlator.c
++++ b/libglusterfs/src/xlator.c
+@@ -1463,3 +1463,19 @@ gluster_graph_take_reference(xlator_t *tree)
+ }
+ return;
+ }
++
++gf_boolean_t
++mgmt_is_multiplexed_daemon(char *name)
++{
++ const char *mux_daemons[] = {"glustershd", NULL};
++ int i;
++
++ if (!name)
++ return _gf_false;
++
++ for (i = 0; mux_daemons[i]; i++) {
++ if (!strcmp(name, mux_daemons[i]))
++ return _gf_true;
++ }
++ return _gf_false;
++}
+diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
+index 779878f..7275d75 100644
+--- a/rpc/rpc-lib/src/protocol-common.h
++++ b/rpc/rpc-lib/src/protocol-common.h
+@@ -245,6 +245,8 @@ enum glusterd_brick_procnum {
+ GLUSTERD_NODE_BITROT,
+ GLUSTERD_BRICK_ATTACH,
+ GLUSTERD_DUMP_METRICS,
++ GLUSTERD_SVC_ATTACH,
++ GLUSTERD_SVC_DETACH,
+ GLUSTERD_BRICK_MAXVALUE,
+ };
+
+diff --git a/tests/basic/glusterd/heald.t b/tests/basic/glusterd/heald.t
+index ca112ad..7dae3c3 100644
+--- a/tests/basic/glusterd/heald.t
++++ b/tests/basic/glusterd/heald.t
+@@ -7,11 +7,16 @@
+ # Covers enable/disable at the moment. Will be enhanced later to include
+ # the other commands as well.
+
++function is_pid_running {
++ local pid=$1
++ num=`ps auxww | grep glustershd | grep $pid | grep -v grep | wc -l`
++ echo $num
++}
++
+ cleanup;
+ TEST glusterd
+ TEST pidof glusterd
+
+-volfile=$(gluster system:: getwd)"/glustershd/glustershd-server.vol"
+ #Commands should fail when volume doesn't exist
+ TEST ! $CLI volume heal non-existent-volume enable
+ TEST ! $CLI volume heal non-existent-volume disable
+@@ -20,51 +25,55 @@ TEST ! $CLI volume heal non-existent-volume disable
+ # volumes
+ TEST $CLI volume create dist $H0:$B0/dist
+ TEST $CLI volume start dist
+-TEST "[ -z $(get_shd_process_pid)]"
++TEST "[ -z $(get_shd_process_pid dist)]"
+ TEST ! $CLI volume heal dist enable
+ TEST ! $CLI volume heal dist disable
+
+ # Commands should work on replicate/disperse volume.
+ TEST $CLI volume create r2 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1
+-TEST "[ -z $(get_shd_process_pid)]"
++TEST "[ -z $(get_shd_process_pid r2)]"
+ TEST $CLI volume start r2
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2
+ TEST $CLI volume heal r2 enable
+ EXPECT "enable" volume_option r2 "cluster.self-heal-daemon"
+-EXPECT "enable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
++volfiler2=$(gluster system:: getwd)"/vols/r2/r2-shd.vol"
++EXPECT "enable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2
++pid=$( get_shd_process_pid r2 )
+ TEST $CLI volume heal r2 disable
+ EXPECT "disable" volume_option r2 "cluster.self-heal-daemon"
+-EXPECT "disable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
++EXPECT "disable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon
++EXPECT "1" is_pid_running $pid
+
+ # Commands should work on disperse volume.
+ TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2
+ TEST $CLI volume start ec2
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2
+ TEST $CLI volume heal ec2 enable
+ EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon"
+-EXPECT "enable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
++volfileec2=$(gluster system:: getwd)"/vols/ec2/ec2-shd.vol"
++EXPECT "enable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2
++pid=$(get_shd_process_pid ec2)
+ TEST $CLI volume heal ec2 disable
+ EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon"
+-EXPECT "disable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
++EXPECT "disable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon
++EXPECT "1" is_pid_running $pid
+
+ #Check that shd graph is rewritten correctly on volume stop/start
+-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
+-EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
++EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse
++
++EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate
+ TEST $CLI volume stop r2
+-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
+-EXPECT "N" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
++EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse
+ TEST $CLI volume stop ec2
+ # When both the volumes are stopped glustershd volfile is not modified just the
+ # process is stopped
+-TEST "[ -z $(get_shd_process_pid) ]"
++TEST "[ -z $(get_shd_process_pid dist) ]"
++TEST "[ -z $(get_shd_process_pid ec2) ]"
+
+ TEST $CLI volume start r2
+-EXPECT "N" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
+-EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
++EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate
+
+ TEST $CLI volume set r2 self-heal-daemon on
+ TEST $CLI volume set r2 cluster.self-heal-daemon off
+diff --git a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t
+index cdb1a33..e6e65c4 100644
+--- a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t
++++ b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t
+@@ -55,9 +55,9 @@ TEST kill_glusterd 1
+ #Bring back 1st glusterd
+ TEST $glusterd_1
+
+-# We need to wait till PROCESS_UP_TIMEOUT and then check shd service does not
+-# come up on node 2
+-sleep $PROCESS_UP_TIMEOUT
+-EXPECT "N" shd_up_status_2
++# We need to wait till PROCESS_UP_TIMEOUT and then check shd service started
++#on node 2, because once glusterd regains quorum, it will restart all volume
++#level daemons
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_2
+
+ cleanup;
+diff --git a/tests/volume.rc b/tests/volume.rc
+index 87ca958..289b197 100644
+--- a/tests/volume.rc
++++ b/tests/volume.rc
+@@ -237,11 +237,13 @@ function ec_child_up_count_shd {
+ }
+
+ function get_shd_process_pid {
+- ps auxww | grep glusterfs | grep -E "glustershd/glustershd.pid" | awk '{print $2}' | head -1
++ local vol=$1
++ ps auxww | grep "process-name\ glustershd" | awk '{print $2}' | head -1
+ }
+
+ function generate_shd_statedump {
+- generate_statedump $(get_shd_process_pid)
++ local vol=$1
++ generate_statedump $(get_shd_process_pid $vol)
+ }
+
+ function generate_nfs_statedump {
+diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am
+index 5fe5156..11ae189 100644
+--- a/xlators/mgmt/glusterd/src/Makefile.am
++++ b/xlators/mgmt/glusterd/src/Makefile.am
+@@ -18,11 +18,12 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \
+ glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \
+ glusterd-mgmt.c glusterd-peer-utils.c glusterd-statedump.c \
+ glusterd-snapshot-utils.c glusterd-conn-mgmt.c \
+- glusterd-proc-mgmt.c glusterd-svc-mgmt.c glusterd-shd-svc.c \
++ glusterd-proc-mgmt.c glusterd-svc-mgmt.c \
+ glusterd-nfs-svc.c glusterd-quotad-svc.c glusterd-svc-helper.c \
+ glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \
+ glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c \
+ glusterd-reset-brick.c glusterd-tierd-svc.c glusterd-tierd-svc-helper.c \
++ glusterd-shd-svc.c glusterd-shd-svc-helper.c \
+ glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c
+
+
+@@ -38,11 +39,12 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
+ glusterd-mgmt.h glusterd-messages.h glusterd-peer-utils.h \
+ glusterd-statedump.h glusterd-snapshot-utils.h glusterd-geo-rep.h \
+ glusterd-conn-mgmt.h glusterd-conn-helper.h glusterd-proc-mgmt.h \
+- glusterd-svc-mgmt.h glusterd-shd-svc.h glusterd-nfs-svc.h \
++ glusterd-svc-mgmt.h glusterd-nfs-svc.h \
+ glusterd-quotad-svc.h glusterd-svc-helper.h glusterd-snapd-svc.h \
+ glusterd-snapd-svc-helper.h glusterd-rcu.h glusterd-bitd-svc.h \
+ glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \
+ glusterd-tierd-svc.h glusterd-tierd-svc-helper.h \
++ glusterd-shd-svc.h glusterd-shd-svc-helper.h \
+ glusterd-gfproxyd-svc.h glusterd-gfproxyd-svc-helper.h \
+ $(CONTRIBDIR)/userspace-rcu/rculist-extra.h
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+index ad9a572..042a805 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+@@ -2863,7 +2863,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
+ }
+
+ if (start_remove && volinfo->status == GLUSTERD_STATUS_STARTED) {
+- ret = glusterd_svcs_reconfigure();
++ ret = glusterd_svcs_reconfigure(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL,
+ "Unable to reconfigure NFS-Server");
+diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
+index e80e152..052438c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
++++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
+@@ -132,3 +132,45 @@ glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath,
+ glusterd_set_socket_filepath(sockfilepath, socketpath, len);
+ return 0;
+ }
++
++int
++__glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
++ rpc_clnt_event_t event, void *data)
++{
++ glusterd_conf_t *conf = THIS->private;
++ glusterd_svc_proc_t *mux_proc = mydata;
++ int ret = -1;
++
++ /* Silently ignoring this error, exactly like the current
++ * implementation */
++ if (!mux_proc)
++ return 0;
++
++ if (event == RPC_CLNT_DESTROY) {
++ /*RPC_CLNT_DESTROY will only called after mux_proc detached from the
++ * list. So it is safe to call without lock. Processing
++ * RPC_CLNT_DESTROY under a lock will lead to deadlock.
++ */
++ if (mux_proc->data) {
++ glusterd_volinfo_unref(mux_proc->data);
++ mux_proc->data = NULL;
++ }
++ GF_FREE(mux_proc);
++ ret = 0;
++ } else {
++ pthread_mutex_lock(&conf->attach_lock);
++ {
++ ret = mux_proc->notify(mux_proc, event);
++ }
++ pthread_mutex_unlock(&conf->attach_lock);
++ }
++ return ret;
++}
++
++int
++glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
++ rpc_clnt_event_t event, void *data)
++{
++ return glusterd_big_locked_notify(rpc, mydata, event, data,
++ __glusterd_muxsvc_conn_common_notify);
++}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
+index 602c0ba..d1c4607 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
++++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
+@@ -43,9 +43,11 @@ glusterd_conn_disconnect(glusterd_conn_t *conn);
+ int
+ glusterd_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data);
++int
++glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
++ rpc_clnt_event_t event, void *data);
+
+ int32_t
+ glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath,
+ int len);
+-
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
+index f9c8617..b01fd4d 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
+@@ -370,6 +370,7 @@ int
+ glusterd_gfproxydsvc_restart()
+ {
+ glusterd_volinfo_t *volinfo = NULL;
++ glusterd_volinfo_t *tmp = NULL;
+ int ret = -1;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+@@ -380,7 +381,7 @@ glusterd_gfproxydsvc_restart()
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+- cds_list_for_each_entry(volinfo, &conf->volumes, vol_list)
++ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
+ {
+ /* Start per volume gfproxyd svc */
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index 528993c..1cb9013 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -5928,6 +5928,11 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
+
+ GF_FREE(rebal_data);
+
++ fprintf(fp, "Volume%d.shd_svc.online_status: %s\n", count,
++ volinfo->shd.svc.online ? "Online" : "Offline");
++ fprintf(fp, "Volume%d.shd_svc.inited: %s\n", count,
++ volinfo->shd.svc.inited ? "True" : "False");
++
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ ret = glusterd_volume_get_hot_tier_type_str(volinfo,
+ &hot_tier_type_str);
+@@ -5997,12 +6002,6 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
+
+ fprintf(fp, "\n[Services]\n");
+
+- if (priv->shd_svc.inited) {
+- fprintf(fp, "svc%d.name: %s\n", ++count, priv->shd_svc.name);
+- fprintf(fp, "svc%d.online_status: %s\n\n", count,
+- priv->shd_svc.online ? "Online" : "Offline");
+- }
+-
+ if (priv->nfs_svc.inited) {
+ fprintf(fp, "svc%d.name: %s\n", ++count, priv->nfs_svc.name);
+ fprintf(fp, "svc%d.online_status: %s\n\n", count,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
+index 5599a63..1ba58c3 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
+@@ -30,6 +30,7 @@
+ #include "rpcsvc.h"
+ #include "rpc-common-xdr.h"
+ #include "glusterd-gfproxyd-svc-helper.h"
++#include "glusterd-shd-svc-helper.h"
+
+ extern struct rpc_clnt_program gd_peer_prog;
+ extern struct rpc_clnt_program gd_mgmt_prog;
+@@ -328,6 +329,26 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
+ goto out;
+ }
+
++ volid_ptr = strstr(volume_id, "shd/");
++ if (volid_ptr) {
++ volid_ptr = strchr(volid_ptr, '/');
++ if (!volid_ptr) {
++ ret = -1;
++ goto out;
++ }
++ volid_ptr++;
++
++ ret = glusterd_volinfo_find(volid_ptr, &volinfo);
++ if (ret == -1) {
++ gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo");
++ goto out;
++ }
++
++ glusterd_svc_build_shd_volfile_path(volinfo, path, path_len);
++ ret = 0;
++ goto out;
++ }
++
+ volid_ptr = strstr(volume_id, "/snaps/");
+ if (volid_ptr) {
+ ret = get_snap_volname_and_volinfo(volid_ptr, &volname, &volinfo);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
+index 7a784db..17052ce 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h
++++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
+@@ -51,6 +51,7 @@ typedef enum gf_gld_mem_types_ {
+ gf_gld_mt_missed_snapinfo_t,
+ gf_gld_mt_snap_create_args_t,
+ gf_gld_mt_glusterd_brick_proc_t,
++ gf_gld_mt_glusterd_svc_proc_t,
+ gf_gld_mt_end,
+ } gf_gld_mem_types_t;
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
+index c7b3ca8..424e15f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
++++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
+@@ -298,6 +298,8 @@ GLFS_MSGID(
+ GD_MSG_LOCALTIME_LOGGING_ENABLE, GD_MSG_LOCALTIME_LOGGING_DISABLE,
+ GD_MSG_PORTS_EXHAUSTED, GD_MSG_CHANGELOG_GET_FAIL,
+ GD_MSG_MANAGER_FUNCTION_FAILED, GD_MSG_NFS_GANESHA_DISABLED,
+- GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL);
++ GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL,
++ GD_MSG_SHD_START_FAIL, GD_MSG_SHD_OBJ_GET_FAIL, GD_MSG_SVC_ATTACH_FAIL,
++ GD_MSG_ATTACH_INFO, GD_MSG_DETACH_INFO, GD_MSG_SVC_DETACH_FAIL);
+
+ #endif /* !_GLUSTERD_MESSAGES_H_ */
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index df8a6ab..95f9707 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -44,6 +44,7 @@
+ #include "glusterd-snapshot-utils.h"
+ #include "glusterd-svc-mgmt.h"
+ #include "glusterd-svc-helper.h"
++#include "glusterd-shd-svc-helper.h"
+ #include "glusterd-shd-svc.h"
+ #include "glusterd-nfs-svc.h"
+ #include "glusterd-quotad-svc.h"
+@@ -2223,6 +2224,11 @@ glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key,
+ if (ret)
+ goto out;
+
++ svc = &(volinfo->shd.svc);
++ ret = svc->reconfigure(volinfo);
++ if (ret)
++ goto out;
++
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+@@ -2237,7 +2243,7 @@ glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key,
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+- ret = glusterd_svcs_reconfigure();
++ ret = glusterd_svcs_reconfigure(volinfo);
+ if (ret)
+ goto out;
+ }
+@@ -2693,6 +2699,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
+ if (ret)
+ goto out;
+
++ svc = &(volinfo->shd.svc);
++ ret = svc->reconfigure(volinfo);
++ if (ret)
++ goto out;
++
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+@@ -2706,7 +2717,7 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
+ }
+ }
+ if (svcs_reconfigure) {
+- ret = glusterd_svcs_reconfigure();
++ ret = glusterd_svcs_reconfigure(NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
+ "Unable to restart "
+@@ -3091,6 +3102,11 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+ if (ret)
+ goto out;
+
++ svc = &(volinfo->shd.svc);
++ ret = svc->reconfigure(volinfo);
++ if (ret)
++ goto out;
++
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+@@ -3106,7 +3122,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+- ret = glusterd_svcs_reconfigure();
++ ret = glusterd_svcs_reconfigure(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
+ "Unable to restart services");
+@@ -3139,6 +3155,11 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+ if (ret)
+ goto out;
+
++ svc = &(volinfo->shd.svc);
++ ret = svc->reconfigure(volinfo);
++ if (ret)
++ goto out;
++
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+@@ -3154,7 +3175,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+- ret = glusterd_svcs_reconfigure();
++ ret = glusterd_svcs_reconfigure(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
+ "Unable to restart services");
+@@ -3361,7 +3382,7 @@ glusterd_op_stats_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+- ret = glusterd_svcs_reconfigure();
++ ret = glusterd_svcs_reconfigure(volinfo);
+ if (ret)
+ goto out;
+ }
+@@ -3644,14 +3665,6 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+ other_count++;
+ node_count++;
+
+- } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+- ret = glusterd_add_node_to_dict(priv->shd_svc.name, rsp_dict, 0,
+- vol_opts);
+- if (ret)
+- goto out;
+- other_count++;
+- node_count++;
+-
+ } else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) {
+ ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict, 0,
+ vol_opts);
+@@ -3685,6 +3698,12 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+ goto out;
+ other_count++;
+ node_count++;
++ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
++ ret = glusterd_add_shd_to_dict(volinfo, rsp_dict, other_index);
++ if (ret)
++ goto out;
++ other_count++;
++ node_count++;
+ } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) {
+ ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick);
+ if (ret)
+@@ -3747,6 +3766,19 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+ node_count++;
+ }
+
++ if (glusterd_is_shd_compatible_volume(volinfo)) {
++ shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts);
++ if (shd_enabled) {
++ ret = glusterd_add_shd_to_dict(volinfo, rsp_dict,
++ other_index);
++ if (ret)
++ goto out;
++ other_count++;
++ other_index++;
++ node_count++;
++ }
++ }
++
+ nfs_disabled = dict_get_str_boolean(vol_opts, NFS_DISABLE_MAP_KEY,
+ _gf_false);
+ if (!nfs_disabled) {
+@@ -3759,18 +3791,6 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+ node_count++;
+ }
+
+- if (glusterd_is_shd_compatible_volume(volinfo))
+- shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts);
+- if (shd_enabled) {
+- ret = glusterd_add_node_to_dict(priv->shd_svc.name, rsp_dict,
+- other_index, vol_opts);
+- if (ret)
+- goto out;
+- other_count++;
+- node_count++;
+- other_index++;
+- }
+-
+ if (glusterd_is_volume_quota_enabled(volinfo)) {
+ ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict,
+ other_index, vol_opts);
+@@ -6875,16 +6895,18 @@ glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op,
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
++ glusterd_svc_t *svc = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
++ svc = &(volinfo->shd.svc);
+
+ switch (heal_op) {
+ case GF_SHD_OP_INDEX_SUMMARY:
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT:
+- if (!priv->shd_svc.online) {
++ if (!svc->online) {
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL,
+ "Received "
+@@ -6905,7 +6927,7 @@ glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op,
+ break;
+
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+- if (!priv->shd_svc.online) {
++ if (!svc->online) {
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL,
+ "Received "
+@@ -7040,7 +7062,7 @@ glusterd_bricks_select_heal_volume(dict_t *dict, char **op_errstr,
+ ret = -1;
+ goto out;
+ } else {
+- pending_node->node = &(priv->shd_svc);
++ pending_node->node = &(volinfo->shd.svc);
+ pending_node->type = GD_NODE_SHD;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
+@@ -7174,6 +7196,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
+ glusterd_pending_node_t *pending_node = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
++ glusterd_svc_t *svc = NULL;
+
+ GF_ASSERT(dict);
+
+@@ -7269,7 +7292,8 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
+
+ ret = 0;
+ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+- if (!priv->shd_svc.online) {
++ svc = &(volinfo->shd.svc);
++ if (!svc->online) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SELF_HEALD_DISABLED,
+ "Self-heal daemon is not running");
+@@ -7281,7 +7305,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
+ ret = -1;
+ goto out;
+ }
+- pending_node->node = &(priv->shd_svc);
++ pending_node->node = svc;
+ pending_node->type = GD_NODE_SHD;
+ pending_node->index = 0;
+ cds_list_add_tail(&pending_node->list, selected);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
+new file mode 100644
+index 0000000..9196758
+--- /dev/null
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
+@@ -0,0 +1,140 @@
++/*
++ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#include "glusterd.h"
++#include "glusterd-utils.h"
++#include "glusterd-shd-svc-helper.h"
++#include "glusterd-messages.h"
++#include "glusterd-volgen.h"
++
++void
++glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path,
++ int path_len)
++{
++ char sockfilepath[PATH_MAX] = {
++ 0,
++ };
++ char rundir[PATH_MAX] = {
++ 0,
++ };
++ int32_t len = 0;
++ glusterd_conf_t *priv = THIS->private;
++
++ if (!priv)
++ return;
++
++ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv);
++ len = snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir,
++ uuid_utoa(MY_UUID));
++ if ((len < 0) || (len >= sizeof(sockfilepath))) {
++ sockfilepath[0] = 0;
++ }
++
++ glusterd_set_socket_filepath(sockfilepath, path, path_len);
++}
++
++void
++glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path,
++ int path_len)
++{
++ char rundir[PATH_MAX] = {
++ 0,
++ };
++ glusterd_conf_t *priv = THIS->private;
++
++ if (!priv)
++ return;
++
++ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv);
++
++ snprintf(path, path_len, "%s/%s-shd.pid", rundir, volinfo->volname);
++}
++
++void
++glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path,
++ int path_len)
++{
++ char workdir[PATH_MAX] = {
++ 0,
++ };
++ glusterd_conf_t *priv = THIS->private;
++
++ if (!priv)
++ return;
++
++ GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv);
++
++ snprintf(path, path_len, "%s/%s-shd.vol", workdir, volinfo->volname);
++}
++
++void
++glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len)
++{
++ snprintf(logdir, len, "%s/shd/%s", DEFAULT_LOG_FILE_DIRECTORY, volname);
++}
++
++void
++glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len)
++{
++ snprintf(logfile, len, "%s/shd.log", logdir);
++}
++
++void
++glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd)
++{
++ glusterd_svc_proc_t *svc_proc = NULL;
++ glusterd_svc_t *svc = NULL;
++ glusterd_conf_t *conf = NULL;
++ gf_boolean_t need_unref = _gf_false;
++ rpc_clnt_t *rpc = NULL;
++
++ conf = THIS->private;
++ if (!conf)
++ return;
++
++ GF_VALIDATE_OR_GOTO(THIS->name, conf, out);
++ GF_VALIDATE_OR_GOTO(THIS->name, shd, out);
++
++ svc = &shd->svc;
++ shd->attached = _gf_false;
++
++ if (svc->conn.rpc) {
++ rpc_clnt_unref(svc->conn.rpc);
++ svc->conn.rpc = NULL;
++ }
++
++ pthread_mutex_lock(&conf->attach_lock);
++ {
++ svc_proc = svc->svc_proc;
++ svc->svc_proc = NULL;
++ svc->inited = _gf_false;
++ cds_list_del_init(&svc->mux_svc);
++ glusterd_unlink_file(svc->proc.pidfile);
++
++ if (svc_proc && cds_list_empty(&svc_proc->svcs)) {
++ cds_list_del_init(&svc_proc->svc_proc_list);
++ /* We cannot free svc_proc list from here. Because
++ * if there are pending events on the rpc, it will
++ * try to access the corresponding svc_proc, so unrefing
++ * rpc request and then cleaning up the memory is carried
++ * from the notify function upon RPC_CLNT_DESTROY destroy.
++ */
++ need_unref = _gf_true;
++ rpc = svc_proc->rpc;
++ svc_proc->rpc = NULL;
++ }
++ }
++ pthread_mutex_unlock(&conf->attach_lock);
++ /*rpc unref has to be performed outside the lock*/
++ if (need_unref && rpc)
++ rpc_clnt_unref(rpc);
++out:
++ return;
++}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
+new file mode 100644
+index 0000000..c70702c
+--- /dev/null
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
+@@ -0,0 +1,45 @@
++/*
++ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#ifndef _GLUSTERD_SHD_SVC_HELPER_H_
++#define _GLUSTERD_SHD_SVC_HELPER_H_
++
++#include "glusterd.h"
++#include "glusterd-svc-mgmt.h"
++
++void
++glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path,
++ int path_len);
++
++void
++glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path,
++ int path_len);
++
++void
++glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path,
++ int path_len);
++
++void
++glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len);
++
++void
++glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len);
++
++void
++glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd);
++
++int
++glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo,
++ glusterd_svc_t *svc, int flags);
++
++int
++glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo);
++
++#endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index 69e27cb..937ea30 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -13,9 +13,10 @@
+ #include "glusterd.h"
+ #include "glusterd-utils.h"
+ #include "glusterd-volgen.h"
+-#include "glusterd-svc-mgmt.h"
+ #include "glusterd-shd-svc.h"
++#include "glusterd-shd-svc-helper.h"
+ #include "glusterd-svc-helper.h"
++#include "glusterd-store.h"
+
+ #define GD_SHD_PROCESS_NAME "--process-name"
+ char *shd_svc_name = "glustershd";
+@@ -23,27 +24,145 @@ char *shd_svc_name = "glustershd";
+ void
+ glusterd_shdsvc_build(glusterd_svc_t *svc)
+ {
++ int ret = -1;
++ ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name);
++ if (ret < 0)
++ return;
++
++ CDS_INIT_LIST_HEAD(&svc->mux_svc);
+ svc->manager = glusterd_shdsvc_manager;
+ svc->start = glusterd_shdsvc_start;
+- svc->stop = glusterd_svc_stop;
++ svc->stop = glusterd_shdsvc_stop;
++ svc->reconfigure = glusterd_shdsvc_reconfigure;
+ }
+
+ int
+-glusterd_shdsvc_init(glusterd_svc_t *svc)
++glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn,
++ glusterd_svc_proc_t *mux_svc)
+ {
+- return glusterd_svc_init(svc, shd_svc_name);
++ int ret = -1;
++ char rundir[PATH_MAX] = {
++ 0,
++ };
++ char sockpath[PATH_MAX] = {
++ 0,
++ };
++ char pidfile[PATH_MAX] = {
++ 0,
++ };
++ char volfile[PATH_MAX] = {
++ 0,
++ };
++ char logdir[PATH_MAX] = {
++ 0,
++ };
++ char logfile[PATH_MAX] = {
++ 0,
++ };
++ char volfileid[256] = {0};
++ glusterd_svc_t *svc = NULL;
++ glusterd_volinfo_t *volinfo = NULL;
++ glusterd_conf_t *priv = NULL;
++ glusterd_muxsvc_conn_notify_t notify = NULL;
++ xlator_t *this = NULL;
++ char *volfileserver = NULL;
++ int32_t len = 0;
++
++ this = THIS;
++ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
++
++ priv = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, priv, out);
++
++ volinfo = data;
++ GF_VALIDATE_OR_GOTO(this->name, data, out);
++ GF_VALIDATE_OR_GOTO(this->name, mux_svc, out);
++
++ svc = &(volinfo->shd.svc);
++
++ ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name);
++ if (ret < 0)
++ goto out;
++
++ notify = glusterd_muxsvc_common_rpc_notify;
++ glusterd_store_perform_node_state_store(volinfo);
++
++ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv);
++ glusterd_svc_create_rundir(rundir);
++
++ glusterd_svc_build_shd_logdir(logdir, volinfo->volname, sizeof(logdir));
++ glusterd_svc_build_shd_logfile(logfile, logdir, sizeof(logfile));
++
++ /* Initialize the connection mgmt */
++ if (mux_conn && mux_svc->rpc) {
++ /* multiplexed svc */
++ svc->conn.frame_timeout = mux_conn->frame_timeout;
++ /* This will be unrefed from glusterd_shd_svcproc_cleanup*/
++ svc->conn.rpc = rpc_clnt_ref(mux_svc->rpc);
++ ret = snprintf(svc->conn.sockpath, sizeof(svc->conn.sockpath), "%s",
++ mux_conn->sockpath);
++ } else {
++ ret = mkdir_p(logdir, 0755, _gf_true);
++ if ((ret == -1) && (EEXIST != errno)) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
++ "Unable to create logdir %s", logdir);
++ goto out;
++ }
++
++ glusterd_svc_build_shd_socket_filepath(volinfo, sockpath,
++ sizeof(sockpath));
++ ret = glusterd_muxsvc_conn_init(&(svc->conn), mux_svc, sockpath, 600,
++ notify);
++ if (ret)
++ goto out;
++ /* This will be unrefed when the last svcs is detached from the list */
++ if (!mux_svc->rpc)
++ mux_svc->rpc = rpc_clnt_ref(svc->conn.rpc);
++ }
++
++ /* Initialize the process mgmt */
++ glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile));
++ glusterd_svc_build_shd_volfile_path(volinfo, volfile, PATH_MAX);
++ len = snprintf(volfileid, sizeof(volfileid), "shd/%s", volinfo->volname);
++ if ((len < 0) || (len >= sizeof(volfileid))) {
++ ret = -1;
++ goto out;
++ }
++
++ if (dict_get_strn(this->options, "transport.socket.bind-address",
++ SLEN("transport.socket.bind-address"),
++ &volfileserver) != 0) {
++ volfileserver = "localhost";
++ }
++ ret = glusterd_proc_init(&(svc->proc), shd_svc_name, pidfile, logdir,
++ logfile, volfile, volfileid, volfileserver);
++ if (ret)
++ goto out;
++
++out:
++ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
++ return ret;
+ }
+
+-static int
+-glusterd_shdsvc_create_volfile()
++int
++glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo)
+ {
+ char filepath[PATH_MAX] = {
+ 0,
+ };
++
+ int ret = -1;
+- glusterd_conf_t *conf = THIS->private;
+ dict_t *mod_dict = NULL;
+
++ glusterd_svc_build_shd_volfile_path(volinfo, filepath, PATH_MAX);
++ if (!glusterd_is_shd_compatible_volume(volinfo)) {
++ /* If volfile exist, delete it. This case happens when we
++ * change from replica/ec to distribute.
++ */
++ (void)glusterd_unlink_file(filepath);
++ ret = 0;
++ goto out;
++ }
+ mod_dict = dict_new();
+ if (!mod_dict)
+ goto out;
+@@ -64,9 +183,7 @@ glusterd_shdsvc_create_volfile()
+ if (ret)
+ goto out;
+
+- glusterd_svc_build_volfile_path(shd_svc_name, conf->workdir, filepath,
+- sizeof(filepath));
+- ret = glusterd_create_global_volfile(build_shd_graph, filepath, mod_dict);
++ ret = glusterd_shdsvc_generate_volfile(volinfo, filepath, mod_dict);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to create volfile");
+@@ -81,26 +198,89 @@ out:
+ return ret;
+ }
+
++gf_boolean_t
++glusterd_svcs_shd_compatible_volumes_stopped(glusterd_svc_t *svc)
++{
++ glusterd_svc_proc_t *svc_proc = NULL;
++ glusterd_shdsvc_t *shd = NULL;
++ glusterd_svc_t *temp_svc = NULL;
++ glusterd_volinfo_t *volinfo = NULL;
++ gf_boolean_t comp = _gf_false;
++ glusterd_conf_t *conf = THIS->private;
++
++ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
++ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
++ pthread_mutex_lock(&conf->attach_lock);
++ {
++ svc_proc = svc->svc_proc;
++ if (!svc_proc)
++ goto unlock;
++ cds_list_for_each_entry(temp_svc, &svc_proc->svcs, mux_svc)
++ {
++ /* Get volinfo->shd from svc object */
++ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
++ if (!shd) {
++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
++ "Failed to get shd object "
++ "from shd service");
++ goto unlock;
++ }
++
++ /* Get volinfo from shd */
++ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
++ if (!volinfo) {
++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
++ "Failed to get volinfo from "
++ "from shd");
++ goto unlock;
++ }
++ if (!glusterd_is_shd_compatible_volume(volinfo))
++ continue;
++ if (volinfo->status == GLUSTERD_STATUS_STARTED)
++ goto unlock;
++ }
++ comp = _gf_true;
++ }
++unlock:
++ pthread_mutex_unlock(&conf->attach_lock);
++out:
++ return comp;
++}
++
+ int
+ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+ {
+- int ret = 0;
++ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+
+- if (!svc->inited) {
+- ret = glusterd_shdsvc_init(svc);
+- if (ret) {
+- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC,
+- "Failed to init shd "
+- "service");
+- goto out;
+- } else {
+- svc->inited = _gf_true;
+- gf_msg_debug(THIS->name, 0, "shd service initialized");
++ volinfo = data;
++ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
++ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
++
++ if (volinfo)
++ glusterd_volinfo_ref(volinfo);
++
++ ret = glusterd_shdsvc_create_volfile(volinfo);
++ if (ret)
++ goto out;
++
++ if (!glusterd_is_shd_compatible_volume(volinfo)) {
++ ret = 0;
++ if (svc->inited) {
++ /* This means glusterd was running for this volume and now
++ * it was converted to a non-shd volume. So just stop the shd
++ */
++ ret = svc->stop(svc, SIGTERM);
+ }
++ goto out;
+ }
+
+- volinfo = data;
++ ret = glusterd_shd_svc_mux_init(volinfo, svc);
++ if (ret) {
++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC,
++ "Failed to init shd service");
++ goto out;
++ }
+
+ /* If all the volumes are stopped or all shd compatible volumes
+ * are stopped then stop the service if:
+@@ -110,31 +290,26 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+ * - volinfo is NULL or
+ * - volinfo is present and volume is shd compatible
+ */
+- if (glusterd_are_all_volumes_stopped() ||
+- glusterd_all_shd_compatible_volumes_stopped()) {
+- if (!(volinfo && !glusterd_is_shd_compatible_volume(volinfo))) {
+- ret = svc->stop(svc, SIGTERM);
+- }
+- } else {
+- if (!(volinfo && !glusterd_is_shd_compatible_volume(volinfo))) {
+- ret = glusterd_shdsvc_create_volfile();
+- if (ret)
+- goto out;
+-
+- ret = svc->stop(svc, SIGTERM);
+- if (ret)
+- goto out;
++ if (glusterd_svcs_shd_compatible_volumes_stopped(svc)) {
++ /* TODO
++ * Take a lock and detach all svc's to stop the process
++ * also reset the init flag
++ */
++ ret = svc->stop(svc, SIGTERM);
++ } else if (volinfo) {
++ ret = svc->stop(svc, SIGTERM);
++ if (ret)
++ goto out;
+
++ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+ ret = svc->start(svc, flags);
+ if (ret)
+ goto out;
+-
+- ret = glusterd_conn_connect(&(svc->conn));
+- if (ret)
+- goto out;
+ }
+ }
+ out:
++ if (volinfo)
++ glusterd_volinfo_unref(volinfo);
+ if (ret)
+ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+@@ -143,7 +318,7 @@ out:
+ }
+
+ int
+-glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
++glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags)
+ {
+ int ret = -1;
+ char glusterd_uuid_option[PATH_MAX] = {0};
+@@ -178,31 +353,136 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
+ goto out;
+
+ ret = glusterd_svc_start(svc, flags, cmdline);
++ if (ret)
++ goto out;
+
++ ret = glusterd_conn_connect(&(svc->conn));
+ out:
+ if (cmdline)
+ dict_unref(cmdline);
++ return ret;
++}
+
++int
++glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo,
++ glusterd_svc_t *svc, int flags)
++{
++ int ret = -1;
++ glusterd_svc_proc_t *mux_proc = NULL;
++ glusterd_conf_t *conf = NULL;
++
++ conf = THIS->private;
++
++ if (!conf || !volinfo || !svc)
++ return -1;
++ glusterd_shd_svcproc_cleanup(&volinfo->shd);
++ mux_proc = glusterd_svcprocess_new();
++ if (!mux_proc) {
++ return -1;
++ }
++ ret = glusterd_shdsvc_init(volinfo, NULL, mux_proc);
++ if (ret)
++ return -1;
++ pthread_mutex_lock(&conf->attach_lock);
++ {
++ cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs);
++ svc->svc_proc = mux_proc;
++ cds_list_del_init(&svc->mux_svc);
++ cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs);
++ }
++ pthread_mutex_unlock(&conf->attach_lock);
++
++ ret = glusterd_new_shd_svc_start(svc, flags);
++ if (!ret) {
++ volinfo->shd.attached = _gf_true;
++ }
++ return ret;
++}
++
++int
++glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
++{
++ int ret = -1;
++ glusterd_shdsvc_t *shd = NULL;
++ glusterd_volinfo_t *volinfo = NULL;
++ glusterd_conf_t *conf = NULL;
++
++ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
++ conf = THIS->private;
++ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
++
++ /* Get volinfo->shd from svc object */
++ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
++ if (!shd) {
++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
++ "Failed to get shd object "
++ "from shd service");
++ return -1;
++ }
++
++ /* Get volinfo from shd */
++ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
++ if (!volinfo) {
++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
++ "Failed to get volinfo from "
++ "from shd");
++ return -1;
++ }
++
++ if (volinfo->status != GLUSTERD_STATUS_STARTED)
++ return -1;
++
++ glusterd_volinfo_ref(volinfo);
++ if (!svc->inited) {
++ ret = glusterd_shd_svc_mux_init(volinfo, svc);
++ if (ret)
++ goto out;
++ }
++
++ if (shd->attached) {
++ ret = glusterd_attach_svc(svc, volinfo, flags);
++ if (ret) {
++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
++ "Failed to attach shd svc(volume=%s) to pid=%d. Starting"
++ "a new process",
++ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
++ ret = glusterd_recover_shd_attach_failure(volinfo, svc, flags);
++ }
++ goto out;
++ }
++ ret = glusterd_new_shd_svc_start(svc, flags);
++ if (!ret) {
++ shd->attached = _gf_true;
++ }
++out:
++ if (volinfo)
++ glusterd_volinfo_unref(volinfo);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+ }
+
+ int
+-glusterd_shdsvc_reconfigure()
++glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo)
+ {
+ int ret = -1;
+ xlator_t *this = NULL;
+- glusterd_conf_t *priv = NULL;
+ gf_boolean_t identical = _gf_false;
++ dict_t *mod_dict = NULL;
++ glusterd_svc_t *svc = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+- priv = this->private;
+- GF_VALIDATE_OR_GOTO(this->name, priv, out);
++ if (!volinfo) {
++ /* reconfigure will be called separately*/
++ ret = 0;
++ goto out;
++ }
+
+- if (glusterd_all_shd_compatible_volumes_stopped())
++ glusterd_volinfo_ref(volinfo);
++ svc = &(volinfo->shd.svc);
++ if (glusterd_svcs_shd_compatible_volumes_stopped(svc))
+ goto manager;
+
+ /*
+@@ -210,8 +490,42 @@ glusterd_shdsvc_reconfigure()
+ * and cksum i.e. "character-by-character". If YES, then
+ * NOTHING has been changed, just return.
+ */
+- ret = glusterd_svc_check_volfile_identical(priv->shd_svc.name,
+- build_shd_graph, &identical);
++
++ if (!glusterd_is_shd_compatible_volume(volinfo)) {
++ if (svc->inited)
++ goto manager;
++
++ /* Nothing to do if not shd compatible */
++ ret = 0;
++ goto out;
++ }
++ mod_dict = dict_new();
++ if (!mod_dict)
++ goto out;
++
++ ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0);
++ if (ret)
++ goto out;
++
++ ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on");
++ if (ret)
++ goto out;
++
++ ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on");
++ if (ret)
++ goto out;
++
++ ret = dict_set_int32(mod_dict, "graph-check", 1);
++ if (ret)
++ goto out;
++
++ ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on");
++ if (ret)
++ goto out;
++
++ ret = glusterd_volume_svc_check_volfile_identical(
++ "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile,
++ &identical);
+ if (ret)
+ goto out;
+
+@@ -226,8 +540,9 @@ glusterd_shdsvc_reconfigure()
+ * changed, then inform the xlator to reconfigure the options.
+ */
+ identical = _gf_false; /* RESET the FLAG */
+- ret = glusterd_svc_check_topology_identical(priv->shd_svc.name,
+- build_shd_graph, &identical);
++ ret = glusterd_volume_svc_check_topology_identical(
++ "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile,
++ &identical);
+ if (ret)
+ goto out;
+
+@@ -235,7 +550,7 @@ glusterd_shdsvc_reconfigure()
+ * options to shd volfile, so that shd will be reconfigured.
+ */
+ if (identical) {
+- ret = glusterd_shdsvc_create_volfile();
++ ret = glusterd_shdsvc_create_volfile(volinfo);
+ if (ret == 0) { /* Only if above PASSES */
+ ret = glusterd_fetchspec_notify(THIS);
+ }
+@@ -243,12 +558,129 @@ glusterd_shdsvc_reconfigure()
+ }
+ manager:
+ /*
+- * shd volfile's topology has been changed. shd server needs
+- * to be RESTARTED to ACT on the changed volfile.
++ * shd volfile's topology has been changed. volfile needs
++ * to be RECONFIGURED to ACT on the changed volfile.
+ */
+- ret = priv->shd_svc.manager(&(priv->shd_svc), NULL, PROC_START_NO_WAIT);
++ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
+
+ out:
++ if (volinfo)
++ glusterd_volinfo_unref(volinfo);
++ if (mod_dict)
++ dict_unref(mod_dict);
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
+ }
++
++int
++glusterd_shdsvc_restart()
++{
++ glusterd_volinfo_t *volinfo = NULL;
++ glusterd_volinfo_t *tmp = NULL;
++ int ret = -1;
++ xlator_t *this = THIS;
++ glusterd_conf_t *conf = NULL;
++ glusterd_svc_t *svc = NULL;
++
++ GF_VALIDATE_OR_GOTO("glusterd", this, out);
++
++ conf = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, conf, out);
++
++ pthread_mutex_lock(&conf->volume_lock);
++ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
++ {
++ glusterd_volinfo_ref(volinfo);
++ pthread_mutex_unlock(&conf->volume_lock);
++ /* Start per volume shd svc */
++ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
++ svc = &(volinfo->shd.svc);
++ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SHD_START_FAIL,
++ "Couldn't start shd for "
++ "vol: %s on restart",
++ volinfo->volname);
++ gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s",
++ volinfo->volname, svc->name);
++ glusterd_volinfo_unref(volinfo);
++ goto out;
++ }
++ }
++ glusterd_volinfo_unref(volinfo);
++ pthread_mutex_lock(&conf->volume_lock);
++ }
++ pthread_mutex_unlock(&conf->volume_lock);
++out:
++ return ret;
++}
++
++int
++glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig)
++{
++ int ret = -1;
++ glusterd_svc_proc_t *svc_proc = NULL;
++ glusterd_shdsvc_t *shd = NULL;
++ glusterd_volinfo_t *volinfo = NULL;
++ gf_boolean_t empty = _gf_false;
++ glusterd_conf_t *conf = NULL;
++ int pid = -1;
++
++ conf = THIS->private;
++ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
++ svc_proc = svc->svc_proc;
++ GF_VALIDATE_OR_GOTO("glusterd", svc_proc, out);
++ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
++
++ /* Get volinfo->shd from svc object */
++ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
++ if (!shd) {
++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
++ "Failed to get shd object "
++ "from shd service");
++ return -1;
++ }
++
++ /* Get volinfo from shd */
++ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
++ if (!volinfo) {
++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
++ "Failed to get volinfo from "
++ "from shd");
++ return -1;
++ }
++
++ glusterd_volinfo_ref(volinfo);
++ pthread_mutex_lock(&conf->attach_lock);
++ {
++ gf_is_service_running(svc->proc.pidfile, &pid);
++ cds_list_del_init(&svc->mux_svc);
++ empty = cds_list_empty(&svc_proc->svcs);
++ }
++ pthread_mutex_unlock(&conf->attach_lock);
++ if (empty) {
++ /* Unref will happen when destroying the connection */
++ glusterd_volinfo_ref(volinfo);
++ svc_proc->data = volinfo;
++ ret = glusterd_svc_stop(svc, sig);
++ }
++ if (!empty && pid != -1) {
++ ret = glusterd_detach_svc(svc, volinfo, sig);
++ if (ret)
++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
++ "shd service is failed to detach volume %s from pid %d",
++ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
++ else
++ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_STOP_SUCCESS,
++ "Shd service is detached for volume %s from pid %d",
++ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
++ }
++ svc->online = _gf_false;
++ (void)glusterd_unlink_file((char *)svc->proc.pidfile);
++ glusterd_shd_svcproc_cleanup(shd);
++ ret = 0;
++ glusterd_volinfo_unref(volinfo);
++out:
++ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
++ return ret;
++}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h
+index 775a9d4..55b409f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h
+@@ -12,12 +12,20 @@
+ #define _GLUSTERD_SHD_SVC_H_
+
+ #include "glusterd-svc-mgmt.h"
++#include "glusterd.h"
++
++typedef struct glusterd_shdsvc_ glusterd_shdsvc_t;
++struct glusterd_shdsvc_ {
++ glusterd_svc_t svc;
++ gf_boolean_t attached;
++};
+
+ void
+ glusterd_shdsvc_build(glusterd_svc_t *svc);
+
+ int
+-glusterd_shdsvc_init(glusterd_svc_t *svc);
++glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn,
++ glusterd_svc_proc_t *svc_proc);
+
+ int
+ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags);
+@@ -27,4 +35,11 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags);
+
+ int
+ glusterd_shdsvc_reconfigure();
++
++int
++glusterd_shdsvc_restart();
++
++int
++glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig);
++
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
+index 54a7bd1..943b1c6 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
+@@ -748,6 +748,16 @@ glusterd_peer_detach_cleanup(glusterd_conf_t *priv)
+ }
+ }
+
++ if (glusterd_is_shd_compatible_volume(volinfo)) {
++ svc = &(volinfo->shd.svc);
++ ret = svc->stop(svc, SIGTERM);
++ if (ret) {
++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
++ "Failed "
++ "to stop shd daemon service");
++ }
++ }
++
+ if (glusterd_is_gfproxyd_enabled(volinfo)) {
+ svc = &(volinfo->gfproxyd.svc);
+ ret = svc->stop(svc, SIGTERM);
+@@ -775,7 +785,7 @@ glusterd_peer_detach_cleanup(glusterd_conf_t *priv)
+ }
+
+ /*Reconfigure all daemon services upon peer detach*/
+- ret = glusterd_svcs_reconfigure();
++ ret = glusterd_svcs_reconfigure(NULL);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
+ "Failed to reconfigure all daemon services.");
+diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
+index 56bab07..1da4076 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
+@@ -366,6 +366,7 @@ int
+ glusterd_snapdsvc_restart()
+ {
+ glusterd_volinfo_t *volinfo = NULL;
++ glusterd_volinfo_t *tmp = NULL;
+ int ret = 0;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+@@ -376,7 +377,7 @@ glusterd_snapdsvc_restart()
+ conf = this->private;
+ GF_ASSERT(conf);
+
+- cds_list_for_each_entry(volinfo, &conf->volumes, vol_list)
++ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
+ {
+ /* Start per volume snapd svc */
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-statedump.c b/xlators/mgmt/glusterd/src/glusterd-statedump.c
+index f5ecde7..69d4cf4 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-statedump.c
++++ b/xlators/mgmt/glusterd/src/glusterd-statedump.c
+@@ -202,9 +202,6 @@ glusterd_dump_priv(xlator_t *this)
+ gf_proc_dump_build_key(key, "glusterd", "ping-timeout");
+ gf_proc_dump_write(key, "%d", priv->ping_timeout);
+
+- gf_proc_dump_build_key(key, "glusterd", "shd.online");
+- gf_proc_dump_write(key, "%d", priv->shd_svc.online);
+-
+ gf_proc_dump_build_key(key, "glusterd", "nfs.online");
+ gf_proc_dump_write(key, "%d", priv->nfs_svc.online);
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index ca19a75..e42703c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -7,6 +7,7 @@
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
++#include <signal.h>
+
+ #include <glusterfs/globals.h>
+ #include <glusterfs/run.h>
+@@ -20,12 +21,14 @@
+ #include "glusterd-bitd-svc.h"
+ #include "glusterd-tierd-svc.h"
+ #include "glusterd-tierd-svc-helper.h"
++#include "glusterd-shd-svc-helper.h"
+ #include "glusterd-scrub-svc.h"
+ #include "glusterd-svc-helper.h"
+ #include <glusterfs/syscall.h>
++#include "glusterd-snapshot-utils.h"
+
+ int
+-glusterd_svcs_reconfigure()
++glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo)
+ {
+ int ret = 0;
+ xlator_t *this = THIS;
+@@ -43,9 +46,11 @@ glusterd_svcs_reconfigure()
+ goto out;
+
+ svc_name = "self-heald";
+- ret = glusterd_shdsvc_reconfigure();
+- if (ret)
+- goto out;
++ if (volinfo) {
++ ret = glusterd_shdsvc_reconfigure(volinfo);
++ if (ret)
++ goto out;
++ }
+
+ if (conf->op_version == GD_OP_VERSION_MIN)
+ goto out;
+@@ -69,7 +74,7 @@ out:
+ }
+
+ int
+-glusterd_svcs_stop()
++glusterd_svcs_stop(glusterd_volinfo_t *volinfo)
+ {
+ int ret = 0;
+ xlator_t *this = NULL;
+@@ -85,14 +90,16 @@ glusterd_svcs_stop()
+ if (ret)
+ goto out;
+
+- ret = glusterd_svc_stop(&(priv->shd_svc), SIGTERM);
+- if (ret)
+- goto out;
+-
+ ret = glusterd_svc_stop(&(priv->quotad_svc), SIGTERM);
+ if (ret)
+ goto out;
+
++ if (volinfo) {
++ ret = glusterd_svc_stop(&(volinfo->shd.svc), PROC_START_NO_WAIT);
++ if (ret)
++ goto out;
++ }
++
+ ret = glusterd_svc_stop(&(priv->bitd_svc), SIGTERM);
+ if (ret)
+ goto out;
+@@ -121,12 +128,6 @@ glusterd_svcs_manager(glusterd_volinfo_t *volinfo)
+ if (ret)
+ goto out;
+
+- ret = conf->shd_svc.manager(&(conf->shd_svc), volinfo, PROC_START_NO_WAIT);
+- if (ret == -EINVAL)
+- ret = 0;
+- if (ret)
+- goto out;
+-
+ if (conf->op_version == GD_OP_VERSION_MIN)
+ goto out;
+
+@@ -143,6 +144,15 @@ glusterd_svcs_manager(glusterd_volinfo_t *volinfo)
+ if (ret)
+ goto out;
+
++ if (volinfo) {
++ ret = volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo,
++ PROC_START_NO_WAIT);
++ if (ret == -EINVAL)
++ ret = 0;
++ if (ret)
++ goto out;
++ }
++
+ ret = conf->scrub_svc.manager(&(conf->scrub_svc), NULL, PROC_START_NO_WAIT);
+ if (ret == -EINVAL)
+ ret = 0;
+@@ -269,3 +279,678 @@ out:
+ GF_FREE(tmpvol);
+ return ret;
+ }
++
++int
++glusterd_volume_svc_check_volfile_identical(
++ char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo,
++ glusterd_vol_graph_builder_t builder, gf_boolean_t *identical)
++{
++ char orgvol[PATH_MAX] = {
++ 0,
++ };
++ char *tmpvol = NULL;
++ xlator_t *this = NULL;
++ int ret = -1;
++ int need_unlink = 0;
++ int tmp_fd = -1;
++
++ this = THIS;
++
++ GF_VALIDATE_OR_GOTO(this->name, this, out);
++ GF_VALIDATE_OR_GOTO(this->name, identical, out);
++
++ /* This builds volfile for volume level dameons */
++ glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol,
++ sizeof(orgvol));
++
++ ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name);
++ if (ret < 0) {
++ goto out;
++ }
++
++ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
++ tmp_fd = mkstemp(tmpvol);
++ if (tmp_fd < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
++ "Unable to create temp file"
++ " %s:(%s)",
++ tmpvol, strerror(errno));
++ ret = -1;
++ goto out;
++ }
++
++ need_unlink = 1;
++
++ ret = builder(volinfo, tmpvol, mode_dict);
++ if (ret)
++ goto out;
++
++ ret = glusterd_check_files_identical(orgvol, tmpvol, identical);
++out:
++ if (need_unlink)
++ sys_unlink(tmpvol);
++
++ if (tmpvol != NULL)
++ GF_FREE(tmpvol);
++
++ if (tmp_fd >= 0)
++ sys_close(tmp_fd);
++
++ return ret;
++}
++
++int
++glusterd_volume_svc_check_topology_identical(
++ char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo,
++ glusterd_vol_graph_builder_t builder, gf_boolean_t *identical)
++{
++ char orgvol[PATH_MAX] = {
++ 0,
++ };
++ char *tmpvol = NULL;
++ glusterd_conf_t *conf = NULL;
++ xlator_t *this = THIS;
++ int ret = -1;
++ int tmpclean = 0;
++ int tmpfd = -1;
++
++ if ((!identical) || (!this) || (!this->private))
++ goto out;
++
++ conf = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, conf, out);
++
++ /* This builds volfile for volume level dameons */
++ glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol,
++ sizeof(orgvol));
++ /* Create the temporary volfile */
++ ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name);
++ if (ret < 0) {
++ goto out;
++ }
++
++ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
++ tmpfd = mkstemp(tmpvol);
++ if (tmpfd < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
++ "Unable to create temp file"
++ " %s:(%s)",
++ tmpvol, strerror(errno));
++ ret = -1;
++ goto out;
++ }
++
++ tmpclean = 1; /* SET the flag to unlink() tmpfile */
++
++ ret = builder(volinfo, tmpvol, mode_dict);
++ if (ret)
++ goto out;
++
++ /* Compare the topology of volfiles */
++ ret = glusterd_check_topology_identical(orgvol, tmpvol, identical);
++out:
++ if (tmpfd >= 0)
++ sys_close(tmpfd);
++ if (tmpclean)
++ sys_unlink(tmpvol);
++ if (tmpvol != NULL)
++ GF_FREE(tmpvol);
++ return ret;
++}
++
++void *
++__gf_find_compatible_svc(gd_node_type daemon)
++{
++ glusterd_svc_proc_t *svc_proc = NULL;
++ glusterd_svc_proc_t *return_proc = NULL;
++ glusterd_svc_t *parent_svc = NULL;
++ struct cds_list_head *svc_procs = NULL;
++ glusterd_conf_t *conf = NULL;
++ int pid = -1;
++
++ conf = THIS->private;
++ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
++
++ if (daemon == GD_NODE_SHD) {
++ svc_procs = &conf->shd_procs;
++ if (!svc_procs)
++ goto out;
++ }
++
++ cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list)
++ {
++ parent_svc = cds_list_entry(svc_proc->svcs.next, glusterd_svc_t,
++ mux_svc);
++ if (!return_proc)
++ return_proc = svc_proc;
++
++ /* If there is an already running shd daemons, select it. Otehrwise
++ * select the first one.
++ */
++ if (parent_svc && gf_is_service_running(parent_svc->proc.pidfile, &pid))
++ return (void *)svc_proc;
++ /*
++ * Logic to select one process goes here. Currently there is only one
++ * shd_proc. So selecting the first one;
++ */
++ }
++out:
++ return return_proc;
++}
++
++glusterd_svc_proc_t *
++glusterd_svcprocess_new()
++{
++ glusterd_svc_proc_t *new_svcprocess = NULL;
++
++ new_svcprocess = GF_CALLOC(1, sizeof(*new_svcprocess),
++ gf_gld_mt_glusterd_svc_proc_t);
++
++ if (!new_svcprocess)
++ return NULL;
++
++ CDS_INIT_LIST_HEAD(&new_svcprocess->svc_proc_list);
++ CDS_INIT_LIST_HEAD(&new_svcprocess->svcs);
++ new_svcprocess->notify = glusterd_muxsvc_common_rpc_notify;
++ return new_svcprocess;
++}
++
++int
++glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc)
++{
++ int ret = -1;
++ glusterd_svc_proc_t *mux_proc = NULL;
++ glusterd_conn_t *mux_conn = NULL;
++ glusterd_conf_t *conf = NULL;
++ glusterd_svc_t *parent_svc = NULL;
++ int pid = -1;
++
++ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
++ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
++ conf = THIS->private;
++ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
++ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
++
++ pthread_mutex_lock(&conf->attach_lock);
++ {
++ if (!svc->inited) {
++ if (gf_is_service_running(svc->proc.pidfile, &pid)) {
++ /* Just connect is required, but we don't know what happens
++ * during the disconnect. So better to reattach.
++ */
++ mux_proc = __gf_find_compatible_svc_from_pid(GD_NODE_SHD, pid);
++ }
++
++ if (!mux_proc) {
++ if (pid != -1 && sys_access(svc->proc.pidfile, R_OK) == 0) {
++ /* stale pid file, unlink it. */
++ kill(pid, SIGTERM);
++ sys_unlink(svc->proc.pidfile);
++ }
++ mux_proc = __gf_find_compatible_svc(GD_NODE_SHD);
++ }
++ if (mux_proc) {
++ /* Take first entry from the process */
++ parent_svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t,
++ mux_svc);
++ sys_link(parent_svc->proc.pidfile, svc->proc.pidfile);
++ mux_conn = &parent_svc->conn;
++ if (volinfo)
++ volinfo->shd.attached = _gf_true;
++ } else {
++ mux_proc = glusterd_svcprocess_new();
++ if (!mux_proc) {
++ ret = -1;
++ goto unlock;
++ }
++ cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs);
++ }
++ svc->svc_proc = mux_proc;
++ cds_list_del_init(&svc->mux_svc);
++ cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs);
++ ret = glusterd_shdsvc_init(volinfo, mux_conn, mux_proc);
++ if (ret) {
++ pthread_mutex_unlock(&conf->attach_lock);
++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC,
++ "Failed to init shd "
++ "service");
++ goto out;
++ }
++ gf_msg_debug(THIS->name, 0, "shd service initialized");
++ svc->inited = _gf_true;
++ }
++ ret = 0;
++ }
++unlock:
++ pthread_mutex_unlock(&conf->attach_lock);
++out:
++ return ret;
++}
++
++void *
++__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid)
++{
++ glusterd_svc_proc_t *svc_proc = NULL;
++ struct cds_list_head *svc_procs = NULL;
++ glusterd_svc_t *svc = NULL;
++ pid_t mux_pid = -1;
++ glusterd_conf_t *conf = NULL;
++
++ conf = THIS->private;
++ if (!conf)
++ return NULL;
++
++ if (daemon == GD_NODE_SHD) {
++ svc_procs = &conf->shd_procs;
++ if (!svc_proc)
++ return NULL;
++ } /* Can be moved to switch when mux is implemented for other daemon; */
++
++ cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list)
++ {
++ cds_list_for_each_entry(svc, &svc_proc->svcs, mux_svc)
++ {
++ if (gf_is_service_running(svc->proc.pidfile, &mux_pid)) {
++ if (mux_pid == pid) {
++ /*TODO
++ * inefficient loop, but at the moment, there is only
++ * one shd.
++ */
++ return svc_proc;
++ }
++ }
++ }
++ }
++ return NULL;
++}
++
++static int32_t
++my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame)
++{
++ call_frame_t *frame = v_frame;
++ xlator_t *this = NULL;
++ glusterd_conf_t *conf = NULL;
++
++ GF_VALIDATE_OR_GOTO("glusterd", frame, out);
++ this = frame->this;
++ GF_VALIDATE_OR_GOTO("glusterd", this, out);
++ conf = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, conf, out);
++
++ GF_ATOMIC_DEC(conf->blockers);
++
++ STACK_DESTROY(frame->root);
++out:
++ return 0;
++}
++
++static int32_t
++glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count,
++ void *v_frame)
++{
++ call_frame_t *frame = v_frame;
++ glusterd_volinfo_t *volinfo = NULL;
++ glusterd_shdsvc_t *shd = NULL;
++ glusterd_svc_t *svc = frame->cookie;
++ glusterd_svc_t *parent_svc = NULL;
++ glusterd_svc_proc_t *mux_proc = NULL;
++ glusterd_conf_t *conf = NULL;
++ int *flag = (int *)frame->local;
++ xlator_t *this = THIS;
++ int pid = -1;
++ int ret = -1;
++ gf_getspec_rsp rsp = {
++ 0,
++ };
++
++ GF_VALIDATE_OR_GOTO("glusterd", this, out);
++ conf = this->private;
++ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
++ GF_VALIDATE_OR_GOTO("glusterd", frame, out);
++ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
++
++ frame->local = NULL;
++ frame->cookie = NULL;
++
++ if (!strcmp(svc->name, "glustershd")) {
++ /* Get volinfo->shd from svc object */
++ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
++ if (!shd) {
++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
++ "Failed to get shd object "
++ "from shd service");
++ goto out;
++ }
++
++ /* Get volinfo from shd */
++ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
++ if (!volinfo) {
++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
++ "Failed to get volinfo from "
++ "from shd");
++ goto out;
++ }
++ }
++
++ if (!iov) {
++ gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
++ "iov is NULL");
++ ret = -1;
++ goto out;
++ }
++
++ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
++ if (ret < 0) {
++ gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
++ "XDR decoding error");
++ ret = -1;
++ goto out;
++ }
++
++ if (rsp.op_ret == 0) {
++ pthread_mutex_lock(&conf->attach_lock);
++ {
++ if (!strcmp(svc->name, "glustershd")) {
++ mux_proc = svc->svc_proc;
++ if (mux_proc &&
++ !gf_is_service_running(svc->proc.pidfile, &pid)) {
++ /*
++ * When svc's are restarting, there is a chance that the
++ * attached svc might not have updated it's pid. Because
++ * it was at connection stage. So in that case, we need
++ * to retry the pid file copy.
++ */
++ parent_svc = cds_list_entry(mux_proc->svcs.next,
++ glusterd_svc_t, mux_svc);
++ if (parent_svc)
++ sys_link(parent_svc->proc.pidfile, svc->proc.pidfile);
++ }
++ }
++ svc->online = _gf_true;
++ }
++ pthread_mutex_unlock(&conf->attach_lock);
++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL,
++ "svc %s of volume %s attached successfully to pid %d", svc->name,
++ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
++ } else {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL,
++ "svc %s of volume %s failed to "
++ "attach to pid %d. Starting a new process",
++ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc));
++ if (!strcmp(svc->name, "glustershd")) {
++ glusterd_recover_shd_attach_failure(volinfo, svc, *flag);
++ }
++ }
++out:
++ if (flag) {
++ GF_FREE(flag);
++ }
++ GF_ATOMIC_DEC(conf->blockers);
++ STACK_DESTROY(frame->root);
++ return 0;
++}
++
++extern size_t
++build_volfile_path(char *volume_id, char *path, size_t path_len,
++ char *trusted_str);
++
++int
++__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
++ struct rpc_clnt *rpc, char *volfile_id,
++ int op)
++{
++ int ret = -1;
++ struct iobuf *iobuf = NULL;
++ struct iobref *iobref = NULL;
++ struct iovec iov = {
++ 0,
++ };
++ char path[PATH_MAX] = {
++ '\0',
++ };
++ struct stat stbuf = {
++ 0,
++ };
++ int32_t spec_fd = -1;
++ size_t file_len = -1;
++ char *volfile_content = NULL;
++ ssize_t req_size = 0;
++ call_frame_t *frame = NULL;
++ gd1_mgmt_brick_op_req brick_req;
++ void *req = &brick_req;
++ void *errlbl = &&err;
++ struct rpc_clnt_connection *conn;
++ xlator_t *this = THIS;
++ glusterd_conf_t *conf = THIS->private;
++ extern struct rpc_clnt_program gd_brick_prog;
++ fop_cbk_fn_t cbkfn = my_callback;
++
++ if (!rpc) {
++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PARAM_NULL,
++ "called with null rpc");
++ return -1;
++ }
++
++ conn = &rpc->conn;
++ if (!conn->connected || conn->disconnected) {
++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONNECT_RETURNED,
++ "not connected yet");
++ return -1;
++ }
++
++ brick_req.op = op;
++ brick_req.name = volfile_id;
++ brick_req.input.input_val = NULL;
++ brick_req.input.input_len = 0;
++
++ frame = create_frame(this, this->ctx->pool);
++ if (!frame) {
++ goto *errlbl;
++ }
++
++ if (op == GLUSTERD_SVC_ATTACH) {
++ (void)build_volfile_path(volfile_id, path, sizeof(path), NULL);
++
++ ret = sys_stat(path, &stbuf);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL,
++ "Unable to stat %s (%s)", path, strerror(errno));
++ ret = -EINVAL;
++ goto *errlbl;
++ }
++
++ file_len = stbuf.st_size;
++ volfile_content = GF_MALLOC(file_len + 1, gf_common_mt_char);
++ if (!volfile_content) {
++ ret = -ENOMEM;
++ goto *errlbl;
++ }
++ spec_fd = open(path, O_RDONLY);
++ if (spec_fd < 0) {
++ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL,
++ "failed to read volfile %s", path);
++ ret = -EIO;
++ goto *errlbl;
++ }
++ ret = sys_read(spec_fd, volfile_content, file_len);
++ if (ret == file_len) {
++ brick_req.input.input_val = volfile_content;
++ brick_req.input.input_len = file_len;
++ } else {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL,
++ "read failed on path %s. File size=%" GF_PRI_SIZET
++ "read size=%d",
++ path, file_len, ret);
++ ret = -EIO;
++ goto *errlbl;
++ }
++
++ frame->cookie = svc;
++ frame->local = GF_CALLOC(1, sizeof(int), gf_gld_mt_int);
++ *((int *)frame->local) = flags;
++ cbkfn = glusterd_svc_attach_cbk;
++ }
++
++ req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req);
++ iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size);
++ if (!iobuf) {
++ goto *errlbl;
++ }
++ errlbl = &&maybe_free_iobuf;
++
++ iov.iov_base = iobuf->ptr;
++ iov.iov_len = iobuf_pagesize(iobuf);
++
++ iobref = iobref_new();
++ if (!iobref) {
++ goto *errlbl;
++ }
++ errlbl = &&free_iobref;
++
++ iobref_add(iobref, iobuf);
++ /*
++ * Drop our reference to the iobuf. The iobref should already have
++ * one after iobref_add, so when we unref that we'll free the iobuf as
++ * well. This allows us to pass just the iobref as frame->local.
++ */
++ iobuf_unref(iobuf);
++ /* Set the pointer to null so we don't free it on a later error. */
++ iobuf = NULL;
++
++ /* Create the xdr payload */
++ ret = xdr_serialize_generic(iov, req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
++ if (ret == -1) {
++ goto *errlbl;
++ }
++ iov.iov_len = ret;
++
++ /* Send the msg */
++ GF_ATOMIC_INC(conf->blockers);
++ ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0,
++ iobref, frame, NULL, 0, NULL, 0, NULL);
++ GF_FREE(volfile_content);
++ if (spec_fd >= 0)
++ sys_close(spec_fd);
++ return ret;
++
++free_iobref:
++ iobref_unref(iobref);
++maybe_free_iobuf:
++ if (iobuf) {
++ iobuf_unref(iobuf);
++ }
++err:
++ GF_FREE(volfile_content);
++ if (spec_fd >= 0)
++ sys_close(spec_fd);
++ if (frame)
++ STACK_DESTROY(frame->root);
++ return -1;
++}
++
++int
++glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int flags)
++{
++ glusterd_conf_t *conf = THIS->private;
++ int ret = -1;
++ int tries;
++ rpc_clnt_t *rpc = NULL;
++
++ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
++ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
++ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
++
++ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_ATTACH_INFO,
++ "adding svc %s (volume=%s) to existing "
++ "process with pid %d",
++ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc));
++
++ rpc = rpc_clnt_ref(svc->conn.rpc);
++ for (tries = 15; tries > 0; --tries) {
++ if (rpc) {
++ pthread_mutex_lock(&conf->attach_lock);
++ {
++ ret = __glusterd_send_svc_configure_req(
++ svc, flags, rpc, svc->proc.volfileid, GLUSTERD_SVC_ATTACH);
++ }
++ pthread_mutex_unlock(&conf->attach_lock);
++ if (!ret) {
++ volinfo->shd.attached = _gf_true;
++ goto out;
++ }
++ }
++ /*
++ * It might not actually be safe to manipulate the lock
++ * like this, but if we don't then the connection can
++ * never actually complete and retries are useless.
++ * Unfortunately, all of the alternatives (e.g. doing
++ * all of this in a separate thread) are much more
++ * complicated and risky.
++ * TBD: see if there's a better way
++ */
++ synclock_unlock(&conf->big_lock);
++ sleep(1);
++ synclock_lock(&conf->big_lock);
++ }
++ ret = -1;
++ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL,
++ "attach failed for %s(volume=%s)", svc->name, volinfo->volname);
++out:
++ if (rpc)
++ rpc_clnt_unref(rpc);
++ return ret;
++}
++
++int
++glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig)
++{
++ glusterd_conf_t *conf = THIS->private;
++ int ret = -1;
++ int tries;
++ rpc_clnt_t *rpc = NULL;
++
++ GF_VALIDATE_OR_GOTO(THIS->name, conf, out);
++ GF_VALIDATE_OR_GOTO(THIS->name, svc, out);
++ GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out);
++
++ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DETACH_INFO,
++ "removing svc %s (volume=%s) from existing "
++ "process with pid %d",
++ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc));
++
++ rpc = rpc_clnt_ref(svc->conn.rpc);
++ for (tries = 15; tries > 0; --tries) {
++ if (rpc) {
++ /*For detach there is no flags, and we are not using sig.*/
++ pthread_mutex_lock(&conf->attach_lock);
++ {
++ ret = __glusterd_send_svc_configure_req(svc, 0, svc->conn.rpc,
++ svc->proc.volfileid,
++ GLUSTERD_SVC_DETACH);
++ }
++ pthread_mutex_unlock(&conf->attach_lock);
++ if (!ret) {
++ goto out;
++ }
++ }
++ /*
++ * It might not actually be safe to manipulate the lock
++ * like this, but if we don't then the connection can
++ * never actually complete and retries are useless.
++ * Unfortunately, all of the alternatives (e.g. doing
++ * all of this in a separate thread) are much more
++ * complicated and risky.
++ * TBD: see if there's a better way
++ */
++ synclock_unlock(&conf->big_lock);
++ sleep(1);
++ synclock_lock(&conf->big_lock);
++ }
++ ret = -1;
++ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_DETACH_FAIL,
++ "detach failed for %s(volume=%s)", svc->name, volinfo->volname);
++out:
++ if (rpc)
++ rpc_clnt_unref(rpc);
++ return ret;
++}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h
+index cc98e78..5def246 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h
+@@ -16,10 +16,10 @@
+ #include "glusterd-volgen.h"
+
+ int
+-glusterd_svcs_reconfigure();
++glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo);
+
+ int
+-glusterd_svcs_stop();
++glusterd_svcs_stop(glusterd_volinfo_t *vol);
+
+ int
+ glusterd_svcs_manager(glusterd_volinfo_t *volinfo);
+@@ -41,5 +41,41 @@ int
+ glusterd_svc_check_tier_topology_identical(char *svc_name,
+ glusterd_volinfo_t *volinfo,
+ gf_boolean_t *identical);
++int
++glusterd_volume_svc_check_volfile_identical(char *svc_name, dict_t *mode_dict,
++ glusterd_volinfo_t *volinfo,
++ glusterd_vol_graph_builder_t,
++ gf_boolean_t *identical);
++int
++glusterd_volume_svc_check_topology_identical(char *svc_name, dict_t *mode_dict,
++ glusterd_volinfo_t *volinfo,
++ glusterd_vol_graph_builder_t,
++ gf_boolean_t *identical);
++void
++glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol,
++ char *volfile, size_t len);
++void *
++__gf_find_compatible_svc(gd_node_type daemon);
++
++glusterd_svc_proc_t *
++glusterd_svcprocess_new();
++
++int
++glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc);
++
++void *
++__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid);
++
++int
++glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo,
++ int flags);
++
++int
++glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig);
++
++int
++__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flag,
++ struct rpc_clnt *rpc, char *volfile_id,
++ int op);
+
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
+index 4cd4cea..f32dafc 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
+@@ -18,6 +18,7 @@
+ #include "glusterd-conn-mgmt.h"
+ #include "glusterd-messages.h"
+ #include <glusterfs/syscall.h>
++#include "glusterd-shd-svc-helper.h"
+
+ int
+ glusterd_svc_create_rundir(char *rundir)
+@@ -167,68 +168,75 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline)
+ GF_ASSERT(this);
+
+ priv = this->private;
+- GF_ASSERT(priv);
++ GF_VALIDATE_OR_GOTO("glusterd", priv, out);
++ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
++
++ pthread_mutex_lock(&priv->attach_lock);
++ {
++ if (glusterd_proc_is_running(&(svc->proc))) {
++ ret = 0;
++ goto unlock;
++ }
+
+- if (glusterd_proc_is_running(&(svc->proc))) {
+- ret = 0;
+- goto out;
+- }
++ ret = sys_access(svc->proc.volfile, F_OK);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_NOT_FOUND,
++ "Volfile %s is not present", svc->proc.volfile);
++ goto unlock;
++ }
+
+- ret = sys_access(svc->proc.volfile, F_OK);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_NOT_FOUND,
+- "Volfile %s is not present", svc->proc.volfile);
+- goto out;
+- }
++ runinit(&runner);
+
+- runinit(&runner);
++ if (this->ctx->cmd_args.valgrind) {
++ len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log",
++ svc->proc.logfile, svc->name);
++ if ((len < 0) || (len >= PATH_MAX)) {
++ ret = -1;
++ goto unlock;
++ }
+
+- if (this->ctx->cmd_args.valgrind) {
+- len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log",
+- svc->proc.logfile, svc->name);
+- if ((len < 0) || (len >= PATH_MAX)) {
+- ret = -1;
+- goto out;
++ runner_add_args(&runner, "valgrind", "--leak-check=full",
++ "--trace-children=yes", "--track-origins=yes",
++ NULL);
++ runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
+ }
+
+- runner_add_args(&runner, "valgrind", "--leak-check=full",
+- "--trace-children=yes", "--track-origins=yes", NULL);
+- runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
+- }
+-
+- runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s",
+- svc->proc.volfileserver, "--volfile-id",
+- svc->proc.volfileid, "-p", svc->proc.pidfile, "-l",
+- svc->proc.logfile, "-S", svc->conn.sockpath, NULL);
++ runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s",
++ svc->proc.volfileserver, "--volfile-id",
++ svc->proc.volfileid, "-p", svc->proc.pidfile, "-l",
++ svc->proc.logfile, "-S", svc->conn.sockpath, NULL);
+
+- if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY,
+- SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY),
+- &localtime_logging) == 0) {
+- if (strcmp(localtime_logging, "enable") == 0)
+- runner_add_arg(&runner, "--localtime-logging");
+- }
+- if (dict_get_strn(priv->opts, GLUSTERD_DAEMON_LOG_LEVEL_KEY,
+- SLEN(GLUSTERD_DAEMON_LOG_LEVEL_KEY), &log_level) == 0) {
+- snprintf(daemon_log_level, 30, "--log-level=%s", log_level);
+- runner_add_arg(&runner, daemon_log_level);
+- }
++ if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY,
++ SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY),
++ &localtime_logging) == 0) {
++ if (strcmp(localtime_logging, "enable") == 0)
++ runner_add_arg(&runner, "--localtime-logging");
++ }
++ if (dict_get_strn(priv->opts, GLUSTERD_DAEMON_LOG_LEVEL_KEY,
++ SLEN(GLUSTERD_DAEMON_LOG_LEVEL_KEY),
++ &log_level) == 0) {
++ snprintf(daemon_log_level, 30, "--log-level=%s", log_level);
++ runner_add_arg(&runner, daemon_log_level);
++ }
+
+- if (cmdline)
+- dict_foreach(cmdline, svc_add_args, (void *)&runner);
++ if (cmdline)
++ dict_foreach(cmdline, svc_add_args, (void *)&runner);
+
+- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS,
+- "Starting %s service", svc->name);
++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS,
++ "Starting %s service", svc->name);
+
+- if (flags == PROC_START_NO_WAIT) {
+- ret = runner_run_nowait(&runner);
+- } else {
+- synclock_unlock(&priv->big_lock);
+- {
+- ret = runner_run(&runner);
++ if (flags == PROC_START_NO_WAIT) {
++ ret = runner_run_nowait(&runner);
++ } else {
++ synclock_unlock(&priv->big_lock);
++ {
++ ret = runner_run(&runner);
++ }
++ synclock_lock(&priv->big_lock);
+ }
+- synclock_lock(&priv->big_lock);
+ }
+-
++unlock:
++ pthread_mutex_unlock(&priv->attach_lock);
+ out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+
+@@ -281,7 +289,8 @@ glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile,
+
+ glusterd_svc_build_svcdir(server, workdir, dir, sizeof(dir));
+
+- if (!strcmp(server, "quotad")) /*quotad has different volfile name*/
++ if (!strcmp(server, "quotad"))
++ /*quotad has different volfile name*/
+ snprintf(volfile, len, "%s/%s.vol", dir, server);
+ else
+ snprintf(volfile, len, "%s/%s-server.vol", dir, server);
+@@ -366,3 +375,138 @@ glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event)
+
+ return ret;
+ }
++
++void
++glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol,
++ char *volfile, size_t len)
++{
++ GF_ASSERT(len == PATH_MAX);
++
++ if (!strcmp(server, "glustershd")) {
++ glusterd_svc_build_shd_volfile_path(vol, volfile, len);
++ }
++}
++
++int
++glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *mux_proc,
++ rpc_clnt_event_t event)
++{
++ int ret = 0;
++ glusterd_svc_t *svc = NULL;
++ glusterd_svc_t *tmp = NULL;
++ xlator_t *this = NULL;
++ gf_boolean_t need_logging = _gf_false;
++
++ this = THIS;
++ GF_ASSERT(this);
++
++ if (!mux_proc) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL,
++ "Failed to get the svc proc data");
++ return -1;
++ }
++
++ /* Currently this function was used for shd svc, if this function is
++ * using for another svc, change ths glustershd reference. We can get
++ * the svc name from any of the attached svc's
++ */
++ switch (event) {
++ case RPC_CLNT_CONNECT:
++ gf_msg_debug(this->name, 0,
++ "glustershd has connected with glusterd.");
++ gf_event(EVENT_SVC_CONNECTED, "svc_name=glustershd");
++ cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc)
++ {
++ if (svc->online)
++ continue;
++ svc->online = _gf_true;
++ }
++ break;
++
++ case RPC_CLNT_DISCONNECT:
++ cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc)
++ {
++ if (svc->online) {
++ if (!need_logging)
++ need_logging = _gf_true;
++ svc->online = _gf_false;
++ }
++ }
++ if (need_logging) {
++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NODE_DISCONNECTED,
++ "glustershd has disconnected from glusterd.");
++ gf_event(EVENT_SVC_DISCONNECTED, "svc_name=glustershd");
++ }
++ break;
++
++ default:
++ gf_msg_trace(this->name, 0, "got some other RPC event %d", event);
++ break;
++ }
++
++ return ret;
++}
++
++int
++glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc,
++ char *sockpath, int frame_timeout,
++ glusterd_muxsvc_conn_notify_t notify)
++{
++ int ret = -1;
++ dict_t *options = NULL;
++ struct rpc_clnt *rpc = NULL;
++ xlator_t *this = THIS;
++ glusterd_svc_t *svc = NULL;
++
++ options = dict_new();
++ if (!this || !options)
++ goto out;
++
++ svc = cds_list_entry(conn, glusterd_svc_t, conn);
++ if (!svc) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL,
++ "Failed to get the service");
++ goto out;
++ }
++
++ ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout);
++ if (ret)
++ goto out;
++
++ ret = dict_set_int32n(options, "transport.socket.ignore-enoent",
++ SLEN("transport.socket.ignore-enoent"), 1);
++ if (ret)
++ goto out;
++
++ /* @options is free'd by rpc_transport when destroyed */
++ rpc = rpc_clnt_new(options, this, (char *)svc->name, 16);
++ if (!rpc) {
++ ret = -1;
++ goto out;
++ }
++
++ ret = rpc_clnt_register_notify(rpc, glusterd_muxsvc_conn_common_notify,
++ mux_proc);
++ if (ret)
++ goto out;
++
++ ret = snprintf(conn->sockpath, sizeof(conn->sockpath), "%s", sockpath);
++ if (ret < 0)
++ goto out;
++ else
++ ret = 0;
++
++ conn->frame_timeout = frame_timeout;
++ conn->rpc = rpc;
++ mux_proc->notify = notify;
++out:
++ if (options)
++ dict_unref(options);
++ if (ret) {
++ if (rpc) {
++ rpc_clnt_unref(rpc);
++ rpc = NULL;
++ }
++ }
++ return ret;
++}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
+index c850bfd..fbc5225 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
+@@ -13,9 +13,12 @@
+
+ #include "glusterd-proc-mgmt.h"
+ #include "glusterd-conn-mgmt.h"
++#include "glusterd-rcu.h"
+
+ struct glusterd_svc_;
++
+ typedef struct glusterd_svc_ glusterd_svc_t;
++typedef struct glusterd_svc_proc_ glusterd_svc_proc_t;
+
+ typedef void (*glusterd_svc_build_t)(glusterd_svc_t *svc);
+
+@@ -25,6 +28,17 @@ typedef int (*glusterd_svc_start_t)(glusterd_svc_t *svc, int flags);
+ typedef int (*glusterd_svc_stop_t)(glusterd_svc_t *svc, int sig);
+ typedef int (*glusterd_svc_reconfigure_t)(void *data);
+
++typedef int (*glusterd_muxsvc_conn_notify_t)(glusterd_svc_proc_t *mux_proc,
++ rpc_clnt_event_t event);
++
++struct glusterd_svc_proc_ {
++ struct cds_list_head svc_proc_list;
++ struct cds_list_head svcs;
++ glusterd_muxsvc_conn_notify_t notify;
++ rpc_clnt_t *rpc;
++ void *data;
++};
++
+ struct glusterd_svc_ {
+ char name[NAME_MAX];
+ glusterd_conn_t conn;
+@@ -35,6 +49,8 @@ struct glusterd_svc_ {
+ gf_boolean_t online;
+ gf_boolean_t inited;
+ glusterd_svc_reconfigure_t reconfigure;
++ glusterd_svc_proc_t *svc_proc;
++ struct cds_list_head mux_svc;
+ };
+
+ int
+@@ -69,4 +85,15 @@ glusterd_svc_reconfigure(int (*create_volfile)());
+ int
+ glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event);
+
++int
++glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *conn,
++ rpc_clnt_event_t event);
++
++int
++glusterd_proc_get_pid(glusterd_proc_t *proc);
++
++int
++glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc,
++ char *sockpath, int frame_timeout,
++ glusterd_muxsvc_conn_notify_t notify);
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-tier.c b/xlators/mgmt/glusterd/src/glusterd-tier.c
+index 4dc0d44..23a9592 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-tier.c
++++ b/xlators/mgmt/glusterd/src/glusterd-tier.c
+@@ -27,6 +27,7 @@
+ #include "glusterd-messages.h"
+ #include "glusterd-mgmt.h"
+ #include "glusterd-syncop.h"
++#include "glusterd-shd-svc-helper.h"
+
+ #include <sys/wait.h>
+ #include <dlfcn.h>
+@@ -615,7 +616,7 @@ glusterd_op_remove_tier_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+
+ if (cmd == GF_DEFRAG_CMD_DETACH_START &&
+ volinfo->status == GLUSTERD_STATUS_STARTED) {
+- ret = glusterd_svcs_reconfigure();
++ ret = glusterd_svcs_reconfigure(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL,
+ "Unable to reconfigure NFS-Server");
+diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c
+index 04ceec5..ab463f1 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c
+@@ -83,7 +83,6 @@ glusterd_tierdsvc_init(void *data)
+ goto out;
+
+ notify = glusterd_svc_common_rpc_notify;
+- glusterd_store_perform_node_state_store(volinfo);
+
+ volinfo->type = GF_CLUSTER_TYPE_TIER;
+
+@@ -395,6 +394,7 @@ int
+ glusterd_tierdsvc_restart()
+ {
+ glusterd_volinfo_t *volinfo = NULL;
++ glusterd_volinfo_t *tmp = NULL;
+ int ret = 0;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+@@ -405,7 +405,7 @@ glusterd_tierdsvc_restart()
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+- cds_list_for_each_entry(volinfo, &conf->volumes, vol_list)
++ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
+ {
+ /* Start per volume tierd svc */
+ if (volinfo->status == GLUSTERD_STATUS_STARTED &&
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 52b83ec..ef664c2 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -61,6 +61,7 @@
+ #include "glusterd-server-quorum.h"
+ #include <glusterfs/quota-common-utils.h>
+ #include <glusterfs/common-utils.h>
++#include "glusterd-shd-svc-helper.h"
+
+ #include "xdr-generic.h"
+ #include <sys/resource.h>
+@@ -583,13 +584,17 @@ glusterd_volinfo_t *
+ glusterd_volinfo_unref(glusterd_volinfo_t *volinfo)
+ {
+ int refcnt = -1;
++ glusterd_conf_t *conf = THIS->private;
+
+- pthread_mutex_lock(&volinfo->reflock);
++ pthread_mutex_lock(&conf->volume_lock);
+ {
+- refcnt = --volinfo->refcnt;
++ pthread_mutex_lock(&volinfo->reflock);
++ {
++ refcnt = --volinfo->refcnt;
++ }
++ pthread_mutex_unlock(&volinfo->reflock);
+ }
+- pthread_mutex_unlock(&volinfo->reflock);
+-
++ pthread_mutex_unlock(&conf->volume_lock);
+ if (!refcnt) {
+ glusterd_volinfo_delete(volinfo);
+ return NULL;
+@@ -661,6 +666,7 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo)
+ glusterd_snapdsvc_build(&new_volinfo->snapd.svc);
+ glusterd_tierdsvc_build(&new_volinfo->tierd.svc);
+ glusterd_gfproxydsvc_build(&new_volinfo->gfproxyd.svc);
++ glusterd_shdsvc_build(&new_volinfo->shd.svc);
+
+ pthread_mutex_init(&new_volinfo->reflock, NULL);
+ *volinfo = glusterd_volinfo_ref(new_volinfo);
+@@ -1026,11 +1032,11 @@ glusterd_volinfo_delete(glusterd_volinfo_t *volinfo)
+ gf_store_handle_destroy(volinfo->snapd.handle);
+
+ glusterd_auth_cleanup(volinfo);
++ glusterd_shd_svcproc_cleanup(&volinfo->shd);
+
+ pthread_mutex_destroy(&volinfo->reflock);
+ GF_FREE(volinfo);
+ ret = 0;
+-
+ out:
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
+@@ -3619,6 +3625,7 @@ glusterd_spawn_daemons(void *opaque)
+ ret = glusterd_snapdsvc_restart();
+ ret = glusterd_tierdsvc_restart();
+ ret = glusterd_gfproxydsvc_restart();
++ ret = glusterd_shdsvc_restart();
+ return ret;
+ }
+
+@@ -4569,6 +4576,9 @@ glusterd_delete_stale_volume(glusterd_volinfo_t *stale_volinfo,
+ svc = &(stale_volinfo->snapd.svc);
+ (void)svc->manager(svc, stale_volinfo, PROC_START_NO_WAIT);
+ }
++ svc = &(stale_volinfo->shd.svc);
++ (void)svc->manager(svc, stale_volinfo, PROC_START_NO_WAIT);
++
+ (void)glusterd_volinfo_remove(stale_volinfo);
+
+ return 0;
+@@ -4683,6 +4693,15 @@ glusterd_import_friend_volume(dict_t *peer_data, int count)
+ glusterd_volinfo_unref(old_volinfo);
+ }
+
++ ret = glusterd_store_volinfo(new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
++ "Failed to store "
++ "volinfo for volume %s",
++ new_volinfo->volname);
++ goto out;
++ }
++
+ if (glusterd_is_volume_started(new_volinfo)) {
+ (void)glusterd_start_bricks(new_volinfo);
+ if (glusterd_is_snapd_enabled(new_volinfo)) {
+@@ -4691,15 +4710,10 @@ glusterd_import_friend_volume(dict_t *peer_data, int count)
+ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name);
+ }
+ }
+- }
+-
+- ret = glusterd_store_volinfo(new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
+- "Failed to store "
+- "volinfo for volume %s",
+- new_volinfo->volname);
+- goto out;
++ svc = &(new_volinfo->shd.svc);
++ if (svc->manager(svc, new_volinfo, PROC_START_NO_WAIT)) {
++ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name);
++ }
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services(new_volinfo);
+@@ -5174,9 +5188,7 @@ glusterd_add_node_to_dict(char *server, dict_t *dict, int count,
+ glusterd_svc_build_pidfile_path(server, priv->rundir, pidfile,
+ sizeof(pidfile));
+
+- if (strcmp(server, priv->shd_svc.name) == 0)
+- svc = &(priv->shd_svc);
+- else if (strcmp(server, priv->nfs_svc.name) == 0)
++ if (strcmp(server, priv->nfs_svc.name) == 0)
+ svc = &(priv->nfs_svc);
+ else if (strcmp(server, priv->quotad_svc.name) == 0)
+ svc = &(priv->quotad_svc);
+@@ -5207,9 +5219,6 @@ glusterd_add_node_to_dict(char *server, dict_t *dict, int count,
+ if (!strcmp(server, priv->nfs_svc.name))
+ ret = dict_set_nstrn(dict, key, keylen, "NFS Server",
+ SLEN("NFS Server"));
+- else if (!strcmp(server, priv->shd_svc.name))
+- ret = dict_set_nstrn(dict, key, keylen, "Self-heal Daemon",
+- SLEN("Self-heal Daemon"));
+ else if (!strcmp(server, priv->quotad_svc.name))
+ ret = dict_set_nstrn(dict, key, keylen, "Quota Daemon",
+ SLEN("Quota Daemon"));
+@@ -8773,6 +8782,21 @@ glusterd_friend_remove_cleanup_vols(uuid_t uuid)
+ "to stop snapd daemon service");
+ }
+ }
++
++ if (glusterd_is_shd_compatible_volume(volinfo)) {
++ /*
++ * Sending stop request for all volumes. So it is fine
++ * to send stop for mux shd
++ */
++ svc = &(volinfo->shd.svc);
++ ret = svc->stop(svc, SIGTERM);
++ if (ret) {
++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
++ "Failed "
++ "to stop shd daemon service");
++ }
++ }
++
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ svc = &(volinfo->tierd.svc);
+ ret = svc->stop(svc, SIGTERM);
+@@ -8798,7 +8822,7 @@ glusterd_friend_remove_cleanup_vols(uuid_t uuid)
+ }
+
+ /* Reconfigure all daemon services upon peer detach */
+- ret = glusterd_svcs_reconfigure();
++ ret = glusterd_svcs_reconfigure(NULL);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
+ "Failed to reconfigure all daemon services.");
+@@ -14350,3 +14374,74 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo)
+ return _gf_true;
+ return _gf_false;
+ }
++
++int32_t
++glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
++ int32_t count)
++{
++ int ret = -1;
++ int32_t pid = -1;
++ int32_t brick_online = -1;
++ char key[64] = {0};
++ int keylen;
++ char *pidfile = NULL;
++ xlator_t *this = NULL;
++ char *uuid_str = NULL;
++
++ this = THIS;
++ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
++
++ GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
++ GF_VALIDATE_OR_GOTO(this->name, dict, out);
++
++ keylen = snprintf(key, sizeof(key), "brick%d.hostname", count);
++ ret = dict_set_nstrn(dict, key, keylen, "Self-heal Daemon",
++ SLEN("Self-heal Daemon"));
++ if (ret)
++ goto out;
++
++ keylen = snprintf(key, sizeof(key), "brick%d.path", count);
++ uuid_str = gf_strdup(uuid_utoa(MY_UUID));
++ if (!uuid_str) {
++ ret = -1;
++ goto out;
++ }
++ ret = dict_set_dynstrn(dict, key, keylen, uuid_str);
++ if (ret)
++ goto out;
++ uuid_str = NULL;
++
++ /* shd doesn't have a port. but the cli needs a port key with
++ * a zero value to parse.
++ * */
++
++ keylen = snprintf(key, sizeof(key), "brick%d.port", count);
++ ret = dict_set_int32n(dict, key, keylen, 0);
++ if (ret)
++ goto out;
++
++ pidfile = volinfo->shd.svc.proc.pidfile;
++
++ brick_online = gf_is_service_running(pidfile, &pid);
++
++ /* If shd is not running, then don't print the pid */
++ if (!brick_online)
++ pid = -1;
++ keylen = snprintf(key, sizeof(key), "brick%d.pid", count);
++ ret = dict_set_int32n(dict, key, keylen, pid);
++ if (ret)
++ goto out;
++
++ keylen = snprintf(key, sizeof(key), "brick%d.status", count);
++ ret = dict_set_int32n(dict, key, keylen, brick_online);
++
++out:
++ if (uuid_str)
++ GF_FREE(uuid_str);
++ if (ret)
++ gf_msg(this ? this->name : "glusterd", GF_LOG_ERROR, 0,
++ GD_MSG_DICT_SET_FAILED,
++ "Returning %d. adding values to dict failed", ret);
++
++ return ret;
++}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
+index 9bf19a6..3647c34 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
+@@ -876,4 +876,8 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo);
+
+ char *
+ search_brick_path_from_proc(pid_t brick_pid, char *brickpath);
++
++int32_t
++glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
++ int32_t count);
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 1f53beb..324ec2f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -36,6 +36,7 @@
+ #include "glusterd-svc-mgmt.h"
+ #include "glusterd-svc-helper.h"
+ #include "glusterd-snapd-svc-helper.h"
++#include "glusterd-shd-svc-helper.h"
+ #include "glusterd-gfproxyd-svc-helper.h"
+
+ struct gd_validate_reconf_opts {
+@@ -4845,7 +4846,7 @@ volgen_get_shd_key(int type)
+ static int
+ volgen_set_shd_key_enable(dict_t *set_dict, const int type)
+ {
+- int ret = -1;
++ int ret = 0;
+
+ switch (type) {
+ case GF_CLUSTER_TYPE_REPLICATE:
+@@ -5136,24 +5137,15 @@ out:
+ static int
+ build_shd_volume_graph(xlator_t *this, volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, dict_t *mod_dict,
+- dict_t *set_dict, gf_boolean_t graph_check,
+- gf_boolean_t *valid_config)
++ dict_t *set_dict, gf_boolean_t graph_check)
+ {
+ volgen_graph_t cgraph = {0};
+ int ret = 0;
+ int clusters = -1;
+
+- if (!graph_check && (volinfo->status != GLUSTERD_STATUS_STARTED))
+- goto out;
+-
+ if (!glusterd_is_shd_compatible_volume(volinfo))
+ goto out;
+
+- /* Shd graph is valid only when there is at least one
+- * replica/disperse volume is present
+- */
+- *valid_config = _gf_true;
+-
+ ret = prepare_shd_volume_options(volinfo, mod_dict, set_dict);
+ if (ret)
+ goto out;
+@@ -5183,19 +5175,16 @@ out:
+ }
+
+ int
+-build_shd_graph(volgen_graph_t *graph, dict_t *mod_dict)
++build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph,
++ dict_t *mod_dict)
+ {
+- glusterd_volinfo_t *voliter = NULL;
+ xlator_t *this = NULL;
+- glusterd_conf_t *priv = NULL;
+ dict_t *set_dict = NULL;
+ int ret = 0;
+- gf_boolean_t valid_config = _gf_false;
+ xlator_t *iostxl = NULL;
+ gf_boolean_t graph_check = _gf_false;
+
+ this = THIS;
+- priv = this->private;
+
+ set_dict = dict_new();
+ if (!set_dict) {
+@@ -5205,26 +5194,18 @@ build_shd_graph(volgen_graph_t *graph, dict_t *mod_dict)
+
+ if (mod_dict)
+ graph_check = dict_get_str_boolean(mod_dict, "graph-check", 0);
+- iostxl = volgen_graph_add_as(graph, "debug/io-stats", "glustershd");
++ iostxl = volgen_graph_add_as(graph, "debug/io-stats", volinfo->volname);
+ if (!iostxl) {
+ ret = -1;
+ goto out;
+ }
+
+- cds_list_for_each_entry(voliter, &priv->volumes, vol_list)
+- {
+- ret = build_shd_volume_graph(this, graph, voliter, mod_dict, set_dict,
+- graph_check, &valid_config);
+- ret = dict_reset(set_dict);
+- if (ret)
+- goto out;
+- }
++ ret = build_shd_volume_graph(this, graph, volinfo, mod_dict, set_dict,
++ graph_check);
+
+ out:
+ if (set_dict)
+ dict_unref(set_dict);
+- if (!valid_config)
+- ret = -EINVAL;
+ return ret;
+ }
+
+@@ -6541,6 +6522,10 @@ glusterd_create_volfiles(glusterd_volinfo_t *volinfo)
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR, "Could not generate gfproxy volfiles");
+
++ ret = glusterd_shdsvc_create_volfile(volinfo);
++ if (ret)
++ gf_log(this->name, GF_LOG_ERROR, "Could not generate shd volfiles");
++
+ dict_del_sizen(volinfo->dict, "skip-CLIOT");
+
+ out:
+@@ -6621,7 +6606,7 @@ validate_shdopts(glusterd_volinfo_t *volinfo, dict_t *val_dict,
+ ret = dict_set_int32_sizen(val_dict, "graph-check", 1);
+ if (ret)
+ goto out;
+- ret = build_shd_graph(&graph, val_dict);
++ ret = build_shd_graph(volinfo, &graph, val_dict);
+ if (!ret)
+ ret = graph_reconf_validateopt(&graph.graph, op_errstr);
+
+@@ -6998,3 +6983,22 @@ gd_is_boolean_option(char *key)
+
+ return _gf_false;
+ }
++
++int
++glusterd_shdsvc_generate_volfile(glusterd_volinfo_t *volinfo, char *filename,
++ dict_t *mode_dict)
++{
++ int ret = -1;
++ volgen_graph_t graph = {
++ 0,
++ };
++
++ graph.type = GF_SHD;
++ ret = build_shd_graph(volinfo, &graph, mode_dict);
++ if (!ret)
++ ret = volgen_write_volfile(&graph, filename);
++
++ volgen_graph_free(&graph);
++
++ return ret;
++}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h
+index f9fc068..897d8fa 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.h
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h
+@@ -66,6 +66,7 @@ typedef enum {
+ GF_REBALANCED = 1,
+ GF_QUOTAD,
+ GF_SNAPD,
++ GF_SHD,
+ } glusterd_graph_type_t;
+
+ struct volgen_graph {
+@@ -77,6 +78,8 @@ typedef struct volgen_graph volgen_graph_t;
+
+ typedef int (*glusterd_graph_builder_t)(volgen_graph_t *graph,
+ dict_t *mod_dict);
++typedef int (*glusterd_vol_graph_builder_t)(glusterd_volinfo_t *,
++ char *filename, dict_t *mod_dict);
+
+ #define COMPLETE_OPTION(key, completion, ret) \
+ do { \
+@@ -201,7 +204,8 @@ void
+ glusterd_get_shd_filepath(char *filename);
+
+ int
+-build_shd_graph(volgen_graph_t *graph, dict_t *mod_dict);
++build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph,
++ dict_t *mod_dict);
+
+ int
+ build_nfs_graph(volgen_graph_t *graph, dict_t *mod_dict);
+@@ -313,4 +317,9 @@ glusterd_generate_gfproxyd_volfile(glusterd_volinfo_t *volinfo);
+
+ int
+ glusterd_build_gfproxyd_volfile(glusterd_volinfo_t *volinfo, char *filename);
++
++int
++glusterd_shdsvc_generate_volfile(glusterd_volinfo_t *volinfo, char *filename,
++ dict_t *mode_dict);
++
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index 1ea8ba6..4c3ad50 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -1940,7 +1940,7 @@ static int
+ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo,
+ dict_t *dict, char **op_errstr)
+ {
+- glusterd_conf_t *priv = NULL;
++ glusterd_svc_t *svc = NULL;
+ gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID;
+ int ret = 0;
+ char msg[2408] = {
+@@ -1950,7 +1950,6 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo,
+ "Self-heal daemon is not running. "
+ "Check self-heal daemon log file.";
+
+- priv = this->private;
+ ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"),
+ (int32_t *)&heal_op);
+ if (ret) {
+@@ -1959,6 +1958,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo,
+ goto out;
+ }
+
++ svc = &(volinfo->shd.svc);
+ switch (heal_op) {
+ case GF_SHD_OP_INVALID:
+ case GF_SHD_OP_HEAL_ENABLE: /* This op should be handled in volume-set*/
+@@ -1988,7 +1988,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo,
+ goto out;
+ }
+
+- if (!priv->shd_svc.online) {
++ if (!svc->online) {
+ ret = -1;
+ *op_errstr = gf_strdup(offline_msg);
+ goto out;
+@@ -2009,7 +2009,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo,
+ goto out;
+ }
+
+- if (!priv->shd_svc.online) {
++ if (!svc->online) {
+ ret = -1;
+ *op_errstr = gf_strdup(offline_msg);
+ goto out;
+diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
+index ff5af42..89afb9c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.c
++++ b/xlators/mgmt/glusterd/src/glusterd.c
+@@ -1533,14 +1533,6 @@ init(xlator_t *this)
+ exit(1);
+ }
+
+- ret = glusterd_init_var_run_dirs(this, rundir, GLUSTERD_GLUSTERSHD_RUN_DIR);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED,
+- "Unable to create "
+- "glustershd running directory");
+- exit(1);
+- }
+-
+ ret = glusterd_init_var_run_dirs(this, rundir, GLUSTERD_NFS_RUN_DIR);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED,
+@@ -1815,6 +1807,9 @@ init(xlator_t *this)
+ CDS_INIT_LIST_HEAD(&conf->snapshots);
+ CDS_INIT_LIST_HEAD(&conf->missed_snaps_list);
+ CDS_INIT_LIST_HEAD(&conf->brick_procs);
++ CDS_INIT_LIST_HEAD(&conf->shd_procs);
++ pthread_mutex_init(&conf->attach_lock, NULL);
++ pthread_mutex_init(&conf->volume_lock, NULL);
+
+ pthread_mutex_init(&conf->mutex, NULL);
+ conf->rpc = rpc;
+@@ -1895,7 +1890,6 @@ init(xlator_t *this)
+ glusterd_mgmt_v3_lock_timer_init();
+ glusterd_txn_opinfo_dict_init();
+
+- glusterd_shdsvc_build(&conf->shd_svc);
+ glusterd_nfssvc_build(&conf->nfs_svc);
+ glusterd_quotadsvc_build(&conf->quotad_svc);
+ glusterd_bitdsvc_build(&conf->bitd_svc);
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index e858ce4..0ac6e63 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -28,6 +28,7 @@
+ #include "glusterd-sm.h"
+ #include "glusterd-snapd-svc.h"
+ #include "glusterd-tierd-svc.h"
++#include "glusterd-shd-svc.h"
+ #include "glusterd-bitd-svc.h"
+ #include "glusterd1-xdr.h"
+ #include "protocol-common.h"
+@@ -170,7 +171,6 @@ typedef struct {
+ char workdir[VALID_GLUSTERD_PATHMAX];
+ char rundir[VALID_GLUSTERD_PATHMAX];
+ rpcsvc_t *rpc;
+- glusterd_svc_t shd_svc;
+ glusterd_svc_t nfs_svc;
+ glusterd_svc_t bitd_svc;
+ glusterd_svc_t scrub_svc;
+@@ -179,6 +179,7 @@ typedef struct {
+ struct cds_list_head volumes;
+ struct cds_list_head snapshots; /*List of snap volumes */
+ struct cds_list_head brick_procs; /* List of brick processes */
++ struct cds_list_head shd_procs; /* List of shd processes */
+ pthread_mutex_t xprt_lock;
+ struct list_head xprt_list;
+ pthread_mutex_t import_volumes;
+@@ -219,6 +220,11 @@ typedef struct {
+ gf_atomic_t blockers;
+ uint32_t mgmt_v3_lock_timeout;
+ gf_boolean_t restart_bricks;
++ pthread_mutex_t attach_lock; /* Lock can be per process or a common one */
++ pthread_mutex_t volume_lock; /* We release the big_lock from lot of places
++ which might lead the modification of volinfo
++ list.
++ */
+ } glusterd_conf_t;
+
+ typedef enum gf_brick_status {
+@@ -498,6 +504,7 @@ struct glusterd_volinfo_ {
+
+ glusterd_snapdsvc_t snapd;
+ glusterd_tierdsvc_t tierd;
++ glusterd_shdsvc_t shd;
+ glusterd_gfproxydsvc_t gfproxyd;
+ int32_t quota_xattr_version;
+ gf_boolean_t stage_deleted; /* volume has passed staging
+@@ -624,7 +631,6 @@ typedef enum {
+ #define GLUSTERD_DEFAULT_SNAPS_BRICK_DIR "/gluster/snaps"
+ #define GLUSTERD_BITD_RUN_DIR "/bitd"
+ #define GLUSTERD_SCRUB_RUN_DIR "/scrub"
+-#define GLUSTERD_GLUSTERSHD_RUN_DIR "/glustershd"
+ #define GLUSTERD_NFS_RUN_DIR "/nfs"
+ #define GLUSTERD_QUOTAD_RUN_DIR "/quotad"
+ #define GLUSTER_SHARED_STORAGE_BRICK_DIR GLUSTERD_DEFAULT_WORKDIR "/ss_brick"
+@@ -680,6 +686,26 @@ typedef ssize_t (*gd_serialize_t)(struct iovec outmsg, void *args);
+ } \
+ } while (0)
+
++#define GLUSTERD_GET_SHD_RUNDIR(path, volinfo, priv) \
++ do { \
++ int32_t _shd_dir_len; \
++ _shd_dir_len = snprintf(path, PATH_MAX, "%s/shd/%s", priv->rundir, \
++ volinfo->volname); \
++ if ((_shd_dir_len < 0) || (_shd_dir_len >= PATH_MAX)) { \
++ path[0] = 0; \
++ } \
++ } while (0)
++
++#define GLUSTERD_GET_SHD_PID_FILE(path, volinfo, priv) \
++ do { \
++ int32_t _shd_pid_len; \
++ _shd_pid_len = snprintf(path, PATH_MAX, "%s/shd/%s-shd.pid", \
++ priv->rundir, volinfo->volname); \
++ if ((_shd_pid_len < 0) || (_shd_pid_len >= PATH_MAX)) { \
++ path[0] = 0; \
++ } \
++ } while (0)
++
+ #define GLUSTERD_GET_VOLUME_PID_DIR(path, volinfo, priv) \
+ do { \
+ int32_t _vol_pid_len; \
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index 2d75714..19f5175 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -46,7 +46,6 @@ client_fini_complete(xlator_t *this)
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ clnt_conf_t *conf = this->private;
+-
+ if (!conf->destroy)
+ return 0;
+
+@@ -69,6 +68,11 @@ client_notify_dispatch_uniq(xlator_t *this, int32_t event, void *data, ...)
+ return 0;
+
+ return client_notify_dispatch(this, event, data);
++
++ /* Please avoid any code that access xlator object here
++ * Because for a child down event, once we do the signal
++ * we will start cleanup.
++ */
+ }
+
+ int
+@@ -105,6 +109,11 @@ client_notify_dispatch(xlator_t *this, int32_t event, void *data, ...)
+ }
+ pthread_mutex_unlock(&ctx->notify_lock);
+
++ /* Please avoid any code that access xlator object here
++ * Because for a child down event, once we do the signal
++ * we will start cleanup.
++ */
++
+ return ret;
+ }
+
+@@ -2272,6 +2281,7 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ {
+ xlator_t *this = NULL;
+ clnt_conf_t *conf = NULL;
++ gf_boolean_t is_parent_down = _gf_false;
+ int ret = 0;
+
+ this = mydata;
+@@ -2333,6 +2343,19 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ if (conf->portmap_err_logged)
+ conf->disconnect_err_logged = 1;
+ }
++ /*
++ * Once we complete the child down notification,
++ * There is a chance that the graph might get freed,
++ * So it is not safe to access any xlator contens
++ * So here we are checking whether the parent is down
++ * or not.
++ */
++ pthread_mutex_lock(&conf->lock);
++ {
++ is_parent_down = conf->parent_down;
++ }
++ pthread_mutex_unlock(&conf->lock);
++
+ /* If the CHILD_DOWN event goes to parent xlator
+ multiple times, the logic of parent xlator notify
+ may get screwed up.. (eg. CHILD_MODIFIED event in
+@@ -2340,6 +2363,12 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ to parent are genuine */
+ ret = client_notify_dispatch_uniq(this, GF_EVENT_CHILD_DOWN,
+ NULL);
++ if (is_parent_down) {
++ /* If parent is down, then there should not be any
++ * operation after a child down.
++ */
++ goto out;
++ }
+ if (ret)
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ PC_MSG_CHILD_DOWN_NOTIFY_FAILED,
+--
+1.8.3.1
+
diff --git a/0099-client-fini-return-fini-after-rpc-cleanup.patch b/0099-client-fini-return-fini-after-rpc-cleanup.patch
new file mode 100644
index 0000000..5cff104
--- /dev/null
+++ b/0099-client-fini-return-fini-after-rpc-cleanup.patch
@@ -0,0 +1,119 @@
+From d79cb2cdff6fe8d962c9ac095a7541ddf500302b Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Mon, 1 Apr 2019 14:44:20 +0530
+Subject: [PATCH 099/124] client/fini: return fini after rpc cleanup
+
+There is a race condition in rpc_transport later
+and client fini.
+
+Sequence of events to happen the race condition
+1) When we want to destroy a graph, we send a parent down
+ event first
+2) Once parent down received on a client xlator, we will
+ initiates a rpc disconnect
+3) This will in turn generates a child down event.
+4) When we process child down, we first do fini for
+ Every xlator
+5) On successful return of fini, we delete the graph
+
+Here after the step 5, there is a chance that the fini
+on client might not be finished. Because an rpc_tranpsort
+ref can race with the above sequence.
+
+So we have to wait till all rpc's are successfully freed
+before returning the fini from client
+
+Backport of: https://review.gluster.org/#/c/glusterfs/+/22468/
+
+>Change-Id: I20145662d71fb837e448a4d3210d1fcb2855f2d4
+>fixes: bz#1659708
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I848bcfb9443467caed32bae0717244ab01b407fc
+BUG: 1471742
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167831
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/protocol/client/src/client.c | 25 ++++++++++++++++++++-----
+ xlators/protocol/client/src/client.h | 6 ++++++
+ 2 files changed, 26 insertions(+), 5 deletions(-)
+
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index 19f5175..a372807 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -49,11 +49,12 @@ client_fini_complete(xlator_t *this)
+ if (!conf->destroy)
+ return 0;
+
+- this->private = NULL;
+-
+- pthread_spin_destroy(&conf->fd_lock);
+- pthread_mutex_destroy(&conf->lock);
+- GF_FREE(conf);
++ pthread_mutex_lock(&conf->lock);
++ {
++ conf->fini_completed = _gf_true;
++ pthread_cond_broadcast(&conf->fini_complete_cond);
++ }
++ pthread_mutex_unlock(&conf->lock);
+
+ out:
+ return 0;
+@@ -2721,6 +2722,7 @@ init(xlator_t *this)
+ goto out;
+
+ pthread_mutex_init(&conf->lock, NULL);
++ pthread_cond_init(&conf->fini_complete_cond, NULL);
+ pthread_spin_init(&conf->fd_lock, 0);
+ INIT_LIST_HEAD(&conf->saved_fds);
+
+@@ -2779,6 +2781,7 @@ fini(xlator_t *this)
+ if (!conf)
+ return;
+
++ conf->fini_completed = _gf_false;
+ conf->destroy = 1;
+ if (conf->rpc) {
+ /* cleanup the saved-frames before last unref */
+@@ -2786,6 +2789,18 @@ fini(xlator_t *this)
+ rpc_clnt_unref(conf->rpc);
+ }
+
++ pthread_mutex_lock(&conf->lock);
++ {
++ while (!conf->fini_completed)
++ pthread_cond_wait(&conf->fini_complete_cond, &conf->lock);
++ }
++ pthread_mutex_unlock(&conf->lock);
++
++ pthread_spin_destroy(&conf->fd_lock);
++ pthread_mutex_destroy(&conf->lock);
++ pthread_cond_destroy(&conf->fini_complete_cond);
++ GF_FREE(conf);
++
+ /* Saved Fds */
+ /* TODO: */
+
+diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
+index f12fa61..8dcd72f 100644
+--- a/xlators/protocol/client/src/client.h
++++ b/xlators/protocol/client/src/client.h
+@@ -235,6 +235,12 @@ typedef struct clnt_conf {
+ * up, disconnects can be
+ * logged
+ */
++
++ gf_boolean_t old_protocol; /* used only for old-protocol testing */
++ pthread_cond_t fini_complete_cond; /* Used to wait till we finsh the fini
++ compltely, ie client_fini_complete
++ to return*/
++ gf_boolean_t fini_completed;
+ } clnt_conf_t;
+
+ typedef struct _client_fd_ctx {
+--
+1.8.3.1
+
diff --git a/0100-clnt-rpc-ref-leak-during-disconnect.patch b/0100-clnt-rpc-ref-leak-during-disconnect.patch
new file mode 100644
index 0000000..0eb4b02
--- /dev/null
+++ b/0100-clnt-rpc-ref-leak-during-disconnect.patch
@@ -0,0 +1,179 @@
+From 4d95e271a9042bf2d789a4d900ad263b6ea47681 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Wed, 23 Jan 2019 21:55:01 +0530
+Subject: [PATCH 100/124] clnt/rpc: ref leak during disconnect.
+
+During disconnect cleanup, we are not cancelling reconnect
+timer, which causes a ref leak each time when a disconnect
+happen.
+
+Backport of: https://review.gluster.org/#/c/glusterfs/+/22087/
+
+>Change-Id: I9d05d1f368d080e04836bf6a0bb018bf8f7b5b8a
+>updates: bz#1659708
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I5a2dbb17e663a4809bb4c435cacadbf0ab694a76
+BUG: 1471742
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167844
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/timer.c | 16 +++++++----
+ rpc/rpc-lib/src/rpc-clnt.c | 11 +++++++-
+ .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 32 ++++++++++++++++++----
+ 3 files changed, 47 insertions(+), 12 deletions(-)
+
+diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c
+index d882543..2643c07 100644
+--- a/libglusterfs/src/timer.c
++++ b/libglusterfs/src/timer.c
+@@ -75,13 +75,13 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event)
+ if (ctx == NULL || event == NULL) {
+ gf_msg_callingfn("timer", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+- return 0;
++ return -1;
+ }
+
+ if (ctx->cleanup_started) {
+ gf_msg_callingfn("timer", GF_LOG_INFO, 0, LG_MSG_CTX_CLEANUP_STARTED,
+ "ctx cleanup started");
+- return 0;
++ return -1;
+ }
+
+ LOCK(&ctx->lock);
+@@ -93,10 +93,9 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event)
+ if (!reg) {
+ /* This can happen when cleanup may have just started and
+ * gf_timer_registry_destroy() sets ctx->timer to NULL.
+- * Just bail out as success as gf_timer_proc() takes
+- * care of cleaning up the events.
++ * gf_timer_proc() takes care of cleaning up the events.
+ */
+- return 0;
++ return -1;
+ }
+
+ LOCK(&reg->lock);
+@@ -203,6 +202,13 @@ gf_timer_proc(void *data)
+ list_for_each_entry_safe(event, tmp, &reg->active, list)
+ {
+ list_del(&event->list);
++ /* TODO Possible resource leak
++ * Before freeing the event, we need to call the respective
++ * event functions and free any resources.
++ * For example, In case of rpc_clnt_reconnect, we need to
++ * unref rpc object which was taken when added to timer
++ * wheel.
++ */
+ GF_FREE(event);
+ }
+ }
+diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
+index 3f7bb3c..6f47515 100644
+--- a/rpc/rpc-lib/src/rpc-clnt.c
++++ b/rpc/rpc-lib/src/rpc-clnt.c
+@@ -495,6 +495,7 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn)
+ int unref = 0;
+ int ret = 0;
+ gf_boolean_t timer_unref = _gf_false;
++ gf_boolean_t reconnect_unref = _gf_false;
+
+ if (!conn) {
+ goto out;
+@@ -514,6 +515,12 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn)
+ timer_unref = _gf_true;
+ conn->timer = NULL;
+ }
++ if (conn->reconnect) {
++ ret = gf_timer_call_cancel(clnt->ctx, conn->reconnect);
++ if (!ret)
++ reconnect_unref = _gf_true;
++ conn->reconnect = NULL;
++ }
+
+ conn->connected = 0;
+ conn->disconnected = 1;
+@@ -533,6 +540,8 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn)
+ if (timer_unref)
+ rpc_clnt_unref(clnt);
+
++ if (reconnect_unref)
++ rpc_clnt_unref(clnt);
+ out:
+ return 0;
+ }
+@@ -830,7 +839,7 @@ rpc_clnt_handle_disconnect(struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)
+ pthread_mutex_lock(&conn->lock);
+ {
+ if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) {
+- ts.tv_sec = 10;
++ ts.tv_sec = 3;
+ ts.tv_nsec = 0;
+
+ rpc_clnt_ref(clnt);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+index 041946d..b3c4158 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+@@ -3364,6 +3364,25 @@ out:
+ return ret;
+ }
+
++int
++glusterd_is_path_mounted(const char *path)
++{
++ FILE *mtab = NULL;
++ struct mntent *part = NULL;
++ int is_mounted = 0;
++
++ if ((mtab = setmntent("/etc/mtab", "r")) != NULL) {
++ while ((part = getmntent(mtab)) != NULL) {
++ if ((part->mnt_fsname != NULL) &&
++ (strcmp(part->mnt_dir, path)) == 0) {
++ is_mounted = 1;
++ break;
++ }
++ }
++ endmntent(mtab);
++ }
++ return is_mounted;
++}
+ /* This function will do unmount for snaps.
+ */
+ int32_t
+@@ -3388,14 +3407,11 @@ glusterd_snap_unmount(xlator_t *this, glusterd_volinfo_t *volinfo)
+ continue;
+ }
+
+- /* Fetch the brick mount path from the brickinfo->path */
+- ret = glusterd_get_brick_root(brickinfo->path, &brick_mount_path);
++ ret = glusterd_find_brick_mount_path(brickinfo->path,
++ &brick_mount_path);
+ if (ret) {
+- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BRICK_PATH_UNMOUNTED,
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNTPATH_GET_FAIL,
+ "Failed to find brick_mount_path for %s", brickinfo->path);
+- /* There is chance that brick path is already
+- * unmounted. */
+- ret = 0;
+ goto out;
+ }
+ /* unmount cannot be done when the brick process is still in
+@@ -3440,6 +3456,10 @@ glusterd_umount(const char *path)
+ GF_ASSERT(this);
+ GF_ASSERT(path);
+
++ if (!glusterd_is_path_mounted(path)) {
++ return 0;
++ }
++
+ runinit(&runner);
+ snprintf(msg, sizeof(msg), "umount path %s", path);
+ runner_add_args(&runner, _PATH_UMOUNT, "-f", path, NULL);
+--
+1.8.3.1
+
diff --git a/0101-shd-mux-Fix-coverity-issues-introduced-by-shd-mux-pa.patch b/0101-shd-mux-Fix-coverity-issues-introduced-by-shd-mux-pa.patch
new file mode 100644
index 0000000..f8d0763
--- /dev/null
+++ b/0101-shd-mux-Fix-coverity-issues-introduced-by-shd-mux-pa.patch
@@ -0,0 +1,162 @@
+From 0021a4bbc9af2bfe28d4a79f76c3cd33f23dd118 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Fri, 5 Apr 2019 12:33:55 +0530
+Subject: [PATCH 101/124] shd/mux: Fix coverity issues introduced by shd mux
+ patch
+
+CID 1400475: Null pointer dereferences (FORWARD_NULL)
+CID 1400474: Null pointer dereferences (FORWARD_NULL)
+CID 1400471: Code maintainability issues (UNUSED_VALUE)
+CID 1400470: Null pointer dereferences (FORWARD_NULL)
+CID 1400469: Memory - illegal accesses (USE_AFTER_FREE)
+CID 1400467: Code maintainability issues (UNUSED_VALUE)
+
+Backport of: https://review.gluster.org/#/c/glusterfs/+/22514/
+
+>Change-Id: I0ca1c733be335c6e5844f44850f8066626ac40d4
+>updates: bz#789278
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I0425efca9ab5a95801eff9e99259219449a16380
+BUG: 1471742
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167832
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/graph.c | 21 +++++++++++++--------
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 6 ++++++
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 24 +++++++++++++++++-------
+ 3 files changed, 36 insertions(+), 15 deletions(-)
+
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index a492dd8..4c8b02d 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -1470,7 +1470,9 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
+ goto out;
+ parent_graph = ctx->active;
+ graph = volfile_obj->graph;
+- if (graph && graph->first)
++ if (!graph)
++ goto out;
++ if (graph->first)
+ xl = graph->first;
+
+ last_xl = graph->last_xl;
+@@ -1591,12 +1593,10 @@ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+ parent_graph->leaf_count += graph->leaf_count;
+ parent_graph->id++;
+
++ volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t);
+ if (!volfile_obj) {
+- volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t);
+- if (!volfile_obj) {
+- ret = -1;
+- goto out;
+- }
++ ret = -1;
++ goto out;
+ }
+
+ graph->used = 1;
+@@ -1641,6 +1641,7 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ {
+ glusterfs_graph_t *oldvolfile_graph = NULL;
+ glusterfs_graph_t *newvolfile_graph = NULL;
++ char vol_id[NAME_MAX + 1];
+
+ int ret = -1;
+
+@@ -1672,6 +1673,9 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ glusterfs_graph_prepare(newvolfile_graph, ctx, newvolfile_graph->first);
+
+ if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) {
++ ret = snprintf(vol_id, sizeof(vol_id), "%s", volfile_obj->vol_id);
++ if (ret < 0)
++ goto out;
+ ret = glusterfs_process_svc_detach(ctx, volfile_obj);
+ if (ret) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL,
+@@ -1680,8 +1684,9 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ "old graph. Aborting the reconfiguration operation");
+ goto out;
+ }
+- ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp,
+- volfile_obj->vol_id, checksum);
++ volfile_obj = NULL;
++ ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, vol_id,
++ checksum);
+ goto out;
+ }
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index 937ea30..04a4b2e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -101,6 +101,8 @@ glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn,
+ svc->conn.rpc = rpc_clnt_ref(mux_svc->rpc);
+ ret = snprintf(svc->conn.sockpath, sizeof(svc->conn.sockpath), "%s",
+ mux_conn->sockpath);
++ if (ret < 0)
++ goto out;
+ } else {
+ ret = mkdir_p(logdir, 0755, _gf_true);
+ if ((ret == -1) && (EEXIST != errno)) {
+@@ -663,6 +665,10 @@ glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig)
+ glusterd_volinfo_ref(volinfo);
+ svc_proc->data = volinfo;
+ ret = glusterd_svc_stop(svc, sig);
++ if (ret) {
++ glusterd_volinfo_unref(volinfo);
++ goto out;
++ }
+ }
+ if (!empty && pid != -1) {
+ ret = glusterd_detach_svc(svc, volinfo, sig);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index e42703c..02945b1 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -411,9 +411,14 @@ __gf_find_compatible_svc(gd_node_type daemon)
+ conf = THIS->private;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+
+- if (daemon == GD_NODE_SHD) {
+- svc_procs = &conf->shd_procs;
+- if (!svc_procs)
++ switch (daemon) {
++ case GD_NODE_SHD: {
++ svc_procs = &conf->shd_procs;
++ if (!svc_procs)
++ goto out;
++ } break;
++ default:
++ /* Add support for other client daemons here */
+ goto out;
+ }
+
+@@ -540,11 +545,16 @@ __gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid)
+ if (!conf)
+ return NULL;
+
+- if (daemon == GD_NODE_SHD) {
+- svc_procs = &conf->shd_procs;
+- if (!svc_proc)
++ switch (daemon) {
++ case GD_NODE_SHD: {
++ svc_procs = &conf->shd_procs;
++ if (!svc_procs)
++ return NULL;
++ } break;
++ default:
++ /* Add support for other client daemons here */
+ return NULL;
+- } /* Can be moved to switch when mux is implemented for other daemon; */
++ }
+
+ cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list)
+ {
+--
+1.8.3.1
+
diff --git a/0102-rpc-transport-Missing-a-ref-on-dict-while-creating-t.patch b/0102-rpc-transport-Missing-a-ref-on-dict-while-creating-t.patch
new file mode 100644
index 0000000..39fe021
--- /dev/null
+++ b/0102-rpc-transport-Missing-a-ref-on-dict-while-creating-t.patch
@@ -0,0 +1,737 @@
+From df6523ed3c5267624197b52edcb553fc2d8a08f2 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Tue, 26 Feb 2019 18:04:18 +0530
+Subject: [PATCH 102/124] rpc/transport: Missing a ref on dict while creating
+ transport object
+
+while creating rpc_tranpsort object, we store a dictionary without
+taking a ref on dict but it does an unref during the cleaning of the
+transport object.
+
+So the rpc layer expect the caller to take a ref on the dictionary
+before passing dict to rpc layer. This leads to a lot of confusion
+across the code base and leads to ref leaks.
+
+Semantically, this is not correct. It is the rpc layer responsibility
+to take a ref when storing it, and free during the cleanup.
+
+I'm listing down the total issues or leaks across the code base because
+of this confusion. These issues are currently present in the upstream
+master.
+
+1) changelog_rpc_client_init
+
+2) quota_enforcer_init
+
+3) rpcsvc_create_listeners : when there are two transport, like tcp,rdma.
+
+4) quotad_aggregator_init
+
+5) glusterd: init
+
+6) nfs3_init_state
+
+7) server: init
+
+8) client:init
+
+This patch does the cleanup according to the semantics.
+
+Backport of : https://review.gluster.org/#/c/glusterfs/+/22266/
+
+>Change-Id: I46373af9630373eb375ee6de0e6f2bbe2a677425
+>updates: bz#1659708
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: Iff978497e11592fbebfa4b683fdc56698b782859
+BUG: 1471742
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167847
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ api/src/glfs-mgmt.c | 10 ++++--
+ cli/src/cli.c | 20 +++++++-----
+ glusterfsd/src/glusterfsd-mgmt.c | 18 ++++++++--
+ rpc/rpc-lib/src/rpc-clnt.c | 2 --
+ rpc/rpc-lib/src/rpc-transport.c | 38 +++++++---------------
+ rpc/rpc-lib/src/rpc-transport.h | 4 +--
+ rpc/rpc-lib/src/rpcsvc.c | 13 ++------
+ rpc/rpc-lib/src/rpcsvc.h | 2 +-
+ .../features/changelog/src/changelog-rpc-common.c | 9 +++--
+ .../snapview-server/src/snapview-server-mgmt.c | 8 ++++-
+ xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c | 8 ++++-
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 18 ++++++----
+ xlators/mgmt/glusterd/src/glusterd-rebalance.c | 8 ++++-
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 9 +++--
+ xlators/mgmt/glusterd/src/glusterd.c | 6 +++-
+ xlators/nfs/server/src/acl3.c | 5 +++
+ xlators/nfs/server/src/mount3.c | 5 +++
+ xlators/nfs/server/src/nlm4.c | 7 ++++
+ 18 files changed, 119 insertions(+), 71 deletions(-)
+
+diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c
+index d502b4f..7476d5b 100644
+--- a/api/src/glfs-mgmt.c
++++ b/api/src/glfs-mgmt.c
+@@ -1015,6 +1015,10 @@ glfs_mgmt_init(struct glfs *fs)
+ if (ctx->mgmt)
+ return 0;
+
++ options = dict_new();
++ if (!options)
++ goto out;
++
+ if (cmd_args->volfile_server_port)
+ port = cmd_args->volfile_server_port;
+
+@@ -1029,11 +1033,11 @@ glfs_mgmt_init(struct glfs *fs)
+
+ if (cmd_args->volfile_server_transport &&
+ !strcmp(cmd_args->volfile_server_transport, "unix")) {
+- ret = rpc_transport_unix_options_build(&options, host, 0);
++ ret = rpc_transport_unix_options_build(options, host, 0);
+ } else {
+ xlator_cmdline_option_t *opt = find_xlator_option_in_cmd_args_t(
+ "address-family", cmd_args);
+- ret = rpc_transport_inet_options_build(&options, host, port,
++ ret = rpc_transport_inet_options_build(options, host, port,
+ (opt ? opt->value : NULL));
+ }
+
+@@ -1075,5 +1079,7 @@ glfs_mgmt_init(struct glfs *fs)
+
+ ret = rpc_clnt_start(rpc);
+ out:
++ if (options)
++ dict_unref(options);
+ return ret;
+ }
+diff --git a/cli/src/cli.c b/cli/src/cli.c
+index c33d152..ff39a98 100644
+--- a/cli/src/cli.c
++++ b/cli/src/cli.c
+@@ -661,9 +661,8 @@ cli_quotad_clnt_rpc_init(void)
+
+ global_quotad_rpc = rpc;
+ out:
+- if (ret) {
+- if (rpc_opts)
+- dict_unref(rpc_opts);
++ if (rpc_opts) {
++ dict_unref(rpc_opts);
+ }
+ return rpc;
+ }
+@@ -685,6 +684,10 @@ cli_rpc_init(struct cli_state *state)
+ this = THIS;
+ cli_rpc_prog = &cli_prog;
+
++ options = dict_new();
++ if (!options)
++ goto out;
++
+ /* If address family specified in CLI */
+ if (state->address_family) {
+ addr_family = state->address_family;
+@@ -699,7 +702,7 @@ cli_rpc_init(struct cli_state *state)
+ "Connecting to glusterd using "
+ "sockfile %s",
+ state->glusterd_sock);
+- ret = rpc_transport_unix_options_build(&options, state->glusterd_sock,
++ ret = rpc_transport_unix_options_build(options, state->glusterd_sock,
+ 0);
+ if (ret)
+ goto out;
+@@ -709,10 +712,6 @@ cli_rpc_init(struct cli_state *state)
+ "%s",
+ state->remote_host);
+
+- options = dict_new();
+- if (!options)
+- goto out;
+-
+ ret = dict_set_str(options, "remote-host", state->remote_host);
+ if (ret)
+ goto out;
+@@ -731,7 +730,7 @@ cli_rpc_init(struct cli_state *state)
+ gf_log("cli", GF_LOG_DEBUG,
+ "Connecting to glusterd using "
+ "default socket");
+- ret = rpc_transport_unix_options_build(&options,
++ ret = rpc_transport_unix_options_build(options,
+ DEFAULT_GLUSTERD_SOCKFILE, 0);
+ if (ret)
+ goto out;
+@@ -749,6 +748,9 @@ cli_rpc_init(struct cli_state *state)
+
+ ret = rpc_clnt_start(rpc);
+ out:
++ if (options)
++ dict_unref(options);
++
+ if (ret) {
+ if (rpc)
+ rpc_clnt_unref(rpc);
+diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
+index a89c980..1d2cd1a 100644
+--- a/glusterfsd/src/glusterfsd-mgmt.c
++++ b/glusterfsd/src/glusterfsd-mgmt.c
+@@ -2781,7 +2781,11 @@ glusterfs_listener_init(glusterfs_ctx_t *ctx)
+ if (!cmd_args->sock_file)
+ return 0;
+
+- ret = rpcsvc_transport_unix_options_build(&options, cmd_args->sock_file);
++ options = dict_new();
++ if (!options)
++ goto out;
++
++ ret = rpcsvc_transport_unix_options_build(options, cmd_args->sock_file);
+ if (ret)
+ goto out;
+
+@@ -2808,6 +2812,8 @@ glusterfs_listener_init(glusterfs_ctx_t *ctx)
+ ctx->listener = rpc;
+
+ out:
++ if (options)
++ dict_unref(options);
+ return ret;
+ }
+
+@@ -2889,6 +2895,10 @@ glusterfs_mgmt_init(glusterfs_ctx_t *ctx)
+ if (ctx->mgmt)
+ return 0;
+
++ options = dict_new();
++ if (!options)
++ goto out;
++
+ LOCK_INIT(&ctx->volfile_lock);
+
+ if (cmd_args->volfile_server_port)
+@@ -2898,10 +2908,10 @@ glusterfs_mgmt_init(glusterfs_ctx_t *ctx)
+
+ if (cmd_args->volfile_server_transport &&
+ !strcmp(cmd_args->volfile_server_transport, "unix")) {
+- ret = rpc_transport_unix_options_build(&options, host, 0);
++ ret = rpc_transport_unix_options_build(options, host, 0);
+ } else {
+ opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args);
+- ret = rpc_transport_inet_options_build(&options, host, port,
++ ret = rpc_transport_inet_options_build(options, host, port,
+ (opt ? opt->value : NULL));
+ }
+ if (ret)
+@@ -2950,6 +2960,8 @@ glusterfs_mgmt_init(glusterfs_ctx_t *ctx)
+
+ ret = rpc_clnt_start(rpc);
+ out:
++ if (options)
++ dict_unref(options);
+ return ret;
+ }
+
+diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
+index 6f47515..b04eaed 100644
+--- a/rpc/rpc-lib/src/rpc-clnt.c
++++ b/rpc/rpc-lib/src/rpc-clnt.c
+@@ -1125,8 +1125,6 @@ rpc_clnt_new(dict_t *options, xlator_t *owner, char *name,
+ mem_pool_destroy(rpc->saved_frames_pool);
+ GF_FREE(rpc);
+ rpc = NULL;
+- if (options)
+- dict_unref(options);
+ goto out;
+ }
+
+diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c
+index 4beaaf9..bed1f8c 100644
+--- a/rpc/rpc-lib/src/rpc-transport.c
++++ b/rpc/rpc-lib/src/rpc-transport.c
+@@ -168,6 +168,11 @@ rpc_transport_cleanup(rpc_transport_t *trans)
+ if (trans->fini)
+ trans->fini(trans);
+
++ if (trans->options) {
++ dict_unref(trans->options);
++ trans->options = NULL;
++ }
++
+ GF_FREE(trans->name);
+
+ if (trans->xl)
+@@ -352,7 +357,7 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
+ }
+ }
+
+- trans->options = options;
++ trans->options = dict_ref(options);
+
+ pthread_mutex_init(&trans->lock, NULL);
+ trans->xl = this;
+@@ -591,19 +596,14 @@ out:
+ }
+
+ int
+-rpc_transport_unix_options_build(dict_t **options, char *filepath,
++rpc_transport_unix_options_build(dict_t *dict, char *filepath,
+ int frame_timeout)
+ {
+- dict_t *dict = NULL;
+ char *fpath = NULL;
+ int ret = -1;
+
+ GF_ASSERT(filepath);
+- GF_ASSERT(options);
+-
+- dict = dict_new();
+- if (!dict)
+- goto out;
++ GF_VALIDATE_OR_GOTO("rpc-transport", dict, out);
+
+ fpath = gf_strdup(filepath);
+ if (!fpath) {
+@@ -638,20 +638,14 @@ rpc_transport_unix_options_build(dict_t **options, char *filepath,
+ if (ret)
+ goto out;
+ }
+-
+- *options = dict;
+ out:
+- if (ret && dict) {
+- dict_unref(dict);
+- }
+ return ret;
+ }
+
+ int
+-rpc_transport_inet_options_build(dict_t **options, const char *hostname,
+- int port, char *af)
++rpc_transport_inet_options_build(dict_t *dict, const char *hostname, int port,
++ char *af)
+ {
+- dict_t *dict = NULL;
+ char *host = NULL;
+ int ret = -1;
+ #ifdef IPV6_DEFAULT
+@@ -660,13 +654,9 @@ rpc_transport_inet_options_build(dict_t **options, const char *hostname,
+ char *addr_family = "inet";
+ #endif
+
+- GF_ASSERT(options);
+ GF_ASSERT(hostname);
+ GF_ASSERT(port >= 1024);
+-
+- dict = dict_new();
+- if (!dict)
+- goto out;
++ GF_VALIDATE_OR_GOTO("rpc-transport", dict, out);
+
+ host = gf_strdup((char *)hostname);
+ if (!host) {
+@@ -702,12 +692,6 @@ rpc_transport_inet_options_build(dict_t **options, const char *hostname,
+ "failed to set trans-type with socket");
+ goto out;
+ }
+-
+- *options = dict;
+ out:
+- if (ret && dict) {
+- dict_unref(dict);
+- }
+-
+ return ret;
+ }
+diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h
+index 9e75d1a..64b7e9b 100644
+--- a/rpc/rpc-lib/src/rpc-transport.h
++++ b/rpc/rpc-lib/src/rpc-transport.h
+@@ -303,11 +303,11 @@ rpc_transport_keepalive_options_set(dict_t *options, int32_t interval,
+ int32_t time, int32_t timeout);
+
+ int
+-rpc_transport_unix_options_build(dict_t **options, char *filepath,
++rpc_transport_unix_options_build(dict_t *options, char *filepath,
+ int frame_timeout);
+
+ int
+-rpc_transport_inet_options_build(dict_t **options, const char *hostname,
++rpc_transport_inet_options_build(dict_t *options, const char *hostname,
+ int port, char *af);
+
+ void
+diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
+index 74373c4..5a35139 100644
+--- a/rpc/rpc-lib/src/rpcsvc.c
++++ b/rpc/rpc-lib/src/rpcsvc.c
+@@ -2615,18 +2615,13 @@ rpcsvc_reconfigure_options(rpcsvc_t *svc, dict_t *options)
+ }
+
+ int
+-rpcsvc_transport_unix_options_build(dict_t **options, char *filepath)
++rpcsvc_transport_unix_options_build(dict_t *dict, char *filepath)
+ {
+- dict_t *dict = NULL;
+ char *fpath = NULL;
+ int ret = -1;
+
+ GF_ASSERT(filepath);
+- GF_ASSERT(options);
+-
+- dict = dict_new();
+- if (!dict)
+- goto out;
++ GF_VALIDATE_OR_GOTO("rpcsvc", dict, out);
+
+ fpath = gf_strdup(filepath);
+ if (!fpath) {
+@@ -2649,13 +2644,9 @@ rpcsvc_transport_unix_options_build(dict_t **options, char *filepath)
+ ret = dict_set_str(dict, "transport-type", "socket");
+ if (ret)
+ goto out;
+-
+- *options = dict;
+ out:
+ if (ret) {
+ GF_FREE(fpath);
+- if (dict)
+- dict_unref(dict);
+ }
+ return ret;
+ }
+diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h
+index 34045ce..a51edc7 100644
+--- a/rpc/rpc-lib/src/rpcsvc.h
++++ b/rpc/rpc-lib/src/rpcsvc.h
+@@ -665,7 +665,7 @@ rpcsvc_actor_t *
+ rpcsvc_program_actor(rpcsvc_request_t *req);
+
+ int
+-rpcsvc_transport_unix_options_build(dict_t **options, char *filepath);
++rpcsvc_transport_unix_options_build(dict_t *options, char *filepath);
+ int
+ rpcsvc_set_allow_insecure(rpcsvc_t *svc, dict_t *options);
+ int
+diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c
+index cf35175..dcdcfb1 100644
+--- a/xlators/features/changelog/src/changelog-rpc-common.c
++++ b/xlators/features/changelog/src/changelog-rpc-common.c
+@@ -47,7 +47,7 @@ changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile,
+ if (!options)
+ goto error_return;
+
+- ret = rpc_transport_unix_options_build(&options, sockfile, 0);
++ ret = rpc_transport_unix_options_build(options, sockfile, 0);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_BUILD_ERROR,
+ "failed to build rpc options");
+@@ -73,6 +73,7 @@ changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile,
+ goto dealloc_rpc_clnt;
+ }
+
++ dict_unref(options);
+ return rpc;
+
+ dealloc_rpc_clnt:
+@@ -303,7 +304,11 @@ changelog_rpc_server_init(xlator_t *this, char *sockfile, void *cbkdata,
+ if (!cbkdata)
+ cbkdata = this;
+
+- ret = rpcsvc_transport_unix_options_build(&options, sockfile);
++ options = dict_new();
++ if (!options)
++ return NULL;
++
++ ret = rpcsvc_transport_unix_options_build(options, sockfile);
+ if (ret)
+ goto dealloc_dict;
+
+diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c
+index b608cdf..bc415ef 100644
+--- a/xlators/features/snapview-server/src/snapview-server-mgmt.c
++++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c
+@@ -101,8 +101,12 @@ svs_mgmt_init(xlator_t *this)
+ if (cmd_args->volfile_server)
+ host = cmd_args->volfile_server;
+
++ options = dict_new();
++ if (!options)
++ goto out;
++
+ opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args);
+- ret = rpc_transport_inet_options_build(&options, host, port,
++ ret = rpc_transport_inet_options_build(options, host, port,
+ (opt != NULL ? opt->value : NULL));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_BUILD_TRNSPRT_OPT_FAILED,
+@@ -145,6 +149,8 @@ svs_mgmt_init(xlator_t *this)
+ gf_msg_debug(this->name, 0, "svs mgmt init successful");
+
+ out:
++ if (options)
++ dict_unref(options);
+ if (ret)
+ if (priv) {
+ rpc_clnt_connection_cleanup(&priv->rpc->conn);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
+index 052438c..16eefa1 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
++++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
+@@ -29,6 +29,10 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout,
+ if (!this)
+ goto out;
+
++ options = dict_new();
++ if (!options)
++ goto out;
++
+ svc = glusterd_conn_get_svc_object(conn);
+ if (!svc) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL,
+@@ -36,7 +40,7 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout,
+ goto out;
+ }
+
+- ret = rpc_transport_unix_options_build(&options, sockpath, frame_timeout);
++ ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout);
+ if (ret)
+ goto out;
+
+@@ -66,6 +70,8 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout,
+ conn->rpc = rpc;
+ conn->notify = notify;
+ out:
++ if (options)
++ dict_unref(options);
+ if (ret) {
+ if (rpc) {
+ rpc_clnt_unref(rpc);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index 1cb9013..6147995 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -3493,11 +3493,10 @@ out:
+ }
+
+ int
+-glusterd_transport_inet_options_build(dict_t **options, const char *hostname,
++glusterd_transport_inet_options_build(dict_t *dict, const char *hostname,
+ int port, char *af)
+ {
+ xlator_t *this = NULL;
+- dict_t *dict = NULL;
+ int32_t interval = -1;
+ int32_t time = -1;
+ int32_t timeout = -1;
+@@ -3505,14 +3504,14 @@ glusterd_transport_inet_options_build(dict_t **options, const char *hostname,
+
+ this = THIS;
+ GF_ASSERT(this);
+- GF_ASSERT(options);
++ GF_ASSERT(dict);
+ GF_ASSERT(hostname);
+
+ if (!port)
+ port = GLUSTERD_DEFAULT_PORT;
+
+ /* Build default transport options */
+- ret = rpc_transport_inet_options_build(&dict, hostname, port, af);
++ ret = rpc_transport_inet_options_build(dict, hostname, port, af);
+ if (ret)
+ goto out;
+
+@@ -3552,7 +3551,6 @@ glusterd_transport_inet_options_build(dict_t **options, const char *hostname,
+ if ((interval > 0) || (time > 0))
+ ret = rpc_transport_keepalive_options_set(dict, interval, time,
+ timeout);
+- *options = dict;
+ out:
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
+@@ -3572,6 +3570,10 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo,
+ if (!peerctx)
+ goto out;
+
++ options = dict_new();
++ if (!options)
++ goto out;
++
+ if (args)
+ peerctx->args = *args;
+
+@@ -3586,7 +3588,7 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo,
+ if (ret)
+ gf_log(this->name, GF_LOG_TRACE,
+ "option transport.address-family is not set in xlator options");
+- ret = glusterd_transport_inet_options_build(&options, peerinfo->hostname,
++ ret = glusterd_transport_inet_options_build(options, peerinfo->hostname,
+ peerinfo->port, af);
+ if (ret)
+ goto out;
+@@ -3596,6 +3598,7 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo,
+ * create our RPC endpoint with the same address that the peer would
+ * use to reach us.
+ */
++
+ if (this->options) {
+ data = dict_getn(this->options, "transport.socket.bind-address",
+ SLEN("transport.socket.bind-address"));
+@@ -3637,6 +3640,9 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo,
+ peerctx = NULL;
+ ret = 0;
+ out:
++ if (options)
++ dict_unref(options);
++
+ GF_FREE(peerctx);
+ return ret;
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+index ed5ded5..cbed9a9 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+@@ -391,6 +391,10 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
+ if (!defrag)
+ goto out;
+
++ options = dict_new();
++ if (!options)
++ goto out;
++
+ GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo);
+ /* Check if defrag sockfile exists in the new location
+ * in /var/run/ , if it does not try the old location
+@@ -420,7 +424,7 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
+ * default timeout of 30mins used for unreliable network connections is
+ * too long for unix domain socket connections.
+ */
+- ret = rpc_transport_unix_options_build(&options, sockfile, 600);
++ ret = rpc_transport_unix_options_build(options, sockfile, 600);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNIX_OP_BUILD_FAIL,
+ "Unix options build failed");
+@@ -437,6 +441,8 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
+ }
+ ret = 0;
+ out:
++ if (options)
++ dict_unref(options);
+ return ret;
+ }
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index ef664c2..2dd5f91 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -1980,7 +1980,11 @@ glusterd_brick_connect(glusterd_volinfo_t *volinfo,
+ * The default timeout of 30mins used for unreliable network
+ * connections is too long for unix domain socket connections.
+ */
+- ret = rpc_transport_unix_options_build(&options, socketpath, 600);
++ options = dict_new();
++ if (!options)
++ goto out;
++
++ ret = rpc_transport_unix_options_build(options, socketpath, 600);
+ if (ret)
+ goto out;
+
+@@ -1999,7 +2003,8 @@ glusterd_brick_connect(glusterd_volinfo_t *volinfo,
+ brickinfo->rpc = rpc;
+ }
+ out:
+-
++ if (options)
++ dict_unref(options);
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
+index 89afb9c..d4ab630 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.c
++++ b/xlators/mgmt/glusterd/src/glusterd.c
+@@ -1111,11 +1111,15 @@ glusterd_init_uds_listener(xlator_t *this)
+
+ GF_ASSERT(this);
+
++ options = dict_new();
++ if (!options)
++ goto out;
++
+ sock_data = dict_get(this->options, "glusterd-sockfile");
+ (void)snprintf(sockfile, sizeof(sockfile), "%s",
+ sock_data ? sock_data->data : DEFAULT_GLUSTERD_SOCKFILE);
+
+- ret = rpcsvc_transport_unix_options_build(&options, sockfile);
++ ret = rpcsvc_transport_unix_options_build(options, sockfile);
+ if (ret)
+ goto out;
+
+diff --git a/xlators/nfs/server/src/acl3.c b/xlators/nfs/server/src/acl3.c
+index 0eca45d..2ede24b 100644
+--- a/xlators/nfs/server/src/acl3.c
++++ b/xlators/nfs/server/src/acl3.c
+@@ -787,9 +787,14 @@ acl3svc_init(xlator_t *nfsx)
+ goto err;
+ }
+
++ if (options)
++ dict_unref(options);
++
+ acl3_inited = _gf_true;
+ return &acl3prog;
+ err:
++ if (options)
++ dict_unref(options);
+ return NULL;
+ }
+
+diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c
+index 726dc29..396809c 100644
+--- a/xlators/nfs/server/src/mount3.c
++++ b/xlators/nfs/server/src/mount3.c
+@@ -4102,8 +4102,13 @@ mnt3svc_init(xlator_t *nfsx)
+ gf_msg_debug(GF_MNT, GF_LOG_DEBUG, "Thread creation failed");
+ }
+ }
++ if (options)
++ dict_unref(options);
++
+ return &mnt3prog;
+ err:
++ if (options)
++ dict_unref(options);
+ return NULL;
+ }
+
+diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c
+index a341ebd..c3c1453 100644
+--- a/xlators/nfs/server/src/nlm4.c
++++ b/xlators/nfs/server/src/nlm4.c
+@@ -1121,6 +1121,8 @@ nlm4_establish_callback(nfs3_call_state_t *cs, call_frame_t *cbk_frame)
+ ret = 0;
+
+ err:
++ if (options)
++ dict_unref(options);
+ if (ret == -1) {
+ if (rpc_clnt)
+ rpc_clnt_unref(rpc_clnt);
+@@ -2708,8 +2710,13 @@ nlm4svc_init(xlator_t *nfsx)
+
+ gf_timer_call_after(nfsx->ctx, timeout, nlm_grace_period_over, NULL);
+ nlm4_inited = _gf_true;
++
++ if (options)
++ dict_unref(options);
+ return &nlm4prog;
+ err:
++ if (options)
++ dict_unref(options);
+ return NULL;
+ }
+
+--
+1.8.3.1
+
diff --git a/0103-dht-NULL-check-before-setting-error-flag.patch b/0103-dht-NULL-check-before-setting-error-flag.patch
new file mode 100644
index 0000000..addd4f7
--- /dev/null
+++ b/0103-dht-NULL-check-before-setting-error-flag.patch
@@ -0,0 +1,43 @@
+From 45c9eeb5544738d4d1d0aefb8a7f61e5d8859ad8 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Tue, 12 Mar 2019 18:00:37 +0530
+Subject: [PATCH 103/124] dht: NULL check before setting error flag
+
+Function dht_common_mark_mdsxattr blindly setting value for
+an integer pointer without validating it. In fact there are
+two callers of this function that passes NULL value to the
+same pointer which leads to a crash.
+
+Backport of : https://review.gluster.org/#/c/22345/
+
+>Change-Id: Id94ffe216f6a21f007b3291bff0b1e1c1989075c
+>fixes: bz#1687811
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: Id9785c16184fd80e8184e5ae135fb63bf44692cd
+BUG: 1471742
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167846
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 367548f..2a68193 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -852,7 +852,8 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
+ "Failed to get hashed subvol for path %s"
+ "gfid is %s ",
+ local->loc.path, gfid_local);
+- (*errst) = 1;
++ if (errst)
++ (*errst) = 1;
+ ret = -1;
+ goto out;
+ }
+--
+1.8.3.1
+
diff --git a/0104-afr-shd-Cleanup-self-heal-daemon-resources-during-af.patch b/0104-afr-shd-Cleanup-self-heal-daemon-resources-during-af.patch
new file mode 100644
index 0000000..214ccb4
--- /dev/null
+++ b/0104-afr-shd-Cleanup-self-heal-daemon-resources-during-af.patch
@@ -0,0 +1,151 @@
+From faaaa3452ceec6afcc18cffc9beca3fe19841cce Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 3 Jan 2019 17:44:18 +0530
+Subject: [PATCH 104/124] afr/shd: Cleanup self heal daemon resources during
+ afr fini
+
+We were not properly cleaning self-heal daemon resources
+during afr fini. This patch will clean the same.
+
+Backport of: https://review.gluster.org/#/c/glusterfs/+/22151/
+
+>Change-Id: I597860be6f781b195449e695d871b8667a418d5a
+>updates: bz#1659708
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I7be981b9c2476c8cacadea6b14d74234f67b714f
+BUG: 1471742
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167845
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/syncop-utils.c | 8 +++++
+ xlators/cluster/afr/src/afr-self-heald.c | 2 ++
+ xlators/cluster/afr/src/afr.c | 57 ++++++++++++++++++++++++++++++++
+ 3 files changed, 67 insertions(+)
+
+diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
+index be03527..b842142 100644
+--- a/libglusterfs/src/syncop-utils.c
++++ b/libglusterfs/src/syncop-utils.c
+@@ -350,6 +350,11 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
+ gf_boolean_t cond_init = _gf_false;
+ gf_boolean_t mut_init = _gf_false;
+ gf_dirent_t entries;
++ xlator_t *this = NULL;
++
++ if (frame) {
++ this = frame->this;
++ }
+
+ /*For this functionality to be implemented in general, we need
+ * synccond_t infra which doesn't block the executing thread. Until then
+@@ -397,6 +402,9 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
+
+ list_for_each_entry_safe(entry, tmp, &entries.list, list)
+ {
++ if (this && this->cleanup_starting)
++ goto out;
++
+ list_del_init(&entry->list);
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) {
+ gf_dirent_entry_free(entry);
+diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
+index 7eb1207..8bc4720 100644
+--- a/xlators/cluster/afr/src/afr-self-heald.c
++++ b/xlators/cluster/afr/src/afr-self-heald.c
+@@ -373,6 +373,7 @@ afr_shd_sweep_prepare(struct subvol_healer *healer)
+
+ time(&event->start_time);
+ event->end_time = 0;
++ _mask_cancellation();
+ }
+
+ void
+@@ -394,6 +395,7 @@ afr_shd_sweep_done(struct subvol_healer *healer)
+
+ if (eh_save_history(shd->statistics[healer->subvol], history) < 0)
+ GF_FREE(history);
++ _unmask_cancellation();
+ }
+
+ int
+diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
+index 33258a0..a0a7551 100644
+--- a/xlators/cluster/afr/src/afr.c
++++ b/xlators/cluster/afr/src/afr.c
+@@ -611,13 +611,70 @@ init(xlator_t *this)
+ out:
+ return ret;
+ }
++void
++afr_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
++{
++ int ret = -1;
++
++ if (!healer)
++ return;
++
++ if (healer->running) {
++ /*
++ * If there are any resources to cleanup, We need
++ * to do that gracefully using pthread_cleanup_push
++ */
++ ret = gf_thread_cleanup_xint(healer->thread);
++ if (ret)
++ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SELF_HEAL_FAILED,
++ "Failed to clean up healer threads.");
++ healer->thread = 0;
++ }
++ pthread_cond_destroy(&healer->cond);
++ pthread_mutex_destroy(&healer->mutex);
++}
++
++void
++afr_selfheal_daemon_fini(xlator_t *this)
++{
++ struct subvol_healer *healer = NULL;
++ afr_self_heald_t *shd = NULL;
++ afr_private_t *priv = NULL;
++ int i = 0;
++
++ priv = this->private;
++ if (!priv)
++ return;
++
++ shd = &priv->shd;
++ if (!shd->iamshd)
++ return;
++
++ for (i = 0; i < priv->child_count; i++) {
++ healer = &shd->index_healers[i];
++ afr_destroy_healer_object(this, healer);
+
++ healer = &shd->full_healers[i];
++ afr_destroy_healer_object(this, healer);
++
++ if (shd->statistics[i])
++ eh_destroy(shd->statistics[i]);
++ }
++ GF_FREE(shd->index_healers);
++ GF_FREE(shd->full_healers);
++ GF_FREE(shd->statistics);
++ if (shd->split_brain)
++ eh_destroy(shd->split_brain);
++}
+ void
+ fini(xlator_t *this)
+ {
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
++
++ afr_selfheal_daemon_fini(this);
++
+ LOCK(&priv->lock);
+ if (priv->timer != NULL) {
+ gf_timer_call_cancel(this->ctx, priv->timer);
+--
+1.8.3.1
+
diff --git a/0105-core-Log-level-changes-do-not-effect-on-running-clie.patch b/0105-core-Log-level-changes-do-not-effect-on-running-clie.patch
new file mode 100644
index 0000000..a735794
--- /dev/null
+++ b/0105-core-Log-level-changes-do-not-effect-on-running-clie.patch
@@ -0,0 +1,336 @@
+From 023854d5573211d4737eb0ebe7ec954a7b7bb4ee Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Mon, 15 Apr 2019 10:34:34 +0530
+Subject: [PATCH 105/124] core: Log level changes do not effect on running
+ client process
+
+Problem: commit c34e4161f3cb6539ec83a9020f3d27eb4759a975 set log-level
+ per xlator during reconfigure only for a brick process not for
+ the client process.
+
+Solution: 1) Change per xlator log-level only if brick_mux is enabled.To make sure
+ about brick multiplex introudce a flag brick_mux at ctx->cmd_args.
+
+Note: There are two other changes done with this patch
+ 1) Ignore client-log-level option to attach a brick with
+ already running brick if brick_mux is enabled
+ 2) Add a log to print pid of the running process to make easier
+ debugging
+
+> Change-Id: I39e85de778e150d0685cd9a79425ce8b4783f9c9
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> Fixes: bz#1696046
+> (Cherry picked from commit 798aadbe51a9a02dd98a0f861cc239ecf7c8ed57)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22495/)
+
+Change-Id: If82cc8e51cf00bd50d3321d31ec420f89786ea02
+Fixes: bz#1695081
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167828
+Tested-by: Mohit Agrawal <moagrawa@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ glusterfsd/src/glusterfsd-messages.h | 2 +-
+ glusterfsd/src/glusterfsd.c | 20 ++++-
+ glusterfsd/src/glusterfsd.h | 1 +
+ libglusterfs/src/glusterfs/glusterfs.h | 1 +
+ tests/bugs/glusterd/bug-1696046.t | 113 +++++++++++++++++++++++++++++
+ xlators/debug/io-stats/src/io-stats.c | 22 +++---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 7 ++
+ 7 files changed, 152 insertions(+), 14 deletions(-)
+ create mode 100644 tests/bugs/glusterd/bug-1696046.t
+
+diff --git a/glusterfsd/src/glusterfsd-messages.h b/glusterfsd/src/glusterfsd-messages.h
+index 94312a5..280624c 100644
+--- a/glusterfsd/src/glusterfsd-messages.h
++++ b/glusterfsd/src/glusterfsd-messages.h
+@@ -36,6 +36,6 @@ GLFS_MSGID(GLUSTERFSD, glusterfsd_msg_1, glusterfsd_msg_2, glusterfsd_msg_3,
+ glusterfsd_msg_31, glusterfsd_msg_32, glusterfsd_msg_33,
+ glusterfsd_msg_34, glusterfsd_msg_35, glusterfsd_msg_36,
+ glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39,
+- glusterfsd_msg_40, glusterfsd_msg_41, glusterfsd_msg_42);
++ glusterfsd_msg_40, glusterfsd_msg_41, glusterfsd_msg_42, glusterfsd_msg_43);
+
+ #endif /* !_GLUSTERFSD_MESSAGES_H_ */
+diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
+index 3aa89ca..6aee4c1 100644
+--- a/glusterfsd/src/glusterfsd.c
++++ b/glusterfsd/src/glusterfsd.c
+@@ -85,8 +85,7 @@ static char gf_doc[] = "";
+ static char argp_doc[] =
+ "--volfile-server=SERVER [MOUNT-POINT]\n"
+ "--volfile=VOLFILE [MOUNT-POINT]";
+-const char *argp_program_version =
+- PACKAGE_NAME" "PACKAGE_VERSION;
++const char *argp_program_version = PACKAGE_NAME " " PACKAGE_VERSION;
+ const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
+
+ static error_t
+@@ -266,6 +265,7 @@ static struct argp_option gf_options[] = {
+ "attribute, dentry and page-cache. "
+ "Disable this only if same files/directories are not accessed across "
+ "two different mounts concurrently [default: \"on\"]"},
++ {"brick-mux", ARGP_BRICK_MUX_KEY, 0, 0, "Enable brick mux. "},
+ {0, 0, 0, 0, "Miscellaneous Options:"},
+ {
+ 0,
+@@ -702,7 +702,6 @@ create_fuse_mount(glusterfs_ctx_t *ctx)
+ xlator_t *master = NULL;
+
+ cmd_args = &ctx->cmd_args;
+-
+ if (!cmd_args->mount_point) {
+ gf_msg_trace("glusterfsd", 0,
+ "mount point not found, not a client process");
+@@ -1090,6 +1089,10 @@ parse_opts(int key, char *arg, struct argp_state *state)
+ cmd_args->thin_client = _gf_true;
+ break;
+
++ case ARGP_BRICK_MUX_KEY:
++ cmd_args->brick_mux = _gf_true;
++ break;
++
+ case ARGP_PID_FILE_KEY:
+ cmd_args->pid_file = gf_strdup(arg);
+ break;
+@@ -1207,7 +1210,6 @@ parse_opts(int key, char *arg, struct argp_state *state)
+ case ARGP_KEY_ARG:
+ if (state->arg_num >= 1)
+ argp_usage(state);
+-
+ cmd_args->mount_point = gf_strdup(arg);
+ break;
+
+@@ -2540,6 +2542,8 @@ postfork:
+ if (ret)
+ goto out;
+ }
++ gf_log("glusterfs", GF_LOG_INFO, "Pid of current running process is %d",
++ getpid());
+ ret = gf_log_inject_timer_event(ctx);
+
+ glusterfs_signals_setup(ctx);
+@@ -2787,6 +2791,14 @@ main(int argc, char *argv[])
+ if (ret)
+ goto out;
+
++ /* set brick_mux mode only for server process */
++ if ((ctx->process_mode != GF_SERVER_PROCESS) && cmd->brick_mux) {
++ gf_msg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_43,
++ "command line argument --brick-mux is valid only for brick "
++ "process");
++ goto out;
++ }
++
+ /* log the version of glusterfs running here along with the actual
+ command line options. */
+ {
+diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
+index 35cf6d8..fa55789 100644
+--- a/glusterfsd/src/glusterfsd.h
++++ b/glusterfsd/src/glusterfsd.h
+@@ -111,6 +111,7 @@ enum argp_option_keys {
+ ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189,
+ ARGP_FUSE_LRU_LIMIT_KEY = 190,
+ ARGP_FUSE_AUTO_INVAL_KEY = 191,
++ ARGP_BRICK_MUX_KEY = 192
+ };
+
+ struct _gfd_vol_top_priv {
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index deec5ba..fb727fc 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -575,6 +575,7 @@ struct _cmd_args {
+
+ int fuse_flush_handle_interrupt;
+ int fuse_auto_inval;
++ bool brick_mux;
+ };
+ typedef struct _cmd_args cmd_args_t;
+
+diff --git a/tests/bugs/glusterd/bug-1696046.t b/tests/bugs/glusterd/bug-1696046.t
+new file mode 100644
+index 0000000..e1c1eb2
+--- /dev/null
++++ b/tests/bugs/glusterd/bug-1696046.t
+@@ -0,0 +1,113 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup;
++
++function count_up_bricks {
++ $CLI --xml volume status $1 | grep '<status>1' | wc -l
++}
++
++function count_brick_processes {
++ pgrep glusterfsd | wc -l
++}
++
++logdir=`gluster --print-logdir`
++
++## Start and create a volume
++TEST glusterd;
++TEST pidof glusterd;
++
++TEST $CLI volume set all cluster.brick-multiplex on
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3};
++TEST $CLI volume create $V1 replica 3 $H0:$B0/${V1}{1,2,3};
++
++## Start volume and verify
++TEST $CLI volume start $V0;
++EXPECT 'Started' volinfo_field $V0 'Status';
++TEST $CLI volume start $V1;
++EXPECT 'Started' volinfo_field $V1 'Status';
++
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_up_bricks $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_up_bricks $V1
++
++EXPECT 1 count_brick_processes
++
++# Mount V0
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
++
++function client-log-file-name()
++{
++ logfilename=$M0".log"
++ echo ${logfilename:1} | tr / -
++}
++
++function brick-log-file-name()
++{
++ logfilename=$B0"/"$V0"1.log"
++ echo ${logfilename:1} | tr / -
++}
++
++log_file=$logdir"/"`client-log-file-name`
++nofdlog=$(cat $log_file | grep " D " | wc -l)
++TEST [ $((nofdlog)) -eq 0 ]
++
++brick_log_file=$logdir"/bricks/"`brick-log-file-name`
++nofdlog=$(cat $brick_log_file | grep " D " | wc -l)
++TEST [ $((nofdlog)) -eq 0 ]
++
++## Set brick-log-level to DEBUG
++TEST $CLI volume set $V0 diagnostics.brick-log-level DEBUG
++
++# Do some operation
++touch $M0/file1
++
++# Check debug message debug message should be exist only for V0
++# Server xlator is common in brick_mux so after enabling DEBUG log
++# some debug message should be available for other xlators like posix
++
++brick_log_file=$logdir"/bricks/"`brick-log-file-name`
++nofdlog=$(cat $brick_log_file | grep file1 | grep -v server | wc -l)
++TEST [ $((nofdlog)) -ne 0 ]
++
++#Check if any debug log exist in client-log file
++nofdlog=$(cat $log_file | grep " D " | wc -l)
++TEST [ $((nofdlog)) -eq 0 ]
++
++## Set brick-log-level to INFO
++TEST $CLI volume set $V0 diagnostics.brick-log-level INFO
++
++## Set client-log-level to DEBUG
++TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
++
++# Do some operation
++touch $M0/file2
++
++nofdlog=$(cat $brick_log_file | grep " D " | grep file2 | wc -l)
++TEST [ $((nofdlog)) -eq 0 ]
++
++nofdlog=$(cat $log_file | grep " D " | wc -l)
++TEST [ $((nofdlog)) -ne 0 ]
++
++# Unmount V0
++TEST umount $M0
++
++#Mount V1
++TEST glusterfs --volfile-id=$V1 --volfile-server=$H0 --entry-timeout=0 $M0;
++
++#do some operation
++touch $M0/file3
++
++
++# DEBUG log level is enabled only for V0 so no debug message should be available
++# in log specific to file2 creation except for server xlator, server xlator is
++# common xlator in brick mulitplex
++nofdlog=$(cat $brick_log_file | grep file3 | grep -v server | wc -l)
++TEST [ $((nofdlog)) -eq 0 ]
++
++# Unmount V1
++TEST umount $M0
++
++cleanup;
+diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
+index 41b57c5..aa91a0a 100644
+--- a/xlators/debug/io-stats/src/io-stats.c
++++ b/xlators/debug/io-stats/src/io-stats.c
+@@ -3704,19 +3704,23 @@ xlator_set_loglevel(xlator_t *this, int log_level)
+ active = ctx->active;
+ top = active->first;
+
+- if (strcmp(top->type, "protocol/server") || (log_level == -1))
++ if (log_level == -1)
+ return;
+
+- /* Set log-level for server xlator */
+- top->loglevel = log_level;
++ if (ctx->cmd_args.brick_mux) {
++ /* Set log-level for all brick xlators */
++ top->loglevel = log_level;
+
+- /* Set log-level for parent xlator */
+- if (this->parents)
+- this->parents->xlator->loglevel = log_level;
++ /* Set log-level for parent xlator */
++ if (this->parents)
++ this->parents->xlator->loglevel = log_level;
+
+- while (trav) {
+- trav->loglevel = log_level;
+- trav = trav->next;
++ while (trav) {
++ trav->loglevel = log_level;
++ trav = trav->next;
++ }
++ } else {
++ gf_log_set_loglevel(this->ctx, log_level);
+ }
+ }
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 2dd5f91..fdd7d91 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -2240,6 +2240,9 @@ retry:
+ if (volinfo->memory_accounting)
+ runner_add_arg(&runner, "--mem-accounting");
+
++ if (is_brick_mx_enabled())
++ runner_add_arg(&runner, "--brick-mux");
++
+ runner_log(&runner, "", 0, "Starting GlusterFS");
+
+ brickinfo->port = port;
+@@ -2378,6 +2381,10 @@ unsafe_option(dict_t *this, char *key, data_t *value, void *arg)
+ return _gf_false;
+ }
+
++ if (fnmatch("*diagnostics.client-log*", key, 0) == 0) {
++ return _gf_false;
++ }
++
+ return _gf_true;
+ }
+
+--
+1.8.3.1
+
diff --git a/0106-libgfchangelog-use-find_library-to-locate-shared-lib.patch b/0106-libgfchangelog-use-find_library-to-locate-shared-lib.patch
new file mode 100644
index 0000000..6788ba8
--- /dev/null
+++ b/0106-libgfchangelog-use-find_library-to-locate-shared-lib.patch
@@ -0,0 +1,111 @@
+From 55d945603bb52f0787c5200118673d6206ec3492 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Fri, 12 Apr 2019 19:55:10 +0530
+Subject: [PATCH 106/124] libgfchangelog : use find_library to locate shared
+ library
+
+Issue:
+
+libgfchangelog.so: cannot open shared object file
+
+Due to hardcoded shared library name runtime loader looks for particular version of
+a shared library.
+
+Solution:
+
+Using find_library to locate shared library at runtime solves this issue.
+
+Traceback (most recent call last):
+ File "/usr/libexec/glusterfs/python/syncdaemon/gsyncd.py", line 323, in main
+ func(args)
+ File "/usr/libexec/glusterfs/python/syncdaemon/subcmds.py", line 82, in subcmd_worker
+ local.service_loop(remote)
+ File "/usr/libexec/glusterfs/python/syncdaemon/resource.py", line 1261, in service_loop
+ changelog_agent.init()
+ File "/usr/libexec/glusterfs/python/syncdaemon/repce.py", line 233, in __call__
+ return self.ins(self.meth, *a)
+ File "/usr/libexec/glusterfs/python/syncdaemon/repce.py", line 215, in __call__
+ raise res
+OSError: libgfchangelog.so: cannot open shared object file: No such file or directory
+
+>Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22557/
+>Change-Id: I3dd013d701ed1cd99ba7ef20d1898f343e1db8f5
+>fixes: bz#1699394
+>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+fixes: bz#1699271
+Change-Id: If8b5827cdac658eb3a211109bd397db9a6fee8e6
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167907
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ geo-replication/syncdaemon/libgfchangelog.py | 3 ++-
+ tools/glusterfind/src/libgfchangelog.py | 7 +++----
+ xlators/features/changelog/lib/examples/python/libgfchangelog.py | 4 +++-
+ 3 files changed, 8 insertions(+), 6 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/libgfchangelog.py b/geo-replication/syncdaemon/libgfchangelog.py
+index fff9d24..8d12956 100644
+--- a/geo-replication/syncdaemon/libgfchangelog.py
++++ b/geo-replication/syncdaemon/libgfchangelog.py
+@@ -10,13 +10,14 @@
+
+ import os
+ from ctypes import CDLL, RTLD_GLOBAL, get_errno, byref, c_ulong
++from ctypes.util import find_library
+ from syncdutils import ChangelogException, ChangelogHistoryNotAvailable
+ from py2py3 import gr_cl_history_changelog, gr_cl_done, gr_create_string_buffer
+ from py2py3 import gr_cl_register, gr_cl_history_done, bytearray_to_str
+
+
+ class Changes(object):
+- libgfc = CDLL("libgfchangelog.so", mode=RTLD_GLOBAL,
++ libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL,
+ use_errno=True)
+
+ @classmethod
+diff --git a/tools/glusterfind/src/libgfchangelog.py b/tools/glusterfind/src/libgfchangelog.py
+index 1ef177a..513bb10 100644
+--- a/tools/glusterfind/src/libgfchangelog.py
++++ b/tools/glusterfind/src/libgfchangelog.py
+@@ -9,8 +9,8 @@
+ # cases as published by the Free Software Foundation.
+
+ import os
+-from ctypes import CDLL, get_errno, create_string_buffer, c_ulong, byref
+-from ctypes import RTLD_GLOBAL
++from ctypes import CDLL, RTLD_GLOBAL, get_errno, create_string_buffer, c_ulong, byref
++from ctypes.util import find_library
+ from gfind_py2py3 import bytearray_to_str, gf_create_string_buffer
+ from gfind_py2py3 import gfind_history_changelog, gfind_changelog_register
+ from gfind_py2py3 import gfind_history_changelog_done
+@@ -19,8 +19,7 @@ from gfind_py2py3 import gfind_history_changelog_done
+ class ChangelogException(OSError):
+ pass
+
+-
+-libgfc = CDLL("libgfchangelog.so", use_errno=True, mode=RTLD_GLOBAL)
++libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, use_errno=True)
+
+
+ def raise_oserr(prefix=None):
+diff --git a/xlators/features/changelog/lib/examples/python/libgfchangelog.py b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
+index 2cdbf11..2da9f2d 100644
+--- a/xlators/features/changelog/lib/examples/python/libgfchangelog.py
++++ b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
+@@ -1,8 +1,10 @@
+ import os
+ from ctypes import *
++from ctypes.util import find_library
+
+ class Changes(object):
+- libgfc = CDLL("libgfchangelog.so", mode=RTLD_GLOBAL, use_errno=True)
++ libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL,
++ use_errno=True)
+
+ @classmethod
+ def geterrno(cls):
+--
+1.8.3.1
+
diff --git a/0107-gfapi-add-function-to-set-client-pid.patch b/0107-gfapi-add-function-to-set-client-pid.patch
new file mode 100644
index 0000000..741f2f3
--- /dev/null
+++ b/0107-gfapi-add-function-to-set-client-pid.patch
@@ -0,0 +1,93 @@
+From 799a74e5e8123cd2e67e9ed5c0f986630a8e0547 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Thu, 14 Mar 2019 18:41:11 +0530
+Subject: [PATCH 107/124] gfapi: add function to set client-pid
+
+This api offers the ability to set the pid of a client to a particular
+value, identical to how gluster fuse clients provide the --client-pid
+option. This is an internal API to be used by gluster processes only. See
+https://lists.gluster.org/pipermail/gluster-devel/2019-March/055925.html
+for more details. Currently glfsheal is the only proposed consumer.
+
+Patch on upstream master: https://review.gluster.org/#/c/glusterfs/+/22368/
+Change-Id: I0620be2127d79d69cdd57cffb29bba44e6e5da1f
+BUG 1676495
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/166459
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ api/src/gfapi.aliases | 1 +
+ api/src/gfapi.map | 4 ++++
+ api/src/glfs-internal.h | 6 ++++++
+ api/src/glfs.c | 15 +++++++++++++++
+ 4 files changed, 26 insertions(+)
+
+diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases
+index 25e2d74..09c0fd8 100644
+--- a/api/src/gfapi.aliases
++++ b/api/src/gfapi.aliases
+@@ -172,6 +172,7 @@ _pub_glfs_upcall_lease_get_lease_type _glfs_upcall_lease_get_lease_type$GFAPI_4.
+
+ _priv_glfs_statx _glfs_statx$GFAPI_6.0
+ _priv_glfs_iatt_from_statx _glfs_iatt_from_statx$GFAPI_6.0
++_priv_glfs_setfspid _glfs_setfspid$GFAPI_6.1
+
+ _pub_glfs_read_async _glfs_read_async$GFAPI_6.0
+ _pub_glfs_write_async _glfs_write_async$GFAPI_6.0
+diff --git a/api/src/gfapi.map b/api/src/gfapi.map
+index bb201c7..b97a614 100644
+--- a/api/src/gfapi.map
++++ b/api/src/gfapi.map
+@@ -267,3 +267,7 @@ GFAPI_6.0 {
+ glfs_fsetattr;
+ } GFAPI_PRIVATE_6.0;
+
++GFAPI_PRIVATE_6.1 {
++ global:
++ glfs_setfspid;
++} GFAPI_6.0;
+diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h
+index 40bbb8a..55401b2 100644
+--- a/api/src/glfs-internal.h
++++ b/api/src/glfs-internal.h
+@@ -702,4 +702,10 @@ void
+ glfs_iatt_from_statx(struct iatt *, const struct glfs_stat *)
+ GFAPI_PRIVATE(glfs_iatt_from_statx, 6.0);
+
++/*
++ * This API is a per thread setting, similar to glfs_setfs{u/g}id, because of
++ * the call to syncopctx_setfspid.
++ */
++int
++glfs_setfspid(struct glfs *, pid_t) GFAPI_PRIVATE(glfs_setfspid, 6.1);
+ #endif /* !_GLFS_INTERNAL_H */
+diff --git a/api/src/glfs.c b/api/src/glfs.c
+index b741f6e..f4a8e08 100644
+--- a/api/src/glfs.c
++++ b/api/src/glfs.c
+@@ -1461,6 +1461,21 @@ invalid_fs:
+
+ GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_ipc, 3.12.0);
+
++int
++priv_glfs_setfspid(struct glfs *fs, pid_t pid)
++{
++ cmd_args_t *cmd_args = NULL;
++ int ret = 0;
++
++ cmd_args = &fs->ctx->cmd_args;
++ cmd_args->client_pid = pid;
++ cmd_args->client_pid_set = 1;
++ ret = syncopctx_setfspid(&pid);
++
++ return ret;
++}
++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_setfspid, 6.1);
++
+ void
+ pub_glfs_free(void *ptr)
+ {
+--
+1.8.3.1
+
diff --git a/0108-afr-add-client-pid-to-all-gf_event-calls.patch b/0108-afr-add-client-pid-to-all-gf_event-calls.patch
new file mode 100644
index 0000000..eda9dd9
--- /dev/null
+++ b/0108-afr-add-client-pid-to-all-gf_event-calls.patch
@@ -0,0 +1,225 @@
+From ba1460a4fee0c41c7d7f7a2043bae37f7e751259 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Fri, 15 Mar 2019 19:31:03 +0530
+Subject: [PATCH 108/124] afr: add client-pid to all gf_event() calls
+
+client-pid for glustershd is GF_CLIENT_PID_SELF_HEALD
+client-pid for glfsheal is GF_CLIENT_PID_GLFS_HEALD
+
+Patch on upstream master: https://review.gluster.org/#/c/glusterfs/+/22369/
+BUG: 1676495
+Change-Id: Ib3a863af160ff48c822a5e6b0c27c575c9887470
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/166460
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ heal/src/glfs-heal.c | 6 ++++++
+ xlators/cluster/afr/src/afr-common.c | 12 ++++++++----
+ xlators/cluster/afr/src/afr-self-heal-common.c | 11 +++++++----
+ xlators/cluster/afr/src/afr-self-heal-data.c | 4 +++-
+ xlators/cluster/afr/src/afr-self-heal-entry.c | 5 +++--
+ xlators/cluster/afr/src/afr-self-heal-metadata.c | 4 +++-
+ xlators/cluster/afr/src/afr-self-heal-name.c | 7 ++++---
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 10 ++++++++++
+ 8 files changed, 44 insertions(+), 15 deletions(-)
+
+diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
+index 6030de3..7e37e47 100644
+--- a/heal/src/glfs-heal.c
++++ b/heal/src/glfs-heal.c
+@@ -1688,6 +1688,12 @@ main(int argc, char **argv)
+ goto out;
+ }
+
++ ret = glfs_setfspid(fs, GF_CLIENT_PID_GLFS_HEAL);
++ if (ret) {
++ printf("Setting client pid failed, %s\n", strerror(errno));
++ goto out;
++ }
++
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ ret = -errno;
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 47a5d3a..3690b84 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -5233,7 +5233,8 @@ __afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator,
+ "Subvolume '%s' came back up; "
+ "going online.",
+ child_xlator->name);
+- gf_event(EVENT_AFR_SUBVOL_UP, "subvol=%s", this->name);
++ gf_event(EVENT_AFR_SUBVOL_UP, "client-pid=%d; subvol=%s",
++ this->ctx->cmd_args.client_pid, this->name);
+ } else {
+ *event = GF_EVENT_SOME_DESCENDENT_UP;
+ }
+@@ -5310,7 +5311,8 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
+ "All subvolumes are down. Going "
+ "offline until at least one of them "
+ "comes back up.");
+- gf_event(EVENT_AFR_SUBVOLS_DOWN, "subvol=%s", this->name);
++ gf_event(EVENT_AFR_SUBVOLS_DOWN, "client-pid=%d; subvol=%s",
++ this->ctx->cmd_args.client_pid, this->name);
+ } else {
+ *event = GF_EVENT_SOME_DESCENDENT_DOWN;
+ }
+@@ -5585,12 +5587,14 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
+ if (!had_quorum && has_quorum) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET,
+ "Client-quorum is met");
+- gf_event(EVENT_AFR_QUORUM_MET, "subvol=%s", this->name);
++ gf_event(EVENT_AFR_QUORUM_MET, "client-pid=%d; subvol=%s",
++ this->ctx->cmd_args.client_pid, this->name);
+ }
+ if (had_quorum && !has_quorum) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
+ "Client-quorum is not met");
+- gf_event(EVENT_AFR_QUORUM_FAIL, "subvol=%s", this->name);
++ gf_event(EVENT_AFR_QUORUM_FAIL, "client-pid=%d; subvol=%s",
++ this->ctx->cmd_args.client_pid, this->name);
+ }
+ }
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index 2268761..595bed4 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -383,11 +383,12 @@ out:
+ uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2),
+ priv->children[src_idx]->name);
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
++ "client-pid=%d;"
+ "subvol=%s;type=gfid;file="
+ "<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;"
+ "child-%d=%s;gfid-%d=%s",
+- this->name, uuid_utoa(pargfid), bname, child_idx,
+- priv->children[child_idx]->name, child_idx,
++ this->ctx->cmd_args.client_pid, this->name, uuid_utoa(pargfid),
++ bname, child_idx, priv->children[child_idx]->name, child_idx,
+ uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx,
+ priv->children[src_idx]->name, src_idx,
+ uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2));
+@@ -2296,11 +2297,13 @@ afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
++ "client-pid=%d;"
+ "subvol=%s;"
+ "type=file;gfid=%s;"
+ "ia_type-%d=%s;ia_type-%d=%s",
+- this->name, uuid_utoa(replies[i].poststat.ia_gfid),
+- first_idx, gf_inode_type_to_str(first.ia_type), i,
++ this->ctx->cmd_args.client_pid, this->name,
++ uuid_utoa(replies[i].poststat.ia_gfid), first_idx,
++ gf_inode_type_to_str(first.ia_type), i,
+ gf_inode_type_to_str(replies[i].poststat.ia_type));
+ ret = -EIO;
+ goto out;
+diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
+index d9a0ee3..18a0334 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-data.c
++++ b/xlators/cluster/afr/src/afr-self-heal-data.c
+@@ -537,9 +537,11 @@ __afr_selfheal_data_finalize_source(
+ replies, AFR_DATA_TRANSACTION);
+ if (source < 0) {
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
++ "client-pid=%d;"
+ "subvol=%s;type=data;"
+ "file=%s",
+- this->name, uuid_utoa(inode->gfid));
++ this->ctx->cmd_args.client_pid, this->name,
++ uuid_utoa(inode->gfid));
+ return -EIO;
+ }
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index b23ed6a..fc09b4c 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -269,11 +269,12 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
+ gf_inode_type_to_str(replies[src_idx].poststat.ia_type),
+ priv->children[src_idx]->name);
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
++ "client-pid=%d;"
+ "subvol=%s;type=file;"
+ "file=<gfid:%s>/%s>;count=2;child-%d=%s;type-"
+ "%d=%s;child-%d=%s;type-%d=%s",
+- this->name, uuid_utoa(pargfid), bname, i,
+- priv->children[i]->name, i,
++ this->ctx->cmd_args.client_pid, this->name,
++ uuid_utoa(pargfid), bname, i, priv->children[i]->name, i,
+ gf_inode_type_to_str(replies[i].poststat.ia_type), src_idx,
+ priv->children[src_idx]->name, src_idx,
+ gf_inode_type_to_str(replies[src_idx].poststat.ia_type));
+diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
+index a661fcb..ba43341 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
++++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
+@@ -242,9 +242,11 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
+
+ if (!priv->metadata_splitbrain_forced_heal) {
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
++ "client-pid=%d;"
+ "subvol=%s;"
+ "type=metadata;file=%s",
+- this->name, uuid_utoa(inode->gfid));
++ this->ctx->cmd_args.client_pid, this->name,
++ uuid_utoa(inode->gfid));
+ return -EIO;
+ }
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
+index c4df5d4..36640b5 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-name.c
++++ b/xlators/cluster/afr/src/afr-self-heal-name.c
+@@ -222,13 +222,14 @@ afr_selfheal_name_type_mismatch_check(xlator_t *this, struct afr_reply *replies,
+ gf_inode_type_to_str(inode_type),
+ priv->children[type_idx]->name);
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
++ "client-pid=%d;"
+ "subvol=%s;type=file;"
+ "file=<gfid:%s>/%s;count=2;"
+ "child-%d=%s;type-%d=%s;child-%d=%s;"
+ "type-%d=%s",
+- this->name, uuid_utoa(pargfid), bname, i,
+- priv->children[i]->name, i,
+- gf_inode_type_to_str(inode_type1), type_idx,
++ this->ctx->cmd_args.client_pid, this->name,
++ uuid_utoa(pargfid), bname, i, priv->children[i]->name,
++ i, gf_inode_type_to_str(inode_type1), type_idx,
+ priv->children[type_idx]->name, type_idx,
+ gf_inode_type_to_str(inode_type));
+ return -EIO;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index 04a4b2e..19eca9f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -324,6 +324,7 @@ glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags)
+ {
+ int ret = -1;
+ char glusterd_uuid_option[PATH_MAX] = {0};
++ char client_pid[32] = {0};
+ dict_t *cmdline = NULL;
+
+ cmdline = dict_new();
+@@ -335,6 +336,15 @@ glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags)
+ if (ret < 0)
+ goto out;
+
++ ret = snprintf(client_pid, sizeof(client_pid), "--client-pid=%d",
++ GF_CLIENT_PID_SELF_HEALD);
++ if (ret < 0)
++ goto out;
++
++ ret = dict_set_str(cmdline, "arg", client_pid);
++ if (ret < 0)
++ goto out;
++
+ /* Pass cmdline arguments as key-value pair. The key is merely
+ * a carrier and is not used. Since dictionary follows LIFO the value
+ * should be put in reverse order*/
+--
+1.8.3.1
+
diff --git a/0109-glusterd-Optimize-glusterd-handshaking-code-path.patch b/0109-glusterd-Optimize-glusterd-handshaking-code-path.patch
new file mode 100644
index 0000000..ed912ea
--- /dev/null
+++ b/0109-glusterd-Optimize-glusterd-handshaking-code-path.patch
@@ -0,0 +1,613 @@
+From aff18f761ef64d55635daa9a1d2140fe35632820 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Fri, 29 Mar 2019 11:48:32 +0530
+Subject: [PATCH 109/124] glusterd: Optimize glusterd handshaking code path
+
+Problem: At the time of handshaking glusterd populate volume
+ data in a dictionary.While no. of volumes are configured
+ more than 1500 glusterd takes more than 10 min to generated
+ the data.Due to taking more time rpc request times out and
+ rpc start bailing of call frames.
+
+Solution: To optimize the code done below changes
+ 1) Spawn multiple threads to populate volumes data in bulk
+ in separate dictionary and introduce an option
+ glusterd.brick-dict-thread-count to configure no. of threads
+ to populate volume data.
+ 2) Populate tier data only while volume type is tier
+ 3) Compare snap data only while snap_count is non zero
+
+> Fixes: bz#1699339
+> Change-Id: I38dc71970c049217f9d1a06fc0aaf4c26eab18f5
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Cherry picked from commit 26a19d9da3ab5604db02d4ca02ce868fb57193a4)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22556/)
+
+Bug: 1652461
+Change-Id: Ia81671a7e1f173bcb32da9dc439be9e61c18bde1
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/167981
+Tested-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ libglusterfs/src/glusterfs/globals.h | 4 +-
+ tests/bugs/glusterd/bug-1699339.t | 69 ++++++
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 1 +
+ .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 3 +
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 269 +++++++++++++++++----
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 55 +++++
+ xlators/mgmt/glusterd/src/glusterd.h | 10 +
+ 7 files changed, 362 insertions(+), 49 deletions(-)
+ create mode 100644 tests/bugs/glusterd/bug-1699339.t
+
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index 6642ba0..e45db14 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -50,7 +50,7 @@
+ 1 /* MIN is the fresh start op-version, mostly \
+ should not change */
+ #define GD_OP_VERSION_MAX \
+- GD_OP_VERSION_6_0 /* MAX VERSION is the maximum \
++ GD_OP_VERSION_7_0 /* MAX VERSION is the maximum \
+ count in VME table, should \
+ keep changing with \
+ introduction of newer \
+@@ -134,6 +134,8 @@
+
+ #define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */
+
++#define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */
++
+ #include "glusterfs/xlator.h"
+ #include "glusterfs/options.h"
+
+diff --git a/tests/bugs/glusterd/bug-1699339.t b/tests/bugs/glusterd/bug-1699339.t
+new file mode 100644
+index 0000000..3e950f4
+--- /dev/null
++++ b/tests/bugs/glusterd/bug-1699339.t
+@@ -0,0 +1,69 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../cluster.rc
++
++cleanup;
++
++NUM_VOLS=15
++
++
++get_brick_base () {
++ printf "%s/vol%02d" $B0 $1
++}
++
++function count_up_bricks {
++ vol=$1;
++ $CLI_1 --xml volume status $vol | grep '<status>1' | wc -l
++}
++
++create_volume () {
++
++ local vol_name=$(printf "%s-vol%02d" $V0 $1)
++
++ TEST $CLI_1 volume create $vol_name replica 3 $H1:$B1/${vol_name} $H2:$B2/${vol_name} $H3:$B3/${vol_name}
++ TEST $CLI_1 volume start $vol_name
++}
++
++TEST launch_cluster 3
++TEST $CLI_1 volume set all cluster.brick-multiplex on
++
++# The option accepts the value in the range from 5 to 200
++TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 210
++TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 4
++
++TEST $CLI_1 volume set all glusterd.vol_count_per_thread 5
++
++TEST $CLI_1 peer probe $H2;
++EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
++
++TEST $CLI_1 peer probe $H3;
++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
++
++# Our infrastructure can't handle an arithmetic expression here. The formula
++# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
++# NUM_VOLS-1 and there are 5 such statements in each iteration.
++TESTS_EXPECTED_IN_LOOP=28
++for i in $(seq 1 $NUM_VOLS); do
++ starttime="$(date +%s)";
++ create_volume $i
++done
++
++TEST kill_glusterd 1
++
++vol1=$(printf "%s-vol%02d" $V0 1)
++TEST $CLI_2 volume set $vol1 performance.readdir-ahead on
++vol2=$(printf "%s-vol%02d" $V0 2)
++TEST $CLI_2 volume set $vol2 performance.readdir-ahead on
++
++# Bring back 1st glusterd
++TEST $glusterd_1
++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
++
++EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol1 performance.readdir-ahead
++
++vol_name=$(printf "%s-vol%02d" $V0 2)
++EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol2 performance.readdir-ahead
++
++cleanup
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 95f9707..94a5e1f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -87,6 +87,7 @@ glusterd_all_vol_opts valid_all_vol_opts[] = {
+ * TBD: Discuss the default value for this. Maybe this should be a
+ * dynamic value depending on the memory specifications per node */
+ {GLUSTERD_BRICKMUX_LIMIT_KEY, GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE},
++ {GLUSTERD_VOL_CNT_PER_THRD, GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE},
+ /*{GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"},*/
+ {GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"},
+ {NULL},
+diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+index b3c4158..d225854 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+@@ -2099,6 +2099,9 @@ glusterd_compare_friend_snapshots(dict_t *peer_data, char *peername,
+ goto out;
+ }
+
++ if (!snap_count)
++ goto out;
++
+ for (i = 1; i <= snap_count; i++) {
+ /* Compare one snapshot from peer_data at a time */
+ ret = glusterd_compare_snap(peer_data, i, peername, peerid);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index fdd7d91..ff6102b 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -155,6 +155,47 @@ out:
+ return ret;
+ }
+
++int
++get_gd_vol_thread_limit(int *thread_limit)
++{
++ char *value = NULL;
++ int ret = -1;
++ int vol_per_thread_limit = 0;
++ xlator_t *this = NULL;
++ glusterd_conf_t *priv = NULL;
++
++ this = THIS;
++ GF_VALIDATE_OR_GOTO("glusterd", this, out);
++
++ priv = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, priv, out);
++
++ if (!is_brick_mx_enabled()) {
++ vol_per_thread_limit = 1;
++ ret = 0;
++ goto out;
++ }
++
++ ret = dict_get_strn(priv->opts, GLUSTERD_VOL_CNT_PER_THRD,
++ SLEN(GLUSTERD_VOL_CNT_PER_THRD), &value);
++ if (ret) {
++ value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE;
++ }
++ ret = gf_string2int(value, &vol_per_thread_limit);
++ if (ret)
++ goto out;
++
++out:
++ *thread_limit = vol_per_thread_limit;
++
++ gf_msg_debug("glusterd", 0,
++ "Per Thread volume limit set to %d glusterd to populate dict "
++ "data parallel",
++ *thread_limit);
++
++ return ret;
++}
++
+ extern struct volopt_map_entry glusterd_volopt_map[];
+ extern glusterd_all_vol_opts valid_all_vol_opts[];
+
+@@ -3070,50 +3111,55 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
+
+ /* tiering related variables */
+
+- snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count);
+- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count);
+- if (ret)
+- goto out;
++ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
++ snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count);
++ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count);
++ if (ret)
++ goto out;
+
+- snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count);
+- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type);
+- if (ret)
+- goto out;
++ snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count);
++ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type);
++ if (ret)
++ goto out;
+
+- snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count);
+- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count);
+- if (ret)
+- goto out;
++ snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count);
++ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count);
++ if (ret)
++ goto out;
+
+- snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count);
+- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_disperse_count);
+- if (ret)
+- goto out;
++ snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count);
++ ret = dict_set_uint32(dict, key,
++ volinfo->tier_info.cold_disperse_count);
++ if (ret)
++ goto out;
+
+- snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count);
+- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_redundancy_count);
+- if (ret)
+- goto out;
++ snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count);
++ ret = dict_set_uint32(dict, key,
++ volinfo->tier_info.cold_redundancy_count);
++ if (ret)
++ goto out;
+
+- snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count);
+- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_dist_leaf_count);
+- if (ret)
+- goto out;
++ snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count);
++ ret = dict_set_uint32(dict, key,
++ volinfo->tier_info.cold_dist_leaf_count);
++ if (ret)
++ goto out;
+
+- snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count);
+- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count);
+- if (ret)
+- goto out;
++ snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count);
++ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count);
++ if (ret)
++ goto out;
+
+- snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count);
+- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type);
+- if (ret)
+- goto out;
++ snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count);
++ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type);
++ if (ret)
++ goto out;
+
+- snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count);
+- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count);
+- if (ret)
+- goto out;
++ snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count);
++ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count);
++ if (ret)
++ goto out;
++ }
+
+ snprintf(key, sizeof(key), "%s%d", prefix, count);
+ ret = gd_add_vol_snap_details_to_dict(dict, key, volinfo);
+@@ -3363,33 +3409,40 @@ out:
+ return ret;
+ }
+
+-int32_t
+-glusterd_add_volumes_to_export_dict(dict_t **peer_data)
++void *
++glusterd_add_bulk_volumes_create_thread(void *data)
+ {
+ int32_t ret = -1;
+- dict_t *dict = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t count = 0;
+- glusterd_dict_ctx_t ctx = {0};
+ xlator_t *this = NULL;
++ glusterd_add_dict_args_t *arg = NULL;
++ dict_t *dict = NULL;
++ int start = 0;
++ int end = 0;
+
+- this = THIS;
+- GF_ASSERT(this);
++ GF_ASSERT(data);
++
++ arg = data;
++ dict = arg->voldict;
++ start = arg->start;
++ end = arg->end;
++ this = arg->this;
++ THIS = arg->this;
+ priv = this->private;
+ GF_ASSERT(priv);
+
+- dict = dict_new();
+- if (!dict)
+- goto out;
+-
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ count++;
++ if ((count < start) || (count > end))
++ continue;
++
+ ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume");
+ if (ret)
+ goto out;
+- if (!glusterd_is_volume_quota_enabled(volinfo))
++ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA))
+ continue;
+ ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count,
+ "volume");
+@@ -3397,7 +3450,122 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data)
+ goto out;
+ }
+
+- ret = dict_set_int32n(dict, "count", SLEN("count"), count);
++out:
++ GF_ATOMIC_DEC(priv->thread_count);
++ free(arg);
++ return NULL;
++}
++
++int32_t
++glusterd_add_volumes_to_export_dict(dict_t **peer_data)
++{
++ int32_t ret = -1;
++ dict_t *dict = NULL;
++ dict_t *dict_arr[128] = {
++ 0,
++ };
++ glusterd_conf_t *priv = NULL;
++ glusterd_volinfo_t *volinfo = NULL;
++ int32_t count = 0;
++ glusterd_dict_ctx_t ctx = {0};
++ xlator_t *this = NULL;
++ int totthread = 0;
++ int volcnt = 0;
++ int start = 1;
++ int endindex = 0;
++ int vol_per_thread_limit = 0;
++ glusterd_add_dict_args_t *arg = NULL;
++ pthread_t th_id = {
++ 0,
++ };
++ int th_ret = 0;
++ int i = 0;
++
++ this = THIS;
++ GF_ASSERT(this);
++ priv = this->private;
++ GF_ASSERT(priv);
++
++ dict = dict_new();
++ if (!dict)
++ goto out;
++
++ /* Count the total number of volumes */
++ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) volcnt++;
++
++ get_gd_vol_thread_limit(&vol_per_thread_limit);
++
++ if ((vol_per_thread_limit == 1) || (vol_per_thread_limit > 100)) {
++ totthread = 0;
++ } else {
++ totthread = volcnt / vol_per_thread_limit;
++ endindex = volcnt % vol_per_thread_limit;
++ if (endindex)
++ totthread++;
++ }
++
++ if (totthread == 0) {
++ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
++ {
++ count++;
++ ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume");
++ if (ret)
++ goto out;
++
++ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA))
++ continue;
++
++ ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count,
++ "volume");
++ if (ret)
++ goto out;
++ }
++ } else {
++ for (i = 0; i < totthread; i++) {
++ arg = calloc(1, sizeof(*arg));
++ dict_arr[i] = dict_new();
++ arg->this = this;
++ arg->voldict = dict_arr[i];
++ arg->start = start;
++ if (!endindex) {
++ arg->end = ((i + 1) * vol_per_thread_limit);
++ } else {
++ arg->end = (start + endindex);
++ }
++ th_ret = gf_thread_create_detached(
++ &th_id, glusterd_add_bulk_volumes_create_thread, arg,
++ "bulkvoldict");
++ if (th_ret) {
++ gf_log(this->name, GF_LOG_ERROR,
++ "glusterd_add_bulk_volume %s"
++ " thread creation failed",
++ "bulkvoldict");
++ free(arg);
++ goto out;
++ }
++
++ start = start + vol_per_thread_limit;
++ GF_ATOMIC_INC(priv->thread_count);
++ gf_log(this->name, GF_LOG_INFO,
++ "Create thread %d to populate dict data for volume"
++ " start index is %d end index is %d",
++ (i + 1), arg->start, arg->end);
++ }
++ while (GF_ATOMIC_GET(priv->thread_count)) {
++ sleep(1);
++ }
++
++ gf_log(this->name, GF_LOG_INFO,
++ "Finished dictionary popluation in all threads");
++ for (i = 0; i < totthread; i++) {
++ dict_copy_with_ref(dict_arr[i], dict);
++ dict_unref(dict_arr[i]);
++ }
++ gf_log(this->name, GF_LOG_INFO,
++ "Finished merger of all dictionraies into single one");
++ }
++
++ ret = dict_set_int32n(dict, "count", SLEN("count"), volcnt);
+ if (ret)
+ goto out;
+
+@@ -3499,6 +3667,9 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+ goto out;
+ }
+
++ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA))
++ goto skip_quota;
++
+ snprintf(key, sizeof(key), "volume%d.quota-version", count);
+ ret = dict_get_uint32(peer_data, key, &quota_version);
+ if (ret) {
+@@ -3550,6 +3721,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+ goto out;
+ }
+ }
++
++skip_quota:
+ *status = GLUSTERD_VOL_COMP_SCS;
+
+ out:
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 42ca9bb..10aa2ae 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -1058,6 +1058,51 @@ out:
+ }
+
+ static int
++validate_volume_per_thread_limit(glusterd_volinfo_t *volinfo, dict_t *dict,
++ char *key, char *value, char **op_errstr)
++{
++ xlator_t *this = NULL;
++ uint val = 0;
++ int ret = -1;
++
++ this = THIS;
++ GF_VALIDATE_OR_GOTO("glusterd", this, out);
++
++ if (!is_brick_mx_enabled()) {
++ gf_asprintf(op_errstr,
++ "Brick-multiplexing is not enabled. "
++ "Please enable brick multiplexing before trying "
++ "to set this option.");
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_WRONG_OPTS_SETTING, "%s",
++ *op_errstr);
++ goto out;
++ }
++
++ ret = gf_string2uint(value, &val);
++ if (ret) {
++ gf_asprintf(op_errstr,
++ "%s is not a valid count. "
++ "%s expects an unsigned integer.",
++ value, key);
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s",
++ *op_errstr);
++ }
++
++ if ((val < 5) || (val > 200)) {
++ gf_asprintf(
++ op_errstr,
++ "Please set this option to a greater than 5 or less than 200 "
++ "to optimize dict generated while no. of volumes are more");
++ ret = -1;
++ goto out;
++ }
++out:
++ gf_msg_debug("glusterd", 0, "Returning %d", ret);
++
++ return ret;
++}
++
++static int
+ validate_boolean(glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char *value, char **op_errstr)
+ {
+@@ -3520,6 +3565,16 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ "brick multiplexing. Brick multiplexing ensures that "
+ "compatible brick instances can share one single "
+ "brick process."},
++ {.key = GLUSTERD_VOL_CNT_PER_THRD,
++ .voltype = "mgmt/glusterd",
++ .value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE,
++ .op_version = GD_OP_VERSION_7_0,
++ .validate_fn = validate_volume_per_thread_limit,
++ .type = GLOBAL_NO_DOC,
++ .description =
++ "This option can be used to limit the number of volumes "
++ "handled by per thread to populate peer data.The option accepts "
++ " the value in the range of 5 to 200"},
+ {.key = GLUSTERD_BRICKMUX_LIMIT_KEY,
+ .voltype = "mgmt/glusterd",
+ .value = GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE,
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index 0ac6e63..bd9f509 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -57,8 +57,10 @@
+ #define GLUSTER_SHARED_STORAGE "gluster_shared_storage"
+ #define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage"
+ #define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex"
++#define GLUSTERD_VOL_CNT_PER_THRD "glusterd.vol_count_per_thread"
+ #define GLUSTERD_BRICKMUX_LIMIT_KEY "cluster.max-bricks-per-process"
+ #define GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE "250"
++#define GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE "100"
+ #define GLUSTERD_LOCALTIME_LOGGING_KEY "cluster.localtime-logging"
+ #define GLUSTERD_DAEMON_LOG_LEVEL_KEY "cluster.daemon-log-level"
+
+@@ -225,8 +227,16 @@ typedef struct {
+ which might lead the modification of volinfo
+ list.
+ */
++ gf_atomic_t thread_count;
+ } glusterd_conf_t;
+
++typedef struct glusterd_add_dict_args {
++ xlator_t *this;
++ dict_t *voldict;
++ int start;
++ int end;
++} glusterd_add_dict_args_t;
++
+ typedef enum gf_brick_status {
+ GF_BRICK_STOPPED,
+ GF_BRICK_STARTED,
+--
+1.8.3.1
+
diff --git a/0110-tier-shd-glusterd-with-shd-mux-the-shd-volfile-path-.patch b/0110-tier-shd-glusterd-with-shd-mux-the-shd-volfile-path-.patch
new file mode 100644
index 0000000..eedac5e
--- /dev/null
+++ b/0110-tier-shd-glusterd-with-shd-mux-the-shd-volfile-path-.patch
@@ -0,0 +1,108 @@
+From 6e7d333625ecd9f7402c2e839338350fa86eaf45 Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <hgowtham@redhat.com>
+Date: Tue, 16 Apr 2019 17:07:37 +0530
+Subject: [PATCH 110/124] tier/shd/glusterd: with shd mux, the shd volfile path
+ have to be updated for tier-heald.t
+
+The volfile path for glustershd has been changed to volume based
+from node based with the shd mux. And those changes for the
+tier-heald.t test case have been made in this patch.
+
+label: DOWNSTREAM ONLY
+
+Change-Id: I0137f7e02c2bf3721dd51c6dfb215cd81b31d6ef
+Signed-off-by: Hari Gowtham <hgowtham@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168038
+Reviewed-by: Rafi Kavungal Chundattu Parambil <rkavunga@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/tier/tier-heald.t | 35 ++++++++++++++++++++---------------
+ 1 file changed, 20 insertions(+), 15 deletions(-)
+
+diff --git a/tests/basic/tier/tier-heald.t b/tests/basic/tier/tier-heald.t
+index a8e634f..0ec9e43 100644
+--- a/tests/basic/tier/tier-heald.t
++++ b/tests/basic/tier/tier-heald.t
+@@ -11,7 +11,7 @@ cleanup;
+ TEST glusterd
+ TEST pidof glusterd
+
+-volfile=$(gluster system:: getwd)"/glustershd/glustershd-server.vol"
++r2_volfile=$(gluster system:: getwd)"/vols/r2/r2-shd.vol"
+
+ # Commands should fail when both tiers are not of distribute type.
+ # Glustershd shouldn't be running as long as there are no replicate/disperse
+@@ -34,51 +34,56 @@ TEST $CLI volume tier r2 attach $H0:$B0/r2_hot
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+ TEST $CLI volume heal r2 enable
+ EXPECT "enable" volume_option r2 "cluster.self-heal-daemon"
+-EXPECT "enable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon
++EXPECT "enable" volgen_volume_option $r2_volfile r2-replicate-0 cluster replicate self-heal-daemon
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+ TEST $CLI volume heal r2 disable
+ EXPECT "disable" volume_option r2 "cluster.self-heal-daemon"
+-EXPECT "disable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon
++EXPECT "disable" volgen_volume_option $r2_volfile r2-replicate-0 cluster replicate self-heal-daemon
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+ # Commands should work on disperse volume.
+ TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2
+ TEST $CLI volume start ec2
+
++ec2_volfile=$(gluster system:: getwd)"/vols/ec2/ec2-shd.vol"
++
+ TEST $CLI volume tier ec2 attach replica 2 $H0:$B0/ec2_hot{1..4}
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+ TEST $CLI volume heal ec2 enable
+ EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon"
+-EXPECT "enable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon
++EXPECT "enable" volgen_volume_option $ec2_volfile ec2-disperse-0 cluster disperse self-heal-daemon
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+ TEST $CLI volume heal ec2 disable
+ EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon"
+-EXPECT "disable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon
++EXPECT "disable" volgen_volume_option $ec2_volfile ec2-disperse-0 cluster disperse self-heal-daemon
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+
+ #Check that shd graph is rewritten correctly on volume stop/start
+-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
+-EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse
++EXPECT "Y" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate
+ TEST $CLI volume stop r2
+-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
+-EXPECT "N" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse
++
++# Has been commented as the validations after stop using volfile dont hold true.
++#EXPECT "N" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate
+ TEST $CLI volume stop ec2
+ # When both the volumes are stopped glustershd volfile is not modified just the
+ # process is stopped
+ TEST "[ -z $(get_shd_process_pid) ]"
+
+ TEST $CLI volume start r2
+-EXPECT "N" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
+-EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
++# Has been commented as the validations after stop using volfile dont hold true.
++#EXPECT "N" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse
++EXPECT "Y" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate
+
+ TEST $CLI volume start ec2
+
+-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
+-EXPECT "Y" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate
++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse
++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-replicate-0 cluster replicate
+
+ TEST $CLI volume tier ec2 detach force
+
+-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
+-EXPECT "N" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate
++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse
++EXPECT "N" volgen_volume_exists $ec2_volfile ec2-replicate-0 cluster replicate
+
+ TEST $CLI volume set r2 self-heal-daemon on
+ TEST $CLI volume set r2 cluster.self-heal-daemon off
+--
+1.8.3.1
+
diff --git a/0111-glusterd-fix-loading-ctime-in-client-graph-logic.patch b/0111-glusterd-fix-loading-ctime-in-client-graph-logic.patch
new file mode 100644
index 0000000..90a25b3
--- /dev/null
+++ b/0111-glusterd-fix-loading-ctime-in-client-graph-logic.patch
@@ -0,0 +1,49 @@
+From 310e09d46cdb293e4af2df0085b8ac45d5c17933 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Tue, 16 Apr 2019 17:20:34 +0530
+Subject: [PATCH 111/124] glusterd: fix loading ctime in client graph logic
+
+Commit efbf8ab wasn't handling all the scenarios of toggling ctime
+option correctly and more over a ! had completely tossed up the logic.
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22578/
+
+>Fixes: bz#1697907
+>Change-Id: If12e2f69045e59878992ee2cd0518cc0eabcce0d
+>Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+
+BUG: 1697820
+Change-Id: If12e2f69045e59878992ee2cd0518cc0eabcce0d
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168048
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 324ec2f..da877aa 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -4358,9 +4358,15 @@ client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ goto out;
+ }
+ }
+-
+- if (conf->op_version >= GD_OP_VERSION_5_0 &&
+- !dict_get_str_boolean(set_dict, "features.ctime", _gf_false)) {
++ /* a. ret will be -1 if features.ctime is not set in the volinfo->dict which
++ * means ctime should be loaded into the graph.
++ * b. ret will be 1 if features.ctime is explicitly turned on through
++ * volume set and in that case ctime should be loaded into the graph.
++ * c. ret will be 0 if features.ctime is explicitly turned off and in that
++ * case ctime shouldn't be loaded into the graph.
++ */
++ ret = dict_get_str_boolean(set_dict, "features.ctime", -1);
++ if (conf->op_version >= GD_OP_VERSION_5_0 && ret) {
+ xl = volgen_graph_add(graph, "features/utime", volname);
+ if (!xl) {
+ ret = -1;
+--
+1.8.3.1
+
diff --git a/0112-geo-rep-fix-incorrectly-formatted-authorized_keys.patch b/0112-geo-rep-fix-incorrectly-formatted-authorized_keys.patch
new file mode 100644
index 0000000..ddcb82c
--- /dev/null
+++ b/0112-geo-rep-fix-incorrectly-formatted-authorized_keys.patch
@@ -0,0 +1,45 @@
+From 1df830953b9a09404f9ca6a0539172e9f23ecbf4 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Wed, 17 Apr 2019 15:13:12 +0530
+Subject: [PATCH 112/124] geo-rep : fix incorrectly formatted authorized_keys
+
+Problem : While Geo-rep setup when creating an ssh authorized_keys
+ the geo-rep setup inserts an extra space before the "ssh-rsa" label.
+ This gets flagged by an enterprise customer's security scan as a
+ security violation.
+
+Solution: Remove extra space in GSYNCD_CMD & TAR_CMD.
+
+>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22246/
+>Change-Id: I956f938faef0e0883703bbc337b1dc2770e4a921
+>fixes: bz#1679401
+>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+BUG: 1671862
+Change-Id: I194a2bddcf2ee9b8286b204f8c4da5c480a528b3
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168144
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/src/peer_georep-sshkey.py.in | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/geo-replication/src/peer_georep-sshkey.py.in b/geo-replication/src/peer_georep-sshkey.py.in
+index 2196fd7..58696e9 100644
+--- a/geo-replication/src/peer_georep-sshkey.py.in
++++ b/geo-replication/src/peer_georep-sshkey.py.in
+@@ -30,8 +30,8 @@ from prettytable import PrettyTable
+
+ SECRET_PEM = "@GLUSTERD_WORKDIR@/geo-replication/secret.pem"
+ TAR_SSH_PEM = "@GLUSTERD_WORKDIR@/geo-replication/tar_ssh.pem"
+-GSYNCD_CMD = 'command="@GLUSTERFS_LIBEXECDIR@/gsyncd" '
+-TAR_CMD = 'command="tar ${SSH_ORIGINAL_COMMAND#* }" '
++GSYNCD_CMD = 'command="@GLUSTERFS_LIBEXECDIR@/gsyncd" '
++TAR_CMD = 'command="tar ${SSH_ORIGINAL_COMMAND#* }" '
+ COMMON_SECRET_FILE = "@GLUSTERD_WORKDIR@/geo-replication/common_secret.pem.pub"
+
+
+--
+1.8.3.1
+
diff --git a/0113-spec-Glusterd-did-not-start-by-default-after-node-re.patch b/0113-spec-Glusterd-did-not-start-by-default-after-node-re.patch
new file mode 100644
index 0000000..9f53a1e
--- /dev/null
+++ b/0113-spec-Glusterd-did-not-start-by-default-after-node-re.patch
@@ -0,0 +1,71 @@
+From 850d5418fb48417d94ab17e565b2184ba951ccbe Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Wed, 17 Apr 2019 18:04:44 +0530
+Subject: [PATCH 113/124] spec: Glusterd did not start by default after node
+ reboot
+
+Problem: After install gluster rpms glusterd service is not enabled
+ so systemctl status is showing "disabled"
+
+Solution: Update glusterfs.spec.in to enable glusterd after install
+ gluster rpms
+
+label: DOWNSTREAM ONLY
+BUG: 1699835
+
+Change-Id: Ied9be5dfb1bf3bda24868722b1fbd77cb1c1d18c
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168168
+Reviewed-by: Kaleb Keithley <kkeithle@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ glusterfs.spec.in | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index cb17eaa..ba095b7 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -168,6 +168,8 @@
+ %endif
+
+ %if ( 0%{?_with_systemd:1} )
++%global service_enable() /bin/systemctl --quiet enable %1.service || : \
++%{nil}
+ %global service_start() /bin/systemctl --quiet start %1.service || : \
+ %{nil}
+ %global service_stop() /bin/systemctl --quiet stop %1.service || :\
+@@ -181,7 +183,7 @@
+ %global glustereventsd_svcfile %{_unitdir}/glustereventsd.service
+ %global glusterfssharedstorage_svcfile %{_unitdir}/glusterfssharedstorage.service
+ %else
+-%global systemd_post() /sbin/chkconfig --add %1 >/dev/null 2>&1 || : \
++%global service_enable() /sbin/chkconfig --add %1 >/dev/null 2>&1 || : \
+ %{nil}
+ %global systemd_preun() /sbin/chkconfig --del %1 >/dev/null 2>&1 || : \
+ %{nil}
+@@ -926,7 +928,7 @@ exit 0
+
+ %if ( 0%{!?_without_events:1} )
+ %post events
+-%systemd_post glustereventsd
++%service_enable glustereventsd
+ %endif
+
+ %if ( 0%{!?_without_server:1} )
+@@ -951,9 +953,9 @@ exit 0
+ %if ( 0%{!?_without_server:1} )
+ %post server
+ # Legacy server
+-%systemd_post glusterd
++%service_enable glusterd
+ %if ( 0%{_for_fedora_koji_builds} )
+-%systemd_post glusterfsd
++%service_enable glusterfsd
+ %endif
+ # ".cmd_log_history" is renamed to "cmd_history.log" in GlusterFS-3.7 .
+ # While upgrading glusterfs-server package form GlusterFS version <= 3.6 to
+--
+1.8.3.1
+
diff --git a/0114-core-fix-hang-issue-in-__gf_free.patch b/0114-core-fix-hang-issue-in-__gf_free.patch
new file mode 100644
index 0000000..7e26642
--- /dev/null
+++ b/0114-core-fix-hang-issue-in-__gf_free.patch
@@ -0,0 +1,46 @@
+From da53d9027d9426c0023176a42e0550d6ccccc941 Mon Sep 17 00:00:00 2001
+From: Susant Palai <spalai@redhat.com>
+Date: Mon, 22 Apr 2019 21:18:30 +0530
+Subject: [PATCH 114/124] core: fix hang issue in __gf_free
+
+Currently GF_ASSERT is done under mem_accounting lock at some places.
+On a GF_ASSERT failure, gf_msg_callingfn is called which calls gf_malloc
+internally and it takes the same mem_accounting lock leading to deadlock.
+
+This is a temporary fix to avoid any hang issue in master.
+https://review.gluster.org/#/c/glusterfs/+/22589/ is being worked on
+in the mean while so that GF_ASSERT can be used under mem_accounting
+lock.
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22600/
+
+>Change-Id: I6d67f23979e7edd2695bdc6aab2997dae4a4060a
+>updates: bz#1700865
+>Signed-off-by: Susant Palai <spalai@redhat.com>
+
+Change-Id: I6d67f23979e7edd2695bdc6aab2997dae4a4060a
+BUG: 1698728
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168474
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/mem-pool.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
+index 81badc0..34cb87a 100644
+--- a/libglusterfs/src/mem-pool.c
++++ b/libglusterfs/src/mem-pool.c
+@@ -348,7 +348,6 @@ __gf_free(void *free_ptr)
+
+ LOCK(&mem_acct->rec[header->type].lock);
+ {
+- GF_ASSERT(mem_acct->rec[header->type].size >= header->size);
+ mem_acct->rec[header->type].size -= header->size;
+ mem_acct->rec[header->type].num_allocs--;
+ /* If all the instances are freed up then ensure typestr is set
+--
+1.8.3.1
+
diff --git a/0115-core-only-log-seek-errors-if-SEEK_HOLE-SEEK_DATA-is-.patch b/0115-core-only-log-seek-errors-if-SEEK_HOLE-SEEK_DATA-is-.patch
new file mode 100644
index 0000000..b53ff91
--- /dev/null
+++ b/0115-core-only-log-seek-errors-if-SEEK_HOLE-SEEK_DATA-is-.patch
@@ -0,0 +1,56 @@
+From 4901fcc0cc507accf30e1a4bdd020a5676488751 Mon Sep 17 00:00:00 2001
+From: Niels de Vos <ndevos@redhat.com>
+Date: Mon, 8 Apr 2019 12:14:34 +0200
+Subject: [PATCH 115/124] core: only log seek errors if SEEK_HOLE/SEEK_DATA is
+ available
+
+On RHEL-6 there is no support for SEEK_HOLE/SEEK_DATA and this causes
+the POSIX xlator to return errno=EINVAL. Because of this, the rpc-server
+xlator will log all 'failed' seek attempts. When applications call
+seek() often, the brick logs can grow very quickly and fill up the
+disks.
+
+Messages that get logged are like
+[server-rpc-fops.c:2091:server_seek_cbk] 0-vol01-server: 4947: SEEK-2 (53920aee-062c-4598-aa50-2b4d7821b204), client: worker.example.com-7808-2019/02/08-18:04:57:903430-vol01-client-0-0-0, error-xlator: vol01-posix [Invalid argument]
+
+The problem can be reproduced by running a Gluster Server on RHEL-6,
+with a client running on RHEL-7. The client should execute an
+application that calls lseek() with SEEK_HOLE/SEEK_DATA.
+
+>Change-Id: I7b6c16f8e0ba1a183e845cfdb8d5a3f8caeab138
+>Fixes: bz#1697316
+>Signed-off-by: Niels de Vos <ndevos@redhat.com>
+
+upstream patch: https://review.gluster.org/#/c/glusterfs/+/22526/
+
+BUG: 1696903
+Change-Id: I7b6c16f8e0ba1a183e845cfdb8d5a3f8caeab138
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168527
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ libglusterfs/src/common-utils.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
+index a0c83c0..70d5d21 100644
+--- a/libglusterfs/src/common-utils.c
++++ b/libglusterfs/src/common-utils.c
+@@ -4500,9 +4500,13 @@ fop_log_level(glusterfs_fop_t fop, int op_errno)
+ return GF_LOG_DEBUG;
+
+ if (fop == GF_FOP_SEEK) {
++#ifdef HAVE_SEEK_HOLE
+ if (op_errno == ENXIO) {
+ return GF_LOG_DEBUG;
+ }
++#else
++ return GF_LOG_DEBUG;
++#endif
+ }
+
+ return GF_LOG_ERROR;
+--
+1.8.3.1
+
diff --git a/0116-cluster-ec-fix-fd-reopen.patch b/0116-cluster-ec-fix-fd-reopen.patch
new file mode 100644
index 0000000..5426c70
--- /dev/null
+++ b/0116-cluster-ec-fix-fd-reopen.patch
@@ -0,0 +1,1931 @@
+From e33b3e0a443d4a54634a664f2d499a3fce9e7fb4 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Tue, 16 Apr 2019 14:19:47 +0530
+Subject: [PATCH 116/124] cluster/ec: fix fd reopen
+
+Currently EC tries to reopen fd's that have been opened while a brick
+was down. This is done as part of regular write operations, just after
+having acquired the locks, and it's sent as a sub-fop of the main write
+fop.
+
+There were two problems:
+
+1. The reopen was attempted on all UP bricks, even if a previous lock
+didn't succeed. This is incorrect because most probably the open will
+fail.
+
+2. If reopen is sent and fails, the error is propagated to the main
+operation, causing it to fail when it shouldn't.
+
+To fix this, we only attempt reopens on bricks where the current fop
+owns a lock, and we prevent any error to be propagated to the main
+fop.
+
+To implement this behaviour an argument used to indicate the minimum
+number of required answers has overloaded to also include some flags. To
+make the change consistent, it has been necessary to rename the
+argument, which means that a lot of files have been changed. However
+there are no functional changes.
+
+This change has also uncovered a problem in discard code, which didn't
+correctely process requests of small sizes because no real discard fop
+was being processed, only a write of 0's on some region. In this case
+some fields of the fop remained uninitialized or with incorrect values.
+To fix this, a new function has been created to simulate success on a
+fop and it's used in the discard case.
+
+Thanks to Pranith for providing a test script that has also detected an
+issue in this patch. This patch includes a small modification of this
+script to force data to be written into bricks before stopping them.
+
+Upstream patch: https://review.gluster.org/22574
+> Change-Id: I7ccd1fc5fc134eeb6d443c755962a20819320d48
+> BUG: bz#1699866
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+
+Upstream patch: https://review.gluster.org/22558
+> Change-Id: If272343873369186c2fb8f43c1d9c52c3ea304ec
+> BUG: bz#1699866
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: If272343873369186c2fb8f43c1d9c52c3ea304ec
+Fixes: bz#1663375
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168522
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
+---
+ tests/basic/ec/self-heal-read-write-fail.t | 69 +++++++++++++
+ tests/bugs/ec/bug-1699866-check-reopen-fd.t | 34 +++++++
+ xlators/cluster/ec/src/ec-common.c | 73 ++++++++++----
+ xlators/cluster/ec/src/ec-common.h | 14 ++-
+ xlators/cluster/ec/src/ec-data.c | 7 +-
+ xlators/cluster/ec/src/ec-data.h | 2 +-
+ xlators/cluster/ec/src/ec-dir-read.c | 12 +--
+ xlators/cluster/ec/src/ec-dir-write.c | 52 +++++-----
+ xlators/cluster/ec/src/ec-fops.h | 144 ++++++++++++++--------------
+ xlators/cluster/ec/src/ec-generic.c | 54 ++++++-----
+ xlators/cluster/ec/src/ec-heal.c | 20 ++--
+ xlators/cluster/ec/src/ec-inode-read.c | 58 +++++------
+ xlators/cluster/ec/src/ec-inode-write.c | 74 +++++++-------
+ xlators/cluster/ec/src/ec-locks.c | 36 +++----
+ xlators/cluster/ec/src/ec-types.h | 11 ++-
+ xlators/cluster/ec/src/ec.c | 45 +++++----
+ 16 files changed, 431 insertions(+), 274 deletions(-)
+ create mode 100644 tests/basic/ec/self-heal-read-write-fail.t
+ create mode 100644 tests/bugs/ec/bug-1699866-check-reopen-fd.t
+
+diff --git a/tests/basic/ec/self-heal-read-write-fail.t b/tests/basic/ec/self-heal-read-write-fail.t
+new file mode 100644
+index 0000000..0ba591b
+--- /dev/null
++++ b/tests/basic/ec/self-heal-read-write-fail.t
+@@ -0,0 +1,69 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++#This test verifies that self-heal fails when read/write fails as part of heal
++cleanup
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume info
++
++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume heal $V0 disable
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
++TEST touch $M0/a
++TEST kill_brick $V0 $H0 $B0/${V0}0
++echo abc >> $M0/a
++
++# Umount the volume to force all pending writes to reach the bricks
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++
++#Load error-gen and fail read fop and test that heal fails
++TEST $CLI volume stop $V0 #Stop volume so that error-gen can be loaded
++TEST $CLI volume set $V0 debug.error-gen posix
++TEST $CLI volume set $V0 debug.error-fops read
++TEST $CLI volume set $V0 debug.error-number EBADF
++TEST $CLI volume set $V0 debug.error-failure 100
++
++TEST $CLI volume start $V0
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
++TEST ! getfattr -n trusted.ec.heal $M0/a
++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
++
++#fail write fop and test that heal fails
++TEST $CLI volume stop $V0
++TEST $CLI volume set $V0 debug.error-fops write
++
++TEST $CLI volume start $V0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
++TEST ! getfattr -n trusted.ec.heal $M0/a
++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
++
++TEST $CLI volume stop $V0 #Stop volume so that error-gen can be disabled
++TEST $CLI volume reset $V0 debug.error-gen
++TEST $CLI volume reset $V0 debug.error-fops
++TEST $CLI volume reset $V0 debug.error-number
++TEST $CLI volume reset $V0 debug.error-failure
++
++TEST $CLI volume start $V0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
++TEST getfattr -n trusted.ec.heal $M0/a
++EXPECT "^0$" get_pending_heal_count $V0
++
++#Test that heal worked as expected by forcing read from brick0
++#remount to make sure data is not served from any cache
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++TEST kill_brick $V0 $H0 $B0/${V0}2
++EXPECT "abc" cat $M0/a
++
++cleanup
+diff --git a/tests/bugs/ec/bug-1699866-check-reopen-fd.t b/tests/bugs/ec/bug-1699866-check-reopen-fd.t
+new file mode 100644
+index 0000000..4386d01
+--- /dev/null
++++ b/tests/bugs/ec/bug-1699866-check-reopen-fd.t
+@@ -0,0 +1,34 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../fileio.rc
++
++cleanup
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
++TEST $CLI volume heal $V0 disable
++TEST $CLI volume set $V0 disperse.background-heals 0
++TEST $CLI volume set $V0 write-behind off
++TEST $CLI volume set $V0 open-behind off
++TEST $CLI volume start $V0
++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
++
++TEST mkdir -p $M0/dir
++
++fd="$(fd_available)"
++
++TEST kill_brick $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "5" ec_child_up_count $V0 0
++
++TEST fd_open ${fd} rw $M0/dir/test
++TEST fd_write ${fd} "test1"
++TEST $CLI volume replace-brick ${V0} $H0:$B0/${V0}0 $H0:$B0/${V0}0_1 commit force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
++TEST fd_write ${fd} "test2"
++TEST fd_close ${fd}
++
++cleanup
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index 5183680..1454ae2 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -44,16 +44,16 @@ ec_update_fd_status(fd_t *fd, xlator_t *xl, int idx, int32_t ret_status)
+ UNLOCK(&fd->lock);
+ }
+
+-static int
+-ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open)
++static uintptr_t
++ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t mask)
+ {
+ int i = 0;
+ int count = 0;
+ ec_t *ec = NULL;
+ ec_fd_t *fd_ctx = NULL;
++ uintptr_t need_open = 0;
+
+ ec = this->private;
+- *need_open = 0;
+
+ fd_ctx = ec_fd_get(fd, this);
+ if (!fd_ctx)
+@@ -63,9 +63,9 @@ ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open)
+ {
+ for (i = 0; i < ec->nodes; i++) {
+ if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) &&
+- (ec->xl_up & (1 << i))) {
++ ((ec->xl_up & (1 << i)) != 0) && ((mask & (1 << i)) != 0)) {
+ fd_ctx->fd_status[i] = EC_FD_OPENING;
+- *need_open |= (1 << i);
++ need_open |= (1 << i);
+ count++;
+ }
+ }
+@@ -76,10 +76,11 @@ ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open)
+ * then ignore fixing the fd as it has been
+ * requested from heal operation.
+ */
+- if (count >= ec->fragments)
+- count = 0;
++ if (count >= ec->fragments) {
++ need_open = 0;
++ }
+
+- return count;
++ return need_open;
+ }
+
+ static gf_boolean_t
+@@ -96,9 +97,8 @@ ec_is_fd_fixable(fd_t *fd)
+ }
+
+ static void
+-ec_fix_open(ec_fop_data_t *fop)
++ec_fix_open(ec_fop_data_t *fop, uintptr_t mask)
+ {
+- int call_count = 0;
+ uintptr_t need_open = 0;
+ int ret = 0;
+ loc_t loc = {
+@@ -109,9 +109,10 @@ ec_fix_open(ec_fop_data_t *fop)
+ goto out;
+
+ /* Evaluate how many remote fd's to be opened */
+- call_count = ec_fd_ctx_need_open(fop->fd, fop->xl, &need_open);
+- if (!call_count)
++ need_open = ec_fd_ctx_need_open(fop->fd, fop->xl, mask);
++ if (need_open == 0) {
+ goto out;
++ }
+
+ loc.inode = inode_ref(fop->fd->inode);
+ gf_uuid_copy(loc.gfid, fop->fd->inode->gfid);
+@@ -121,11 +122,13 @@ ec_fix_open(ec_fop_data_t *fop)
+ }
+
+ if (IA_IFDIR == fop->fd->inode->ia_type) {
+- ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL,
++ ec_opendir(fop->frame, fop->xl, need_open,
++ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL,
+ &fop->loc[0], fop->fd, NULL);
+ } else {
+- ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL,
+- &loc, fop->fd->flags, fop->fd, NULL);
++ ec_open(fop->frame, fop->xl, need_open,
++ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc,
++ fop->fd->flags, fop->fd, NULL);
+ }
+
+ out:
+@@ -495,12 +498,16 @@ ec_resume(ec_fop_data_t *fop, int32_t error)
+ }
+
+ void
+-ec_resume_parent(ec_fop_data_t *fop, int32_t error)
++ec_resume_parent(ec_fop_data_t *fop)
+ {
+ ec_fop_data_t *parent;
++ int32_t error = 0;
+
+ parent = fop->parent;
+ if (parent != NULL) {
++ if ((fop->fop_flags & EC_FOP_NO_PROPAGATE_ERROR) == 0) {
++ error = fop->error;
++ }
+ ec_trace("RESUME_PARENT", fop, "error=%u", error);
+ fop->parent = NULL;
+ ec_resume(parent, error);
+@@ -593,6 +600,8 @@ ec_internal_op(ec_fop_data_t *fop)
+ return _gf_true;
+ if (fop->id == GF_FOP_FXATTROP)
+ return _gf_true;
++ if (fop->id == GF_FOP_OPEN)
++ return _gf_true;
+ return _gf_false;
+ }
+
+@@ -631,7 +640,7 @@ ec_msg_str(ec_fop_data_t *fop)
+ return fop->errstr;
+ }
+
+-int32_t
++static int32_t
+ ec_child_select(ec_fop_data_t *fop)
+ {
+ ec_t *ec = fop->xl->private;
+@@ -693,8 +702,6 @@ ec_child_select(ec_fop_data_t *fop)
+ return 0;
+ }
+
+- ec_sleep(fop);
+-
+ return 1;
+ }
+
+@@ -773,6 +780,8 @@ ec_dispatch_one(ec_fop_data_t *fop)
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop)) {
++ ec_sleep(fop);
++
+ fop->expected = 1;
+ fop->first = ec_select_first_by_read_policy(fop->xl->private, fop);
+
+@@ -807,6 +816,8 @@ ec_dispatch_inc(ec_fop_data_t *fop)
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop)) {
++ ec_sleep(fop);
++
+ fop->expected = gf_bits_count(fop->remaining);
+ fop->first = 0;
+
+@@ -820,6 +831,8 @@ ec_dispatch_all(ec_fop_data_t *fop)
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop)) {
++ ec_sleep(fop);
++
+ fop->expected = gf_bits_count(fop->remaining);
+ fop->first = 0;
+
+@@ -838,6 +851,8 @@ ec_dispatch_min(ec_fop_data_t *fop)
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop)) {
++ ec_sleep(fop);
++
+ fop->expected = count = ec->fragments;
+ fop->first = ec_select_first_by_read_policy(fop->xl->private, fop);
+ idx = fop->first - 1;
+@@ -852,6 +867,23 @@ ec_dispatch_min(ec_fop_data_t *fop)
+ }
+ }
+
++void
++ec_succeed_all(ec_fop_data_t *fop)
++{
++ ec_dispatch_start(fop);
++
++ if (ec_child_select(fop)) {
++ fop->expected = gf_bits_count(fop->remaining);
++ fop->first = 0;
++
++ /* Simulate a successful execution on all bricks */
++ ec_trace("SUCCEED", fop, "");
++
++ fop->good = fop->remaining;
++ fop->remaining = 0;
++ }
++}
++
+ ec_lock_t *
+ ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc)
+ {
+@@ -1825,7 +1857,8 @@ ec_lock_acquired(ec_lock_link_t *link)
+
+ if (fop->use_fd &&
+ (link->update[EC_DATA_TXN] || link->update[EC_METADATA_TXN])) {
+- ec_fix_open(fop);
++ /* Try to reopen closed fd's only if lock has succeeded. */
++ ec_fix_open(fop, lock->mask);
+ }
+
+ ec_lock_resume_shared(&list);
+diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
+index 54aaa77..e948342 100644
+--- a/xlators/cluster/ec/src/ec-common.h
++++ b/xlators/cluster/ec/src/ec-common.h
+@@ -54,9 +54,12 @@ enum _ec_xattrop_flags {
+
+ #define EC_SELFHEAL_BIT 62
+
+-#define EC_MINIMUM_ONE -1
+-#define EC_MINIMUM_MIN -2
+-#define EC_MINIMUM_ALL -3
++#define EC_MINIMUM_ONE (1 << 6)
++#define EC_MINIMUM_MIN (2 << 6)
++#define EC_MINIMUM_ALL (3 << 6)
++#define EC_FOP_NO_PROPAGATE_ERROR (1 << 8)
++#define EC_FOP_MINIMUM(_flags) ((_flags)&255)
++#define EC_FOP_FLAGS(_flags) ((_flags) & ~255)
+
+ #define EC_UPDATE_DATA 1
+ #define EC_UPDATE_META 2
+@@ -163,11 +166,14 @@ void
+ ec_dispatch_one(ec_fop_data_t *fop);
+
+ void
++ec_succeed_all(ec_fop_data_t *fop);
++
++void
+ ec_sleep(ec_fop_data_t *fop);
+ void
+ ec_resume(ec_fop_data_t *fop, int32_t error);
+ void
+-ec_resume_parent(ec_fop_data_t *fop, int32_t error);
++ec_resume_parent(ec_fop_data_t *fop);
+
+ void
+ ec_manager(ec_fop_data_t *fop, int32_t error);
+diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
+index fae8843..6ef9340 100644
+--- a/xlators/cluster/ec/src/ec-data.c
++++ b/xlators/cluster/ec/src/ec-data.c
+@@ -98,7 +98,7 @@ ec_cbk_data_destroy(ec_cbk_data_t *cbk)
+
+ ec_fop_data_t *
+ ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
+- uint32_t flags, uintptr_t target, int32_t minimum,
++ uint32_t flags, uintptr_t target, uint32_t fop_flags,
+ ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks,
+ void *data)
+ {
+@@ -151,7 +151,8 @@ ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
+ fop->refs = 1;
+
+ fop->flags = flags;
+- fop->minimum = minimum;
++ fop->minimum = EC_FOP_MINIMUM(fop_flags);
++ fop->fop_flags = EC_FOP_FLAGS(fop_flags);
+ fop->mask = target;
+
+ fop->wind = wind;
+@@ -271,7 +272,7 @@ ec_fop_data_release(ec_fop_data_t *fop)
+ loc_wipe(&fop->loc[1]);
+ GF_FREE(fop->errstr);
+
+- ec_resume_parent(fop, fop->error);
++ ec_resume_parent(fop);
+
+ ec_fop_cleanup(fop);
+
+diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
+index 112536d..c8a74ff 100644
+--- a/xlators/cluster/ec/src/ec-data.h
++++ b/xlators/cluster/ec/src/ec-data.h
+@@ -18,7 +18,7 @@ ec_cbk_data_allocate(call_frame_t *frame, xlator_t *this, ec_fop_data_t *fop,
+ int32_t id, int32_t idx, int32_t op_ret, int32_t op_errno);
+ ec_fop_data_t *
+ ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
+- uint32_t flags, uintptr_t target, int32_t minimum,
++ uint32_t flags, uintptr_t target, uint32_t fop_flags,
+ ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks,
+ void *data);
+ void
+diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
+index c9db701..8310d4a 100644
+--- a/xlators/cluster/ec/src/ec-dir-read.c
++++ b/xlators/cluster/ec/src/ec-dir-read.c
+@@ -219,7 +219,7 @@ ec_manager_opendir(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_opendir_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.opendir = func};
+@@ -233,7 +233,7 @@ ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_OPENDIR, EC_FLAG_LOCK_SHARED,
+- target, minimum, ec_wind_opendir,
++ target, fop_flags, ec_wind_opendir,
+ ec_manager_opendir, callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -515,7 +515,7 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.readdir = func};
+@@ -529,7 +529,7 @@ ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIR, EC_FLAG_LOCK_SHARED,
+- target, minimum, ec_wind_readdir,
++ target, fop_flags, ec_wind_readdir,
+ ec_manager_readdir, callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -585,7 +585,7 @@ ec_wind_readdirp(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+
+ void
+ ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_readdirp_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.readdirp = func};
+@@ -599,7 +599,7 @@ ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(
+- frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, minimum,
++ frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, fop_flags,
+ ec_wind_readdirp, ec_manager_readdir, callback, data);
+ if (fop == NULL) {
+ goto out;
+diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
+index e24667f..0b8ee21 100644
+--- a/xlators/cluster/ec/src/ec-dir-write.c
++++ b/xlators/cluster/ec/src/ec-dir-write.c
+@@ -262,7 +262,7 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_create_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.create = func};
+@@ -275,7 +275,7 @@ ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, minimum,
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, fop_flags,
+ ec_wind_create, ec_manager_create, callback,
+ data);
+ if (fop == NULL) {
+@@ -432,9 +432,9 @@ ec_manager_link(ec_fop_data_t *fop, int32_t state)
+ }
+
+ void
+-ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_link_cbk_t func, void *data, loc_t *oldloc, loc_t *newloc,
+- dict_t *xdata)
++ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc,
++ loc_t *newloc, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.link = func};
+ ec_fop_data_t *fop = NULL;
+@@ -446,7 +446,7 @@ ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, minimum,
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, fop_flags,
+ ec_wind_link, ec_manager_link, callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -613,9 +613,9 @@ ec_manager_mkdir(ec_fop_data_t *fop, int32_t state)
+ }
+
+ void
+-ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_mkdir_cbk_t func, void *data, loc_t *loc, mode_t mode,
+- mode_t umask, dict_t *xdata)
++ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc,
++ mode_t mode, mode_t umask, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.mkdir = func};
+ ec_fop_data_t *fop = NULL;
+@@ -627,7 +627,7 @@ ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, minimum,
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, fop_flags,
+ ec_wind_mkdir, ec_manager_mkdir, callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -815,9 +815,9 @@ ec_manager_mknod(ec_fop_data_t *fop, int32_t state)
+ }
+
+ void
+-ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_mknod_cbk_t func, void *data, loc_t *loc, mode_t mode, dev_t rdev,
+- mode_t umask, dict_t *xdata)
++ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc,
++ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.mknod = func};
+ ec_fop_data_t *fop = NULL;
+@@ -829,7 +829,7 @@ ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, minimum,
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, fop_flags,
+ ec_wind_mknod, ec_manager_mknod, callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -975,7 +975,7 @@ ec_manager_rename(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_rename_cbk_t func, void *data, loc_t *oldloc,
++ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.rename = func};
+@@ -988,7 +988,7 @@ ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, minimum,
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, fop_flags,
+ ec_wind_rename, ec_manager_rename, callback,
+ data);
+ if (fop == NULL) {
+@@ -1125,9 +1125,9 @@ ec_manager_rmdir(ec_fop_data_t *fop, int32_t state)
+ }
+
+ void
+-ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_rmdir_cbk_t func, void *data, loc_t *loc, int xflags,
+- dict_t *xdata)
++ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc,
++ int xflags, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.rmdir = func};
+ ec_fop_data_t *fop = NULL;
+@@ -1139,7 +1139,7 @@ ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, minimum,
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, fop_flags,
+ ec_wind_rmdir, ec_manager_rmdir, callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -1281,7 +1281,7 @@ ec_manager_symlink(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_symlink_cbk_t func, void *data,
++ uint32_t fop_flags, fop_symlink_cbk_t func, void *data,
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.symlink = func};
+@@ -1294,9 +1294,9 @@ ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target, minimum,
+- ec_wind_symlink, ec_manager_symlink, callback,
+- data);
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target,
++ fop_flags, ec_wind_symlink, ec_manager_symlink,
++ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -1435,7 +1435,7 @@ ec_manager_unlink(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.unlink = func};
+@@ -1448,7 +1448,7 @@ ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, minimum,
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, fop_flags,
+ ec_wind_unlink, ec_manager_unlink, callback,
+ data);
+ if (fop == NULL) {
+diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h
+index 2abef0d..07edf8a 100644
+--- a/xlators/cluster/ec/src/ec-fops.h
++++ b/xlators/cluster/ec/src/ec-fops.h
+@@ -18,233 +18,237 @@
+
+ void
+ ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_access_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc,
+ int32_t mask, dict_t *xdata);
+
+ void
+ ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_create_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+ void
+ ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_entrylk_cbk_t func, void *data,
++ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+ void
+ ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fentrylk_cbk_t func, void *data,
++ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data,
+ const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+ void
+-ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_flush_cbk_t func, void *data, fd_t *fd, dict_t *xdata);
++ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd,
++ dict_t *xdata);
+
+ void
+-ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_fsync_cbk_t func, void *data, fd_t *fd, int32_t datasync,
+- dict_t *xdata);
++ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd,
++ int32_t datasync, dict_t *xdata);
+
+ void
+ ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+ void
+ ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_getxattr_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+ void
+ ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+ void
+-ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial,
+- dict_t *xdata);
++ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc,
++ int32_t partial, dict_t *xdata);
+
+ void
+-ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial,
+- dict_t *xdata);
++ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd,
++ int32_t partial, dict_t *xdata);
+
+ void
+ ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
+- uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func,
++ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func,
+ void *data, const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+ void
+ ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
+- uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func,
++ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func,
+ void *data, const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+ void
+-ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_link_cbk_t func, void *data, loc_t *oldloc, loc_t *newloc,
+- dict_t *xdata);
++ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc,
++ loc_t *newloc, dict_t *xdata);
+
+ void
+-ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
++ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags,
+ fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+ void
+ ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata);
+
+ void
+-ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_mkdir_cbk_t func, void *data, loc_t *loc, mode_t mode,
+- mode_t umask, dict_t *xdata);
++ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc,
++ mode_t mode, mode_t umask, dict_t *xdata);
+
+ void
+-ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_mknod_cbk_t func, void *data, loc_t *loc, mode_t mode, dev_t rdev,
+- mode_t umask, dict_t *xdata);
++ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc,
++ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
+
+ void
+-ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_open_cbk_t func, void *data, loc_t *loc, int32_t flags, fd_t *fd,
+- dict_t *xdata);
++ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc,
++ int32_t flags, fd_t *fd, dict_t *xdata);
+
+ void
+ ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_opendir_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc,
+ fd_t *fd, dict_t *xdata);
+
+ void
+ ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata);
+
+ void
+ ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_readdirp_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata);
+
+ void
+ ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_readlink_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc,
+ size_t size, dict_t *xdata);
+
+ void
+-ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_readv_cbk_t func, void *data, fd_t *fd, size_t size, off_t offset,
+- uint32_t flags, dict_t *xdata);
++ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd,
++ size_t size, off_t offset, uint32_t flags, dict_t *xdata);
+
+ void
+ ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_removexattr_cbk_t func, void *data,
++ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data,
+ loc_t *loc, const char *name, dict_t *xdata);
+
+ void
+ ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fremovexattr_cbk_t func, void *data,
++ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data,
+ fd_t *fd, const char *name, dict_t *xdata);
+
+ void
+ ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_rename_cbk_t func, void *data, loc_t *oldloc,
++ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+ void
+-ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_rmdir_cbk_t func, void *data, loc_t *loc, int xflags,
+- dict_t *xdata);
++ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc,
++ int xflags, dict_t *xdata);
+
+ void
+ ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_setattr_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+ void
+ ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+ void
+ ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_setxattr_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+ void
+ ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+ void
+-ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_stat_cbk_t func, void *data, loc_t *loc, dict_t *xdata);
++ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc,
++ dict_t *xdata);
+
+ void
+-ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_fstat_cbk_t func, void *data, fd_t *fd, dict_t *xdata);
++ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd,
++ dict_t *xdata);
+
+ void
+ ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata);
+
+ void
+ ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_symlink_cbk_t func, void *data,
++ uint32_t fop_flags, fop_symlink_cbk_t func, void *data,
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata);
+
+ void
+ ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata);
+
+ void
+ ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+
+ void
+ ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_truncate_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc,
+ off_t offset, dict_t *xdata);
+
+ void
+ ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
+ off_t offset, dict_t *xdata);
+
+ void
+ ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata);
+
+ void
+ ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_writev_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+ void
+ ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_xattrop_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+ void
+ ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+ void
+-ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_seek_cbk_t func, void *data, fd_t *fd, off_t offset,
+- gf_seek_what_t what, dict_t *xdata);
++ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd,
++ off_t offset, gf_seek_what_t what, dict_t *xdata);
+
+ void
+-ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_ipc_cbk_t func, void *data, int32_t op, dict_t *xdata);
++ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op,
++ dict_t *xdata);
+
+ #endif /* __EC_FOPS_H__ */
+diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
+index 175e88a..acc16b5 100644
+--- a/xlators/cluster/ec/src/ec-generic.c
++++ b/xlators/cluster/ec/src/ec-generic.c
+@@ -151,8 +151,9 @@ ec_manager_flush(ec_fop_data_t *fop, int32_t state)
+ }
+
+ void
+-ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_flush_cbk_t func, void *data, fd_t *fd, dict_t *xdata)
++ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd,
++ dict_t *xdata)
+ {
+ ec_cbk_t callback = {.flush = func};
+ ec_fop_data_t *fop = NULL;
+@@ -164,7 +165,7 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, minimum,
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags,
+ ec_wind_flush, ec_manager_flush, callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -366,9 +367,9 @@ ec_manager_fsync(ec_fop_data_t *fop, int32_t state)
+ }
+
+ void
+-ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_fsync_cbk_t func, void *data, fd_t *fd, int32_t datasync,
+- dict_t *xdata)
++ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd,
++ int32_t datasync, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.fsync = func};
+ ec_fop_data_t *fop = NULL;
+@@ -380,7 +381,7 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, minimum,
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags,
+ ec_wind_fsync, ec_manager_fsync, callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -553,7 +554,7 @@ ec_manager_fsyncdir(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.fsyncdir = func};
+@@ -566,9 +567,9 @@ ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target, minimum,
+- ec_wind_fsyncdir, ec_manager_fsyncdir, callback,
+- data);
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target,
++ fop_flags, ec_wind_fsyncdir, ec_manager_fsyncdir,
++ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -848,7 +849,7 @@ ec_manager_lookup(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata)
+ {
+ ec_cbk_t callback = {.lookup = func};
+@@ -862,7 +863,7 @@ ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LOOKUP, EC_FLAG_LOCK_SHARED,
+- target, minimum, ec_wind_lookup,
++ target, fop_flags, ec_wind_lookup,
+ ec_manager_lookup, callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -1033,7 +1034,7 @@ ec_manager_statfs(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata)
+ {
+ ec_cbk_t callback = {.statfs = func};
+@@ -1047,7 +1048,7 @@ ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_STATFS, EC_FLAG_LOCK_SHARED,
+- target, minimum, ec_wind_statfs,
++ target, fop_flags, ec_wind_statfs,
+ ec_manager_statfs, callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -1270,7 +1271,7 @@ ec_manager_xattrop(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_xattrop_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.xattrop = func};
+@@ -1283,9 +1284,9 @@ ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target, minimum,
+- ec_wind_xattrop, ec_manager_xattrop, callback,
+- data);
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target,
++ fop_flags, ec_wind_xattrop, ec_manager_xattrop,
++ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -1343,7 +1344,7 @@ ec_wind_fxattrop(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+
+ void
+ ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.fxattrop = func};
+@@ -1356,9 +1357,9 @@ ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target, minimum,
+- ec_wind_fxattrop, ec_manager_xattrop, callback,
+- data);
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target,
++ fop_flags, ec_wind_fxattrop, ec_manager_xattrop,
++ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -1507,8 +1508,9 @@ ec_manager_ipc(ec_fop_data_t *fop, int32_t state)
+ }
+
+ void
+-ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_ipc_cbk_t func, void *data, int32_t op, dict_t *xdata)
++ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op,
++ dict_t *xdata)
+ {
+ ec_cbk_t callback = {.ipc = func};
+ ec_fop_data_t *fop = NULL;
+@@ -1520,7 +1522,7 @@ ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_IPC, 0, target, minimum,
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_IPC, 0, target, fop_flags,
+ ec_wind_ipc, ec_manager_ipc, callback, data);
+ if (fop == NULL) {
+ goto out;
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index 1ca12c1..3aa04fb 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -367,16 +367,16 @@ ec_heal_data_block(ec_heal_t *heal)
+ /* FOP: fheal */
+
+ void
+-ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial,
+- dict_t *xdata)
++ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd,
++ int32_t partial, dict_t *xdata)
+ {
+ ec_fd_t *ctx = ec_fd_get(fd, this);
+
+ if (ctx != NULL) {
+ gf_msg_trace("ec", 0, "FHEAL ctx: flags=%X, open=%" PRIXPTR, ctx->flags,
+ ctx->open);
+- ec_heal(frame, this, target, minimum, func, data, &ctx->loc, partial,
++ ec_heal(frame, this, target, fop_flags, func, data, &ctx->loc, partial,
+ xdata);
+ }
+ }
+@@ -1975,7 +1975,7 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
+ /*Takes lock */
+ void
+ ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_heal_cbk_t func, ec_heal_t *heal)
++ uint32_t fop_flags, fop_heal_cbk_t func, ec_heal_t *heal)
+ {
+ ec_cbk_t callback = {.heal = func};
+ ec_fop_data_t *fop = NULL;
+@@ -1986,7 +1986,7 @@ ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum,
++ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags,
+ NULL, ec_manager_heal_block, callback, heal);
+ if (fop == NULL)
+ goto out;
+@@ -2761,9 +2761,9 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
+ }
+
+ void
+-ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial,
+- dict_t *xdata)
++ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc,
++ int32_t partial, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.heal = func};
+ ec_fop_data_t *fop = NULL;
+@@ -2779,7 +2779,7 @@ ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+
+ if (frame && frame->local)
+ goto fail;
+- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum,
++ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags,
+ NULL, NULL, callback, data);
+
+ err = ENOMEM;
+diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
+index 55e5934..f87a94a 100644
+--- a/xlators/cluster/ec/src/ec-inode-read.c
++++ b/xlators/cluster/ec/src/ec-inode-read.c
+@@ -135,7 +135,7 @@ ec_manager_access(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_access_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc,
+ int32_t mask, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.access = func};
+@@ -149,7 +149,7 @@ ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_ACCESS, EC_FLAG_LOCK_SHARED,
+- target, minimum, ec_wind_access,
++ target, fop_flags, ec_wind_access,
+ ec_manager_access, callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -446,7 +446,7 @@ out:
+
+ void
+ ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_getxattr_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc,
+ const char *name, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.getxattr = func};
+@@ -468,7 +468,7 @@ ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ }
+
+ fop = ec_fop_data_allocate(
+- frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, minimum,
++ frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags,
+ ec_wind_getxattr, ec_manager_getxattr, callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -588,7 +588,7 @@ ec_wind_fgetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+
+ void
+ ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
+ const char *name, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.fgetxattr = func};
+@@ -602,7 +602,7 @@ ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(
+- frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, minimum,
++ frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags,
+ ec_wind_fgetxattr, ec_manager_getxattr, callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -869,9 +869,9 @@ ec_manager_open(ec_fop_data_t *fop, int32_t state)
+ }
+
+ void
+-ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_open_cbk_t func, void *data, loc_t *loc, int32_t flags, fd_t *fd,
+- dict_t *xdata)
++ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc,
++ int32_t flags, fd_t *fd, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.open = func};
+ ec_fop_data_t *fop = NULL;
+@@ -884,7 +884,7 @@ ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_OPEN, EC_FLAG_LOCK_SHARED,
+- target, minimum, ec_wind_open, ec_manager_open,
++ target, fop_flags, ec_wind_open, ec_manager_open,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -1071,7 +1071,7 @@ ec_manager_readlink(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_readlink_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc,
+ size_t size, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.readlink = func};
+@@ -1085,7 +1085,7 @@ ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(
+- frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, minimum,
++ frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, fop_flags,
+ ec_wind_readlink, ec_manager_readlink, callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -1417,9 +1417,9 @@ ec_manager_readv(ec_fop_data_t *fop, int32_t state)
+ }
+
+ void
+-ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_readv_cbk_t func, void *data, fd_t *fd, size_t size, off_t offset,
+- uint32_t flags, dict_t *xdata)
++ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd,
++ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.readv = func};
+ ec_fop_data_t *fop = NULL;
+@@ -1432,8 +1432,8 @@ ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_READ, EC_FLAG_LOCK_SHARED,
+- target, minimum, ec_wind_readv, ec_manager_readv,
+- callback, data);
++ target, fop_flags, ec_wind_readv,
++ ec_manager_readv, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -1637,9 +1637,9 @@ ec_manager_seek(ec_fop_data_t *fop, int32_t state)
+ }
+
+ void
+-ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_seek_cbk_t func, void *data, fd_t *fd, off_t offset,
+- gf_seek_what_t what, dict_t *xdata)
++ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd,
++ off_t offset, gf_seek_what_t what, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.seek = func};
+ ec_fop_data_t *fop = NULL;
+@@ -1652,7 +1652,7 @@ ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SEEK, EC_FLAG_LOCK_SHARED,
+- target, minimum, ec_wind_seek, ec_manager_seek,
++ target, fop_flags, ec_wind_seek, ec_manager_seek,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -1855,8 +1855,9 @@ ec_manager_stat(ec_fop_data_t *fop, int32_t state)
+ }
+
+ void
+-ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_stat_cbk_t func, void *data, loc_t *loc, dict_t *xdata)
++ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc,
++ dict_t *xdata)
+ {
+ ec_cbk_t callback = {.stat = func};
+ ec_fop_data_t *fop = NULL;
+@@ -1869,7 +1870,7 @@ ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_STAT, EC_FLAG_LOCK_SHARED,
+- target, minimum, ec_wind_stat, ec_manager_stat,
++ target, fop_flags, ec_wind_stat, ec_manager_stat,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -1965,8 +1966,9 @@ ec_wind_fstat(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+ }
+
+ void
+-ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+- fop_fstat_cbk_t func, void *data, fd_t *fd, dict_t *xdata)
++ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target,
++ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd,
++ dict_t *xdata)
+ {
+ ec_cbk_t callback = {.fstat = func};
+ ec_fop_data_t *fop = NULL;
+@@ -1979,8 +1981,8 @@ ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSTAT, EC_FLAG_LOCK_SHARED,
+- target, minimum, ec_wind_fstat, ec_manager_stat,
+- callback, data);
++ target, fop_flags, ec_wind_fstat,
++ ec_manager_stat, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
+index e7b34e6..a903664 100644
+--- a/xlators/cluster/ec/src/ec-inode-write.c
++++ b/xlators/cluster/ec/src/ec-inode-write.c
+@@ -281,7 +281,7 @@ ec_manager_xattr(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_removexattr_cbk_t func, void *data,
++ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data,
+ loc_t *loc, const char *name, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.removexattr = func};
+@@ -295,7 +295,7 @@ ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_REMOVEXATTR, 0, target,
+- minimum, ec_wind_removexattr, ec_manager_xattr,
++ fop_flags, ec_wind_removexattr, ec_manager_xattr,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -361,7 +361,7 @@ ec_wind_fremovexattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+
+ void
+ ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fremovexattr_cbk_t func, void *data,
++ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data,
+ fd_t *fd, const char *name, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.fremovexattr = func};
+@@ -375,8 +375,8 @@ ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FREMOVEXATTR, 0, target,
+- minimum, ec_wind_fremovexattr, ec_manager_xattr,
+- callback, data);
++ fop_flags, ec_wind_fremovexattr,
++ ec_manager_xattr, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -550,7 +550,7 @@ ec_manager_setattr(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_setattr_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.setattr = func};
+@@ -563,9 +563,9 @@ ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target, minimum,
+- ec_wind_setattr, ec_manager_setattr, callback,
+- data);
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target,
++ fop_flags, ec_wind_setattr, ec_manager_setattr,
++ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -627,7 +627,7 @@ ec_wind_fsetattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+
+ void
+ ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.fsetattr = func};
+@@ -640,9 +640,9 @@ ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target, minimum,
+- ec_wind_fsetattr, ec_manager_setattr, callback,
+- data);
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target,
++ fop_flags, ec_wind_fsetattr, ec_manager_setattr,
++ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -707,7 +707,7 @@ ec_wind_setxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+
+ void
+ ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_setxattr_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.setxattr = func};
+@@ -720,9 +720,9 @@ ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target, minimum,
+- ec_wind_setxattr, ec_manager_xattr, callback,
+- data);
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target,
++ fop_flags, ec_wind_setxattr, ec_manager_xattr,
++ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -825,7 +825,7 @@ ec_wind_fsetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+
+ void
+ ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.fsetxattr = func};
+@@ -839,7 +839,7 @@ ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETXATTR, 0, target,
+- minimum, ec_wind_fsetxattr, ec_manager_xattr,
++ fop_flags, ec_wind_fsetxattr, ec_manager_xattr,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+@@ -1035,7 +1035,7 @@ ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.fallocate = func};
+@@ -1049,8 +1049,8 @@ ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FALLOCATE, 0, target,
+- minimum, ec_wind_fallocate, ec_manager_fallocate,
+- callback, data);
++ fop_flags, ec_wind_fallocate,
++ ec_manager_fallocate, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -1209,8 +1209,8 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state)
+ ec_dispatch_all(fop);
+ return EC_STATE_DELAYED_START;
+ } else {
+- /*Assume discard to have succeeded on mask*/
+- fop->good = fop->mask;
++ /* Assume discard to have succeeded on all bricks */
++ ec_succeed_all(fop);
+ }
+
+ /* Fall through */
+@@ -1289,7 +1289,7 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.discard = func};
+@@ -1302,9 +1302,9 @@ ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target, minimum,
+- ec_wind_discard, ec_manager_discard, callback,
+- data);
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target,
++ fop_flags, ec_wind_discard, ec_manager_discard,
++ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -1530,7 +1530,7 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_truncate_cbk_t func, void *data, loc_t *loc,
++ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc,
+ off_t offset, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.truncate = func};
+@@ -1543,9 +1543,9 @@ ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target, minimum,
+- ec_wind_truncate, ec_manager_truncate, callback,
+- data);
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target,
++ fop_flags, ec_wind_truncate, ec_manager_truncate,
++ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -1604,7 +1604,7 @@ ec_wind_ftruncate(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+
+ void
+ ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
+ off_t offset, dict_t *xdata)
+ {
+ ec_cbk_t callback = {.ftruncate = func};
+@@ -1618,8 +1618,8 @@ ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FTRUNCATE, 0, target,
+- minimum, ec_wind_ftruncate, ec_manager_truncate,
+- callback, data);
++ fop_flags, ec_wind_ftruncate,
++ ec_manager_truncate, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -2262,7 +2262,7 @@ ec_manager_writev(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_writev_cbk_t func, void *data, fd_t *fd,
++ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+ {
+@@ -2276,7 +2276,7 @@ ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, minimum,
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, fop_flags,
+ ec_wind_writev, ec_manager_writev, callback,
+ data);
+ if (fop == NULL) {
+diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c
+index f978af0..ffcac07 100644
+--- a/xlators/cluster/ec/src/ec-locks.c
++++ b/xlators/cluster/ec/src/ec-locks.c
+@@ -275,7 +275,7 @@ ec_manager_entrylk(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_entrylk_cbk_t func, void *data,
++ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+ {
+@@ -288,9 +288,9 @@ ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target, minimum,
+- ec_wind_entrylk, ec_manager_entrylk, callback,
+- data);
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target,
++ fop_flags, ec_wind_entrylk, ec_manager_entrylk,
++ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -403,7 +403,7 @@ ec_wind_fentrylk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+
+ void
+ ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
+- int32_t minimum, fop_fentrylk_cbk_t func, void *data,
++ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data,
+ const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+ {
+@@ -416,9 +416,9 @@ ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target, minimum,
+- ec_wind_fentrylk, ec_manager_entrylk, callback,
+- data);
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target,
++ fop_flags, ec_wind_fentrylk, ec_manager_entrylk,
++ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -650,7 +650,7 @@ ec_manager_inodelk(ec_fop_data_t *fop, int32_t state)
+
+ void
+ ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
+- uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func,
++ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func,
+ void *data, const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+ {
+@@ -664,9 +664,9 @@ ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target, minimum,
+- ec_wind_inodelk, ec_manager_inodelk, callback,
+- data);
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target,
++ fop_flags, ec_wind_inodelk, ec_manager_inodelk,
++ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -782,7 +782,7 @@ ec_wind_finodelk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+
+ void
+ ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
+- uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func,
++ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func,
+ void *data, const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+ {
+@@ -796,9 +796,9 @@ ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target, minimum,
+- ec_wind_finodelk, ec_manager_inodelk, callback,
+- data);
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target,
++ fop_flags, ec_wind_finodelk, ec_manager_inodelk,
++ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+@@ -1032,7 +1032,7 @@ ec_manager_lk(ec_fop_data_t *fop, int32_t state)
+ }
+
+ void
+-ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
++ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags,
+ fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+ {
+@@ -1045,7 +1045,7 @@ ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, minimum,
++ fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, fop_flags,
+ ec_wind_lk, ec_manager_lk, callback, data);
+ if (fop == NULL) {
+ goto out;
+diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
+index 6ae4a2b..1c295c0 100644
+--- a/xlators/cluster/ec/src/ec-types.h
++++ b/xlators/cluster/ec/src/ec-types.h
+@@ -308,9 +308,9 @@ struct _ec_fop_data {
+ int32_t id; /* ID of the file operation */
+ int32_t refs;
+ int32_t state;
+- int32_t minimum; /* Minimum number of successful
+- operation required to conclude a
+- fop as successful */
++ uint32_t minimum; /* Minimum number of successful
++ operation required to conclude a
++ fop as successful */
+ int32_t expected;
+ int32_t winds;
+ int32_t jobs;
+@@ -325,11 +325,12 @@ struct _ec_fop_data {
+ ec_cbk_data_t *answer; /* accepted answer */
+ int32_t lock_count;
+ int32_t locked;
++ gf_lock_t lock;
+ ec_lock_link_t locks[2];
+ int32_t first_lock;
+- gf_lock_t lock;
+
+- uint32_t flags;
++ uint32_t fop_flags; /* Flags passed by the caller. */
++ uint32_t flags; /* Internal flags. */
+ uint32_t first;
+ uintptr_t mask;
+ uintptr_t healing; /*Dispatch is done but call is successful only
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index 13ffeb9..3c8013e 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -797,11 +797,12 @@ ec_gf_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+ {
+- int32_t minimum = EC_MINIMUM_ALL;
++ uint32_t fop_flags = EC_MINIMUM_ALL;
++
+ if (cmd == ENTRYLK_UNLOCK)
+- minimum = EC_MINIMUM_ONE;
+- ec_entrylk(frame, this, -1, minimum, default_entrylk_cbk, NULL, volume, loc,
+- basename, cmd, type, xdata);
++ fop_flags = EC_MINIMUM_ONE;
++ ec_entrylk(frame, this, -1, fop_flags, default_entrylk_cbk, NULL, volume,
++ loc, basename, cmd, type, xdata);
+
+ return 0;
+ }
+@@ -811,10 +812,11 @@ ec_gf_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+ {
+- int32_t minimum = EC_MINIMUM_ALL;
++ uint32_t fop_flags = EC_MINIMUM_ALL;
++
+ if (cmd == ENTRYLK_UNLOCK)
+- minimum = EC_MINIMUM_ONE;
+- ec_fentrylk(frame, this, -1, minimum, default_fentrylk_cbk, NULL, volume,
++ fop_flags = EC_MINIMUM_ONE;
++ ec_fentrylk(frame, this, -1, fop_flags, default_fentrylk_cbk, NULL, volume,
+ fd, basename, cmd, type, xdata);
+
+ return 0;
+@@ -905,7 +907,7 @@ ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ {
+ int error = 0;
+ ec_t *ec = this->private;
+- int32_t minimum = EC_MINIMUM_ONE;
++ int32_t fop_flags = EC_MINIMUM_ONE;
+
+ if (name && strcmp(name, EC_XATTR_HEAL) != 0) {
+ EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
+@@ -920,11 +922,11 @@ ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+
+ if (name && ((fnmatch(GF_XATTR_STIME_PATTERN, name, 0) == 0) ||
+ XATTR_IS_NODE_UUID(name) || XATTR_IS_NODE_UUID_LIST(name))) {
+- minimum = EC_MINIMUM_ALL;
++ fop_flags = EC_MINIMUM_ALL;
+ }
+
+- ec_getxattr(frame, this, -1, minimum, default_getxattr_cbk, NULL, loc, name,
+- xdata);
++ ec_getxattr(frame, this, -1, fop_flags, default_getxattr_cbk, NULL, loc,
++ name, xdata);
+
+ return 0;
+ out:
+@@ -954,11 +956,12 @@ int32_t
+ ec_gf_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+ {
+- int32_t minimum = EC_MINIMUM_ALL;
++ int32_t fop_flags = EC_MINIMUM_ALL;
++
+ if (flock->l_type == F_UNLCK)
+- minimum = EC_MINIMUM_ONE;
++ fop_flags = EC_MINIMUM_ONE;
+
+- ec_inodelk(frame, this, &frame->root->lk_owner, -1, minimum,
++ ec_inodelk(frame, this, &frame->root->lk_owner, -1, fop_flags,
+ default_inodelk_cbk, NULL, volume, loc, cmd, flock, xdata);
+
+ return 0;
+@@ -968,10 +971,11 @@ int32_t
+ ec_gf_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+ {
+- int32_t minimum = EC_MINIMUM_ALL;
++ int32_t fop_flags = EC_MINIMUM_ALL;
++
+ if (flock->l_type == F_UNLCK)
+- minimum = EC_MINIMUM_ONE;
+- ec_finodelk(frame, this, &frame->root->lk_owner, -1, minimum,
++ fop_flags = EC_MINIMUM_ONE;
++ ec_finodelk(frame, this, &frame->root->lk_owner, -1, fop_flags,
+ default_finodelk_cbk, NULL, volume, fd, cmd, flock, xdata);
+
+ return 0;
+@@ -991,10 +995,11 @@ int32_t
+ ec_gf_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+ {
+- int32_t minimum = EC_MINIMUM_ALL;
++ int32_t fop_flags = EC_MINIMUM_ALL;
++
+ if (flock->l_type == F_UNLCK)
+- minimum = EC_MINIMUM_ONE;
+- ec_lk(frame, this, -1, minimum, default_lk_cbk, NULL, fd, cmd, flock,
++ fop_flags = EC_MINIMUM_ONE;
++ ec_lk(frame, this, -1, fop_flags, default_lk_cbk, NULL, fd, cmd, flock,
+ xdata);
+
+ return 0;
+--
+1.8.3.1
+
diff --git a/0117-spec-Remove-thin-arbiter-package.patch b/0117-spec-Remove-thin-arbiter-package.patch
new file mode 100644
index 0000000..47fbffc
--- /dev/null
+++ b/0117-spec-Remove-thin-arbiter-package.patch
@@ -0,0 +1,184 @@
+From 70842c77735a655a053ed4a7cb77fec01028355a Mon Sep 17 00:00:00 2001
+From: Sunil Kumar Acharya <sheggodu@redhat.com>
+Date: Mon, 22 Apr 2019 12:48:13 +0530
+Subject: [PATCH 117/124] spec: Remove thin-arbiter package
+
+Thin-arbiter is not supported in downstream. Updated the
+code to avoid RPMdiff warnings. Marked thin-arbiter
+test cases as bad to avoid nightly runs from reporting
+expected failures.
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1698436
+Change-Id: Ic36bccdfe1c7039fb7e5ce078a8b64cf71056970
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168406
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ configure.ac | 2 --
+ extras/systemd/Makefile.am | 6 ++---
+ glusterfs.spec.in | 39 +++++----------------------------
+ tests/basic/afr/ta-check-locks.t | 2 ++
+ tests/basic/afr/ta-read.t | 2 ++
+ tests/basic/afr/ta-shd.t | 2 ++
+ tests/basic/afr/ta-write-on-bad-brick.t | 2 ++
+ xlators/features/Makefile.am | 2 +-
+ 8 files changed, 18 insertions(+), 39 deletions(-)
+
+diff --git a/configure.ac b/configure.ac
+index 521671b..3065077 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -116,8 +116,6 @@ AC_CONFIG_FILES([Makefile
+ xlators/features/Makefile
+ xlators/features/arbiter/Makefile
+ xlators/features/arbiter/src/Makefile
+- xlators/features/thin-arbiter/Makefile
+- xlators/features/thin-arbiter/src/Makefile
+ xlators/features/changelog/Makefile
+ xlators/features/changelog/src/Makefile
+ xlators/features/changelog/lib/Makefile
+diff --git a/extras/systemd/Makefile.am b/extras/systemd/Makefile.am
+index 61446a9..b849775 100644
+--- a/extras/systemd/Makefile.am
++++ b/extras/systemd/Makefile.am
+@@ -1,8 +1,8 @@
+-CLEANFILES = glusterd.service glustereventsd.service glusterfssharedstorage.service gluster-ta-volume.service
+-EXTRA_DIST = glusterd.service.in glustereventsd.service.in glusterfssharedstorage.service.in gluster-ta-volume.service.in
++CLEANFILES = glusterd.service glustereventsd.service glusterfssharedstorage.service
++EXTRA_DIST = glusterd.service.in glustereventsd.service.in glusterfssharedstorage.service.in
+
+ if USE_SYSTEMD
+-systemd_DATA = gluster-ta-volume.service
++systemd_DATA =
+ endif
+
+ if WITH_SERVER
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index ba095b7..bf72a55 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -682,18 +682,6 @@ is in user space and easily manageable.
+ This package provides the glusterfs server daemon.
+ %endif
+
+-%package thin-arbiter
+-Summary: GlusterFS thin-arbiter module
+-Requires: %{name}%{?_isa} = %{version}-%{release}
+-Requires: %{name}-server%{?_isa} = %{version}-%{release}
+-
+-%description thin-arbiter
+-This package provides a tie-breaker functionality to GlusterFS
+-replicate volume. It includes translators required to provide the
+-functionality, and also few other scripts required for getting the setup done.
+-
+-This package provides the glusterfs thin-arbiter translator.
+-
+ %package client-xlators
+ Summary: GlusterFS client-side translators
+ Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+@@ -1045,14 +1033,6 @@ fi
+ exit 0
+ %endif
+
+-%preun thin-arbiter
+-if [ $1 -eq 0 ]; then
+- if [ -f %glusterta_svcfile ]; then
+- %service_stop gluster-ta-volume
+- %systemd_preun gluster-ta-volume
+- fi
+-fi
+-
+ ##-----------------------------------------------------------------------------
+ ## All %%postun should be placed here and keep them sorted
+ ##
+@@ -1188,6 +1168,12 @@ exit 0
+ %exclude %{_prefix}/lib/ocf/resource.d/heartbeat/*
+ %endif
+
++%exclude %{_datadir}/glusterfs/scripts/setup-thin-arbiter.sh
++
++%if ( 0%{?_without_server:1} )
++%exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol
++%endif
++
+ %files api
+ %exclude %{_libdir}/*.so
+ # libgfapi files
+@@ -1263,19 +1249,6 @@ exit 0
+ %{_bindir}/fusermount-glusterfs
+ %endif
+
+-%files thin-arbiter
+-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+- %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/thin-arbiter.so
+-%dir %{_datadir}/glusterfs/scripts
+- %{_datadir}/glusterfs/scripts/setup-thin-arbiter.sh
+-%config %{_sysconfdir}/glusterfs/thin-arbiter.vol
+-
+-%if ( 0%{?_with_systemd:1} )
+-%{_unitdir}/gluster-ta-volume.service
+-%endif
+-
+-
+ %if ( 0%{!?_without_georeplication:1} )
+ %files geo-replication
+ %config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs-georep
+diff --git a/tests/basic/afr/ta-check-locks.t b/tests/basic/afr/ta-check-locks.t
+index c0102c3..c51aa39 100644
+--- a/tests/basic/afr/ta-check-locks.t
++++ b/tests/basic/afr/ta-check-locks.t
+@@ -66,3 +66,5 @@ TEST ta_start_brick_process brick0
+ EXPECT_WITHIN $HEAL_TIMEOUT "0" get_lock_count_on_ta
+
+ cleanup;
++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000
+diff --git a/tests/basic/afr/ta-read.t b/tests/basic/afr/ta-read.t
+index f2b3c38..1b36dba 100644
+--- a/tests/basic/afr/ta-read.t
++++ b/tests/basic/afr/ta-read.t
+@@ -58,3 +58,5 @@ TEST [ -z $TA_PID ]
+ # Read must now succeed.
+ TEST cat $M0/FILE
+ cleanup;
++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000
+diff --git a/tests/basic/afr/ta-shd.t b/tests/basic/afr/ta-shd.t
+index bb2e58b..4b1ea85 100644
+--- a/tests/basic/afr/ta-shd.t
++++ b/tests/basic/afr/ta-shd.t
+@@ -47,3 +47,5 @@ TEST ta_start_mount_process $M0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+ EXPECT "Hello" cat $M0/a.txt
+ cleanup;
++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000
+diff --git a/tests/basic/afr/ta-write-on-bad-brick.t b/tests/basic/afr/ta-write-on-bad-brick.t
+index 18cb65b..77cbf5f 100644
+--- a/tests/basic/afr/ta-write-on-bad-brick.t
++++ b/tests/basic/afr/ta-write-on-bad-brick.t
+@@ -49,3 +49,5 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate
+ TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5
+
+ cleanup;
++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000
+diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
+index 545c02b..537c148 100644
+--- a/xlators/features/Makefile.am
++++ b/xlators/features/Makefile.am
+@@ -4,7 +4,7 @@ endif
+
+ SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \
+ compress changelog gfid-access snapview-client snapview-server trash \
+- shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \
++ shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) \
+ utime changetimerecorder
+
+ CLEANFILES =
+--
+1.8.3.1
+
diff --git a/0118-tests-mark-thin-arbiter-test-ta.t-as-bad.patch b/0118-tests-mark-thin-arbiter-test-ta.t-as-bad.patch
new file mode 100644
index 0000000..328116b
--- /dev/null
+++ b/0118-tests-mark-thin-arbiter-test-ta.t-as-bad.patch
@@ -0,0 +1,31 @@
+From 24c2430e3cd629665851fdb2921d754e3ecef3b4 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Wed, 24 Apr 2019 07:47:32 +0530
+Subject: [PATCH 118/124] tests: mark thin-arbiter test ta.t as bad
+
+As thin-arbiter isn't packaged and tested at RHGS 3.5
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1698436
+Change-Id: Ideab570f58fc0b2daecb2be4ed4b6740362d0b35
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168613
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ tests/basic/afr/ta.t | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/tests/basic/afr/ta.t b/tests/basic/afr/ta.t
+index 05d4843..cbd1b56 100644
+--- a/tests/basic/afr/ta.t
++++ b/tests/basic/afr/ta.t
+@@ -52,3 +52,5 @@ TEST ! ls $B0/brick0/c.txt
+ TEST ! ls $B0/brick1/c.txt
+
+ cleanup;
++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000
+--
+1.8.3.1
+
diff --git a/0119-glusterd-provide-a-way-to-detach-failed-node.patch b/0119-glusterd-provide-a-way-to-detach-failed-node.patch
new file mode 100644
index 0000000..fa9198b
--- /dev/null
+++ b/0119-glusterd-provide-a-way-to-detach-failed-node.patch
@@ -0,0 +1,53 @@
+From a325e7b3bbe5c1f67b999f375b83d2e2f1b2c1c6 Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Tue, 9 Apr 2019 13:56:24 +0530
+Subject: [PATCH 119/124] glusterd: provide a way to detach failed node
+
+When a gluster node in trusted storage pool has failed
+due to hardware issues, volume delete operation fails
+saying "Not all peers are up" and peer detach for failed
+node fails saying "Brick(s) with peer <peer_ip> exists
+in cluster".
+
+The idea here is to use either replace-brick or remove-brick
+command to remove all the bricks hosted by failed node and
+then re-attempting the peer detach. This change adds this
+trick in peer detach error message.
+
+> upstream patch : https://review.gluster.org/22534
+
+>fixes: bz#1697866
+>Change-Id: I0c58887479d31db603ad8d6535ea9d547880ccc8
+>Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+BUG: 1696334
+Change-Id: I0c58887479d31db603ad8d6535ea9d547880ccc8
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168614
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index 6147995..af8a8a4 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -4134,8 +4134,11 @@ set_deprobe_error_str(int op_ret, int op_errno, char *op_errstr, char *errstr,
+
+ case GF_DEPROBE_BRICK_EXIST:
+ snprintf(errstr, len,
+- "Brick(s) with the peer "
+- "%s exist in cluster",
++ "Peer %s hosts one or more bricks. If the peer is in "
++ "not recoverable state then use either replace-brick "
++ "or remove-brick command with force to remove all "
++ "bricks from the peer and attempt the peer detach "
++ "again.",
+ hostname);
+ break;
+
+--
+1.8.3.1
+
diff --git a/0120-glusterd-shd-Keep-a-ref-on-volinfo-until-attach-rpc-.patch b/0120-glusterd-shd-Keep-a-ref-on-volinfo-until-attach-rpc-.patch
new file mode 100644
index 0000000..58b86d7
--- /dev/null
+++ b/0120-glusterd-shd-Keep-a-ref-on-volinfo-until-attach-rpc-.patch
@@ -0,0 +1,62 @@
+From c429d3c63601e6ea15af76aa684c30bbeb746467 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Tue, 23 Apr 2019 13:03:28 +0530
+Subject: [PATCH 120/124] glusterd/shd: Keep a ref on volinfo until attach rpc
+ execute cbk
+
+When svc attach execute for multiplexing a daemon, we have to keep
+a ref on volinfo until it finish the execution. Because, if the attach
+is an aysnc call, then a parallel volume delete can lead to free the
+volinfo
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22606/
+
+>Change-Id: Ibc02b89557baaed2f63db63d7fb1a7480444ae0d
+>fixes: bz#1702185
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: Ibc02b89557baaed2f63db63d7fb1a7480444ae0d
+BUG: 1702240
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168616
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 3 +++
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 4 ++++
+ 2 files changed, 7 insertions(+)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index 19eca9f..a9eab42 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -452,8 +452,11 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
+ }
+
+ if (shd->attached) {
++ glusterd_volinfo_ref(volinfo);
++ /* Unref will happen from glusterd_svc_attach_cbk */
+ ret = glusterd_attach_svc(svc, volinfo, flags);
+ if (ret) {
++ glusterd_volinfo_unref(volinfo);
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to attach shd svc(volume=%s) to pid=%d. Starting"
+ "a new process",
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index 02945b1..f7be394 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -695,6 +695,10 @@ out:
+ if (flag) {
+ GF_FREE(flag);
+ }
++
++ if (volinfo)
++ glusterd_volinfo_unref(volinfo);
++
+ GF_ATOMIC_DEC(conf->blockers);
+ STACK_DESTROY(frame->root);
+ return 0;
+--
+1.8.3.1
+
diff --git a/0121-spec-glusterfs-devel-for-client-build-should-not-dep.patch b/0121-spec-glusterfs-devel-for-client-build-should-not-dep.patch
new file mode 100644
index 0000000..00aa910
--- /dev/null
+++ b/0121-spec-glusterfs-devel-for-client-build-should-not-dep.patch
@@ -0,0 +1,42 @@
+From e4209dfb27faeca5544a09474ac524546e5d11e0 Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <hgowtham@redhat.com>
+Date: Wed, 24 Apr 2019 18:14:33 +0530
+Subject: [PATCH 121/124] spec: glusterfs-devel for client-build should not
+ depend on server
+
+Found that libgfdb.pc was included in client package.
+It was earlier removed from glusterfs-devel client package
+as a part of:
+40eb62a8872ce061416e899fb6c0784b6253ab16
+
+Made it back into downstream when tier was introduced again.
+Removing it in this patch.
+
+label: DOWNSTREAM ONLY
+
+Change-Id: I5fd5f5b0a6b06c677f8ea3693eb0392af51abaf1
+Signed-off-by: Hari Gowtham <hgowtham@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168670
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Milind Changire <mchangir@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ glusterfs.spec.in | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index bf72a55..d20b062 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1210,7 +1210,7 @@ exit 0
+ %exclude %{_libdir}/pkgconfig/libgfchangelog.pc
+ %exclude %{_libdir}/libgfchangelog.so
+ %if ( 0%{!?_without_tiering:1} )
+-%{_libdir}/pkgconfig/libgfdb.pc
++%exclude %{_libdir}/pkgconfig/libgfdb.pc
+ %endif
+ %else
+ %{_libdir}/pkgconfig/libgfchangelog.pc
+--
+1.8.3.1
+
diff --git a/0122-posix-ctime-Fix-stat-time-attributes-inconsistency-d.patch b/0122-posix-ctime-Fix-stat-time-attributes-inconsistency-d.patch
new file mode 100644
index 0000000..5d256e2
--- /dev/null
+++ b/0122-posix-ctime-Fix-stat-time-attributes-inconsistency-d.patch
@@ -0,0 +1,312 @@
+From 2f07d12f902e371d8cb8c76007d558e3a727b56a Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Tue, 9 Apr 2019 18:23:05 +0530
+Subject: [PATCH 122/124] posix/ctime: Fix stat(time attributes) inconsistency
+ during readdirp
+
+Problem:
+ Creation of tar file on gluster volume throws warning
+'file changed as we read it'
+
+Cause:
+ During readdirp, for few of the files whose inode is not
+present, time attributes were served from backend. This caused
+the ctime of few files to be different between before readdir
+and after readdir by tar.
+
+Solution:
+ If ctime feature is enabled and inode is not present, don't
+serve the time attributes from backend file, serve it from xattr.
+
+Backport of:
+ > Patch: https://review.gluster.org/22540
+ > fixes: bz#1698078
+ > Change-Id: I427ef865f97399475faf5aa6ca495f7e317603ae
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+BUG: 1699709
+Change-Id: I427ef865f97399475faf5aa6ca495f7e317603ae
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168687
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ tests/basic/ctime/ctime-readdir.c | 29 +++++++++++++++++
+ tests/basic/ctime/ctime-readdir.t | 50 ++++++++++++++++++++++++++++++
+ xlators/storage/posix/src/posix-helpers.c | 29 +++++++++++------
+ xlators/storage/posix/src/posix-metadata.c | 41 ++++++++++++++----------
+ 4 files changed, 123 insertions(+), 26 deletions(-)
+ create mode 100644 tests/basic/ctime/ctime-readdir.c
+ create mode 100644 tests/basic/ctime/ctime-readdir.t
+
+diff --git a/tests/basic/ctime/ctime-readdir.c b/tests/basic/ctime/ctime-readdir.c
+new file mode 100644
+index 0000000..8760db2
+--- /dev/null
++++ b/tests/basic/ctime/ctime-readdir.c
+@@ -0,0 +1,29 @@
++#include <stdio.h>
++#include <dirent.h>
++#include <string.h>
++#include <assert.h>
++
++int
++main(int argc, char **argv)
++{
++ DIR *dir = NULL;
++ struct dirent *entry = NULL;
++ int ret = 0;
++ char *path = NULL;
++
++ assert(argc == 2);
++ path = argv[1];
++
++ dir = opendir(path);
++ if (!dir) {
++ printf("opendir(%s) failed.\n", path);
++ return -1;
++ }
++
++ while ((entry = readdir(dir)) != NULL) {
++ }
++ if (dir)
++ closedir(dir);
++
++ return ret;
++}
+diff --git a/tests/basic/ctime/ctime-readdir.t b/tests/basic/ctime/ctime-readdir.t
+new file mode 100644
+index 0000000..4564fc1
+--- /dev/null
++++ b/tests/basic/ctime/ctime-readdir.t
+@@ -0,0 +1,50 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup;
++
++TEST glusterd
++
++TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3};
++TEST $CLI volume set $V0 performance.stat-prefetch on
++TEST $CLI volume set $V0 performance.readdir-ahead off
++TEST $CLI volume start $V0;
++
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
++
++TEST mkdir $M0/dir0
++TEST "echo hello_world > $M0/dir0/FILE"
++
++ctime1=$(stat -c %Z $M0/dir0/FILE)
++echo "Mount change time: $ctime1"
++
++sleep 2
++
++#Write to back end directly to modify ctime of backend file
++TEST "echo write_from_backend >> $B0/brick1/dir0/FILE"
++TEST "echo write_from_backend >> $B0/brick2/dir0/FILE"
++TEST "echo write_from_backend >> $B0/brick3/dir0/FILE"
++echo "Backend change time"
++echo "brick1: $(stat -c %Z $B0/brick1/dir0/FILE)"
++echo "brick2: $(stat -c %Z $B0/brick2/dir0/FILE)"
++echo "brick3: $(stat -c %Z $B0/brick3/dir0/FILE)"
++
++#Stop and start to hit the case of no inode for readdir
++TEST umount $M0
++TEST $CLI volume stop $V0
++TEST $CLI volume start $V0
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
++
++TEST build_tester $(dirname $0)/ctime-readdir.c
++
++#Do readdir
++TEST ./$(dirname $0)/ctime-readdir $M0/dir0
++
++EXPECT "$ctime1" stat -c %Z $M0/dir0/FILE
++echo "Mount change time after readdir $(stat -c %Z $M0/dir0/FILE)"
++
++cleanup_tester $(dirname $0)/ctime-readdir
++
++cleanup;
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 193afc5..37e33a9 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -832,17 +832,26 @@ posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *path,
+
+ iatt_from_stat(&stbuf, &lstatbuf);
+
+- if (inode && priv->ctime) {
+- if (!inode_locked) {
+- ret = posix_get_mdata_xattr(this, path, -1, inode, &stbuf);
++ if (priv->ctime) {
++ if (inode) {
++ if (!inode_locked) {
++ ret = posix_get_mdata_xattr(this, path, -1, inode, &stbuf);
++ } else {
++ ret = __posix_get_mdata_xattr(this, path, -1, inode, &stbuf);
++ }
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED,
++ "posix get mdata failed on gfid: %s",
++ uuid_utoa(inode->gfid));
++ goto out;
++ }
+ } else {
+- ret = __posix_get_mdata_xattr(this, path, -1, inode, &stbuf);
+- }
+- if (ret) {
+- gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED,
+- "posix get mdata failed on gfid: %s",
+- uuid_utoa(inode->gfid));
+- goto out;
++ ret = __posix_get_mdata_xattr(this, path, -1, NULL, &stbuf);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED,
++ "posix get mdata failed on path: %s", path);
++ goto out;
++ }
+ }
+ }
+
+diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
+index 0ea9099..7ff5225 100644
+--- a/xlators/storage/posix/src/posix-metadata.c
++++ b/xlators/storage/posix/src/posix-metadata.c
+@@ -79,6 +79,7 @@ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd,
+ fd_based_fop = _gf_true;
+ }
+ if (!(fd_based_fop || real_path_arg)) {
++ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ MAKE_HANDLE_PATH(real_path, this, inode->gfid, NULL);
+ if (!real_path) {
+ uuid_utoa_r(inode->gfid, gfid_str);
+@@ -114,14 +115,14 @@ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd,
+ key,
+ real_path ? real_path
+ : (real_path_arg ? real_path_arg : "null"),
+- uuid_utoa(inode->gfid));
++ inode ? uuid_utoa(inode->gfid) : "null");
+ } else {
+ gf_msg(this->name, GF_LOG_DEBUG, *op_errno, P_MSG_XATTR_FAILED,
+ "getxattr failed"
+ " on %s gfid: %s key: %s ",
+ real_path ? real_path
+ : (real_path_arg ? real_path_arg : "null"),
+- uuid_utoa(inode->gfid), key);
++ inode ? uuid_utoa(inode->gfid) : "null", key);
+ }
+ op_ret = -1;
+ goto out;
+@@ -148,7 +149,7 @@ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd,
+ "getxattr failed on "
+ " on %s gfid: %s key: %s ",
+ real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
+- uuid_utoa(inode->gfid), key);
++ inode ? uuid_utoa(inode->gfid) : "null", key);
+ goto out;
+ }
+
+@@ -233,9 +234,14 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
+ int ret = -1;
+ int op_errno = 0;
+
+- GF_VALIDATE_OR_GOTO(this->name, inode, out);
++ /* Handle readdirp: inode might be null, time attributes should be served
++ * from xattr not from backend's file attributes */
++ if (inode) {
++ ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
++ } else {
++ ret = -1;
++ }
+
+- ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
+ if (ret == -1 || !mdata) {
+ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
+ if (!mdata) {
+@@ -251,7 +257,9 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
+ * is hit when in-memory status is lost due to brick
+ * down scenario
+ */
+- __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
++ if (inode) {
++ __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
++ }
+ } else {
+ /* Failed to get mdata from disk, xattr missing.
+ * This happens on two cases.
+@@ -278,7 +286,8 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
+ */
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_FETCHMDATA_FAILED, "file: %s: gfid: %s key:%s ",
+- real_path ? real_path : "null", uuid_utoa(inode->gfid),
++ real_path ? real_path : "null",
++ inode ? uuid_utoa(inode->gfid) : "null",
+ GF_XATTR_MDATA_KEY);
+ GF_FREE(mdata);
+ ret = 0;
+@@ -297,6 +306,10 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
+ stbuf->ia_atime = mdata->atime.tv_sec;
+ stbuf->ia_atime_nsec = mdata->atime.tv_nsec;
+ }
++ /* Not set in inode context, hence free mdata */
++ if (!inode) {
++ GF_FREE(mdata);
++ }
+
+ out:
+ return ret;
+@@ -416,6 +429,11 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+ }
+ }
+
++ if ((flag->ctime == 0) && (flag->mtime == 0) && (flag->atime == 0)) {
++ ret = 0;
++ goto unlock;
++ }
++
+ /* Earlier, mdata was updated only if the existing time is less
+ * than the time to be updated. This would fail the scenarios
+ * where mtime can be set to any time using the syscall. Hence
+@@ -486,7 +504,6 @@ out:
+ stbuf->ia_atime_nsec = mdata->atime.tv_nsec;
+ }
+
+-
+ return ret;
+ }
+
+@@ -604,10 +621,6 @@ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path,
+
+ if (priv->ctime) {
+ (void)posix_get_mdata_flag(frame->root->flags, &flag);
+- if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) {
+- goto out;
+- }
+-
+ if (frame->root->ctime.tv_sec == 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed, No ctime : %s gfid:%s", real_path,
+@@ -643,9 +656,6 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,
+
+ if (inode && priv->ctime) {
+ (void)posix_get_parent_mdata_flag(frame->root->flags, &flag);
+- if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) {
+- goto out;
+- }
+ ret = posix_set_mdata_xattr(this, real_path, fd, inode,
+ &frame->root->ctime, stbuf, &flag,
+ _gf_false);
+@@ -655,7 +665,6 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,
+ uuid_utoa(inode->gfid));
+ }
+ }
+-out:
+ return;
+ }
+
+--
+1.8.3.1
+
diff --git a/0123-ctime-Fix-log-repeated-logging-during-open.patch b/0123-ctime-Fix-log-repeated-logging-during-open.patch
new file mode 100644
index 0000000..b51c436
--- /dev/null
+++ b/0123-ctime-Fix-log-repeated-logging-during-open.patch
@@ -0,0 +1,79 @@
+From 03c0395a1ead769167046713a99662bc5c5233fa Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Fri, 19 Apr 2019 11:34:37 +0530
+Subject: [PATCH 123/124] ctime: Fix log repeated logging during open
+
+The log "posix set mdata failed, No ctime" logged repeatedly
+after the fix [1]. Those could be internal fops. This patch
+fixes the same.
+
+[1] https://review.gluster.org/22540
+
+Backport of:
+ > Patch: https://review.gluster.org/22591
+ > fixes: bz#1701457
+ > Change-Id: I42799a90b976982cedb0ca11fa224d555eb05650
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+BUG: 1699709
+Change-Id: I42799a90b976982cedb0ca11fa224d555eb05650
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168688
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/storage/posix/src/posix-metadata.c | 15 +++++----------
+ 1 file changed, 5 insertions(+), 10 deletions(-)
+
+diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
+index 7ff5225..e96f222 100644
+--- a/xlators/storage/posix/src/posix-metadata.c
++++ b/xlators/storage/posix/src/posix-metadata.c
+@@ -429,11 +429,6 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+ }
+ }
+
+- if ((flag->ctime == 0) && (flag->mtime == 0) && (flag->atime == 0)) {
+- ret = 0;
+- goto unlock;
+- }
+-
+ /* Earlier, mdata was updated only if the existing time is less
+ * than the time to be updated. This would fail the scenarios
+ * where mtime can be set to any time using the syscall. Hence
+@@ -621,13 +616,9 @@ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path,
+
+ if (priv->ctime) {
+ (void)posix_get_mdata_flag(frame->root->flags, &flag);
+- if (frame->root->ctime.tv_sec == 0) {
+- gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+- "posix set mdata failed, No ctime : %s gfid:%s", real_path,
+- inode ? uuid_utoa(inode->gfid) : "No inode");
++ if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) {
+ goto out;
+ }
+-
+ ret = posix_set_mdata_xattr(this, real_path, fd, inode,
+ &frame->root->ctime, stbuf, &flag,
+ _gf_false);
+@@ -656,6 +647,9 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,
+
+ if (inode && priv->ctime) {
+ (void)posix_get_parent_mdata_flag(frame->root->flags, &flag);
++ if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) {
++ goto out;
++ }
+ ret = posix_set_mdata_xattr(this, real_path, fd, inode,
+ &frame->root->ctime, stbuf, &flag,
+ _gf_false);
+@@ -665,6 +659,7 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,
+ uuid_utoa(inode->gfid));
+ }
+ }
++out:
+ return;
+ }
+
+--
+1.8.3.1
+
diff --git a/0124-spec-remove-duplicate-references-to-files.patch b/0124-spec-remove-duplicate-references-to-files.patch
new file mode 100644
index 0000000..b8a8c8b
--- /dev/null
+++ b/0124-spec-remove-duplicate-references-to-files.patch
@@ -0,0 +1,39 @@
+From e7112224eebaa91c529397a944e94254e482f48f Mon Sep 17 00:00:00 2001
+From: Milind Changire <mchangir@redhat.com>
+Date: Thu, 25 Apr 2019 13:07:19 +0530
+Subject: [PATCH 124/124] spec: remove duplicate references to files
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I446fbeadaaab96aa215f4fd784d951f825486008
+Signed-off-by: Milind Changire <mchangir@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168735
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ glusterfs.spec.in | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index d20b062..86a1527 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1214,7 +1214,6 @@ exit 0
+ %endif
+ %else
+ %{_libdir}/pkgconfig/libgfchangelog.pc
+-%{_libdir}/libgfchangelog.so
+ %if ( 0%{!?_without_tiering:1} )
+ %{_libdir}/pkgconfig/libgfdb.pc
+ %endif
+@@ -1469,7 +1468,6 @@ exit 0
+ %ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run
+ %ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+-%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/peers
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/quotad
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/scrub
+--
+1.8.3.1
+
diff --git a/0125-glusterd-define-dumpops-in-the-xlator_api-of-gluster.patch b/0125-glusterd-define-dumpops-in-the-xlator_api-of-gluster.patch
new file mode 100644
index 0000000..c1c49a3
--- /dev/null
+++ b/0125-glusterd-define-dumpops-in-the-xlator_api-of-gluster.patch
@@ -0,0 +1,75 @@
+From 0cd08d9e89f5ee86d5f4f90f0ca5c07bd290636c Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Fri, 26 Apr 2019 22:28:53 +0530
+Subject: [PATCH 125/141] glusterd: define dumpops in the xlator_api of
+ glusterd
+
+Problem: statedump is not capturing information related to glusterd
+
+Solution: statdump is not capturing glusterd info because
+trav->dumpops is null in gf_proc_dump_single_xlator_info ()
+where trav is glusterd xlator object. trav->dumpops is null
+because we missed to define dumpops in xlator_api of glusterd.
+defining dumpops in xlator_api of glusterd fixes the issue.
+
+> fixes: bz#1703629
+> Change-Id: If85429ecb1ef580aced8d5b88d09fc15258bfc4c
+> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+upstream patch: https://review.gluster.org/#/c/glusterfs/+/22640/
+
+BUG: 1703753
+Change-Id: If85429ecb1ef580aced8d5b88d09fc15258bfc4c
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169207
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ tests/bugs/glusterd/optimized-basic-testcases.t | 13 +++++++++++++
+ xlators/mgmt/glusterd/src/glusterd.c | 1 +
+ 2 files changed, 14 insertions(+)
+
+diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t
+index dd98a65..d700b5e 100644
+--- a/tests/bugs/glusterd/optimized-basic-testcases.t
++++ b/tests/bugs/glusterd/optimized-basic-testcases.t
+@@ -32,6 +32,16 @@ function get_brick_host_uuid()
+ echo $host_uuid_list | awk '{print $1}'
+ }
+
++function generate_statedump_and_check_for_glusterd_info {
++ pid=`pidof glusterd`
++ #remove old stale statedumps
++ cleanup_statedump $pid
++ kill -USR1 $pid
++ #Wait till the statedump is generated
++ sleep 1
++ fname=$(ls $statedumpdir | grep -E "\.$pid\.dump\.")
++ cat $statedumpdir/$fname | grep "xlator.glusterd.priv" | wc -l
++}
+
+ cleanup;
+
+@@ -279,4 +289,7 @@ mkdir -p /xyz/var/lib/glusterd/abc
+ TEST $CLI volume create "test" $H0:/xyz/var/lib/glusterd/abc
+ EXPECT 'Created' volinfo_field "test" 'Status';
+
++EXPECT "1" generate_statedump_and_check_for_glusterd_info
++
++cleanup_statedump `pidof glusterd`
+ cleanup
+diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
+index d4ab630..c0973cb 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.c
++++ b/xlators/mgmt/glusterd/src/glusterd.c
+@@ -2231,6 +2231,7 @@ xlator_api_t xlator_api = {
+ .fini = fini,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
++ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+--
+1.8.3.1
+
diff --git a/0126-cluster-dht-refactor-dht-lookup-functions.patch b/0126-cluster-dht-refactor-dht-lookup-functions.patch
new file mode 100644
index 0000000..25c43a0
--- /dev/null
+++ b/0126-cluster-dht-refactor-dht-lookup-functions.patch
@@ -0,0 +1,663 @@
+From 6565749c95e90f360a994bde1416cffd22cd8ce9 Mon Sep 17 00:00:00 2001
+From: N Balachandran <nbalacha@redhat.com>
+Date: Mon, 25 Mar 2019 15:56:56 +0530
+Subject: [PATCH 126/141] cluster/dht: refactor dht lookup functions
+
+Part 1: refactor the dht_lookup_dir_cbk
+and dht_selfheal_directory functions.
+Added a simple dht selfheal directory test
+
+upstream: https://review.gluster.org/#/c/glusterfs/+/22407/
+> Change-Id: I1410c26359e3c14b396adbe751937a52bd2fcff9
+> updates: bz#1590385
+
+Change-Id: Idd0a7df7122d634c371ecf30c0dbb94dc6063416
+BUG: 1703897
+Signed-off-by: N Balachandran <nbalacha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169037
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Susant Palai <spalai@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ tests/basic/distribute/dir-heal.t | 145 +++++++++++++++++++++++++++
+ xlators/cluster/dht/src/dht-common.c | 178 +++++++++++++++------------------
+ xlators/cluster/dht/src/dht-selfheal.c | 65 +++++++-----
+ 3 files changed, 264 insertions(+), 124 deletions(-)
+ create mode 100644 tests/basic/distribute/dir-heal.t
+
+diff --git a/tests/basic/distribute/dir-heal.t b/tests/basic/distribute/dir-heal.t
+new file mode 100644
+index 0000000..851f765
+--- /dev/null
++++ b/tests/basic/distribute/dir-heal.t
+@@ -0,0 +1,145 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../nfs.rc
++. $(dirname $0)/../../common-utils.rc
++
++# Test 1 overview:
++# ----------------
++#
++# 1. Kill one brick of the volume.
++# 2. Create directories and change directory properties.
++# 3. Bring up the brick and access the directory
++# 4. Check the permissions and xattrs on the backend
++
++cleanup
++
++TEST glusterd
++TEST pidof glusterd
++
++TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3}
++TEST $CLI volume start $V0
++
++# We want the lookup to reach DHT
++TEST $CLI volume set $V0 performance.stat-prefetch off
++
++# Mount using FUSE , kill a brick and create directories
++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
++
++ls $M0/
++cd $M0
++
++TEST kill_brick $V0 $H0 $B0/$V0-1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1
++
++TEST mkdir dir{1..4}
++
++# No change for dir1
++# Change permissions for dir2
++# Set xattr on dir3
++# Change permissions and set xattr on dir4
++
++TEST chmod 777 $M0/dir2
++
++TEST setfattr -n "user.test" -v "test" $M0/dir3
++
++TEST chmod 777 $M0/dir4
++TEST setfattr -n "user.test" -v "test" $M0/dir4
++
++
++# Start all bricks
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1
++
++#$CLI volume status
++
++# It takes a while for the client to reconnect to the brick
++sleep 5
++
++stat $M0/dir* > /dev/null
++
++# Check that directories have been created on the brick that was killed
++
++TEST ls $B0/$V0-1/dir1
++
++TEST ls $B0/$V0-1/dir2
++EXPECT "777" stat -c "%a" $B0/$V0-1/dir2
++
++TEST ls $B0/$V0-1/dir3
++EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3
++
++
++TEST ls $B0/$V0-1/dir4
++EXPECT "777" stat -c "%a" $B0/$V0-1/dir4
++EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4
++
++
++TEST rm -rf $M0/*
++
++cd
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++
++
++# Test 2 overview:
++# ----------------
++# 1. Create directories with all bricks up.
++# 2. Kill a brick and change directory properties and set user xattr.
++# 2. Bring up the brick and access the directory
++# 3. Check the permissions and xattrs on the backend
++
++
++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
++
++ls $M0/
++cd $M0
++TEST mkdir dir{1..4}
++
++TEST kill_brick $V0 $H0 $B0/$V0-1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1
++
++# No change for dir1
++# Change permissions for dir2
++# Set xattr on dir3
++# Change permissions and set xattr on dir4
++
++TEST chmod 777 $M0/dir2
++
++TEST setfattr -n "user.test" -v "test" $M0/dir3
++
++TEST chmod 777 $M0/dir4
++TEST setfattr -n "user.test" -v "test" $M0/dir4
++
++
++# Start all bricks
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1
++
++#$CLI volume status
++
++# It takes a while for the client to reconnect to the brick
++sleep 5
++
++stat $M0/dir* > /dev/null
++
++# Check directories on the brick that was killed
++
++TEST ls $B0/$V0-1/dir2
++EXPECT "777" stat -c "%a" $B0/$V0-1/dir2
++
++TEST ls $B0/$V0-1/dir3
++EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3
++
++
++TEST ls $B0/$V0-1/dir4
++EXPECT "777" stat -c "%a" $B0/$V0-1/dir4
++EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4
++cd
++
++
++# Cleanup
++cleanup
++
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 2a68193..d3e900c 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -801,9 +801,8 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
+ call_frame_t *xattr_frame = NULL;
+ gf_boolean_t vol_down = _gf_false;
+
+- this = frame->this;
+-
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
++ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+@@ -812,6 +811,7 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
+ conf = this->private;
+ layout = local->selfheal.layout;
+ local->mds_heal_fresh_lookup = mark_during_fresh_lookup;
++
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ /* Code to update hashed subvol consider as a mds subvol
+@@ -1240,6 +1240,31 @@ out:
+ }
+
+ int
++dht_needs_selfheal(call_frame_t *frame, xlator_t *this)
++{
++ dht_local_t *local = NULL;
++ dht_layout_t *layout = NULL;
++ int needs_selfheal = 0;
++ int ret = 0;
++
++ local = frame->local;
++ layout = local->layout;
++
++ if (local->need_attrheal || local->need_xattr_heal ||
++ local->need_selfheal) {
++ needs_selfheal = 1;
++ }
++
++ ret = dht_layout_normalize(this, &local->loc, layout);
++
++ if (ret != 0) {
++ gf_msg_debug(this->name, 0, "fixing assignment on %s", local->loc.path);
++ needs_selfheal = 1;
++ }
++ return needs_selfheal;
++}
++
++int
+ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
+@@ -1256,8 +1281,6 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char gfid_node[GF_UUID_BUF_SIZE] = {0};
+ int32_t mds_xattr_val[1] = {0};
+- call_frame_t *copy = NULL;
+- dht_local_t *copy_local = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+@@ -1270,7 +1293,11 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ conf = this->private;
+
+ layout = local->layout;
++ gf_msg_debug(this->name, op_errno,
++ "%s: lookup on %s returned with op_ret = %d, op_errno = %d",
++ local->loc.path, prev->name, op_ret, op_errno);
+
++ /* The first successful lookup*/
+ if (!op_ret && gf_uuid_is_null(local->gfid)) {
+ memcpy(local->gfid, stbuf->ia_gfid, 16);
+ }
+@@ -1298,13 +1325,10 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+- gf_msg_debug(this->name, op_errno,
+- "%s: lookup on %s returned error", local->loc.path,
+- prev->name);
+
+ /* The GFID is missing on this subvol. Force a heal. */
+ if (op_errno == ENODATA) {
+- local->need_selfheal = 1;
++ local->need_lookup_everywhere = 1;
+ }
+ goto unlock;
+ }
+@@ -1312,12 +1336,11 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ is_dir = check_is_dir(inode, stbuf, xattr);
+ if (!is_dir) {
+ gf_msg_debug(this->name, 0,
+- "lookup of %s on %s returned non"
+- "dir 0%o"
++ "%s: lookup on %s returned non dir 0%o"
+ "calling lookup_everywhere",
+ local->loc.path, prev->name, stbuf->ia_type);
+
+- local->need_selfheal = 1;
++ local->need_lookup_everywhere = 1;
+ goto unlock;
+ }
+
+@@ -1328,14 +1351,8 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ dht_aggregate_xattr(local->xattr, xattr);
+ }
+
+- if (dict_get(xattr, conf->mds_xattr_key)) {
+- local->mds_subvol = prev;
+- local->mds_stbuf.ia_gid = stbuf->ia_gid;
+- local->mds_stbuf.ia_uid = stbuf->ia_uid;
+- local->mds_stbuf.ia_prot = stbuf->ia_prot;
+- }
+-
+ if (local->stbuf.ia_type != IA_INVAL) {
++ /* This is not the first subvol to respond */
+ if (!__is_root_gfid(stbuf->ia_gfid) &&
+ ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid) ||
+@@ -1348,65 +1365,64 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ if (local->inode == NULL)
+ local->inode = inode_ref(inode);
+
++ /* This could be a problem */
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->postparent, postparent);
+
+ if (!dict_get(xattr, conf->mds_xattr_key)) {
+ gf_msg_debug(this->name, 0,
+- "Internal xattr %s is not present "
+- " on path %s gfid is %s ",
+- conf->mds_xattr_key, local->loc.path, gfid_local);
++ "%s: mds xattr %s is not present "
++ "on %s(gfid = %s)",
++ local->loc.path, conf->mds_xattr_key, prev->name,
++ gfid_local);
+ goto unlock;
+- } else {
+- /* Save mds subvol on inode ctx */
+- ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0,
+- DHT_MSG_SET_INODE_CTX_FAILED,
+- "Failed to set hashed subvol for %s vol is %s",
+- local->loc.path, prev->name);
+- }
++ }
++
++ local->mds_subvol = prev;
++ local->mds_stbuf = *stbuf;
++
++ /* Save mds subvol on inode ctx */
++
++ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
++ "%s: Failed to set mds (%s)", local->loc.path, prev->name);
+ }
+ check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
+ mds_xattr_val, 1, &errst);
+ if ((check_mds < 0) && !errst) {
+ local->mds_xattr = dict_ref(xattr);
+ gf_msg_debug(this->name, 0,
+- "Value of %s is not zero on hashed subvol "
+- "so xattr needs to be heal on non hashed"
+- " path is %s and vol name is %s "
+- " gfid is %s",
+- conf->mds_xattr_key, local->loc.path, prev->name,
++ "%s: %s is not zero on %s. Xattrs need to be healed."
++ "(gfid = %s)",
++ local->loc.path, conf->mds_xattr_key, prev->name,
+ gfid_local);
+ local->need_xattr_heal = 1;
+- local->mds_subvol = prev;
+ }
+ }
++
+ unlock:
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
++ /* If the mds subvol is not set correctly*/
++ if (!__is_root_gfid(local->gfid) &&
++ (!dict_get(local->xattr, conf->mds_xattr_key))) {
++ local->need_selfheal = 1;
++ }
++
+ /* No need to call xattr heal code if volume count is 1
+ */
+- if (conf->subvolume_cnt == 1)
++ if (conf->subvolume_cnt == 1) {
+ local->need_xattr_heal = 0;
+-
+- /* Code to update all extended attributed from hashed subvol
+- to local->xattr
+- */
+- if (local->need_xattr_heal && (local->mds_xattr)) {
+- dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr,
+- NULL, NULL);
+- dict_unref(local->mds_xattr);
+- local->mds_xattr = NULL;
+ }
+
+- if (local->need_selfheal) {
+- local->need_selfheal = 0;
++ if (local->need_selfheal || local->need_lookup_everywhere) {
+ /* Set the gfid-req so posix will set the GFID*/
+ if (!gf_uuid_is_null(local->gfid)) {
++ /* Ok, this should _never_ happen */
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid, 16);
+ } else {
+@@ -1414,73 +1430,36 @@ unlock:
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid_req, 16);
+ }
++ }
++
++ if (local->need_lookup_everywhere) {
++ local->need_lookup_everywhere = 0;
+ dht_lookup_everywhere(frame, this, &local->loc);
+ return 0;
+ }
+
+ if (local->op_ret == 0) {
+- ret = dht_layout_normalize(this, &local->loc, layout);
+-
+- if (ret != 0) {
+- gf_msg_debug(this->name, 0, "fixing assignment on %s",
+- local->loc.path);
++ if (dht_needs_selfheal(frame, this)) {
+ goto selfheal;
+ }
+
+ dht_layout_set(this, local->inode, layout);
+- if (!dict_get(local->xattr, conf->mds_xattr_key) ||
+- local->need_xattr_heal)
+- goto selfheal;
+- }
+-
+- if (local->inode) {
+- dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
+- }
+-
+- if (local->loc.parent) {
+- dht_inode_ctx_time_update(local->loc.parent, this,
+- &local->postparent, 1);
+- }
+-
+- if (local->need_attrheal) {
+- local->need_attrheal = 0;
+- if (!__is_root_gfid(inode->gfid)) {
+- local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
+- local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
+- local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
++ if (local->inode) {
++ dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
+ }
+- copy = create_frame(this, this->ctx->pool);
+- if (copy) {
+- copy_local = dht_local_init(copy, &local->loc, NULL, 0);
+- if (!copy_local) {
+- DHT_STACK_DESTROY(copy);
+- goto skip_attr_heal;
+- }
+- copy_local->stbuf = local->stbuf;
+- gf_uuid_copy(copy_local->loc.gfid, local->stbuf.ia_gfid);
+- copy_local->mds_stbuf = local->mds_stbuf;
+- copy_local->mds_subvol = local->mds_subvol;
+- copy->local = copy_local;
+- FRAME_SU_DO(copy, dht_local_t);
+- ret = synctask_new(this->ctx->env, dht_dir_attr_heal,
+- dht_dir_attr_heal_done, copy, copy);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
+- DHT_MSG_DIR_ATTR_HEAL_FAILED,
+- "Synctask creation failed to heal attr "
+- "for path %s gfid %s ",
+- local->loc.path, local->gfid);
+- DHT_STACK_DESTROY(copy);
+- }
++
++ if (local->loc.parent) {
++ dht_inode_ctx_time_update(local->loc.parent, this,
++ &local->postparent, 1);
+ }
+ }
+
+- skip_attr_heal:
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ /* Delete mds xattr at the time of STACK UNWIND */
+ if (local->xattr)
+ GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
++
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+@@ -5444,9 +5423,8 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+- "Failed to get mds subvol for path %s"
+- "gfid is %s ",
+- loc->path, gfid_local);
++ "%s: Failed to get mds subvol. (gfid is %s)", loc->path,
++ gfid_local);
+ }
+ (*op_errno) = ENOENT;
+ goto err;
+diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
+index bd1b7ea..5420fca 100644
+--- a/xlators/cluster/dht/src/dht-selfheal.c
++++ b/xlators/cluster/dht/src/dht-selfheal.c
+@@ -1033,18 +1033,27 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
+ int missing_attr = 0;
+ int i = 0, ret = -1;
+ dht_local_t *local = NULL;
++ dht_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ int cnt = 0;
+
+ local = frame->local;
+ this = frame->this;
++ conf = this->private;
++
++ /* We need to heal the attrs if:
++ * 1. Any directories were missing - the newly created dirs will need
++ * to have the correct attrs set
++ * 2. An existing dir does not have the correct permissions -they may
++ * have been changed when a brick was down.
++ */
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == -1)
+ missing_attr++;
+ }
+
+- if (missing_attr == 0) {
++ if ((missing_attr == 0) && (local->need_attrheal == 0)) {
+ if (!local->heal_layout) {
+ gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ",
+ loc->path, uuid_utoa(loc->gfid));
+@@ -1062,19 +1071,12 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
+ return 0;
+ }
+
+- local->call_cnt = missing_attr;
+- cnt = layout->cnt;
++ cnt = local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < cnt; i++) {
+- if (layout->list[i].err == -1) {
+- gf_msg_trace(this->name, 0, "%s: setattr on subvol %s, gfid = %s",
+- loc->path, layout->list[i].xlator->name,
+- uuid_utoa(loc->gfid));
+-
+- STACK_WIND(
+- frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator,
+- layout->list[i].xlator->fops->setattr, loc, stbuf, valid, NULL);
+- }
++ STACK_WIND(frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator,
++ layout->list[i].xlator->fops->setattr, loc, stbuf, valid,
++ NULL);
+ }
+
+ return 0;
+@@ -1492,6 +1494,9 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
+ }
+
+ if (missing_dirs == 0) {
++ /* We don't need to create any directories. Proceed to heal the
++ * attrs and xattrs
++ */
+ if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+ if (local->need_xattr_heal) {
+ local->need_xattr_heal = 0;
+@@ -1499,8 +1504,8 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+- "xattr heal failed for "
+- "directory %s gfid %s ",
++ "%s:xattr heal failed for "
++ "directory (gfid = %s)",
+ local->loc.path, local->gfid);
+ } else {
+ if (!gf_uuid_is_null(local->gfid))
+@@ -1512,8 +1517,8 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
+
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+- "Failed to set mds xattr "
+- "for directory %s gfid %s ",
++ "%s: Failed to set mds xattr "
++ "for directory (gfid = %s)",
+ local->loc.path, local->gfid);
+ }
+ }
+@@ -2085,10 +2090,10 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ loc_t *loc, dht_layout_t *layout)
+ {
+ dht_local_t *local = NULL;
++ xlator_t *this = NULL;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ int ret = 0;
+- xlator_t *this = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ inode_t *linked_inode = NULL, *inode = NULL;
+@@ -2099,6 +2104,11 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref(this, layout);
+
++ if (local->need_attrheal && !IA_ISINVAL(local->mds_stbuf.ia_type)) {
++ /*Use the one in the mds_stbuf*/
++ local->stbuf = local->mds_stbuf;
++ }
++
+ if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+@@ -2118,6 +2128,13 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ inode_unref(inode);
+ }
+
++ if (local->need_xattr_heal && (local->mds_xattr)) {
++ dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr,
++ NULL, NULL);
++ dict_unref(local->mds_xattr);
++ local->mds_xattr = NULL;
++ }
++
+ dht_layout_anomalies(this, loc, layout, &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt,
+ &local->selfheal.missing_cnt, &local->selfheal.down,
+@@ -2128,18 +2145,18 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+
+ if (down) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
+- "Directory selfheal failed: %d subvolumes down."
+- "Not fixing. path = %s, gfid = %s",
+- down, loc->path, gfid);
++ "%s: Directory selfheal failed: %d subvolumes down."
++ "Not fixing. gfid = %s",
++ loc->path, down, gfid);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ if (misc) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
+- "Directory selfheal failed : %d subvolumes "
+- "have unrecoverable errors. path = %s, gfid = %s",
+- misc, loc->path, gfid);
++ "%s: Directory selfheal failed : %d subvolumes "
++ "have unrecoverable errors. gfid = %s",
++ loc->path, misc, gfid);
+
+ ret = 0;
+ goto sorry_no_fix;
+@@ -2369,13 +2386,13 @@ dht_dir_attr_heal(void *data)
+
+ frame = data;
+ local = frame->local;
+- mds_subvol = local->mds_subvol;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", local, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO("dht", conf, out);
+
++ mds_subvol = local->mds_subvol;
+ call_cnt = conf->subvolume_cnt;
+
+ if (!__is_root_gfid(local->stbuf.ia_gfid) && (!mds_subvol)) {
+--
+1.8.3.1
+
diff --git a/0127-cluster-dht-Refactor-dht-lookup-functions.patch b/0127-cluster-dht-Refactor-dht-lookup-functions.patch
new file mode 100644
index 0000000..0d0fdb3
--- /dev/null
+++ b/0127-cluster-dht-Refactor-dht-lookup-functions.patch
@@ -0,0 +1,200 @@
+From 884ba13ee47888b5de9b6d6acaf051e895f55053 Mon Sep 17 00:00:00 2001
+From: N Balachandran <nbalacha@redhat.com>
+Date: Wed, 10 Apr 2019 14:28:55 +0530
+Subject: [PATCH 127/141] cluster/dht: Refactor dht lookup functions
+
+Part 2: Modify dht_revalidate_cbk to call
+dht_selfheal_directory instead of separate calls
+to heal attrs and xattrs.
+
+upstream: https://review.gluster.org/#/c/glusterfs/+/22542/
+
+> Change-Id: Id41ac6c4220c2c35484812bbfc6157fc3c86b142
+> updates: bz#1590385
+
+Change-Id: Id53962306dd142efc741de838b585fa5c78f9b1f
+BUG:1703897
+Signed-off-by: N Balachandran <nbalacha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169038
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Susant Palai <spalai@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 104 ++++++++++-------------------------
+ 1 file changed, 30 insertions(+), 74 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index d3e900c..183872f 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -1365,7 +1365,6 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ if (local->inode == NULL)
+ local->inode = inode_ref(inode);
+
+- /* This could be a problem */
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->postparent, postparent);
+
+@@ -1509,8 +1508,6 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int is_dir = 0;
+ int is_linkfile = 0;
+ int follow_link = 0;
+- call_frame_t *copy = NULL;
+- dht_local_t *copy_local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ uint32_t vol_commit_hash = 0;
+ xlator_t *subvol = NULL;
+@@ -1538,17 +1535,16 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
++ gf_msg_debug(this->name, op_errno,
++ "%s: revalidate lookup on %s returned op_ret %d",
++ local->loc.path, prev->name, op_ret);
++
+ LOCK(&frame->lock);
+ {
+ if (gf_uuid_is_null(local->gfid)) {
+ memcpy(local->gfid, local->loc.gfid, 16);
+ }
+
+- gf_msg_debug(this->name, op_errno,
+- "revalidate lookup of %s "
+- "returned with op_ret %d",
+- local->loc.path, op_ret);
+-
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+
+@@ -1580,6 +1576,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ local->loc.path);
+
+ local->need_lookup_everywhere = 1;
++ } else if (IA_ISDIR(local->loc.inode->ia_type)) {
++ local->need_selfheal = 1;
+ }
+ }
+
+@@ -1638,15 +1636,16 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ (local->stbuf.ia_uid != stbuf->ia_uid) ||
+ is_permission_different(&local->stbuf.ia_prot,
+ &stbuf->ia_prot)) {
+- local->need_selfheal = 1;
++ local->need_attrheal = 1;
+ }
+ }
+
+ if (!dict_get(xattr, conf->mds_xattr_key)) {
+ gf_msg_debug(this->name, 0,
+- "internal xattr %s is not present"
+- " on path %s gfid is %s ",
+- conf->mds_xattr_key, local->loc.path, gfid);
++ "%s: internal xattr %s is not present"
++ " on subvol %s(gfid is %s)",
++ local->loc.path, conf->mds_xattr_key, prev->name,
++ gfid);
+ } else {
+ check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
+ mds_xattr_val, 1, &errst);
+@@ -1734,71 +1733,28 @@ unlock:
+ local->need_xattr_heal = 0;
+
+ if (IA_ISDIR(local->stbuf.ia_type)) {
+- /* Code to update all extended attributed from hashed
+- subvol to local->xattr and call heal code to heal
+- custom xattr from hashed subvol to non-hashed subvol
+- */
+- if (local->need_xattr_heal && (local->mds_xattr)) {
+- dht_dir_set_heal_xattr(this, local, local->xattr,
+- local->mds_xattr, NULL, NULL);
+- dict_unref(local->mds_xattr);
+- local->mds_xattr = NULL;
+- local->need_xattr_heal = 0;
+- ret = dht_dir_xattr_heal(this, local);
+- if (ret)
+- gf_msg(this->name, GF_LOG_ERROR, ret,
+- DHT_MSG_DIR_XATTR_HEAL_FAILED,
+- "xattr heal failed for directory %s "
+- " gfid %s ",
+- local->loc.path, gfid);
+- } else {
+- /* Call function to save hashed subvol on inode
+- ctx if internal mds xattr is not present and
+- all subvols are up
+- */
+- if (inode && !__is_root_gfid(inode->gfid) && (!local->op_ret))
+- (void)dht_common_mark_mdsxattr(frame, NULL, 1);
+- }
+- }
+- if (local->need_selfheal) {
+- local->need_selfheal = 0;
+- if (!__is_root_gfid(inode->gfid)) {
+- gf_uuid_copy(local->gfid, local->mds_stbuf.ia_gfid);
+- local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
+- local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
+- local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
+- } else {
+- gf_uuid_copy(local->gfid, local->stbuf.ia_gfid);
+- local->stbuf.ia_gid = local->prebuf.ia_gid;
+- local->stbuf.ia_uid = local->prebuf.ia_uid;
+- local->stbuf.ia_prot = local->prebuf.ia_prot;
+- }
++ if (!__is_root_gfid(local->loc.inode->gfid) &&
++ (!dict_get(local->xattr, conf->mds_xattr_key)))
++ local->need_selfheal = 1;
+
+- copy = create_frame(this, this->ctx->pool);
+- if (copy) {
+- copy_local = dht_local_init(copy, &local->loc, NULL, 0);
+- if (!copy_local) {
+- DHT_STACK_DESTROY(copy);
+- goto cont;
+- }
+- copy_local->stbuf = local->stbuf;
+- copy_local->mds_stbuf = local->mds_stbuf;
+- copy_local->mds_subvol = local->mds_subvol;
+- copy->local = copy_local;
+- FRAME_SU_DO(copy, dht_local_t);
+- ret = synctask_new(this->ctx->env, dht_dir_attr_heal,
+- dht_dir_attr_heal_done, copy, copy);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
+- DHT_MSG_DIR_ATTR_HEAL_FAILED,
+- "Synctask creation failed to heal attr "
+- "for path %s gfid %s ",
+- local->loc.path, local->gfid);
+- DHT_STACK_DESTROY(copy);
++ if (dht_needs_selfheal(frame, this)) {
++ if (!__is_root_gfid(local->loc.inode->gfid)) {
++ local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
++ local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
++ local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
++ } else {
++ local->stbuf.ia_gid = local->prebuf.ia_gid;
++ local->stbuf.ia_uid = local->prebuf.ia_uid;
++ local->stbuf.ia_prot = local->prebuf.ia_prot;
+ }
++
++ layout = local->layout;
++ dht_selfheal_directory(frame, dht_lookup_selfheal_cbk,
++ &local->loc, layout);
++ return 0;
+ }
+ }
+- cont:
++
+ if (local->layout_mismatch) {
+ /* Found layout mismatch in the directory, need to
+ fix this in the inode context */
+@@ -1814,7 +1770,7 @@ unlock:
+ dht_layout_unref(this, local->layout);
+ local->layout = NULL;
+
+- /* We know that current cached subvol is no more
++ /* We know that current cached subvol is no longer
+ valid, get the new one */
+ local->cached_subvol = NULL;
+ if (local->xattr_req) {
+--
+1.8.3.1
+
diff --git a/0128-glusterd-Fix-bulkvoldict-thread-logic-in-brick-multi.patch b/0128-glusterd-Fix-bulkvoldict-thread-logic-in-brick-multi.patch
new file mode 100644
index 0000000..862b828
--- /dev/null
+++ b/0128-glusterd-Fix-bulkvoldict-thread-logic-in-brick-multi.patch
@@ -0,0 +1,86 @@
+From bb39abc1dab3c7b7b725f9eefe119218e94f610b Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Mon, 29 Apr 2019 18:48:36 +0530
+Subject: [PATCH 128/141] glusterd: Fix bulkvoldict thread logic in brick
+ multiplexing
+
+Problem: Currently glusterd spawn bulkvoldict in brick_mux
+ environment while no. of volumes are less than configured
+ glusterd.vol_count_per_thread
+
+Solution: Correct the logic to spawn bulkvoldict thread
+ 1) Calculate endindex only while total thread is non zero
+ 2) Update end index correctly to pass index for bulkvoldict
+ thread
+
+> Fixes: bz#1704252
+> Change-Id: I1def847fbdd6a605e7687bfc4e42b706bf0eb70b
+> (Cherry picked from commit ac70f66c5805e10b3a1072bd467918730c0aeeb4)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22647/)
+
+BUG: 1704769
+Change-Id: I1def847fbdd6a605e7687bfc4e42b706bf0eb70b
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169091
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 24 ++++++++++++++++++------
+ 1 file changed, 18 insertions(+), 6 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index ff6102b..efa5a86 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -3436,9 +3436,19 @@ glusterd_add_bulk_volumes_create_thread(void *data)
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ count++;
+- if ((count < start) || (count > end))
++
++ /* Skip volumes if index count is less than start
++ index to handle volume for specific thread
++ */
++ if (count < start)
+ continue;
+
++ /* No need to process volume if index count is greater
++ than end index
++ */
++ if (count > end)
++ break;
++
+ ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume");
+ if (ret)
+ goto out;
+@@ -3499,9 +3509,11 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data)
+ totthread = 0;
+ } else {
+ totthread = volcnt / vol_per_thread_limit;
+- endindex = volcnt % vol_per_thread_limit;
+- if (endindex)
+- totthread++;
++ if (totthread) {
++ endindex = volcnt % vol_per_thread_limit;
++ if (endindex)
++ totthread++;
++ }
+ }
+
+ if (totthread == 0) {
+@@ -3527,10 +3539,10 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data)
+ arg->this = this;
+ arg->voldict = dict_arr[i];
+ arg->start = start;
+- if (!endindex) {
++ if ((i + 1) != totthread) {
+ arg->end = ((i + 1) * vol_per_thread_limit);
+ } else {
+- arg->end = (start + endindex);
++ arg->end = ((i * vol_per_thread_limit) + endindex);
+ }
+ th_ret = gf_thread_create_detached(
+ &th_id, glusterd_add_bulk_volumes_create_thread, arg,
+--
+1.8.3.1
+
diff --git a/0129-core-handle-memory-accounting-correctly.patch b/0129-core-handle-memory-accounting-correctly.patch
new file mode 100644
index 0000000..1281d04
--- /dev/null
+++ b/0129-core-handle-memory-accounting-correctly.patch
@@ -0,0 +1,401 @@
+From f305ee93ec9dbbd679e1eb58c7c0bf8d9b5659d5 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Fri, 12 Apr 2019 13:40:59 +0200
+Subject: [PATCH 129/141] core: handle memory accounting correctly
+
+When a translator stops, memory accounting for that translator is not
+destroyed (because there could remain memory allocated that references
+it), but mutexes that coordinate updates of memory accounting were
+destroyed. This caused incorrect memory accounting and even crashes in
+debug mode.
+
+This patch also fixes some other things:
+
+* Reduce the number of atomic operations needed to manage memory
+ accounting.
+* Correctly account memory when realloc() is used.
+* Merge two critical sections into one.
+* Cleaned the code a bit.
+
+Upstream patch:
+> Change-Id: Id5eaee7338729b9bc52c931815ca3ff1e5a7dcc8
+> Upstream patch link : https://review.gluster.org/#/c/glusterfs/+/22554/
+> BUG: 1659334
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: Id5eaee7338729b9bc52c931815ca3ff1e5a7dcc8
+Fixes: bz#1702270
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169325
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ libglusterfs/src/glusterfs/xlator.h | 2 +
+ libglusterfs/src/libglusterfs.sym | 1 +
+ libglusterfs/src/mem-pool.c | 193 ++++++++++++++++--------------------
+ libglusterfs/src/xlator.c | 23 +++--
+ 4 files changed, 105 insertions(+), 114 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
+index 06152ec..8998976 100644
+--- a/libglusterfs/src/glusterfs/xlator.h
++++ b/libglusterfs/src/glusterfs/xlator.h
+@@ -1035,6 +1035,8 @@ gf_boolean_t
+ loc_is_nameless(loc_t *loc);
+ int
+ xlator_mem_acct_init(xlator_t *xl, int num_types);
++void
++xlator_mem_acct_unref(struct mem_acct *mem_acct);
+ int
+ is_gf_log_command(xlator_t *trans, const char *name, char *value);
+ int
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index fa2025e..cf5757c 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -1093,6 +1093,7 @@ xlator_foreach
+ xlator_foreach_depth_first
+ xlator_init
+ xlator_mem_acct_init
++xlator_mem_acct_unref
+ xlator_notify
+ xlator_option_info_list
+ xlator_option_init_bool
+diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
+index 34cb87a..3934a78 100644
+--- a/libglusterfs/src/mem-pool.c
++++ b/libglusterfs/src/mem-pool.c
+@@ -35,61 +35,92 @@ gf_mem_acct_enable_set(void *data)
+ return;
+ }
+
+-int
+-gf_mem_set_acct_info(xlator_t *xl, char **alloc_ptr, size_t size, uint32_t type,
+- const char *typestr)
++static void *
++gf_mem_header_prepare(struct mem_header *header, size_t size)
+ {
+- void *ptr = NULL;
+- struct mem_header *header = NULL;
++ void *ptr;
+
+- if (!alloc_ptr)
+- return -1;
++ header->size = size;
+
+- ptr = *alloc_ptr;
++ ptr = header + 1;
+
+- GF_ASSERT(xl != NULL);
++ /* data follows in this gap of 'size' bytes */
++ *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC;
+
+- GF_ASSERT(xl->mem_acct != NULL);
++ return ptr;
++}
+
+- GF_ASSERT(type <= xl->mem_acct->num_types);
++static void *
++gf_mem_set_acct_info(struct mem_acct *mem_acct, struct mem_header *header,
++ size_t size, uint32_t type, const char *typestr)
++{
++ struct mem_acct_rec *rec = NULL;
++ bool new_ref = false;
+
+- LOCK(&xl->mem_acct->rec[type].lock);
+- {
+- if (!xl->mem_acct->rec[type].typestr)
+- xl->mem_acct->rec[type].typestr = typestr;
+- xl->mem_acct->rec[type].size += size;
+- xl->mem_acct->rec[type].num_allocs++;
+- xl->mem_acct->rec[type].total_allocs++;
+- xl->mem_acct->rec[type].max_size = max(xl->mem_acct->rec[type].max_size,
+- xl->mem_acct->rec[type].size);
+- xl->mem_acct->rec[type].max_num_allocs = max(
+- xl->mem_acct->rec[type].max_num_allocs,
+- xl->mem_acct->rec[type].num_allocs);
+- }
+- UNLOCK(&xl->mem_acct->rec[type].lock);
++ if (mem_acct != NULL) {
++ GF_ASSERT(type <= mem_acct->num_types);
+
+- GF_ATOMIC_INC(xl->mem_acct->refcnt);
++ rec = &mem_acct->rec[type];
++ LOCK(&rec->lock);
++ {
++ if (!rec->typestr) {
++ rec->typestr = typestr;
++ }
++ rec->size += size;
++ new_ref = (rec->num_allocs == 0);
++ rec->num_allocs++;
++ rec->total_allocs++;
++ rec->max_size = max(rec->max_size, rec->size);
++ rec->max_num_allocs = max(rec->max_num_allocs, rec->num_allocs);
++
++#ifdef DEBUG
++ list_add(&header->acct_list, &rec->obj_list);
++#endif
++ }
++ UNLOCK(&rec->lock);
++
++ /* We only take a reference for each memory type used, not for each
++ * allocation. This minimizes the use of atomic operations. */
++ if (new_ref) {
++ GF_ATOMIC_INC(mem_acct->refcnt);
++ }
++ }
+
+- header = (struct mem_header *)ptr;
+ header->type = type;
+- header->size = size;
+- header->mem_acct = xl->mem_acct;
++ header->mem_acct = mem_acct;
+ header->magic = GF_MEM_HEADER_MAGIC;
+
++ return gf_mem_header_prepare(header, size);
++}
++
++static void *
++gf_mem_update_acct_info(struct mem_acct *mem_acct, struct mem_header *header,
++ size_t size)
++{
++ struct mem_acct_rec *rec = NULL;
++
++ if (mem_acct != NULL) {
++ rec = &mem_acct->rec[header->type];
++ LOCK(&rec->lock);
++ {
++ rec->size += size - header->size;
++ rec->total_allocs++;
++ rec->max_size = max(rec->max_size, rec->size);
++
+ #ifdef DEBUG
+- INIT_LIST_HEAD(&header->acct_list);
+- LOCK(&xl->mem_acct->rec[type].lock);
+- {
+- list_add(&header->acct_list, &(xl->mem_acct->rec[type].obj_list));
+- }
+- UNLOCK(&xl->mem_acct->rec[type].lock);
++ /* The old 'header' already was present in 'obj_list', but
++ * realloc() could have changed its address. We need to remove
++ * the old item from the list and add the new one. This can be
++ * done this way because list_move() doesn't use the pointers
++ * to the old location (which are not valid anymore) already
++ * present in the list, it simply overwrites them. */
++ list_move(&header->acct_list, &rec->obj_list);
+ #endif
+- ptr += sizeof(struct mem_header);
+- /* data follows in this gap of 'size' bytes */
+- *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC;
++ }
++ UNLOCK(&rec->lock);
++ }
+
+- *alloc_ptr = ptr;
+- return 0;
++ return gf_mem_header_prepare(header, size);
+ }
+
+ void *
+@@ -97,7 +128,7 @@ __gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr)
+ {
+ size_t tot_size = 0;
+ size_t req_size = 0;
+- char *ptr = NULL;
++ void *ptr = NULL;
+ xlator_t *xl = NULL;
+
+ if (!THIS->ctx->mem_acct_enable)
+@@ -114,16 +145,15 @@ __gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr)
+ gf_msg_nomem("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
+- gf_mem_set_acct_info(xl, &ptr, req_size, type, typestr);
+
+- return (void *)ptr;
++ return gf_mem_set_acct_info(xl->mem_acct, ptr, req_size, type, typestr);
+ }
+
+ void *
+ __gf_malloc(size_t size, uint32_t type, const char *typestr)
+ {
+ size_t tot_size = 0;
+- char *ptr = NULL;
++ void *ptr = NULL;
+ xlator_t *xl = NULL;
+
+ if (!THIS->ctx->mem_acct_enable)
+@@ -138,84 +168,32 @@ __gf_malloc(size_t size, uint32_t type, const char *typestr)
+ gf_msg_nomem("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
+- gf_mem_set_acct_info(xl, &ptr, size, type, typestr);
+
+- return (void *)ptr;
++ return gf_mem_set_acct_info(xl->mem_acct, ptr, size, type, typestr);
+ }
+
+ void *
+ __gf_realloc(void *ptr, size_t size)
+ {
+ size_t tot_size = 0;
+- char *new_ptr;
+- struct mem_header *old_header = NULL;
+- struct mem_header *new_header = NULL;
+- struct mem_header tmp_header;
++ struct mem_header *header = NULL;
+
+ if (!THIS->ctx->mem_acct_enable)
+ return REALLOC(ptr, size);
+
+ REQUIRE(NULL != ptr);
+
+- old_header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE);
+- GF_ASSERT(old_header->magic == GF_MEM_HEADER_MAGIC);
+- tmp_header = *old_header;
+-
+-#ifdef DEBUG
+- int type = 0;
+- size_t copy_size = 0;
+-
+- /* Making these changes for realloc is not straightforward. So
+- * I am simulating realloc using calloc and free
+- */
+-
+- type = tmp_header.type;
+- new_ptr = __gf_calloc(1, size, type,
+- tmp_header.mem_acct->rec[type].typestr);
+- if (new_ptr) {
+- copy_size = (size > tmp_header.size) ? tmp_header.size : size;
+- memcpy(new_ptr, ptr, copy_size);
+- __gf_free(ptr);
+- }
+-
+- /* This is not quite what the man page says should happen */
+- return new_ptr;
+-#endif
++ header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE);
++ GF_ASSERT(header->magic == GF_MEM_HEADER_MAGIC);
+
+ tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+- new_ptr = realloc(old_header, tot_size);
+- if (!new_ptr) {
++ header = realloc(header, tot_size);
++ if (!header) {
+ gf_msg_nomem("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
+
+- /*
+- * We used to pass (char **)&ptr as the second
+- * argument after the value of realloc was saved
+- * in ptr, but the compiler warnings complained
+- * about the casting to and forth from void ** to
+- * char **.
+- * TBD: it would be nice to adjust the memory accounting info here,
+- * but calling gf_mem_set_acct_info here is wrong because it bumps
+- * up counts as though this is a new allocation - which it's not.
+- * The consequence of doing nothing here is only that the sizes will be
+- * wrong, but at least the counts won't be.
+- uint32_t type = 0;
+- xlator_t *xl = NULL;
+- type = header->type;
+- xl = (xlator_t *) header->xlator;
+- gf_mem_set_acct_info (xl, &new_ptr, size, type, NULL);
+- */
+-
+- new_header = (struct mem_header *)new_ptr;
+- *new_header = tmp_header;
+- new_header->size = size;
+-
+- new_ptr += sizeof(struct mem_header);
+- /* data follows in this gap of 'size' bytes */
+- *(uint32_t *)(new_ptr + size) = GF_MEM_TRAILER_MAGIC;
+-
+- return (void *)new_ptr;
++ return gf_mem_update_acct_info(header->mem_acct, header, size);
+ }
+
+ int
+@@ -321,6 +299,7 @@ __gf_free(void *free_ptr)
+ void *ptr = NULL;
+ struct mem_acct *mem_acct;
+ struct mem_header *header = NULL;
++ bool last_ref = false;
+
+ if (!THIS->ctx->mem_acct_enable) {
+ FREE(free_ptr);
+@@ -352,16 +331,18 @@ __gf_free(void *free_ptr)
+ mem_acct->rec[header->type].num_allocs--;
+ /* If all the instances are freed up then ensure typestr is set
+ * to NULL */
+- if (!mem_acct->rec[header->type].num_allocs)
++ if (!mem_acct->rec[header->type].num_allocs) {
++ last_ref = true;
+ mem_acct->rec[header->type].typestr = NULL;
++ }
+ #ifdef DEBUG
+ list_del(&header->acct_list);
+ #endif
+ }
+ UNLOCK(&mem_acct->rec[header->type].lock);
+
+- if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) {
+- FREE(mem_acct);
++ if (last_ref) {
++ xlator_mem_acct_unref(mem_acct);
+ }
+
+ free:
+diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
+index 5d6f8d2..022c3ed 100644
+--- a/libglusterfs/src/xlator.c
++++ b/libglusterfs/src/xlator.c
+@@ -736,6 +736,19 @@ xlator_mem_acct_init(xlator_t *xl, int num_types)
+ }
+
+ void
++xlator_mem_acct_unref(struct mem_acct *mem_acct)
++{
++ uint32_t i;
++
++ if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) {
++ for (i = 0; i < mem_acct->num_types; i++) {
++ LOCK_DESTROY(&(mem_acct->rec[i].lock));
++ }
++ FREE(mem_acct);
++ }
++}
++
++void
+ xlator_tree_fini(xlator_t *xl)
+ {
+ xlator_t *top = NULL;
+@@ -766,7 +779,6 @@ xlator_list_destroy(xlator_list_t *list)
+ int
+ xlator_memrec_free(xlator_t *xl)
+ {
+- uint32_t i = 0;
+ struct mem_acct *mem_acct = NULL;
+
+ if (!xl) {
+@@ -775,13 +787,8 @@ xlator_memrec_free(xlator_t *xl)
+ mem_acct = xl->mem_acct;
+
+ if (mem_acct) {
+- for (i = 0; i < mem_acct->num_types; i++) {
+- LOCK_DESTROY(&(mem_acct->rec[i].lock));
+- }
+- if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) {
+- FREE(mem_acct);
+- xl->mem_acct = NULL;
+- }
++ xlator_mem_acct_unref(mem_acct);
++ xl->mem_acct = NULL;
+ }
+
+ return 0;
+--
+1.8.3.1
+
diff --git a/0130-tier-test-new-tier-cmds.t-fails-after-a-glusterd-res.patch b/0130-tier-test-new-tier-cmds.t-fails-after-a-glusterd-res.patch
new file mode 100644
index 0000000..2bd360f
--- /dev/null
+++ b/0130-tier-test-new-tier-cmds.t-fails-after-a-glusterd-res.patch
@@ -0,0 +1,117 @@
+From 01bb17a0910a638e89a44a6da4b1359123940498 Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <hgowtham@redhat.com>
+Date: Wed, 17 Apr 2019 12:17:27 +0530
+Subject: [PATCH 130/141] tier/test: new-tier-cmds.t fails after a glusterd
+ restart
+
+Problem: new-tier-cmds.t does a restart of gluster processes and
+after the restart the bricks and the tier process takes more
+time than before to come online. This causes the detach start to
+fail.
+
+Fix: Give it enough time to come online after the restart.
+
+label: DOWNSTREAM ONLY
+
+Change-Id: I0f50b0bb77fe49ebd3a0292e190d0350d7994cfe
+Signed-off-by: Hari Gowtham <hgowtham@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/168130
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ tests/basic/tier/new-tier-cmds.t | 45 ++++++++++++++++++++++++++--------------
+ tests/volume.rc | 8 +++++++
+ 2 files changed, 37 insertions(+), 16 deletions(-)
+
+diff --git a/tests/basic/tier/new-tier-cmds.t b/tests/basic/tier/new-tier-cmds.t
+index b9c9390..92881ac 100644
+--- a/tests/basic/tier/new-tier-cmds.t
++++ b/tests/basic/tier/new-tier-cmds.t
+@@ -19,14 +19,6 @@ function create_dist_tier_vol () {
+ TEST $CLI_1 volume tier $V0 attach replica 2 $H1:$B1/${V0}_h1 $H2:$B2/${V0}_h2 $H3:$B3/${V0}_h3 $H1:$B1/${V0}_h4 $H2:$B2/${V0}_h5 $H3:$B3/${V0}_h6
+ }
+
+-function tier_daemon_status {
+- local _VAR=CLI_$1
+- local xpath_sel='//node[hostname="Tier Daemon"][path="localhost"]/status'
+- ${!_VAR} --xml volume status $V0 \
+- | xmllint --xpath "$xpath_sel" - \
+- | sed -n '/.*<status>\([0-9]*\).*/s//\1/p'
+-}
+-
+ function detach_xml_status {
+ $CLI_1 volume tier $V0 detach status --xml | sed -n \
+ '/.*<opErrstr>Detach tier status successful/p' | wc -l
+@@ -70,7 +62,20 @@ TEST $glusterd_2;
+ EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers;
+
+ #after starting detach tier the detach tier status should display the status
+-sleep 2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b4
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h4
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b5
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h5
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b3
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b6
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h3
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h6
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" get_shd_count
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" get_tierd_count
+ $CLI_1 volume status
+ TEST $CLI_1 volume tier $V0 detach start
+
+@@ -91,13 +96,21 @@ EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers;
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_b2
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_h2
+
+-# Parsing normal output doesn't work because of line-wrap issues on our
+-# regression machines, and the version of xmllint there doesn't support --xpath
+-# so we can't do it that way either. In short, there's no way for us to detect
+-# when we can stop waiting, so we just have to wait the maximum time every time
+-# and hope any failures will show up later in the script.
+-sleep $PROCESS_UP_TIMEOUT
+-#XPECT_WITHIN $PROCESS_UP_TIMEOUT 1 tier_daemon_status 2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b4
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h4
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b5
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h5
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b3
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b6
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h3
++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h6
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" get_shd_count
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_tierd_count
++$CLI_1 volume status
+
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status
+
+diff --git a/tests/volume.rc b/tests/volume.rc
+index 289b197..b326098 100644
+--- a/tests/volume.rc
++++ b/tests/volume.rc
+@@ -719,6 +719,14 @@ function get_snapd_count {
+ ps auxww | grep glusterfs | grep snapd.pid | grep -v grep | wc -l
+ }
+
++function get_tierd_count {
++ ps auxww | grep glusterfs | grep tierd.pid | grep -v grep | wc -l
++}
++
++function get_shd_count {
++ ps auxww | grep glusterfs | grep shd.pid | grep -v grep | wc -l
++}
++
+ function drop_cache() {
+ case $OSTYPE in
+ Linux)
+--
+1.8.3.1
+
diff --git a/0131-tests-dht-Test-that-lookups-are-sent-post-brick-up.patch b/0131-tests-dht-Test-that-lookups-are-sent-post-brick-up.patch
new file mode 100644
index 0000000..6238fb1
--- /dev/null
+++ b/0131-tests-dht-Test-that-lookups-are-sent-post-brick-up.patch
@@ -0,0 +1,113 @@
+From a0949929282529e0e866e074721c1bdfe3928c8c Mon Sep 17 00:00:00 2001
+From: N Balachandran <nbalacha@redhat.com>
+Date: Thu, 11 Apr 2019 12:12:12 +0530
+Subject: [PATCH 131/141] tests/dht: Test that lookups are sent post brick up
+
+upstream: https://review.gluster.org/#/c/glusterfs/+/22545/
+
+>Change-Id: I3556793c5e9d58cc6a08644b41dc5740fab2610b
+>updates: bz#1628194
+
+BUG:1704562
+Change-Id: Ie45331298902bd5268c56cb29a966d8246abfd6d
+Signed-off-by: N Balachandran <nbalacha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169592
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ tests/basic/distribute/brick-down.t | 83 +++++++++++++++++++++++++++++++++++++
+ 1 file changed, 83 insertions(+)
+ create mode 100644 tests/basic/distribute/brick-down.t
+
+diff --git a/tests/basic/distribute/brick-down.t b/tests/basic/distribute/brick-down.t
+new file mode 100644
+index 0000000..522ccc0
+--- /dev/null
++++ b/tests/basic/distribute/brick-down.t
+@@ -0,0 +1,83 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../common-utils.rc
++. $(dirname $0)/../../dht.rc
++
++# Test 1 overview:
++# ----------------
++# Test whether lookups are sent after a brick comes up again
++#
++# 1. Create a 3 brick pure distribute volume
++# 2. Fuse mount the volume so the layout is set on the root
++# 3. Kill one brick and try to create a directory which hashes to that brick.
++# It should fail with EIO.
++# 4. Restart the brick that was killed.
++# 5. Do not remount the volume. Try to create the same directory as in step 3.
++
++cleanup
++
++TEST glusterd
++TEST pidof glusterd
++
++TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3}
++TEST $CLI volume start $V0
++
++# We want the lookup to reach DHT
++TEST $CLI volume set $V0 performance.stat-prefetch off
++
++# Mount using FUSE and lookup the mount so a layout is set on the brick root
++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
++
++ls $M0/
++
++TEST mkdir $M0/level1
++
++# Find a dirname that will hash to the brick we are going to kill
++hashed=$V0-client-1
++TEST dht_first_filename_with_hashsubvol "$hashed" $M0 "dir-"
++roottestdir=$fn_return_val
++
++hashed=$V0-client-1
++TEST dht_first_filename_with_hashsubvol "$hashed" $M0/level1 "dir-"
++level1testdir=$fn_return_val
++
++
++TEST kill_brick $V0 $H0 $B0/$V0-2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-2
++
++TEST $CLI volume status $V0
++
++
++# Unmount and mount the volume again so dht has an incomplete in memory layout
++
++umount -f $M0
++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
++
++
++mkdir $M0/$roottestdir
++TEST [ $? -ne 0 ]
++
++mkdir $M0/level1/$level1testdir
++TEST [ $? -ne 0 ]
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-2
++
++#$CLI volume status
++
++# It takes a while for the client to reconnect to the brick
++sleep 5
++
++
++mkdir $M0/$roottestdir
++TEST [ $? -eq 0 ]
++
++mkdir $M0/$level1/level1testdir
++TEST [ $? -eq 0 ]
++
++# Cleanup
++cleanup
++
++
+--
+1.8.3.1
+
diff --git a/0132-glusterd-remove-duplicate-occurrence-of-features.sel.patch b/0132-glusterd-remove-duplicate-occurrence-of-features.sel.patch
new file mode 100644
index 0000000..2c7888d
--- /dev/null
+++ b/0132-glusterd-remove-duplicate-occurrence-of-features.sel.patch
@@ -0,0 +1,41 @@
+From 83d5ebd6ca68e319db86e310cf072888d0f0f1d1 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Wed, 8 May 2019 10:07:29 +0530
+Subject: [PATCH 132/141] glusterd: remove duplicate occurrence of
+ features.selinux from volume option table
+
+Label : DOWNSTREAM ONLY
+
+Change-Id: I0a49fece7a1fcbb9f3bbfe5806ec470aeb33ad70
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169664
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 10aa2ae..e52de20 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -3242,16 +3242,6 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ "pages."
+ "The max value is 262144 pages i.e 1 GB and "
+ "the min value is 1000 pages i.e ~4 MB."},
+- {.key = VKEY_FEATURES_SELINUX,
+- .voltype = "features/selinux",
+- .type = NO_DOC,
+- .value = "on",
+- .op_version = GD_OP_VERSION_3_11_0,
+- .description = "Convert security.selinux xattrs to "
+- "trusted.gluster.selinux on the bricks. Recommended "
+- "to have enabled when clients and/or bricks support "
+- "SELinux."},
+-
+ #endif /* USE_GFDB */
+ {
+ .key = "locks.trace",
+--
+1.8.3.1
+
diff --git a/0133-glusterd-enable-fips-mode-rchecksum-for-new-volumes.patch b/0133-glusterd-enable-fips-mode-rchecksum-for-new-volumes.patch
new file mode 100644
index 0000000..88f4bd0
--- /dev/null
+++ b/0133-glusterd-enable-fips-mode-rchecksum-for-new-volumes.patch
@@ -0,0 +1,62 @@
+From f1f27e5839dd99389bef65f79ea491e98e6935d2 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Tue, 23 Apr 2019 18:05:36 +0530
+Subject: [PATCH 133/141] glusterd: enable fips-mode-rchecksum for new volumes
+
+...during volume create if the cluster op-version is >=GD_OP_VERSION_7_0.
+
+This option itself was introduced in GD_OP_VERSION_4_0_0 via commit 6daa65356.
+We missed enabling it by default for new volume creates in that commit.
+If we are to do it now safely, we need to use op version
+GD_OP_VERSION_7_0 and target it for release-7.
+
+Patch in upstream master: https://review.gluster.org/#/c/glusterfs/+/22609/
+BUG: 1706683
+Change-Id: I7c6d4a8abe0816367e7069cb5cad01744f04858f
+fixes: bz#1706683
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169443
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index da877aa..77aa705 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -1614,10 +1614,17 @@ brick_graph_add_posix(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ gf_boolean_t pgfid_feat = _gf_false;
+ char *value = NULL;
+ xlator_t *xl = NULL;
++ xlator_t *this = NULL;
++ glusterd_conf_t *priv = NULL;
+
+ if (!graph || !volinfo || !set_dict || !brickinfo)
+ goto out;
+
++ this = THIS;
++ GF_VALIDATE_OR_GOTO("glusterd", this, out);
++ priv = this->private;
++ GF_VALIDATE_OR_GOTO("glusterd", priv, out);
++
+ ret = glusterd_volinfo_get(volinfo, VKEY_FEATURES_QUOTA, &value);
+ if (value) {
+ ret = gf_string2boolean(value, &quota_enabled);
+@@ -1661,6 +1668,12 @@ brick_graph_add_posix(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ }
+ }
+
++ if (priv->op_version >= GD_OP_VERSION_7_0) {
++ ret = xlator_set_fixed_option(xl, "fips-mode-rchecksum", "on");
++ if (ret) {
++ goto out;
++ }
++ }
+ snprintf(tmpstr, sizeof(tmpstr), "%d", brickinfo->fs_share_count);
+ ret = xlator_set_fixed_option(xl, "shared-brick-count", tmpstr);
+ out:
+--
+1.8.3.1
+
diff --git a/0134-performance-write-behind-remove-request-from-wip-lis.patch b/0134-performance-write-behind-remove-request-from-wip-lis.patch
new file mode 100644
index 0000000..d20ca09
--- /dev/null
+++ b/0134-performance-write-behind-remove-request-from-wip-lis.patch
@@ -0,0 +1,79 @@
+From 76127f4f8f3c2bf415f66a335e7b37670cb9bd84 Mon Sep 17 00:00:00 2001
+From: Raghavendra G <rgowdapp@redhat.com>
+Date: Fri, 3 May 2019 10:14:48 +0530
+Subject: [PATCH 134/141] performance/write-behind: remove request from wip
+ list in wb_writev_cbk
+
+There is a race in the way O_DIRECT writes are handled. Assume two
+overlapping write requests w1 and w2.
+
+* w1 is issued and is in wb_inode->wip queue as the response is still
+ pending from bricks. Also wb_request_unref in wb_do_winds is not yet
+ invoked.
+
+ list_for_each_entry_safe (req, tmp, tasks, winds) {
+ list_del_init (&req->winds);
+
+ if (req->op_ret == -1) {
+ call_unwind_error_keep_stub (req->stub, req->op_ret,
+ req->op_errno);
+ } else {
+ call_resume_keep_stub (req->stub);
+ }
+
+ wb_request_unref (req);
+ }
+
+* w2 is issued and wb_process_queue is invoked. w2 is not picked up
+ for winding as w1 is still in wb_inode->wip. w1 is added to todo
+ list and wb_writev for w2 returns.
+
+* response to w1 is received and invokes wb_request_unref. Assume
+ wb_request_unref in wb_do_winds (see point 1) is not invoked
+ yet. Since there is one more refcount, wb_request_unref in
+ wb_writev_cbk of w1 doesn't remove w1 from wip.
+
+* wb_process_queue is invoked as part of wb_writev_cbk of w1. But, it
+ fails to wind w2 as w1 is still in wip.
+
+* wb_requet_unref is invoked on w1 as part of wb_do_winds. w1 is
+ removed from all queues including w1.
+
+* After this point there is no invocation of wb_process_queue unless
+ new request is issued from application causing w2 to be hung till
+ the next request.
+
+This bug is similar to bz 1626780 and bz 1379655.
+
+upstream patch: https://review.gluster.org/#/c/glusterfs/+/22654/
+BUG: 1702686
+Change-Id: Iaa47437613591699d4c8ad18bc0b32de6affcc31
+fixes: bz#1702686
+Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169552
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/write-behind/src/write-behind.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
+index cf302bd..70e281a 100644
+--- a/xlators/performance/write-behind/src/write-behind.c
++++ b/xlators/performance/write-behind/src/write-behind.c
+@@ -1813,6 +1813,12 @@ wb_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ frame->local = NULL;
+ wb_inode = req->wb_inode;
+
++ LOCK(&req->wb_inode->lock);
++ {
++ list_del_init(&req->wip);
++ }
++ UNLOCK(&req->wb_inode->lock);
++
+ wb_request_unref(req);
+
+ /* requests could be pending while this was in progress */
+--
+1.8.3.1
+
diff --git a/0135-geo-rep-fix-incorrectly-formatted-authorized_keys.patch b/0135-geo-rep-fix-incorrectly-formatted-authorized_keys.patch
new file mode 100644
index 0000000..e6d7889
--- /dev/null
+++ b/0135-geo-rep-fix-incorrectly-formatted-authorized_keys.patch
@@ -0,0 +1,56 @@
+From 677f575d2289285d2e553ddd610944856cb947db Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Fri, 10 May 2019 11:21:03 +0530
+Subject: [PATCH 135/141] geo-rep: fix incorrectly formatted authorized_keys
+
+There are two ways for creating secret pem pub file during geo-rep
+setup.
+1. gluster-georep-sshkey generate
+2. gluster system:: execute gsec_create
+
+Below patch solves this problem for `gluster-georep-sshkey generate`
+method.
+Patch link: https://review.gluster.org/#/c/glusterfs/+/22246/
+
+This patch is added to support old way of creating secret pem pub file
+`gluster system:: execute gsec_create`.
+
+Problem: While Geo-rep setup when creating an ssh authorized_keys
+ the geo-rep setup inserts an extra space before the "ssh-rsa" label.
+ This gets flagged by an enterprise customer's security scan as a
+ security violation.
+Solution: Remove extra space while creating secret key.
+
+Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22673/
+
+>fixes: bz#1679401
+>Change-Id: I92ba7e25aaa5123dae9ebe2f3c68d14315aa5f0e
+>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+BUG: 1671862
+Change-Id: I11e90c00a14a301a5d95e14b5e8984867e6ff893
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169870
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/src/peer_gsec_create.in | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/geo-replication/src/peer_gsec_create.in b/geo-replication/src/peer_gsec_create.in
+index 05c1638..6d4a484 100755
+--- a/geo-replication/src/peer_gsec_create.in
++++ b/geo-replication/src/peer_gsec_create.in
+@@ -18,7 +18,7 @@ if [ "Xcontainer" = "X$1" ]; then
+ output1=`cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub`
+ output2=`cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub`
+ else
+- output1=`echo command=\"${libexecdir}/glusterfs/gsyncd\" " "``cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub`
+- output2=`echo command=\"tar \$\{SSH_ORIGINAL_COMMAND#* \}\" " "``cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub`
++ output1=`echo command=\"${libexecdir}/glusterfs/gsyncd\" ""``cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub`
++ output2=`echo command=\"tar \$\{SSH_ORIGINAL_COMMAND#* \}\" ""``cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub`
+ fi
+ echo -e "$output1\n$output2"
+--
+1.8.3.1
+
diff --git a/0136-glusterd-fix-inconsistent-global-option-output-in-vo.patch b/0136-glusterd-fix-inconsistent-global-option-output-in-vo.patch
new file mode 100644
index 0000000..403dcb3
--- /dev/null
+++ b/0136-glusterd-fix-inconsistent-global-option-output-in-vo.patch
@@ -0,0 +1,51 @@
+From c63346dab3e5da0605bf4ddaa314253f42892c9d Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Wed, 8 May 2019 12:13:59 +0530
+Subject: [PATCH 136/141] glusterd: fix inconsistent global option output in
+ volume get
+
+volume get all all | grep <key> & volume get <volname> all | grep <key>
+dumps two different output value for cluster.brick-multiplex and
+cluster.server-quorum-ratio
+
+>upstream patch : https://review.gluster.org/#/c/glusterfs/+/22680/
+
+>Fixes: bz#1707700
+>Change-Id: Id131734e0502aa514b84768cf67fce3c22364eae
+>Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+
+BUG: 1706776
+Change-Id: Id131734e0502aa514b84768cf67fce3c22364eae
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169948
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index e52de20..4b32fb6 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2906,7 +2906,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .op_version = 1},
+ {.key = GLUSTERD_QUORUM_RATIO_KEY,
+ .voltype = "mgmt/glusterd",
+- .value = "0",
++ .value = "51",
+ .op_version = 1},
+ /* changelog translator - global tunables */
+ {.key = "changelog.changelog",
+@@ -3547,7 +3547,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ /* Brick multiplexing options */
+ {.key = GLUSTERD_BRICK_MULTIPLEX_KEY,
+ .voltype = "mgmt/glusterd",
+- .value = "off",
++ .value = "disable",
+ .op_version = GD_OP_VERSION_3_10_0,
+ .validate_fn = validate_boolean,
+ .type = GLOBAL_DOC,
+--
+1.8.3.1
+
diff --git a/0137-shd-glusterd-Serialize-shd-manager-to-prevent-race-c.patch b/0137-shd-glusterd-Serialize-shd-manager-to-prevent-race-c.patch
new file mode 100644
index 0000000..24dd1db
--- /dev/null
+++ b/0137-shd-glusterd-Serialize-shd-manager-to-prevent-race-c.patch
@@ -0,0 +1,160 @@
+From 646292b4f73bf1b506d034b85787f794963d7196 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Mon, 6 May 2019 23:35:08 +0530
+Subject: [PATCH 137/141] shd/glusterd: Serialize shd manager to prevent race
+ condition
+
+At the time of a glusterd restart, while doing a handshake
+there is a possibility that multiple shd manager might get
+executed. Because of this, there is a chance that multiple
+shd get spawned during a glusterd restart
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22667/
+
+>Change-Id: Ie20798441e07d7d7a93b7d38dfb924cea178a920
+>fixes: bz#1707081
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+BUG: 1704851
+Change-Id: Ie20798441e07d7d7a93b7d38dfb924cea178a920
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169947
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../serialize-shd-manager-glusterd-restart.t | 54 ++++++++++++++++++++++
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 14 ++++++
+ xlators/mgmt/glusterd/src/glusterd.c | 1 +
+ xlators/mgmt/glusterd/src/glusterd.h | 3 ++
+ 4 files changed, 72 insertions(+)
+ create mode 100644 tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t
+
+diff --git a/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t
+new file mode 100644
+index 0000000..3a27c2a
+--- /dev/null
++++ b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t
+@@ -0,0 +1,54 @@
++#! /bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../cluster.rc
++
++function check_peers {
++count=`$CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l`
++echo $count
++}
++
++function check_shd {
++ps aux | grep $1 | grep glustershd | wc -l
++}
++
++cleanup
++
++
++TEST launch_cluster 6
++
++TESTS_EXPECTED_IN_LOOP=25
++for i in $(seq 2 6); do
++ hostname="H$i"
++ TEST $CLI_1 peer probe ${!hostname}
++done
++
++
++EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers;
++for i in $(seq 1 5); do
++
++ TEST $CLI_1 volume create ${V0}_$i replica 3 $H1:$B1/${V0}_$i $H2:$B2/${V0}_$i $H3:$B3/${V0}_$i $H4:$B4/${V0}_$i $H5:$B5/${V0}_$i $H6:$B6/${V0}_$i
++ TEST $CLI_1 volume start ${V0}_$i force
++
++done
++
++#kill a node
++TEST kill_node 3
++
++TEST $glusterd_3;
++EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 check_shd $H3
++
++for i in $(seq 1 5); do
++
++ TEST $CLI_1 volume stop ${V0}_$i
++ TEST $CLI_1 volume delete ${V0}_$i
++
++done
++
++for i in $(seq 1 6); do
++ hostname="H$i"
++ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 0 check_shd ${!hostname}
++done
++cleanup
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index a9eab42..75f9a07 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -254,14 +254,26 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+ {
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
++ glusterd_conf_t *conf = NULL;
++ gf_boolean_t shd_restart = _gf_false;
+
++ conf = THIS->private;
+ volinfo = data;
++ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+
+ if (volinfo)
+ glusterd_volinfo_ref(volinfo);
+
++ while (conf->restart_shd) {
++ synclock_unlock(&conf->big_lock);
++ sleep(2);
++ synclock_lock(&conf->big_lock);
++ }
++ conf->restart_shd = _gf_true;
++ shd_restart = _gf_true;
++
+ ret = glusterd_shdsvc_create_volfile(volinfo);
+ if (ret)
+ goto out;
+@@ -310,6 +322,8 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+ }
+ }
+ out:
++ if (shd_restart)
++ conf->restart_shd = _gf_false;
+ if (volinfo)
+ glusterd_volinfo_unref(volinfo);
+ if (ret)
+diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
+index c0973cb..6d7dd4a 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.c
++++ b/xlators/mgmt/glusterd/src/glusterd.c
+@@ -1819,6 +1819,7 @@ init(xlator_t *this)
+ conf->rpc = rpc;
+ conf->uds_rpc = uds_rpc;
+ conf->gfs_mgmt = &gd_brick_prog;
++ conf->restart_shd = _gf_false;
+ this->private = conf;
+ /* conf->workdir and conf->rundir are smaller than PATH_MAX; gcc's
+ * snprintf checking will throw an error here if sprintf is used.
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index bd9f509..2ea8560 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -222,6 +222,9 @@ typedef struct {
+ gf_atomic_t blockers;
+ uint32_t mgmt_v3_lock_timeout;
+ gf_boolean_t restart_bricks;
++ gf_boolean_t restart_shd; /* This flag prevents running two shd manager
++ simultaneously
++ */
+ pthread_mutex_t attach_lock; /* Lock can be per process or a common one */
+ pthread_mutex_t volume_lock; /* We release the big_lock from lot of places
+ which might lead the modification of volinfo
+--
+1.8.3.1
+
diff --git a/0138-glusterd-Add-gluster-volume-stop-operation-to-gluste.patch b/0138-glusterd-Add-gluster-volume-stop-operation-to-gluste.patch
new file mode 100644
index 0000000..9b8bb86
--- /dev/null
+++ b/0138-glusterd-Add-gluster-volume-stop-operation-to-gluste.patch
@@ -0,0 +1,64 @@
+From d08083d057d6cc7136128cad6ecefba43b886c4c Mon Sep 17 00:00:00 2001
+From: Vishal Pandey <vpandey@redhat.com>
+Date: Thu, 9 May 2019 14:37:22 +0530
+Subject: [PATCH 138/141] glusterd: Add gluster volume stop operation to
+ glusterd_validate_quorum()
+
+ISSUE: gluster volume stop succeeds even if quorum is not met.
+
+Fix: Add GD_OP_STOP_VOLUME to gluster_validate_quorum in
+glusterd_mgmt_v3_pre_validate ().
+
+Since the volume stop command has been ported from synctask to mgmt_v3,
+the quorum check was missed out.
+
+>upstream patch : https://review.gluster.org/#/c/glusterfs/+/22692/
+
+>Change-Id: I7a634ad89ec2e286ea262d7952061efad5360042
+>fixes: bz#1690753
+>Signed-off-by: Vishal Pandey <vpandey@redhat.com>
+
+BUG: 1706893
+Change-Id: I7a634ad89ec2e286ea262d7952061efad5360042
+Signed-off-by: Vishal Pandey <vpandey@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169949
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/glusterd/quorum-validation.t | 4 +++-
+ xlators/mgmt/glusterd/src/glusterd-mgmt.c | 2 +-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/tests/bugs/glusterd/quorum-validation.t b/tests/bugs/glusterd/quorum-validation.t
+index 05aef4e..ff46729 100644
+--- a/tests/bugs/glusterd/quorum-validation.t
++++ b/tests/bugs/glusterd/quorum-validation.t
+@@ -34,9 +34,11 @@ TEST ! $CLI_1 volume add-brick $V0 $H1:$B1/${V0}2
+ TEST ! $CLI_1 volume remove-brick $V0 $H1:$B1/${V0}0 start
+ TEST ! $CLI_1 volume set $V0 barrier enable
+
+-# Now execute a command which goes through op state machine and it should fail
+ TEST ! $CLI_1 volume profile $V0 start
+
++#bug-1690753 - Volume stop when quorum not met is successful
++TEST ! $CLI_1 volume stop $V0
++
+ #Bring back the 2nd glusterd
+ TEST $glusterd_2
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+index 61ad66e..ec78913 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c
++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+@@ -1059,7 +1059,7 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict,
+ goto out;
+ }
+
+- if (op == GD_OP_PROFILE_VOLUME) {
++ if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME) {
+ ret = glusterd_validate_quorum(this, op, req_dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
+--
+1.8.3.1
+
diff --git a/0139-ec-shd-Cleanup-self-heal-daemon-resources-during-ec-.patch b/0139-ec-shd-Cleanup-self-heal-daemon-resources-during-ec-.patch
new file mode 100644
index 0000000..4f8ec9c
--- /dev/null
+++ b/0139-ec-shd-Cleanup-self-heal-daemon-resources-during-ec-.patch
@@ -0,0 +1,300 @@
+From edc238e40060773f5f5fd59fcdad8ae27d65749f Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Mon, 29 Apr 2019 13:22:32 +0530
+Subject: [PATCH 139/141] ec/shd: Cleanup self heal daemon resources during ec
+ fini
+
+We were not properly cleaning self-heal daemon resources
+during ec fini. With shd multiplexing, it is absolutely
+necessary to cleanup all the resources during ec fini.
+
+Back port of
+ upstream patch: https://review.gluster.org/#/c/glusterfs/+/22644/
+ >Change-Id: Iae4f1bce7d8c2e1da51ac568700a51088f3cc7f2
+ >fixes: bz#1703948
+ >Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+BUG: 1703434
+Change-Id: I98ae03178d3176772c62e34baa08a5c35b8f7217
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169994
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/syncop-utils.c | 2 +
+ xlators/cluster/afr/src/afr-self-heald.c | 5 +++
+ xlators/cluster/ec/src/ec-heald.c | 77 +++++++++++++++++++++++++++-----
+ xlators/cluster/ec/src/ec-heald.h | 3 ++
+ xlators/cluster/ec/src/ec-messages.h | 3 +-
+ xlators/cluster/ec/src/ec.c | 47 +++++++++++++++++++
+ 6 files changed, 124 insertions(+), 13 deletions(-)
+
+diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
+index b842142..4167db4 100644
+--- a/libglusterfs/src/syncop-utils.c
++++ b/libglusterfs/src/syncop-utils.c
+@@ -354,6 +354,8 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
+
+ if (frame) {
+ this = frame->this;
++ } else {
++ this = THIS;
+ }
+
+ /*For this functionality to be implemented in general, we need
+diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
+index 8bc4720..522fe5d 100644
+--- a/xlators/cluster/afr/src/afr-self-heald.c
++++ b/xlators/cluster/afr/src/afr-self-heald.c
+@@ -524,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
++
++ if (this->cleanup_starting) {
++ return -ENOTCONN;
++ }
++
+ if (!priv->shd.enabled)
+ return -EBUSY;
+
+diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
+index cba111a..edf5e11 100644
+--- a/xlators/cluster/ec/src/ec-heald.c
++++ b/xlators/cluster/ec/src/ec-heald.c
+@@ -71,6 +71,11 @@ disabled_loop:
+ break;
+ }
+
++ if (ec->shutdown) {
++ healer->running = _gf_false;
++ return -1;
++ }
++
+ ret = healer->rerun;
+ healer->rerun = 0;
+
+@@ -241,9 +246,11 @@ ec_shd_index_sweep(struct subvol_healer *healer)
+ goto out;
+ }
+
++ _mask_cancellation();
+ ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
+ healer, ec_shd_index_heal, xdata,
+ ec->shd.max_threads, ec->shd.wait_qlength);
++ _unmask_cancellation();
+ out:
+ if (xdata)
+ dict_unref(xdata);
+@@ -263,6 +270,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ int ret = 0;
+
+ ec = this->private;
++
++ if (this->cleanup_starting) {
++ return -ENOTCONN;
++ }
++
+ if (ec->xl_up_count <= ec->fragments) {
+ return -ENOTCONN;
+ }
+@@ -305,11 +317,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
+ {
+ ec_t *ec = NULL;
+ loc_t loc = {0};
++ int ret = -1;
+
+ ec = healer->this->private;
+ loc.inode = inode;
+- return syncop_ftw(ec->xl_list[healer->subvol], &loc,
+- GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
++ _mask_cancellation();
++ ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
++ GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
++ _unmask_cancellation();
++ return ret;
+ }
+
+ void *
+@@ -317,13 +333,16 @@ ec_shd_index_healer(void *data)
+ {
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
++ int run = 0;
+
+ healer = data;
+ THIS = this = healer->this;
+ ec_t *ec = this->private;
+
+ for (;;) {
+- ec_shd_healer_wait(healer);
++ run = ec_shd_healer_wait(healer);
++ if (run == -1)
++ break;
+
+ if (ec->xl_up_count > ec->fragments) {
+ gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
+@@ -352,16 +371,12 @@ ec_shd_full_healer(void *data)
+
+ rootloc.inode = this->itable->root;
+ for (;;) {
+- pthread_mutex_lock(&healer->mutex);
+- {
+- run = __ec_shd_healer_wait(healer);
+- if (!run)
+- healer->running = _gf_false;
+- }
+- pthread_mutex_unlock(&healer->mutex);
+-
+- if (!run)
++ run = ec_shd_healer_wait(healer);
++ if (run < 0) {
+ break;
++ } else if (run == 0) {
++ continue;
++ }
+
+ if (ec->xl_up_count > ec->fragments) {
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
+@@ -562,3 +577,41 @@ out:
+ dict_del(output, this->name);
+ return ret;
+ }
++
++void
++ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
++{
++ if (!healer)
++ return;
++
++ pthread_cond_destroy(&healer->cond);
++ pthread_mutex_destroy(&healer->mutex);
++}
++
++void
++ec_selfheal_daemon_fini(xlator_t *this)
++{
++ struct subvol_healer *healer = NULL;
++ ec_self_heald_t *shd = NULL;
++ ec_t *priv = NULL;
++ int i = 0;
++
++ priv = this->private;
++ if (!priv)
++ return;
++
++ shd = &priv->shd;
++ if (!shd->iamshd)
++ return;
++
++ for (i = 0; i < priv->nodes; i++) {
++ healer = &shd->index_healers[i];
++ ec_destroy_healer_object(this, healer);
++
++ healer = &shd->full_healers[i];
++ ec_destroy_healer_object(this, healer);
++ }
++
++ GF_FREE(shd->index_healers);
++ GF_FREE(shd->full_healers);
++}
+diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
+index 2eda2a7..8184cf4 100644
+--- a/xlators/cluster/ec/src/ec-heald.h
++++ b/xlators/cluster/ec/src/ec-heald.h
+@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this);
+ void
+ ec_shd_index_healer_wake(ec_t *ec);
+
++void
++ec_selfheal_daemon_fini(xlator_t *this);
++
+ #endif /* __EC_HEALD_H__ */
+diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
+index 7c28808..ce299bb 100644
+--- a/xlators/cluster/ec/src/ec-messages.h
++++ b/xlators/cluster/ec/src/ec-messages.h
+@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
+ EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
+ EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
+ EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
+- EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);
++ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
++ EC_MSG_THREAD_CLEANUP_FAILED);
+
+ #endif /* !_EC_MESSAGES_H_ */
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index 3c8013e..264582a 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -429,6 +429,51 @@ ec_disable_delays(ec_t *ec)
+ }
+
+ void
++ec_cleanup_healer_object(ec_t *ec)
++{
++ struct subvol_healer *healer = NULL;
++ ec_self_heald_t *shd = NULL;
++ void *res = NULL;
++ int i = 0;
++ gf_boolean_t is_join = _gf_false;
++
++ shd = &ec->shd;
++ if (!shd->iamshd)
++ return;
++
++ for (i = 0; i < ec->nodes; i++) {
++ healer = &shd->index_healers[i];
++ pthread_mutex_lock(&healer->mutex);
++ {
++ healer->rerun = 1;
++ if (healer->running) {
++ pthread_cond_signal(&healer->cond);
++ is_join = _gf_true;
++ }
++ }
++ pthread_mutex_unlock(&healer->mutex);
++ if (is_join) {
++ pthread_join(healer->thread, &res);
++ is_join = _gf_false;
++ }
++
++ healer = &shd->full_healers[i];
++ pthread_mutex_lock(&healer->mutex);
++ {
++ healer->rerun = 1;
++ if (healer->running) {
++ pthread_cond_signal(&healer->cond);
++ is_join = _gf_true;
++ }
++ }
++ pthread_mutex_unlock(&healer->mutex);
++ if (is_join) {
++ pthread_join(healer->thread, &res);
++ is_join = _gf_false;
++ }
++ }
++}
++void
+ ec_pending_fops_completed(ec_t *ec)
+ {
+ if (ec->shutdown) {
+@@ -544,6 +589,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
+ /* If there aren't pending fops running after we have waken up
+ * them, we immediately propagate the notification. */
+ propagate = ec_disable_delays(ec);
++ ec_cleanup_healer_object(ec);
+ goto unlock;
+ }
+
+@@ -759,6 +805,7 @@ failed:
+ void
+ fini(xlator_t *this)
+ {
++ ec_selfheal_daemon_fini(this);
+ __ec_destroy_private(this);
+ }
+
+--
+1.8.3.1
+
diff --git a/0140-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch b/0140-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch
new file mode 100644
index 0000000..b4b44e8
--- /dev/null
+++ b/0140-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch
@@ -0,0 +1,40 @@
+From 40bd6e9c186adb427e136a84eaab631e6a6f5263 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Sun, 5 May 2019 21:17:24 +0530
+Subject: [PATCH 140/141] cluster/ec: Reopen shouldn't happen with O_TRUNC
+
+Problem:
+Doing re-open with O_TRUNC will truncate the fragment even when it is not
+needed needing extra heals
+
+Fix:
+At the time of re-open don't use O_TRUNC.
+
+Upstream-patch: https://review.gluster.org/c/glusterfs/+/22660/
+
+fixes bz#1706549
+Change-Id: Idc6408968efaad897b95a5a52481c66e843d3fb8
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169982
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-common.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index 1454ae2..b1ba5e9 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -128,7 +128,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask)
+ } else {
+ ec_open(fop->frame, fop->xl, need_open,
+ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc,
+- fop->fd->flags, fop->fd, NULL);
++ fop->fd->flags & (~O_TRUNC), fop->fd, NULL);
+ }
+
+ out:
+--
+1.8.3.1
+
diff --git a/0141-socket-ssl-fix-crl-handling.patch b/0141-socket-ssl-fix-crl-handling.patch
new file mode 100644
index 0000000..4c51ad0
--- /dev/null
+++ b/0141-socket-ssl-fix-crl-handling.patch
@@ -0,0 +1,295 @@
+From e3020e43344ddbc32e62e06bbbf88a4f5d7cdc82 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Fri, 10 May 2019 11:13:45 +0530
+Subject: [PATCH 141/141] socket/ssl: fix crl handling
+
+Problem:
+Just setting the path to the CRL directory in socket_init() wasn't working.
+
+Solution:
+Need to use special API to retrieve and set X509_VERIFY_PARAM and set
+the CRL checking flags explicitly.
+Also, setting the CRL checking flags is a big pain, since the connection
+is declared as failed if any CRL isn't found in the designated file or
+directory. A comment has been added to the code appropriately.
+
+> Change-Id: I8a8ed2ddaf4b5eb974387d2f7b1a85c1ca39fe79
+> fixes: bz#1687326
+> Signed-off-by: Milind Changire <mchangir@redhat.com>
+> (Cherry pick from commit 06fa261207f0f0625c52fa977b96e5875e9a91e0)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22334/)
+
+Change-Id: I0958e9890035fd376f1e1eafc1452caf3edd184b
+BUG: 1583585
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/166458
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ configure.ac | 2 +
+ rpc/rpc-transport/socket/src/socket.c | 110 ++++++++++++++++++++++++++++------
+ rpc/rpc-transport/socket/src/socket.h | 2 +
+ tests/features/ssl-ciphers.t | 13 +++-
+ 4 files changed, 107 insertions(+), 20 deletions(-)
+
+diff --git a/configure.ac b/configure.ac
+index 3065077..0e11d4c 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -491,6 +491,8 @@ AC_CHECK_HEADERS([openssl/dh.h])
+
+ AC_CHECK_HEADERS([openssl/ecdh.h])
+
++AC_CHECK_LIB([ssl], [SSL_CTX_get0_param], [AC_DEFINE([HAVE_SSL_CTX_GET0_PARAM], [1], [define if found OpenSSL SSL_CTX_get0_param])])
++
+ dnl Math library
+ AC_CHECK_LIB([m], [pow], [MATH_LIB='-lm'], [MATH_LIB=''])
+ AC_SUBST(MATH_LIB)
+diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
+index f6de1d3..bf2fa71 100644
+--- a/rpc/rpc-transport/socket/src/socket.c
++++ b/rpc/rpc-transport/socket/src/socket.c
+@@ -308,8 +308,65 @@ out:
+ #define ssl_write_one(t, b, l) \
+ ssl_do((t), (b), (l), (SSL_trinary_func *)SSL_write)
+
++/* set crl verify flags only for server */
++/* see man X509_VERIFY_PARAM_SET_FLAGS(3)
++ * X509_V_FLAG_CRL_CHECK enables CRL checking for the certificate chain
++ * leaf certificate. An error occurs if a suitable CRL cannot be found.
++ * Since we're never going to revoke a gluster node cert, we better disable
++ * CRL check for server certs to avoid getting error and failed connection
++ * attempts.
++ */
++static void
++ssl_clear_crl_verify_flags(SSL_CTX *ssl_ctx)
++{
++#ifdef X509_V_FLAG_CRL_CHECK_ALL
++#ifdef HAVE_SSL_CTX_GET0_PARAM
++ X509_VERIFY_PARAM *vpm;
++
++ vpm = SSL_CTX_get0_param(ssl_ctx);
++ if (vpm) {
++ X509_VERIFY_PARAM_clear_flags(
++ vpm, (X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL));
++ }
++#else
++ /* CRL verify flag need not be cleared for rhel6 kind of clients */
++#endif
++#else
++ gf_log(this->name, GF_LOG_ERROR, "OpenSSL version does not support CRL");
++#endif
++ return;
++}
++
++/* set crl verify flags only for server */
++static void
++ssl_set_crl_verify_flags(SSL_CTX *ssl_ctx)
++{
++#ifdef X509_V_FLAG_CRL_CHECK_ALL
++#ifdef HAVE_SSL_CTX_GET0_PARAM
++ X509_VERIFY_PARAM *vpm;
++
++ vpm = SSL_CTX_get0_param(ssl_ctx);
++ if (vpm) {
++ unsigned long flags;
++
++ flags = X509_VERIFY_PARAM_get_flags(vpm);
++ flags |= (X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL);
++ X509_VERIFY_PARAM_set_flags(vpm, flags);
++ }
++#else
++ X509_STORE *x509store;
++
++ x509store = SSL_CTX_get_cert_store(ssl_ctx);
++ X509_STORE_set_flags(x509store,
++ X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL);
++#endif
++#else
++ gf_log(this->name, GF_LOG_ERROR, "OpenSSL version does not support CRL");
++#endif
++}
++
+ int
+-ssl_setup_connection_prefix(rpc_transport_t *this)
++ssl_setup_connection_prefix(rpc_transport_t *this, gf_boolean_t server)
+ {
+ int ret = -1;
+ socket_private_t *priv = NULL;
+@@ -332,6 +389,9 @@ ssl_setup_connection_prefix(rpc_transport_t *this)
+ priv->ssl_accepted = _gf_false;
+ priv->ssl_context_created = _gf_false;
+
++ if (!server && priv->crl_path)
++ ssl_clear_crl_verify_flags(priv->ssl_ctx);
++
+ priv->ssl_ssl = SSL_new(priv->ssl_ctx);
+ if (!priv->ssl_ssl) {
+ gf_log(this->name, GF_LOG_ERROR, "SSL_new failed");
+@@ -2664,7 +2724,7 @@ ssl_handle_server_connection_attempt(rpc_transport_t *this)
+ fd = priv->sock;
+
+ if (!priv->ssl_context_created) {
+- ret = ssl_setup_connection_prefix(this);
++ ret = ssl_setup_connection_prefix(this, _gf_true);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "> ssl_setup_connection_prefix() failed!");
+@@ -2718,7 +2778,7 @@ ssl_handle_client_connection_attempt(rpc_transport_t *this)
+ ret = -1;
+ } else {
+ if (!priv->ssl_context_created) {
+- ret = ssl_setup_connection_prefix(this);
++ ret = ssl_setup_connection_prefix(this, _gf_false);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "> ssl_setup_connection_prefix() "
+@@ -3085,7 +3145,30 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+ gf_log(this->name, GF_LOG_TRACE, "XXX server:%s, client:%s",
+ new_trans->myinfo.identifier, new_trans->peerinfo.identifier);
+
++ /* Make options available to local socket_init() to create new
++ * SSL_CTX per transport. A separate SSL_CTX per transport is
++ * required to avoid setting crl checking options for client
++ * connections. The verification options eventually get copied
++ * to the SSL object. Unfortunately, there's no way to identify
++ * whether socket_init() is being called after a client-side
++ * connect() or a server-side accept(). Although, we could pass
++ * a flag from the transport init() to the socket_init() and
++ * from this place, this doesn't identify the case where the
++ * server-side transport loading is done for the first time.
++ * Also, SSL doesn't apply for UNIX sockets.
++ */
++ if (new_sockaddr.ss_family != AF_UNIX)
++ new_trans->options = dict_ref(this->options);
++ new_trans->ctx = this->ctx;
++
+ ret = socket_init(new_trans);
++
++ /* reset options to NULL to avoid double free */
++ if (new_sockaddr.ss_family != AF_UNIX) {
++ dict_unref(new_trans->options);
++ new_trans->options = NULL;
++ }
++
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "initialization of new_trans "
+@@ -4150,7 +4233,6 @@ ssl_setup_connection_params(rpc_transport_t *this)
+ char *cipher_list = DEFAULT_CIPHER_LIST;
+ char *dh_param = DEFAULT_DH_PARAM;
+ char *ec_curve = DEFAULT_EC_CURVE;
+- char *crl_path = NULL;
+
+ priv = this->private;
+
+@@ -4192,6 +4274,7 @@ ssl_setup_connection_params(rpc_transport_t *this)
+ }
+ priv->ssl_ca_list = gf_strdup(priv->ssl_ca_list);
+
++ optstr = NULL;
+ if (dict_get_str(this->options, SSL_CRL_PATH_OPT, &optstr) == 0) {
+ if (!priv->ssl_enabled) {
+ gf_log(this->name, GF_LOG_WARNING,
+@@ -4199,9 +4282,9 @@ ssl_setup_connection_params(rpc_transport_t *this)
+ SSL_ENABLED_OPT);
+ }
+ if (strcasecmp(optstr, "NULL") == 0)
+- crl_path = NULL;
++ priv->crl_path = NULL;
+ else
+- crl_path = optstr;
++ priv->crl_path = gf_strdup(optstr);
+ }
+
+ gf_log(this->name, priv->ssl_enabled ? GF_LOG_INFO : GF_LOG_DEBUG,
+@@ -4343,24 +4426,15 @@ ssl_setup_connection_params(rpc_transport_t *this)
+ }
+
+ if (!SSL_CTX_load_verify_locations(priv->ssl_ctx, priv->ssl_ca_list,
+- crl_path)) {
++ priv->crl_path)) {
+ gf_log(this->name, GF_LOG_ERROR, "could not load CA list");
+ goto err;
+ }
+
+ SSL_CTX_set_verify_depth(priv->ssl_ctx, cert_depth);
+
+- if (crl_path) {
+-#ifdef X509_V_FLAG_CRL_CHECK_ALL
+- X509_STORE *x509store;
+-
+- x509store = SSL_CTX_get_cert_store(priv->ssl_ctx);
+- X509_STORE_set_flags(
+- x509store, X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL);
+-#else
+- gf_log(this->name, GF_LOG_ERROR,
+- "OpenSSL version does not support CRL");
+-#endif
++ if (priv->crl_path) {
++ ssl_set_crl_verify_flags(priv->ssl_ctx);
+ }
+
+ priv->ssl_session_id = session_id++;
+diff --git a/rpc/rpc-transport/socket/src/socket.h b/rpc/rpc-transport/socket/src/socket.h
+index e1ccae2..e7c0090 100644
+--- a/rpc/rpc-transport/socket/src/socket.h
++++ b/rpc/rpc-transport/socket/src/socket.h
+@@ -14,6 +14,7 @@
+ #include <openssl/ssl.h>
+ #include <openssl/err.h>
+ #include <openssl/x509v3.h>
++#include <openssl/x509_vfy.h>
+ #ifdef HAVE_OPENSSL_DH_H
+ #include <openssl/dh.h>
+ #endif
+@@ -246,6 +247,7 @@ typedef struct {
+ char *ssl_own_cert;
+ char *ssl_private_key;
+ char *ssl_ca_list;
++ char *crl_path;
+ int pipe[2];
+ struct gf_sock_incoming incoming;
+ /* -1 = not connected. 0 = in progress. 1 = connected */
+diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t
+index 563d37c..7e1e199 100644
+--- a/tests/features/ssl-ciphers.t
++++ b/tests/features/ssl-ciphers.t
+@@ -175,8 +175,6 @@ BRICK_PORT=`brick_port $V0`
+ EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+
+ # test revocation
+-# no need to restart the volume since the options are used
+-# by the client here.
+ TEST $CLI volume set $V0 ssl.crl-path $TMPDIR
+ EXPECT $TMPDIR volume_option $V0 ssl.crl-path
+ $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+@@ -189,14 +187,25 @@ TEST openssl ca -batch -config $SSL_CFG -revoke $SSL_CERT 2>&1
+ TEST openssl ca -config $SSL_CFG -gencrl -out $SSL_CRL 2>&1
+
+ # Failed once revoked
++# Although client fails to mount without restarting the server after crl-path
++# is set when no actual crl file is found on the client, it would also fail
++# when server is restarted for the same reason. Since the socket initialization
++# code is the same for client and server, the crl verification flags need to
++# be turned off for the client to avoid SSL searching for CRLs in the
++# ssl.crl-path. If no CRL files are found in the ssl.crl-path, SSL fails the
++# connect() attempt on the client.
++TEST $CLI volume stop $V0
++TEST $CLI volume start $V0
+ $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+ EXPECT "N" wait_mount $M0
+ TEST ! test -f $TEST_FILE
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+ # Succeed with CRL disabled
++TEST $CLI volume stop $V0
+ TEST $CLI volume set $V0 ssl.crl-path NULL
+ EXPECT NULL volume_option $V0 ssl.crl-path
++TEST $CLI volume start $V0
+ $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+ EXPECT "Y" wait_mount $M0
+ TEST test -f $TEST_FILE
+--
+1.8.3.1
+
diff --git a/0142-lock-check-null-value-of-dict-to-avoid-log-flooding.patch b/0142-lock-check-null-value-of-dict-to-avoid-log-flooding.patch
new file mode 100644
index 0000000..ec6ed8a
--- /dev/null
+++ b/0142-lock-check-null-value-of-dict-to-avoid-log-flooding.patch
@@ -0,0 +1,36 @@
+From e44b75fdb86dcf759204816c873b4f9f4efbefa8 Mon Sep 17 00:00:00 2001
+From: Susant Palai <spalai@redhat.com>
+Date: Tue, 21 May 2019 16:17:09 +0530
+Subject: [PATCH 142/169] lock: check null value of dict to avoid log flooding
+
+> updates: bz#1712322
+> Change-Id: I120a1d23506f9ebcf88c7ea2f2eff4978a61cf4a
+> Signed-off-by: Susant Palai <spalai@redhat.com>
+(backport of fix https://review.gluster.org/#/c/glusterfs/+/22756/)
+
+BUG: bz#1704181
+Change-Id: I2a192236328ebb39666ffef1146df312c08a377d
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/171325
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/locks/src/posix.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index 3f1c7a7..adb0df5 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -121,7 +121,7 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
+
+ #define PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, fd, priv) \
+ do { \
+- if (dict_get(dict, GF_ENFORCE_MANDATORY_LOCK) || \
++ if ((dict && (dict_get(dict, GF_ENFORCE_MANDATORY_LOCK))) || \
+ (name && (strcmp(name, GF_ENFORCE_MANDATORY_LOCK) == 0))) { \
+ inode_t *__inode = (loc ? loc->inode : fd->inode); \
+ pl_inode_t *__pl_inode = pl_inode_get(this, __inode, NULL); \
+--
+1.8.3.1
+
diff --git a/0143-packaging-Change-the-dependency-on-nfs-ganesha-to-2..patch b/0143-packaging-Change-the-dependency-on-nfs-ganesha-to-2..patch
new file mode 100644
index 0000000..fd25a69
--- /dev/null
+++ b/0143-packaging-Change-the-dependency-on-nfs-ganesha-to-2..patch
@@ -0,0 +1,42 @@
+From 43fb1d9d3890c44108b466d308177428fb8217aa Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Mon, 27 May 2019 10:11:39 +0530
+Subject: [PATCH 143/169] packaging : Change the dependency on nfs-ganesha to
+ 2.7 for glusterfs-ganesha
+
+Change-Id: I16a3f32eddfcbf745d67de9dc7440e2fc6ef2315
+fixes: bz#1714078
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/171471
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 86a1527..ed58356 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -460,7 +460,7 @@ Summary: NFS-Ganesha configuration
+ Group: Applications/File
+
+ Requires: %{name}-server%{?_isa} = %{version}-%{release}
+-Requires: nfs-ganesha-gluster >= 2.4.1
++Requires: nfs-ganesha-gluster >= 2.7.3
+ Requires: pcs, dbus
+ %if ( 0%{?rhel} && 0%{?rhel} == 6 )
+ Requires: cman, pacemaker, corosync
+@@ -1933,6 +1933,9 @@ fi
+ %endif
+
+ %changelog
++* Mon May 27 2019 Jiffin Tony Thottan <jthottan@redhat.com>
++- Change the dependency to 2.7.3 on nfs-ganesha for glusterfs-ganesha (#1714078)
++
+ * Sun Apr 7 2019 Jiffin Tony Thottan <jthottan@redhat.com>
+ - DOWNSTREAM ONLY - revert of 83abcb(gnfs in an optional subpackage)
+
+--
+1.8.3.1
+
diff --git a/0144-cluster-ec-honor-contention-notifications-for-partia.patch b/0144-cluster-ec-honor-contention-notifications-for-partia.patch
new file mode 100644
index 0000000..40a6aa8
--- /dev/null
+++ b/0144-cluster-ec-honor-contention-notifications-for-partia.patch
@@ -0,0 +1,114 @@
+From ff8a74250209f4279f67dd89c3e57b2289a1b7d1 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Thu, 9 May 2019 11:07:18 +0200
+Subject: [PATCH 144/169] cluster/ec: honor contention notifications for
+ partially acquired locks
+
+EC was ignoring lock contention notifications received while a lock was
+being acquired. When a lock is partially acquired (some bricks have
+granted the lock but some others not yet) we can receive notifications
+from acquired bricks, which should be honored, since we may not receive
+more notifications after that.
+
+Since EC was ignoring them, once the lock was acquired, it was not
+released until the eager-lock timeout, causing unnecessary delays on
+other clients.
+
+This fix takes into consideration the notifications received before
+having completed the full lock acquisition. After that, the lock will
+be releaed as soon as possible.
+
+Upstream patch:
+> BUG: 1708156
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22690
+> Change-Id: I2a306dbdb29fb557dcab7788a258bd75d826cc12
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Fixes: bz#1703455
+Change-Id: I2a306dbdb29fb557dcab7788a258bd75d826cc12
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/171525
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ ...or-inodelk-contention-notify-on-partial-locks.t | 54 ++++++++++++++++++++++
+ xlators/cluster/ec/src/ec-common.c | 2 +-
+ 2 files changed, 55 insertions(+), 1 deletion(-)
+ create mode 100644 tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t
+
+diff --git a/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t b/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t
+new file mode 100644
+index 0000000..67fdb18
+--- /dev/null
++++ b/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t
+@@ -0,0 +1,54 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++function do_ls() {
++ local dir="${1}"
++ local i
++
++ for i in {1..50}; do
++ ls -l $M0/${dir} >/dev/null &
++ ls -l $M1/${dir} >/dev/null &
++ ls -l $M2/${dir} >/dev/null &
++ ls -l $M3/${dir} >/dev/null &
++ done
++ wait
++}
++
++function measure_time() {
++ {
++ LC_ALL=C
++ time -p "${@}"
++ } 2>&1 | awk '/^real/ { print $2 * 1000 }'
++}
++
++cleanup
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
++
++TEST $CLI volume set $V0 disperse.eager-lock on
++TEST $CLI volume set $V0 disperse.other-eager-lock on
++TEST $CLI volume set $V0 features.locks-notify-contention on
++TEST $CLI volume set $V0 disperse.eager-lock-timeout 10
++TEST $CLI volume set $V0 disperse.other-eager-lock-timeout 10
++
++TEST $CLI volume start $V0
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M2
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M3
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M3
++TEST mkdir $M0/dir
++TEST touch $M0/dir/file.{1..10}
++
++# Run multiple 'ls' concurrently from multiple clients so that they collide and
++# cause partial locks.
++TEST [[ $(measure_time do_ls dir) -lt 10000 ]]
++
++cleanup
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index b1ba5e9..e85aa8b 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -2497,7 +2497,7 @@ ec_lock_release(ec_t *ec, inode_t *inode)
+ goto done;
+ }
+ lock = ctx->inode_lock;
+- if ((lock == NULL) || !lock->acquired || lock->release) {
++ if ((lock == NULL) || lock->release) {
+ goto done;
+ }
+
+--
+1.8.3.1
+
diff --git a/0145-core-Capture-process-memory-usage-at-the-time-of-cal.patch b/0145-core-Capture-process-memory-usage-at-the-time-of-cal.patch
new file mode 100644
index 0000000..398f460
--- /dev/null
+++ b/0145-core-Capture-process-memory-usage-at-the-time-of-cal.patch
@@ -0,0 +1,65 @@
+From 55d47524c0c8a88204129c3a94d71779aae00beb Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Tue, 28 May 2019 08:18:12 +0530
+Subject: [PATCH 145/169] core: Capture process memory usage at the time of
+ call gf_msg_nomem
+
+Problem: All gluster processes call gf_mgm_nomem while calloc/malloc/realloc
+ throw an error but the message does not capture current memory usage of
+ gluster process
+
+Solution: Call getrusage to capture current memory usage of gluster
+ process
+
+> Change-Id: I2e0319da1f33b177fa042fdc9e7268068576c9c3
+> fixes: bz#1708051
+> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22688/
+> Cherry pick from commit 8e1d53f14730ac1b1ca0ce9d9a0ccb32578fd4fb
+
+BUG: 1709087
+Change-Id: I2e0319da1f33b177fa042fdc9e7268068576c9c3
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/171587
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/logging.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c
+index 5d46916..7f0eff6 100644
+--- a/libglusterfs/src/logging.c
++++ b/libglusterfs/src/logging.c
+@@ -17,6 +17,7 @@
+ #include <string.h>
+ #include <stdlib.h>
+ #include <syslog.h>
++#include <sys/resource.h>
+
+ #ifdef HAVE_BACKTRACE
+ #include <execinfo.h>
+@@ -1196,6 +1197,7 @@ _gf_msg_nomem(const char *domain, const char *file, const char *function,
+ glusterfs_ctx_t *ctx = NULL;
+ int wlen = 0;
+ int priority;
++ struct rusage r_usage;
+
+ this = THIS;
+ ctx = this->ctx;
+@@ -1231,10 +1233,11 @@ _gf_msg_nomem(const char *domain, const char *file, const char *function,
+ "]"
+ " [%s:%d:%s] %s: no memory "
+ "available for size (%" GF_PRI_SIZET
+- ")"
++ ") current memory usage in kilobytes %ld"
+ " [call stack follows]\n",
+ timestr, gf_level_strings[level], (uint64_t)0, basename,
+- line, function, domain, size);
++ line, function, domain, size,
++ (!getrusage(RUSAGE_SELF, &r_usage) ? r_usage.ru_maxrss : 0));
+ if (-1 == ret) {
+ goto out;
+ }
+--
+1.8.3.1
+
diff --git a/0146-dht-Custom-xattrs-are-not-healed-in-case-of-add-bric.patch b/0146-dht-Custom-xattrs-are-not-healed-in-case-of-add-bric.patch
new file mode 100644
index 0000000..50747cc
--- /dev/null
+++ b/0146-dht-Custom-xattrs-are-not-healed-in-case-of-add-bric.patch
@@ -0,0 +1,146 @@
+From 8cc721ee43ac8038eecb712278378710ad0745ed Mon Sep 17 00:00:00 2001
+From: root <root@localhost.localdomain>
+Date: Sun, 7 Apr 2019 19:31:17 +0530
+Subject: [PATCH 146/169] dht: Custom xattrs are not healed in case of
+ add-brick
+
+Problem: If any custom xattrs are set on the directory before
+ add a brick, xattrs are not healed on the directory
+ after adding a brick.
+
+Solution: xattr are not healed because dht_selfheal_dir_mkdir_lookup_cbk
+ checks the value of MDS and if MDS value is not negative
+ selfheal code path does not take reference of MDS xattrs.Change the
+ condition to take reference of MDS xattr so that custom xattrs are
+ populated on newly added brick
+
+> Updates: bz#1702299
+> Change-Id: Id14beedb98cce6928055f294e1594b22132e811c
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry pick from commit aa52259de7b50625b754ce9fb5c0f38e22d79dd6)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22520/)
+
+BUG: 1702298
+Change-Id: Id14beedb98cce6928055f294e1594b22132e811c
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/171591
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/bug-1702299.t | 67 ++++++++++++++++++++++++++++++++++
+ xlators/cluster/dht/src/dht-selfheal.c | 9 +----
+ 2 files changed, 68 insertions(+), 8 deletions(-)
+ create mode 100644 tests/bugs/bug-1702299.t
+
+diff --git a/tests/bugs/bug-1702299.t b/tests/bugs/bug-1702299.t
+new file mode 100644
+index 0000000..1cff2ed
+--- /dev/null
++++ b/tests/bugs/bug-1702299.t
+@@ -0,0 +1,67 @@
++#!/bin/bash
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++. $(dirname $0)/../dht.rc
++cleanup;
++
++function get_getfattr {
++ local path=$1
++ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
++}
++
++function set_fattr {
++ for i in `seq 1 10`
++ do
++ setfattr -n user.foo -v "newabc" ./tmp${i}
++ if [ "$?" = "0" ]
++ then
++ succ=$((succ+1))
++ else
++ fail=$((fail+1))
++ fi
++ done
++}
++
++
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3}
++TEST $CLI volume start $V0
++
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0;
++
++cd $M0
++TEST mkdir tmp{1..10}
++
++succ=fail=0
++## set user.foo xattr with value newabc after kill one brick
++set_fattr
++count=10
++EXPECT "$succ" echo $count
++count=0
++EXPECT "$fail" echo $count
++
++cd -
++
++# Add-brick
++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{4,5}
++
++cd $M0
++## At this point dht code will heal xattr on down brick only for those dirs
++## hashed subvol was up at the time of update xattr
++TEST stat ./tmp{1..10}
++
++
++## Count the user.foo xattr value with newabc on brick and compare with succ value
++count=`getfattr -n user.foo $B0/${V0}4/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l`
++EXPECT "$succ" echo $count
++
++## Count the user.foo xattr value with newabc on brick and compare with succ value
++count=`getfattr -n user.foo $B0/${V0}5/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l`
++EXPECT "$succ" echo $count
++
++
++cd -
++TEST umount $M0
++cleanup
+diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
+index 5420fca..f5dfff9 100644
+--- a/xlators/cluster/dht/src/dht-selfheal.c
++++ b/xlators/cluster/dht/src/dht-selfheal.c
+@@ -1310,12 +1310,8 @@ dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie,
+ int this_call_cnt = 0;
+ int missing_dirs = 0;
+ dht_layout_t *layout = NULL;
+- dht_conf_t *conf = 0;
+ xlator_t *prev = 0;
+ loc_t *loc = NULL;
+- int check_mds = 0;
+- int errst = 0;
+- int32_t mds_xattr_val[1] = {0};
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ int index = -1;
+
+@@ -1324,7 +1320,6 @@ dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie,
+ local = frame->local;
+ layout = local->layout;
+ loc = &local->loc;
+- conf = this->private;
+ prev = cookie;
+
+ if (!gf_uuid_is_null(local->gfid))
+@@ -1347,9 +1342,7 @@ dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie,
+
+ if (!op_ret) {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+- check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
+- mds_xattr_val, 1, &errst);
+- if (dict_get(xattr, conf->mds_xattr_key) && check_mds && !errst) {
++ if (prev == local->mds_subvol) {
+ dict_unref(local->xattr);
+ local->xattr = dict_ref(xattr);
+ }
+--
+1.8.3.1
+
diff --git a/0147-glusterd-bulkvoldict-thread-is-not-handling-all-volu.patch b/0147-glusterd-bulkvoldict-thread-is-not-handling-all-volu.patch
new file mode 100644
index 0000000..27f8a4e
--- /dev/null
+++ b/0147-glusterd-bulkvoldict-thread-is-not-handling-all-volu.patch
@@ -0,0 +1,80 @@
+From d7795a592883cfb01da76b6905a7c9eb1e912bef Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Tue, 28 May 2019 08:28:29 +0530
+Subject: [PATCH 147/169] glusterd: bulkvoldict thread is not handling all
+ volumes
+
+Problem: In commit ac70f66c5805e10b3a1072bd467918730c0aeeb4 I
+ missed one condition to populate volume dictionary in
+ multiple threads while brick_multiplex is enabled.Due
+ to that glusterd is not sending volume dictionary for
+ all volumes to peer.
+
+Solution: Update the condition in code as well as update test case
+ also to avoid the issue
+
+> Change-Id: I06522dbdfee4f7e995d9cc7b7098fdf35340dc52
+> fixes: bz#1711250
+> Cherry pick from commit 4a5fb52eb1c5387a0fb8bfa1253e5227c7c255e8
+> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22739/
+
+BUG: 1711249
+Change-Id: I06522dbdfee4f7e995d9cc7b7098fdf35340dc52
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/171589
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/glusterd/bug-1699339.t | 16 ++++++++++------
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 2 +-
+ 2 files changed, 11 insertions(+), 7 deletions(-)
+
+diff --git a/tests/bugs/glusterd/bug-1699339.t b/tests/bugs/glusterd/bug-1699339.t
+index 3e950f4..bb8d4f4 100644
+--- a/tests/bugs/glusterd/bug-1699339.t
++++ b/tests/bugs/glusterd/bug-1699339.t
+@@ -52,18 +52,22 @@ done
+
+ TEST kill_glusterd 1
+
+-vol1=$(printf "%s-vol%02d" $V0 1)
++TESTS_EXPECTED_IN_LOOP=4
++for i in `seq 1 3 15`
++do
++vol1=$(printf "%s-vol%02d" $V0 $i)
+ TEST $CLI_2 volume set $vol1 performance.readdir-ahead on
+-vol2=$(printf "%s-vol%02d" $V0 2)
+-TEST $CLI_2 volume set $vol2 performance.readdir-ahead on
++done
+
+ # Bring back 1st glusterd
+ TEST $glusterd_1
+ EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
++TESTS_EXPECTED_IN_LOOP=4
++for i in `seq 1 3 15`
++do
++vol1=$(printf "%s-vol%02d" $V0 $i)
+ EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol1 performance.readdir-ahead
+-
+-vol_name=$(printf "%s-vol%02d" $V0 2)
+-EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol2 performance.readdir-ahead
++done
+
+ cleanup
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index efa5a86..8f1525e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -3542,7 +3542,7 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data)
+ if ((i + 1) != totthread) {
+ arg->end = ((i + 1) * vol_per_thread_limit);
+ } else {
+- arg->end = ((i * vol_per_thread_limit) + endindex);
++ arg->end = (((i + 1) * vol_per_thread_limit) + endindex);
+ }
+ th_ret = gf_thread_create_detached(
+ &th_id, glusterd_add_bulk_volumes_create_thread, arg,
+--
+1.8.3.1
+
diff --git a/0148-cluster-dht-Lookup-all-files-when-processing-directo.patch b/0148-cluster-dht-Lookup-all-files-when-processing-directo.patch
new file mode 100644
index 0000000..b1a5651
--- /dev/null
+++ b/0148-cluster-dht-Lookup-all-files-when-processing-directo.patch
@@ -0,0 +1,70 @@
+From 92aadb6a5eeec75edf7f5a11a0ebd861dd85ca6b Mon Sep 17 00:00:00 2001
+From: N Balachandran <nbalacha@redhat.com>
+Date: Mon, 20 May 2019 15:23:42 +0530
+Subject: [PATCH 148/169] cluster/dht: Lookup all files when processing
+ directory
+
+A rebalance process currently only looks up files
+that it is supposed to migrate. This could cause issues
+when lookup-optimize is enabled as the dir layout can be
+updated with the commit hash before all files are looked up.
+This is expecially problematic if one of the rebalance processes
+fails to complete as clients will try to access files whose
+linkto files might not have been created.
+Each process will now lookup every file in the directory it is
+processing.
+Pros: Less likely that files will be inaccessible.
+Cons: More lookup requests sent to the bricks and a potential
+performance hit.
+Note: this does not handle races such as when a layout is updated on disk
+just as the create fop is sent by the client.
+
+upstream : https://review.gluster.org/#/c/glusterfs/+/22746/
+
+>Change-Id: I22b55846effc08d3b827c3af9335229335f67fb8
+>fixes: bz#1711764
+
+BUG#1714124
+
+Change-Id: Ica6a9459befe53957f080001a2dda525b3b14d1c
+Signed-off-by: N Balachandran <nbalacha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172080
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+---
+ xlators/cluster/dht/src/dht-rebalance.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index efbe8a4..559f046 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -2741,12 +2741,6 @@ gf_defrag_migrate_single_file(void *opaque)
+ goto out;
+ }
+
+- if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index,
+- entry->d_stat.ia_gfid)) {
+- gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
+- goto out;
+- }
+-
+ gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
+
+ gf_uuid_copy(entry_loc.pargfid, loc->gfid);
+@@ -2772,6 +2766,12 @@ gf_defrag_migrate_single_file(void *opaque)
+ goto out;
+ }
+
++ if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index,
++ entry->d_stat.ia_gfid)) {
++ gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
++ goto out;
++ }
++
+ iatt_ptr = &iatt;
+
+ hashed_subvol = dht_subvol_get_hashed(this, &entry_loc);
+--
+1.8.3.1
+
diff --git a/0149-glusterd-Optimize-code-to-copy-dictionary-in-handsha.patch b/0149-glusterd-Optimize-code-to-copy-dictionary-in-handsha.patch
new file mode 100644
index 0000000..5caf3d4
--- /dev/null
+++ b/0149-glusterd-Optimize-code-to-copy-dictionary-in-handsha.patch
@@ -0,0 +1,452 @@
+From 86eee7e829bb33cac9b611da511ecbd2f03fab25 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Fri, 17 May 2019 19:26:48 +0530
+Subject: [PATCH 149/169] glusterd: Optimize code to copy dictionary in
+ handshake code path
+
+Problem: While high no. of volumes are configured around 2000
+ glusterd has bottleneck during handshake at the time
+ of copying dictionary
+
+Solution: To avoid the bottleneck serialize a dictionary instead
+ of copying key-value pair one by one
+
+> Change-Id: I9fb332f432e4f915bc3af8dcab38bed26bda2b9a
+> fixes: bz#1711297
+> Cherry picked from commit f8f09178bb890924a8050b466cc2e7a0a30e35a7
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22742/)
+
+BUG: 1711296
+Change-Id: I9fb332f432e4f915bc3af8dcab38bed26bda2b9a
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172255
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ libglusterfs/src/dict.c | 6 +-
+ libglusterfs/src/glusterfs/dict.h | 6 +
+ libglusterfs/src/libglusterfs.sym | 1 +
+ xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 27 ++--
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 187 +++++++++++++++++++++++----
+ xlators/mgmt/glusterd/src/glusterd-utils.h | 3 +-
+ xlators/mgmt/glusterd/src/glusterd.h | 5 +
+ 7 files changed, 194 insertions(+), 41 deletions(-)
+
+diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
+index 4cd1fcf..6917df9 100644
+--- a/libglusterfs/src/dict.c
++++ b/libglusterfs/src/dict.c
+@@ -2799,10 +2799,6 @@ dict_rename_key(dict_t *this, char *key, char *replace_key)
+ * 4 4 4 <key len> <value len>
+ */
+
+-#define DICT_HDR_LEN 4
+-#define DICT_DATA_HDR_KEY_LEN 4
+-#define DICT_DATA_HDR_VAL_LEN 4
+-
+ /**
+ * dict_serialized_length_lk - return the length of serialized dict. This
+ * procedure has to be called with this->lock held.
+@@ -2812,7 +2808,7 @@ dict_rename_key(dict_t *this, char *key, char *replace_key)
+ * : failure: -errno
+ */
+
+-static int
++int
+ dict_serialized_length_lk(dict_t *this)
+ {
+ int ret = -EINVAL;
+diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h
+index 52b833f..022f564 100644
+--- a/libglusterfs/src/glusterfs/dict.h
++++ b/libglusterfs/src/glusterfs/dict.h
+@@ -91,6 +91,9 @@ typedef struct _data_pair data_pair_t;
+ #define DICT_MAX_FLAGS 256
+ #define DICT_FLAG_SET 1
+ #define DICT_FLAG_CLEAR 0
++#define DICT_HDR_LEN 4
++#define DICT_DATA_HDR_KEY_LEN 4
++#define DICT_DATA_HDR_VAL_LEN 4
+
+ struct _data {
+ char *data;
+@@ -412,4 +415,7 @@ are_dicts_equal(dict_t *one, dict_t *two,
+ gf_boolean_t (*value_ignore)(char *k));
+ int
+ dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result);
++
++int
++dict_serialized_length_lk(dict_t *this);
+ #endif
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index cf5757c..ec474e7 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -405,6 +405,7 @@ dict_rename_key
+ dict_reset
+ dict_serialize
+ dict_serialized_length
++dict_serialized_length_lk
+ dict_serialize_value_with_delim
+ dict_set
+ dict_setn
+diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+index 4ec9700..45f8f17 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+@@ -1528,11 +1528,9 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data)
+
+ RCU_READ_UNLOCK;
+
+- ret = glusterd_add_volumes_to_export_dict(&peer_data);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+- "Unable to add list of volumes "
+- "in the peer_data dict for handshake");
++ peer_data = dict_new();
++ if (!peer_data) {
++ errno = ENOMEM;
+ goto out;
+ }
+
+@@ -1563,10 +1561,23 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data)
+ }
+ }
+
+- ret = dict_allocate_and_serialize(peer_data, &req.vols.vols_val,
+- &req.vols.vols_len);
+- if (ret)
++ /* Don't add any key-value in peer_data dictionary after call this function
++ */
++ ret = glusterd_add_volumes_to_export_dict(peer_data, &req.vols.vols_val,
++ &req.vols.vols_len);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
++ "Unable to add list of volumes "
++ "in the peer_data dict for handshake");
+ goto out;
++ }
++
++ if (!req.vols.vols_len) {
++ ret = dict_allocate_and_serialize(peer_data, &req.vols.vols_val,
++ &req.vols.vols_len);
++ if (ret)
++ goto out;
++ }
+
+ ret = glusterd_submit_request(
+ peerinfo->rpc, &req, frame, peerinfo->peer, GLUSTERD_FRIEND_ADD, NULL,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 8f1525e..2bc4836 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -3466,11 +3466,118 @@ out:
+ return NULL;
+ }
+
++int
++glusterd_dict_searialize(dict_t *dict_arr[], int count, int totcount, char *buf)
++{
++ int i = 0;
++ int32_t keylen = 0;
++ int64_t netword = 0;
++ data_pair_t *pair = NULL;
++ int dict_count = 0;
++ int ret = 0;
++
++ netword = hton32(totcount);
++ memcpy(buf, &netword, sizeof(netword));
++ buf += DICT_HDR_LEN;
++
++ for (i = 0; i < count; i++) {
++ if (dict_arr[i]) {
++ dict_count = dict_arr[i]->count;
++ pair = dict_arr[i]->members_list;
++ while (dict_count) {
++ if (!pair) {
++ gf_msg("glusterd", GF_LOG_ERROR, 0,
++ LG_MSG_PAIRS_LESS_THAN_COUNT,
++ "less than count data pairs found!");
++ ret = -1;
++ goto out;
++ }
++
++ if (!pair->key) {
++ gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR,
++ "pair->key is null!");
++ ret = -1;
++ goto out;
++ }
++
++ keylen = strlen(pair->key);
++ netword = hton32(keylen);
++ memcpy(buf, &netword, sizeof(netword));
++ buf += DICT_DATA_HDR_KEY_LEN;
++ if (!pair->value) {
++ gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR,
++ "pair->value is null!");
++ ret = -1;
++ goto out;
++ }
++
++ netword = hton32(pair->value->len);
++ memcpy(buf, &netword, sizeof(netword));
++ buf += DICT_DATA_HDR_VAL_LEN;
++
++ memcpy(buf, pair->key, keylen);
++ buf += keylen;
++ *buf++ = '\0';
++
++ if (pair->value->data) {
++ memcpy(buf, pair->value->data, pair->value->len);
++ buf += pair->value->len;
++ }
++
++ pair = pair->next;
++ dict_count--;
++ }
++ }
++ }
++
++out:
++ for (i = 0; i < count; i++) {
++ if (dict_arr[i])
++ dict_unref(dict_arr[i]);
++ }
++ return ret;
++}
++
++int
++glusterd_dict_arr_serialize(dict_t *dict_arr[], int count, char **buf,
++ u_int *length)
++{
++ ssize_t len = 0;
++ int i = 0;
++ int totcount = 0;
++ int ret = 0;
++
++ for (i = 0; i < count; i++) {
++ if (dict_arr[i]) {
++ len += dict_serialized_length_lk(dict_arr[i]);
++ totcount += dict_arr[i]->count;
++ }
++ }
++
++ // Subtract HDR_LEN except one dictionary
++ len = len - ((count - 1) * DICT_HDR_LEN);
++
++ *buf = GF_MALLOC(len, gf_common_mt_char);
++ if (*buf == NULL) {
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ if (length != NULL) {
++ *length = len;
++ }
++
++ ret = glusterd_dict_searialize(dict_arr, count, totcount, *buf);
++
++out:
++ return ret;
++}
++
+ int32_t
+-glusterd_add_volumes_to_export_dict(dict_t **peer_data)
++glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf,
++ u_int *length)
+ {
+ int32_t ret = -1;
+- dict_t *dict = NULL;
+ dict_t *dict_arr[128] = {
+ 0,
+ };
+@@ -3496,10 +3603,6 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data)
+ priv = this->private;
+ GF_ASSERT(priv);
+
+- dict = dict_new();
+- if (!dict)
+- goto out;
+-
+ /* Count the total number of volumes */
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) volcnt++;
+
+@@ -3520,14 +3623,15 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data)
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ count++;
+- ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume");
++ ret = glusterd_add_volume_to_dict(volinfo, peer_data, count,
++ "volume");
+ if (ret)
+ goto out;
+
+ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA))
+ continue;
+
+- ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count,
++ ret = glusterd_vol_add_quota_conf_to_dict(volinfo, peer_data, count,
+ "volume");
+ if (ret)
+ goto out;
+@@ -3569,34 +3673,34 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data)
+
+ gf_log(this->name, GF_LOG_INFO,
+ "Finished dictionary popluation in all threads");
+- for (i = 0; i < totthread; i++) {
+- dict_copy_with_ref(dict_arr[i], dict);
+- dict_unref(dict_arr[i]);
+- }
+- gf_log(this->name, GF_LOG_INFO,
+- "Finished merger of all dictionraies into single one");
+ }
+
+- ret = dict_set_int32n(dict, "count", SLEN("count"), volcnt);
++ ret = dict_set_int32n(peer_data, "count", SLEN("count"), volcnt);
+ if (ret)
+ goto out;
+
+- ctx.dict = dict;
++ ctx.dict = peer_data;
+ ctx.prefix = "global";
+ ctx.opt_count = 1;
+ ctx.key_name = "key";
+ ctx.val_name = "val";
+ dict_foreach(priv->opts, _add_dict_to_prdict, &ctx);
+ ctx.opt_count--;
+- ret = dict_set_int32n(dict, "global-opt-count", SLEN("global-opt-count"),
+- ctx.opt_count);
++ ret = dict_set_int32n(peer_data, "global-opt-count",
++ SLEN("global-opt-count"), ctx.opt_count);
+ if (ret)
+ goto out;
+
+- *peer_data = dict;
++ if (totthread) {
++ gf_log(this->name, GF_LOG_INFO,
++ "Finished merger of all dictionraies into single one");
++ dict_arr[totthread++] = peer_data;
++ ret = glusterd_dict_arr_serialize(dict_arr, totthread, buf, length);
++ gf_log(this->name, GF_LOG_INFO,
++ "Serialize dictionary data return is %d", ret);
++ }
++
+ out:
+- if (ret)
+- dict_unref(dict);
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+@@ -4940,6 +5044,7 @@ glusterd_import_friend_volumes_synctask(void *opaque)
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ dict_t *peer_data = NULL;
++ glusterd_friend_synctask_args_t *arg = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+@@ -4947,8 +5052,20 @@ glusterd_import_friend_volumes_synctask(void *opaque)
+ conf = this->private;
+ GF_ASSERT(conf);
+
+- peer_data = (dict_t *)opaque;
+- GF_ASSERT(peer_data);
++ arg = opaque;
++ if (!arg)
++ goto out;
++
++ peer_data = dict_new();
++ if (!peer_data) {
++ goto out;
++ }
++
++ ret = dict_unserialize(arg->dict_buf, arg->dictlen, &peer_data);
++ if (ret) {
++ errno = ENOMEM;
++ goto out;
++ }
+
+ ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count);
+ if (ret)
+@@ -4980,6 +5097,11 @@ glusterd_import_friend_volumes_synctask(void *opaque)
+ out:
+ if (peer_data)
+ dict_unref(peer_data);
++ if (arg) {
++ if (arg->dict_buf)
++ GF_FREE(arg->dict_buf);
++ GF_FREE(arg);
++ }
+
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
+ return ret;
+@@ -5146,7 +5268,7 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname)
+ gf_boolean_t update = _gf_false;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+- dict_t *peer_data_copy = NULL;
++ glusterd_friend_synctask_args_t *arg = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+@@ -5188,12 +5310,23 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname)
+ * first brick to come up before attaching the subsequent bricks
+ * in case brick multiplexing is enabled
+ */
+- peer_data_copy = dict_copy_with_ref(peer_data, NULL);
+- glusterd_launch_synctask(glusterd_import_friend_volumes_synctask,
+- peer_data_copy);
++ arg = GF_CALLOC(1, sizeof(*arg), gf_common_mt_char);
++ ret = dict_allocate_and_serialize(peer_data, &arg->dict_buf,
++ &arg->dictlen);
++ if (ret < 0) {
++ gf_log(this->name, GF_LOG_ERROR,
++ "dict_serialize failed while handling "
++ " import friend volume request");
++ goto out;
++ }
++
++ glusterd_launch_synctask(glusterd_import_friend_volumes_synctask, arg);
+ }
+
+ out:
++ if (ret && arg) {
++ GF_FREE(arg);
++ }
+ gf_msg_debug(this->name, 0, "Returning with ret: %d, status: %d", ret,
+ *status);
+ return ret;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
+index 3647c34..6ad8062 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
+@@ -227,7 +227,8 @@ glusterd_volume_brickinfo_get_by_brick(char *brick, glusterd_volinfo_t *volinfo,
+ gf_boolean_t construct_real_path);
+
+ int32_t
+-glusterd_add_volumes_to_export_dict(dict_t **peer_data);
++glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf,
++ u_int *length);
+
+ int32_t
+ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status,
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index 2ea8560..f96bca3 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -240,6 +240,11 @@ typedef struct glusterd_add_dict_args {
+ int end;
+ } glusterd_add_dict_args_t;
+
++typedef struct glusterd_friend_synctask_args {
++ char *dict_buf;
++ u_int dictlen;
++} glusterd_friend_synctask_args_t;
++
+ typedef enum gf_brick_status {
+ GF_BRICK_STOPPED,
+ GF_BRICK_STARTED,
+--
+1.8.3.1
+
diff --git a/0150-libglusterfs-define-macros-needed-for-cloudsync.patch b/0150-libglusterfs-define-macros-needed-for-cloudsync.patch
new file mode 100644
index 0000000..b5fbef2
--- /dev/null
+++ b/0150-libglusterfs-define-macros-needed-for-cloudsync.patch
@@ -0,0 +1,38 @@
+From 4c410d99792808b0c8deb601d50c66df19f73ca7 Mon Sep 17 00:00:00 2001
+From: Anuradha Talur <atalur@commvault.com>
+Date: Thu, 25 Oct 2018 17:23:10 -0400
+Subject: [PATCH 150/169] libglusterfs: define macros needed for cloudsync
+
+backport of patch: https://review.gluster.org/#/c/glusterfs/+/21585/
+
+> Change-Id: Iec5ce7f17fbf899f881a58cd20c4c967e3b71668
+> fixes: bz#1642168
+> Signed-off-by: Anuradha Talur <atalur@commvault.com>
+
+Change-Id: I79e5d955559acdec7cbeb8f35c8482b3b6ff8b0f
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172189
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/glusterfs/glusterfs.h | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index fb727fc..516b497 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -364,6 +364,10 @@ enum gf_internal_fop_indicator {
+ } while (0)
+
+ #define GF_CS_OBJECT_SIZE "trusted.glusterfs.cs.object_size"
++#define GF_CS_BLOCK_SIZE "trusted.glusterfs.cs.block_size"
++#define GF_CS_NUM_BLOCKS "trusted.glusterfs.cs.num_blocks"
++
++#define GF_CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid"
+
+ #define GF_CS_OBJECT_UPLOAD_COMPLETE "trusted.glusterfs.csou.complete"
+ #define GF_CS_OBJECT_REMOTE "trusted.glusterfs.cs.remote"
+--
+1.8.3.1
+
diff --git a/0151-mgmt-glusterd-Make-changes-related-to-cloudsync-xlat.patch b/0151-mgmt-glusterd-Make-changes-related-to-cloudsync-xlat.patch
new file mode 100644
index 0000000..d95db3d
--- /dev/null
+++ b/0151-mgmt-glusterd-Make-changes-related-to-cloudsync-xlat.patch
@@ -0,0 +1,156 @@
+From bffdcce7119f3ed68694df918e504cc241502835 Mon Sep 17 00:00:00 2001
+From: Anuradha Talur <atalur@commvault.com>
+Date: Mon, 19 Nov 2018 17:57:18 -0800
+Subject: [PATCH 151/169] mgmt/glusterd: Make changes related to cloudsync
+ xlator
+
+1) The placement of cloudsync xlator has been changed
+to make it shard xlator's child. If cloudsync has to
+work with shard in the graph, it needs to be child of shard.
+
+backport of: https://review.gluster.org/#/c/glusterfs/+/21681/
+
+> Change-Id: Ib55424fdcb7ce8edae9f19b8a6e3d3ba86c1f0c4
+> fixes: bz#1642168
+> Signed-off-by: Anuradha Talur <atalur@commvault.com>
+
+Change-Id: I68fd43b2c559cc2d9f05e1ab19784b174233d690
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172190
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ tests/basic/glusterd/check-cloudsync-ancestry.t | 48 +++++++++++++++++++++++++
+ tests/volume.rc | 21 +++++++++++
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 24 ++++++-------
+ 3 files changed, 81 insertions(+), 12 deletions(-)
+ create mode 100644 tests/basic/glusterd/check-cloudsync-ancestry.t
+
+diff --git a/tests/basic/glusterd/check-cloudsync-ancestry.t b/tests/basic/glusterd/check-cloudsync-ancestry.t
+new file mode 100644
+index 0000000..ff6ffee
+--- /dev/null
++++ b/tests/basic/glusterd/check-cloudsync-ancestry.t
+@@ -0,0 +1,48 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++# When shard and cloudsync xlators enabled on a volume, shard xlator
++# should be an ancestor of cloudsync. This testcase is to check this condition.
++
++cleanup;
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
++
++volfile=$(gluster system:: getwd)"/vols/$V0/trusted-$V0.tcp-fuse.vol"
++
++#Test that both shard and cloudsync are not loaded
++EXPECT "N" volgen_volume_exists $volfile $V0-shard features shard
++EXPECT "N" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
++
++#Enable shard and cloudsync in that order and check if volfile is correct
++TEST $CLI volume set $V0 shard on
++TEST $CLI volume set $V0 cloudsync on
++
++#Test that both shard and cloudsync are loaded
++EXPECT "Y" volgen_volume_exists $volfile $V0-shard features shard
++EXPECT "Y" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
++
++EXPECT "Y" volgen_check_ancestry $volfile features shard features cloudsync
++
++#Disable shard and cloudsync
++TEST $CLI volume set $V0 shard off
++TEST $CLI volume set $V0 cloudsync off
++
++#Test that both shard and cloudsync are not loaded
++EXPECT "N" volgen_volume_exists $volfile $V0-shard features shard
++EXPECT "N" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
++
++#Enable cloudsync and shard in that order and check if volfile is correct
++TEST $CLI volume set $V0 cloudsync on
++TEST $CLI volume set $V0 shard on
++
++#Test that both shard and cloudsync are loaded
++EXPECT "Y" volgen_volume_exists $volfile $V0-shard features shard
++EXPECT "Y" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
++
++EXPECT "Y" volgen_check_ancestry $volfile features shard features cloudsync
++
++cleanup;
+diff --git a/tests/volume.rc b/tests/volume.rc
+index b326098..a0ea3b8 100644
+--- a/tests/volume.rc
++++ b/tests/volume.rc
+@@ -891,3 +891,24 @@ function check_changelog_op {
+
+ $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep "$op" | wc -l
+ }
++
++function volgen_check_ancestry {
++ #Returns Y if ancestor_xl is an ancestor of $child_xl according to the volfile
++ local volfile="$1"
++
++ local child_xl_type="$2"
++ local child_xl="$3"
++
++ local ancestor_xl_type="$4"
++ local ancestor_xl="$5"
++
++ child_linenum=$(awk '/type $child_xl_type\/$child_xl/ {print FNR}' $volfile)
++ ancestor_linenum=$(awk '/type $ancestor_xl_type\/$ancestor_xl/ {print FNR}' $volfile)
++
++ if [ $child_linenum -lt $ancestor_linenum ];
++ then
++ echo "Y"
++ else
++ echo "N"
++ fi
++}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 77aa705..8b58d40 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -4360,6 +4360,18 @@ client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ "tcp", set_dict);
+ }
+
++ ret = dict_get_str_boolean(set_dict, "features.cloudsync", _gf_false);
++ if (ret == -1)
++ goto out;
++
++ if (ret) {
++ xl = volgen_graph_add(graph, "features/cloudsync", volname);
++ if (!xl) {
++ ret = -1;
++ goto out;
++ }
++ }
++
+ ret = dict_get_str_boolean(set_dict, "features.shard", _gf_false);
+ if (ret == -1)
+ goto out;
+@@ -4567,18 +4579,6 @@ client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ if (ret)
+ return -1;
+
+- ret = dict_get_str_boolean(set_dict, "features.cloudsync", _gf_false);
+- if (ret == -1)
+- goto out;
+-
+- if (ret) {
+- xl = volgen_graph_add(graph, "features/cloudsync", volname);
+- if (!xl) {
+- ret = -1;
+- goto out;
+- }
+- }
+-
+ /* if the client is part of 'gfproxyd' server, then we need to keep the
+ volume name as 'gfproxyd-<volname>', for better portmapper options */
+ subvol = volname;
+--
+1.8.3.1
+
diff --git a/0152-storage-posix-changes-with-respect-to-cloudsync.patch b/0152-storage-posix-changes-with-respect-to-cloudsync.patch
new file mode 100644
index 0000000..1610009
--- /dev/null
+++ b/0152-storage-posix-changes-with-respect-to-cloudsync.patch
@@ -0,0 +1,403 @@
+From 10e9f850017d58fcd813ccce253784280326f1d0 Mon Sep 17 00:00:00 2001
+From: Anuradha Talur <atalur@commvault.com>
+Date: Tue, 20 Nov 2018 13:15:26 -0800
+Subject: [PATCH 152/169] storage/posix: changes with respect to cloudsync
+
+Main changes include logic to update iatt buf
+with file size from extended attributes in posix
+rather than having this logic in cloudsync xlator.
+
+backport of:https://review.gluster.org/#/c/glusterfs/+/21694/
+
+> Change-Id: I44f5f8df7a01e496372557fe2f4eff368dbdaa33
+> fixes: bz#1642168
+> Signed-off-by: Anuradha Talur <atalur@commvault.com>
+
+Change-Id: I34880d856fb3add4ce88d64021d08d95405fc1c1
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172191
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/storage/posix/src/posix-entry-ops.c | 1 +
+ xlators/storage/posix/src/posix-helpers.c | 50 +++++++++
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 139 ++++++++++++++++++++++---
+ xlators/storage/posix/src/posix.h | 2 +
+ 4 files changed, 177 insertions(+), 15 deletions(-)
+
+diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
+index fbd83c4..b24a052 100644
+--- a/xlators/storage/posix/src/posix-entry-ops.c
++++ b/xlators/storage/posix/src/posix-entry-ops.c
+@@ -272,6 +272,7 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ }
+ }
+
++ posix_update_iatt_buf(&buf, -1, real_path, xdata);
+ if (priv->update_pgfid_nlinks) {
+ if (!gf_uuid_is_null(loc->pargfid) && !IA_ISDIR(buf.ia_type)) {
+ MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 37e33a9..d0fd45a 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -3453,3 +3453,53 @@ posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno)
+ out:
+ return ret;
+ }
++
++void
++posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req)
++{
++ int ret = 0;
++ char val[4096] = {
++ 0,
++ };
++
++ if (!xattr_req)
++ return;
++
++ if (!(dict_getn(xattr_req, GF_CS_OBJECT_STATUS,
++ strlen(GF_CS_OBJECT_STATUS))))
++ return;
++
++ if (fd != -1) {
++ ret = sys_fgetxattr(fd, GF_CS_OBJECT_SIZE, &val, sizeof(val));
++ if (ret > 0) {
++ buf->ia_size = atoll(val);
++ } else {
++ /* Safe to assume that the other 2 xattrs are also not set*/
++ return;
++ }
++ ret = sys_fgetxattr(fd, GF_CS_BLOCK_SIZE, &val, sizeof(val));
++ if (ret > 0) {
++ buf->ia_blksize = atoll(val);
++ }
++ ret = sys_fgetxattr(fd, GF_CS_NUM_BLOCKS, &val, sizeof(val));
++ if (ret > 0) {
++ buf->ia_blocks = atoll(val);
++ }
++ } else {
++ ret = sys_lgetxattr(loc, GF_CS_OBJECT_SIZE, &val, sizeof(val));
++ if (ret > 0) {
++ buf->ia_size = atoll(val);
++ } else {
++ /* Safe to assume that the other 2 xattrs are also not set*/
++ return;
++ }
++ ret = sys_lgetxattr(loc, GF_CS_BLOCK_SIZE, &val, sizeof(val));
++ if (ret > 0) {
++ buf->ia_blksize = atoll(val);
++ }
++ ret = sys_lgetxattr(loc, GF_CS_NUM_BLOCKS, &val, sizeof(val));
++ if (ret > 0) {
++ buf->ia_blocks = atoll(val);
++ }
++ }
++}
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index 7dbbd3d..065fced 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -108,6 +108,63 @@ extern char *marker_xattrs[];
+ static char *disallow_removexattrs[] = {GF_XATTR_VOL_ID_KEY, GFID_XATTR_KEY,
+ NULL};
+
++void
++posix_cs_build_xattr_rsp(xlator_t *this, dict_t **rsp, dict_t *req, int fd,
++ char *loc)
++{
++ int ret = 0;
++ uuid_t uuid;
++
++ if (!(dict_getn(req, GF_CS_OBJECT_STATUS, strlen(GF_CS_OBJECT_STATUS))))
++ return;
++
++ if (!(*rsp)) {
++ *rsp = dict_new();
++ if (!(*rsp)) {
++ return;
++ }
++ }
++
++ if (fd != -1) {
++ if (dict_getn(req, GF_CS_XATTR_ARCHIVE_UUID,
++ strlen(GF_CS_XATTR_ARCHIVE_UUID))) {
++ ret = sys_fgetxattr(fd, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16);
++ if (ret > 0) {
++ ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid,
++ true);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
++ "%s: Failed to set "
++ "dictionary value for %s for fd %d",
++ uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, fd);
++ }
++ } else {
++ gf_msg_debug(this->name, 0, "getxattr failed on %s for fd %d",
++ GF_CS_XATTR_ARCHIVE_UUID, fd);
++ }
++ }
++ } else {
++ if (dict_getn(req, GF_CS_XATTR_ARCHIVE_UUID,
++ strlen(GF_CS_XATTR_ARCHIVE_UUID))) {
++ ret = sys_lgetxattr(loc, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16);
++ if (ret > 0) {
++ ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid,
++ true);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
++ "%s: Failed to set "
++ "dictionary value for %s for loc %s",
++ uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, loc);
++ }
++ } else {
++ gf_msg_debug(this->name, 0, "getxattr failed on %s for %s",
++ GF_CS_XATTR_ARCHIVE_UUID, loc);
++ }
++ }
++ }
++ return;
++}
++
+ int32_t
+ posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ {
+@@ -150,8 +207,11 @@ posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+
+ posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata,
+ &xattr_rsp, _gf_true);
++
++ posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, -1, real_path);
+ }
+
++ posix_update_iatt_buf(&buf, -1, real_path, xdata);
+ op_ret = 0;
+
+ out:
+@@ -422,6 +482,8 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ if (xdata)
+ xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
+ &statpost);
++ posix_update_iatt_buf(&statpre, -1, real_path, xdata);
++ posix_update_iatt_buf(&statpost, -1, real_path, xdata);
+ op_ret = 0;
+
+ out:
+@@ -898,6 +960,7 @@ posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ }
+ }
+
++ posix_update_iatt_buf(statpre, pfd->fd, NULL, xdata);
+ /* See if we can use FALLOC_FL_ZERO_RANGE to perform the zero fill.
+ * If it fails, fall back to _posix_do_zerofill() and an optional fsync.
+ */
+@@ -1366,6 +1429,7 @@ posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ }
+ }
+
++ posix_update_iatt_buf(&prebuf, -1, real_path, xdata);
+ op_ret = sys_truncate(real_path, offset);
+ if (op_ret == -1) {
+ op_errno = errno;
+@@ -1405,6 +1469,10 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ int32_t _fd = -1;
+ struct posix_fd *pfd = NULL;
+ struct posix_private *priv = NULL;
++ struct iatt preop = {
++ 0,
++ };
++ dict_t *rsp_xdata = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+@@ -1471,6 +1539,18 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ pfd->flags = flags;
+ pfd->fd = _fd;
+
++ if (xdata) {
++ op_ret = posix_fdstat(this, fd->inode, pfd->fd, &preop);
++ if (op_ret == -1) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
++ "pre-operation fstat failed on fd=%p", fd);
++ goto out;
++ }
++
++ posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL, xdata,
++ &rsp_xdata, _gf_true);
++ }
++
+ op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd);
+ if (op_ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED,
+@@ -1488,7 +1568,7 @@ out:
+
+ SET_TO_OLD_FS_ID();
+
+- STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, NULL);
++ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, rsp_xdata);
+
+ return 0;
+ }
+@@ -1573,6 +1653,7 @@ posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ }
+ }
+
++ posix_update_iatt_buf(&preop, _fd, NULL, xdata);
+ op_ret = sys_pread(_fd, iobuf->ptr, size, offset);
+ if (op_ret == -1) {
+ op_errno = errno;
+@@ -1878,6 +1959,7 @@ posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ }
+ }
+
++ posix_update_iatt_buf(&preop, _fd, NULL, xdata);
+ if (locked && write_append) {
+ if (preop.ia_size == offset || (fd->flags & O_APPEND))
+ is_append = 1;
+@@ -2531,10 +2613,8 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ 0,
+ };
+ data_t *tdata = NULL;
+- char stime[4096];
+- char sxattr[4096];
++ char *cs_var = NULL;
+ gf_cs_obj_state state = -1;
+- char remotepath[4096] = {0};
+ int i = 0;
+ int len;
+
+@@ -2588,10 +2668,11 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ goto unlock;
+ }
+
+- sprintf(stime, "%" PRId64, tmp_stbuf.ia_mtime);
++ cs_var = alloca(4096);
++ sprintf(cs_var, "%" PRId64, tmp_stbuf.ia_mtime);
+
+ /*TODO: may be should consider nano-second also */
+- if (strncmp(stime, tdata->data, tdata->len) != 0) {
++ if (strncmp(cs_var, tdata->data, tdata->len) > 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "mtime "
+ "passed is different from seen by file now."
+@@ -2601,31 +2682,54 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ goto unlock;
+ }
+
+- len = sprintf(sxattr, "%" PRIu64, tmp_stbuf.ia_size);
++ len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_size);
+
+- ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, sxattr, len,
++ ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, cs_var, len,
+ flags);
+ if (ret) {
++ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "setxattr failed. key %s err %d", GF_CS_OBJECT_SIZE,
+ ret);
++ goto unlock;
++ }
++
++ len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_blocks);
++
++ ret = sys_lsetxattr(real_path, GF_CS_NUM_BLOCKS, cs_var, len,
++ flags);
++ if (ret) {
+ op_errno = errno;
++ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
++ "setxattr failed. key %s err %d", GF_CS_NUM_BLOCKS, ret);
+ goto unlock;
+ }
+
++ len = sprintf(cs_var, "%" PRIu32, tmp_stbuf.ia_blksize);
++
++ ret = sys_lsetxattr(real_path, GF_CS_BLOCK_SIZE, cs_var, len,
++ flags);
++ if (ret) {
++ op_errno = errno;
++ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
++ "setxattr failed. key %s err %d", GF_CS_BLOCK_SIZE, ret);
++ goto unlock;
++ }
++
++ memset(cs_var, 0, 4096);
+ if (loc->path[0] == '/') {
+ for (i = 1; i < strlen(loc->path); i++) {
+- remotepath[i - 1] = loc->path[i];
++ cs_var[i - 1] = loc->path[i];
+ }
+
+- remotepath[i] = '\0';
+- gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s",
+- remotepath);
++ cs_var[i] = '\0';
++ gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s", cs_var);
+ }
+
+- ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, remotepath,
+- strlen(loc->path), flags);
++ ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, cs_var,
++ strlen(cs_var), flags);
+ if (ret) {
++ op_errno = errno;
+ gf_log("POSIX", GF_LOG_ERROR,
+ "setxattr failed - %s"
+ " %d",
+@@ -2635,13 +2739,14 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+
+ ret = sys_truncate(real_path, 0);
+ if (ret) {
++ op_errno = errno;
+ gf_log("POSIX", GF_LOG_ERROR,
+ "truncate failed - %s"
+ " %d",
+ GF_CS_OBJECT_SIZE, ret);
+- op_errno = errno;
+ ret = sys_lremovexattr(real_path, GF_CS_OBJECT_REMOTE);
+ if (ret) {
++ op_errno = errno;
+ gf_log("POSIX", GF_LOG_ERROR,
+ "removexattr "
+ "failed post processing- %s"
+@@ -2659,6 +2764,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ }
+ unlock:
+ UNLOCK(&loc->inode->lock);
++ op_ret = ret;
+ goto out;
+ }
+
+@@ -4927,6 +5033,7 @@ posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ }
+ }
+
++ posix_update_iatt_buf(&preop, _fd, NULL, xdata);
+ op_ret = sys_ftruncate(_fd, offset);
+
+ if (op_ret == -1) {
+@@ -5008,8 +5115,10 @@ posix_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ }
++ posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, _fd, NULL);
+ }
+
++ posix_update_iatt_buf(&buf, _fd, NULL, xdata);
+ op_ret = 0;
+
+ out:
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index d5ba08c..1da4d01 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -664,4 +664,6 @@ posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno);
+ int
+ posix_spawn_ctx_janitor_thread(xlator_t *this);
+
++void
++posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata);
+ #endif /* _POSIX_H */
+--
+1.8.3.1
+
diff --git a/0153-features-cloudsync-Added-some-new-functions.patch b/0153-features-cloudsync-Added-some-new-functions.patch
new file mode 100644
index 0000000..d28a7a5
--- /dev/null
+++ b/0153-features-cloudsync-Added-some-new-functions.patch
@@ -0,0 +1,1077 @@
+From 90254e4ae9455fa0a126f83700978a9314eb79ea Mon Sep 17 00:00:00 2001
+From: Anuradha Talur <atalur@commvault.com>
+Date: Thu, 29 Nov 2018 12:54:21 -0800
+Subject: [PATCH 153/169] features/cloudsync : Added some new functions
+
+This patch contains the following changes:
+1) Store ID info will now be stored in the inode ctx
+2) Added new readv type where read is made directly
+ from the remote store. This choice is made by
+ volume set operation.
+3) cs_forget() was missing. Added it.
+
+backport of:https://review.gluster.org/#/c/glusterfs/+/21757/
+
+> Change-Id: Ie3232b3d7ffb5313a03f011b0553b19793eedfa2
+> fixes: bz#1642168
+> Signed-off-by: Anuradha Talur <atalur@commvault.com>
+
+Change-Id: I089e5a8c93049cf6bfabf011673796e38e78d7ee
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172192
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/features/cloudsync/src/cloudsync-common.c | 16 +
+ xlators/features/cloudsync/src/cloudsync-common.h | 35 ++
+ xlators/features/cloudsync/src/cloudsync-fops-c.py | 12 +-
+ .../features/cloudsync/src/cloudsync-mem-types.h | 1 +
+ xlators/features/cloudsync/src/cloudsync.c | 600 ++++++++++++++++++---
+ xlators/features/cloudsync/src/cloudsync.h | 20 +
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 7 +-
+ 7 files changed, 597 insertions(+), 94 deletions(-)
+
+diff --git a/xlators/features/cloudsync/src/cloudsync-common.c b/xlators/features/cloudsync/src/cloudsync-common.c
+index aee1f06..445a31b 100644
+--- a/xlators/features/cloudsync/src/cloudsync-common.c
++++ b/xlators/features/cloudsync/src/cloudsync-common.c
+@@ -11,6 +11,20 @@
+ #include "cloudsync-common.h"
+
+ void
++cs_xattrinfo_wipe(cs_local_t *local)
++{
++ if (local->xattrinfo.lxattr) {
++ if (local->xattrinfo.lxattr->file_path)
++ GF_FREE(local->xattrinfo.lxattr->file_path);
++
++ if (local->xattrinfo.lxattr->volname)
++ GF_FREE(local->xattrinfo.lxattr->volname);
++
++ GF_FREE(local->xattrinfo.lxattr);
++ }
++}
++
++void
+ cs_local_wipe(xlator_t *this, cs_local_t *local)
+ {
+ if (!local)
+@@ -40,5 +54,7 @@ cs_local_wipe(xlator_t *this, cs_local_t *local)
+ if (local->remotepath)
+ GF_FREE(local->remotepath);
+
++ cs_xattrinfo_wipe(local);
++
+ mem_put(local);
+ }
+diff --git a/xlators/features/cloudsync/src/cloudsync-common.h b/xlators/features/cloudsync/src/cloudsync-common.h
+index 7b3520c..11d2334 100644
+--- a/xlators/features/cloudsync/src/cloudsync-common.h
++++ b/xlators/features/cloudsync/src/cloudsync-common.h
+@@ -14,9 +14,23 @@
+ #include <glusterfs/call-stub.h>
+ #include <glusterfs/xlator.h>
+ #include <glusterfs/syncop.h>
++#include <glusterfs/compat-errno.h>
+ #include "cloudsync-mem-types.h"
+ #include "cloudsync-messages.h"
+
++typedef struct cs_loc_xattr {
++ char *file_path;
++ uuid_t uuid;
++ uuid_t gfid;
++ char *volname;
++} cs_loc_xattr_t;
++
++typedef struct cs_size_xattr {
++ uint64_t size;
++ uint64_t blksize;
++ uint64_t blocks;
++} cs_size_xattr_t;
++
+ typedef struct cs_local {
+ loc_t loc;
+ fd_t *fd;
+@@ -34,10 +48,25 @@ typedef struct cs_local {
+ int call_cnt;
+ inode_t *inode;
+ char *remotepath;
++
++ struct {
++ /* offset, flags and size are the information needed
++ * by read fop for remote read operation. These will be
++ * populated in cloudsync read fop, before being passed
++ * on to the plugin performing remote read.
++ */
++ off_t offset;
++ uint32_t flags;
++ size_t size;
++ cs_loc_xattr_t *lxattr;
++ } xattrinfo;
++
+ } cs_local_t;
+
+ typedef int (*fop_download_t)(call_frame_t *frame, void *config);
+
++typedef int (*fop_remote_read_t)(call_frame_t *, void *);
++
+ typedef void *(*store_init)(xlator_t *this);
+
+ typedef int (*store_reconfigure)(xlator_t *this, dict_t *options);
+@@ -48,6 +77,7 @@ struct cs_remote_stores {
+ char *name; /* store name */
+ void *config; /* store related information */
+ fop_download_t dlfop; /* store specific download function */
++ fop_remote_read_t rdfop; /* store specific read function */
+ store_init init; /* store init to initialize store config */
+ store_reconfigure reconfigure; /* reconfigure store config */
+ store_fini fini;
+@@ -59,11 +89,15 @@ typedef struct cs_private {
+ struct cs_remote_stores *stores;
+ gf_boolean_t abortdl;
+ pthread_spinlock_t lock;
++ gf_boolean_t remote_read;
+ } cs_private_t;
+
+ void
+ cs_local_wipe(xlator_t *this, cs_local_t *local);
+
++void
++cs_xattrinfo_wipe(cs_local_t *local);
++
+ #define CS_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ cs_local_t *__local = NULL; \
+@@ -90,6 +124,7 @@ cs_local_wipe(xlator_t *this, cs_local_t *local);
+
+ typedef struct store_methods {
+ int (*fop_download)(call_frame_t *frame, void *config);
++ int (*fop_remote_read)(call_frame_t *, void *);
+ /* return type should be the store config */
+ void *(*fop_init)(xlator_t *this);
+ int (*fop_reconfigure)(xlator_t *this, dict_t *options);
+diff --git a/xlators/features/cloudsync/src/cloudsync-fops-c.py b/xlators/features/cloudsync/src/cloudsync-fops-c.py
+index 3122bd3..a7a2201 100755
+--- a/xlators/features/cloudsync/src/cloudsync-fops-c.py
++++ b/xlators/features/cloudsync/src/cloudsync-fops-c.py
+@@ -137,15 +137,15 @@ cs_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ } else {
+ __cs_inode_ctx_update (this, fd->inode, val);
+ gf_msg (this->name, GF_LOG_INFO, 0, 0,
+- " state = %ld", val);
++ " state = %" PRIu64, val);
+
+ if (local->call_cnt == 1 &&
+ (val == GF_CS_REMOTE ||
+ val == GF_CS_DOWNLOADING)) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ 0, " will repair and download "
+- "the file, current state : %ld",
+- val);
++ "the file, current state : %"
++ PRIu64, val);
+ goto repair;
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0,
+@@ -274,7 +274,7 @@ fd_ops = ['readv', 'writev', 'flush', 'fsync', 'fsyncdir', 'ftruncate',
+ # These are the current actual lists used to generate the code
+
+ # The following list contains fops which are fd based that modifies data
+-fd_data_modify_op_fop_template = ['readv', 'writev', 'flush', 'fsync',
++fd_data_modify_op_fop_template = ['writev', 'flush', 'fsync',
+ 'ftruncate', 'rchecksum', 'fallocate',
+ 'discard', 'zerofill', 'seek']
+
+@@ -284,8 +284,8 @@ loc_stat_op_fop_template = ['lookup', 'stat', 'discover', 'access', 'setattr',
+ 'getattr']
+
+ # These fops need a separate implementation
+-special_fops = ['readdirp', 'statfs', 'setxattr', 'unlink', 'getxattr',
+- 'truncate', 'fstat']
++special_fops = ['statfs', 'setxattr', 'unlink', 'getxattr',
++ 'truncate', 'fstat', 'readv']
+
+ def gen_defaults():
+ for name in ops:
+diff --git a/xlators/features/cloudsync/src/cloudsync-mem-types.h b/xlators/features/cloudsync/src/cloudsync-mem-types.h
+index 9e6837a..2203464 100644
+--- a/xlators/features/cloudsync/src/cloudsync-mem-types.h
++++ b/xlators/features/cloudsync/src/cloudsync-mem-types.h
+@@ -16,6 +16,7 @@ enum cs_mem_types_ {
+ gf_cs_mt_cs_private_t = gf_common_mt_end + 1,
+ gf_cs_mt_cs_remote_stores_t,
+ gf_cs_mt_cs_inode_ctx_t,
++ gf_cs_mt_cs_lxattr_t,
+ gf_cs_mt_end
+ };
+ #endif /* __CLOUDSYNC_MEM_TYPES_H__ */
+diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c
+index fbdcdf7..2240fc3 100644
+--- a/xlators/features/cloudsync/src/cloudsync.c
++++ b/xlators/features/cloudsync/src/cloudsync.c
+@@ -16,6 +16,7 @@
+ #include <glusterfs/call-stub.h>
+ #include "cloudsync-autogen-fops.h"
+
++#include <string.h>
+ #include <dlfcn.h>
+
+ void
+@@ -72,6 +73,8 @@ cs_init(xlator_t *this)
+
+ this->private = priv;
+
++ GF_OPTION_INIT("cloudsync-remote-read", priv->remote_read, bool, out);
++
+ /* temp workaround. Should be configurable through glusterd*/
+ per_vol = _gf_true;
+
+@@ -135,6 +138,18 @@ cs_init(xlator_t *this)
+
+ (void)dlerror();
+
++ if (priv->remote_read) {
++ priv->stores->rdfop = store_methods->fop_remote_read;
++ if (!priv->stores->rdfop) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
++ "failed to get"
++ " read fop %s",
++ dlerror());
++ ret = -1;
++ goto out;
++ }
++ }
++
+ priv->stores->dlfop = store_methods->fop_download;
+ if (!priv->stores->dlfop) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+@@ -196,6 +211,22 @@ out:
+ return ret;
+ }
+
++int
++cs_forget(xlator_t *this, inode_t *inode)
++{
++ uint64_t ctx_int = 0;
++ cs_inode_ctx_t *ctx = NULL;
++
++ inode_ctx_del(inode, this, &ctx_int);
++ if (!ctx_int)
++ return 0;
++
++ ctx = (cs_inode_ctx_t *)(uintptr_t)ctx_int;
++
++ GF_FREE(ctx);
++ return 0;
++}
++
+ void
+ cs_fini(xlator_t *this)
+ {
+@@ -217,6 +248,9 @@ cs_reconfigure(xlator_t *this, dict_t *options)
+ goto out;
+ }
+
++ GF_OPTION_RECONF("cloudsync-remote-read", priv->remote_read, options, bool,
++ out);
++
+ /* needed only for per volume configuration*/
+ ret = priv->stores->reconfigure(this, options);
+
+@@ -242,59 +276,6 @@ out:
+ }
+
+ int32_t
+-cs_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+- dict_t *xdata)
+-{
+- gf_dirent_t *tmp = NULL;
+- char *sxattr = NULL;
+- uint64_t ia_size = 0;
+- int ret = 0;
+-
+- list_for_each_entry(tmp, &entries->list, list)
+- {
+- ret = dict_get_str(tmp->dict, GF_CS_OBJECT_SIZE, &sxattr);
+- if (ret) {
+- gf_msg_trace(this->name, 0, "size xattr found");
+- continue;
+- }
+-
+- ia_size = atoll(sxattr);
+- tmp->d_stat.ia_size = ia_size;
+- }
+-
+- STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
+- return 0;
+-}
+-
+-int32_t
+-cs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+- off_t off, dict_t *xdata)
+-{
+- int ret = 0;
+- int op_errno = ENOMEM;
+-
+- if (!xdata) {
+- xdata = dict_new();
+- if (!xdata) {
+- goto err;
+- }
+- }
+-
+- ret = dict_set_int32(xdata, GF_CS_OBJECT_SIZE, 1);
+- if (ret) {
+- goto err;
+- }
+-
+- STACK_WIND(frame, cs_readdirp_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(readdirp, frame, -1, op_errno, NULL, NULL);
+- return 0;
+-}
+-
+-int32_t
+ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+@@ -305,7 +286,6 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+
+ local = frame->local;
+
+- /* Do we need lock here? */
+ local->call_cnt++;
+
+ if (op_ret == -1) {
+@@ -320,13 +300,13 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ goto unwind;
+ } else {
+ __cs_inode_ctx_update(this, local->loc.inode, val);
+- gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %ld", val);
++ gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val);
+
+ if (local->call_cnt == 1 &&
+ (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
+ "will repair and download "
+- "the file, current state : %ld",
++ "the file, current state : %" PRIu64,
+ val);
+ goto repair;
+ } else {
+@@ -665,7 +645,7 @@ cs_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ if (op_ret == 0) {
+ ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val);
+ if (!ret) {
+- gf_msg_debug(this->name, 0, "state %ld", val);
++ gf_msg_debug(this->name, 0, "state %" PRIu64, val);
+ ret = __cs_inode_ctx_update(this, fd->inode, val);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed");
+@@ -831,7 +811,7 @@ out:
+ return 0;
+ }
+
+-void *
++int
+ cs_download_task(void *arg)
+ {
+ call_frame_t *frame = NULL;
+@@ -842,7 +822,6 @@ cs_download_task(void *arg)
+ fd_t *fd = NULL;
+ cs_local_t *local = NULL;
+ dict_t *dict = NULL;
+- int *retval = NULL;
+
+ frame = (call_frame_t *)arg;
+
+@@ -850,13 +829,6 @@ cs_download_task(void *arg)
+
+ priv = this->private;
+
+- retval = GF_CALLOC(1, sizeof(int), gf_common_mt_int);
+- if (!retval) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
+- ret = -1;
+- goto out;
+- }
+-
+ if (!priv->stores) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "No remote store "
+@@ -972,20 +944,13 @@ out:
+ local->dlfd = NULL;
+ }
+
+- if (retval) {
+- *retval = ret;
+- pthread_exit(retval);
+- } else {
+- pthread_exit(&ret);
+- }
++ return ret;
+ }
+
+ int
+ cs_download(call_frame_t *frame)
+ {
+- int *retval = NULL;
+ int ret = 0;
+- pthread_t dthread;
+ cs_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+@@ -1000,16 +965,406 @@ cs_download(call_frame_t *frame)
+ goto out;
+ }
+
+- ret = gf_thread_create(&dthread, NULL, &cs_download_task, (void *)frame,
+- "downloadthread");
++ ret = cs_download_task((void *)frame);
++out:
++ return ret;
++}
+
+- pthread_join(dthread, (void **)&retval);
++int
++cs_set_xattr_req(call_frame_t *frame)
++{
++ cs_local_t *local = NULL;
++ GF_UNUSED int ret = 0;
++
++ local = frame->local;
++
++ /* When remote reads are performed (i.e. reads on remote store),
++ * there needs to be a way to associate a file on gluster volume
++ * with its correspnding file on the remote store. In order to do
++ * that, a unique key can be maintained as an xattr
++ * (GF_CS_XATTR_ARCHIVE_UUID)on the stub file on gluster bricks.
++ * This xattr should be provided to the plugin to
++ * perform the read fop on the correct file. This assumes that the file
++ * hierarchy and name need not be the same on remote store as that of
++ * the gluster volume.
++ */
++ ret = dict_set_str(local->xattr_req, GF_CS_XATTR_ARCHIVE_UUID, "1");
++
++ return 0;
++}
+
+- ret = *retval;
++int
++cs_update_xattrs(call_frame_t *frame, dict_t *xdata)
++{
++ cs_local_t *local = NULL;
++ xlator_t *this = NULL;
++ int size = -1;
++ GF_UNUSED int ret = 0;
++
++ local = frame->local;
++ this = frame->this;
++
++ local->xattrinfo.lxattr = GF_CALLOC(1, sizeof(cs_loc_xattr_t),
++ gf_cs_mt_cs_lxattr_t);
++ if (!local->xattrinfo.lxattr) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto err;
++ }
++
++ gf_uuid_copy(local->xattrinfo.lxattr->gfid, local->loc.gfid);
++
++ if (local->remotepath) {
++ local->xattrinfo.lxattr->file_path = gf_strdup(local->remotepath);
++ if (!local->xattrinfo.lxattr->file_path) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto err;
++ }
++ }
++
++ ret = dict_get_gfuuid(xdata, GF_CS_XATTR_ARCHIVE_UUID,
++ &(local->xattrinfo.lxattr->uuid));
++
++ if (ret) {
++ gf_uuid_clear(local->xattrinfo.lxattr->uuid);
++ }
++ size = strlen(this->name) - strlen("-cloudsync") + 1;
++ local->xattrinfo.lxattr->volname = GF_CALLOC(1, size, gf_common_mt_char);
++ if (!local->xattrinfo.lxattr->volname) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto err;
++ }
++ strncpy(local->xattrinfo.lxattr->volname, this->name, size - 1);
++ local->xattrinfo.lxattr->volname[size - 1] = '\0';
++
++ return 0;
++err:
++ cs_xattrinfo_wipe(local);
++ return -1;
++}
++
++int
++cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags)
++{
++ xlator_t *this = NULL;
++ cs_private_t *priv = NULL;
++ int ret = -1;
++ fd_t *fd = NULL;
++ cs_local_t *local = NULL;
++
++ local = frame->local;
++ this = frame->this;
++ priv = this->private;
++
++ if (!local->remotepath) {
++ ret = -1;
++ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
++ "remote path not"
++ " available. Check posix logs to resolve");
++ goto out;
++ }
++
++ if (!priv->stores) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
++ "No remote store "
++ "plugins found");
++ ret = -1;
++ goto out;
++ }
++
++ if (local->fd) {
++ fd = fd_anonymous(local->fd->inode);
++ } else {
++ fd = fd_anonymous(local->loc.inode);
++ }
++
++ local->xattrinfo.size = size;
++ local->xattrinfo.offset = offset;
++ local->xattrinfo.flags = flags;
++
++ if (!fd) {
++ gf_msg("CS", GF_LOG_ERROR, 0, 0, "fd creation failed");
++ ret = -1;
++ goto out;
++ }
++
++ local->dlfd = fd;
++ local->dloffset = offset;
++
++ /*this calling method is for per volume setting */
++ ret = priv->stores->rdfop(frame, priv->stores->config);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
++ "read failed"
++ ", remotepath: %s",
++ local->remotepath);
++ ret = -1;
++ goto out;
++ } else {
++ gf_msg(this->name, GF_LOG_INFO, 0, 0,
++ "read success, path"
++ " : %s",
++ local->remotepath);
++ }
+
+ out:
+- if (retval)
+- GF_FREE(retval);
++ if (fd) {
++ fd_unref(fd);
++ local->dlfd = NULL;
++ }
++ return ret;
++}
++
++int32_t
++cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
++ int32_t op_errno, struct iovec *vector, int32_t count,
++ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
++{
++ cs_local_t *local = NULL;
++ int ret = 0;
++ uint64_t val = 0;
++ fd_t *fd = NULL;
++
++ local = frame->local;
++ fd = local->fd;
++
++ local->call_cnt++;
++
++ if (op_ret == -1) {
++ ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val);
++ if (ret == 0) {
++ if (val == GF_CS_ERROR) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
++ "could not get file state, unwinding");
++ op_ret = -1;
++ op_errno = EIO;
++ goto unwind;
++ } else {
++ __cs_inode_ctx_update(this, fd->inode, val);
++ gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val);
++
++ if (local->call_cnt == 1 &&
++ (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) {
++ gf_msg(this->name, GF_LOG_INFO, 0, 0,
++ " will read from remote : %" PRIu64, val);
++ goto repair;
++ } else {
++ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
++ "second readv, Unwinding");
++ goto unwind;
++ }
++ }
++ } else {
++ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
++ "file state "
++ "could not be figured, unwinding");
++ goto unwind;
++ }
++ } else {
++ /* successful readv => file is local */
++ __cs_inode_ctx_update(this, fd->inode, GF_CS_LOCAL);
++ gf_msg(this->name, GF_LOG_INFO, 0, 0,
++ "state : GF_CS_LOCAL"
++ ", readv successful");
++
++ goto unwind;
++ }
++
++repair:
++ ret = locate_and_execute(frame);
++ if (ret) {
++ goto unwind;
++ }
++
++ return 0;
++
++unwind:
++ CS_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf,
++ iobref, xdata);
++
++ return 0;
++}
++
++int32_t
++cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++ off_t offset, uint32_t flags, dict_t *xdata)
++{
++ int ret = 0;
++
++ ret = cs_resume_postprocess(this, frame, fd->inode);
++ if (ret) {
++ goto unwind;
++ }
++
++ cs_inodelk_unlock(frame);
++
++ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
++
++ return 0;
++
++unwind:
++ cs_inodelk_unlock(frame);
++
++ cs_common_cbk(frame);
++
++ return 0;
++}
++
++int32_t
++cs_resume_remote_readv(call_frame_t *frame, xlator_t *this, fd_t *fd,
++ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
++{
++ int ret = 0;
++ cs_local_t *local = NULL;
++ gf_cs_obj_state state = -1;
++ cs_inode_ctx_t *ctx = NULL;
++
++ cs_inodelk_unlock(frame);
++
++ local = frame->local;
++ if (!local) {
++ ret = -1;
++ goto unwind;
++ }
++
++ __cs_inode_ctx_get(this, fd->inode, &ctx);
++
++ state = __cs_get_file_state(this, fd->inode, ctx);
++ if (state == GF_CS_ERROR) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
++ "status is GF_CS_ERROR."
++ " Aborting readv");
++ local->op_ret = -1;
++ local->op_errno = EREMOTE;
++ ret = -1;
++ goto unwind;
++ }
++
++ /* Serve readv from remote store only if it is remote. */
++ gf_msg_debug(this->name, 0, "status of file %s is %d",
++ local->remotepath ? local->remotepath : "", state);
++
++ /* We will reach this condition if local inode ctx had REMOTE
++ * state when the control was in cs_readv but after stat
++ * we got an updated state saying that the file is LOCAL.
++ */
++ if (state == GF_CS_LOCAL) {
++ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
++ xdata);
++ } else if (state == GF_CS_REMOTE) {
++ ret = cs_resume_remote_readv_postprocess(this, frame, fd->inode, offset,
++ size, flags);
++ /* Failed to submit the remote readv fop to plugin */
++ if (ret) {
++ local->op_ret = -1;
++ local->op_errno = EREMOTE;
++ goto unwind;
++ }
++ /* When the file is in any other intermediate state,
++ * we should not perform remote reads.
++ */
++ } else {
++ local->op_ret = -1;
++ local->op_errno = EINVAL;
++ goto unwind;
++ }
++
++ return 0;
++
++unwind:
++ cs_common_cbk(frame);
++
++ return 0;
++}
++
++int32_t
++cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++ off_t offset, uint32_t flags, dict_t *xdata)
++{
++ int op_errno = -1;
++ cs_local_t *local = NULL;
++ int ret = 0;
++ cs_inode_ctx_t *ctx = NULL;
++ gf_cs_obj_state state = -1;
++ cs_private_t *priv = NULL;
++
++ VALIDATE_OR_GOTO(frame, err);
++ VALIDATE_OR_GOTO(this, err);
++ VALIDATE_OR_GOTO(fd, err);
++
++ priv = this->private;
++
++ local = cs_local_init(this, frame, NULL, fd, GF_FOP_READ);
++ if (!local) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed");
++ op_errno = ENOMEM;
++ goto err;
++ }
++
++ __cs_inode_ctx_get(this, fd->inode, &ctx);
++
++ if (ctx)
++ state = __cs_get_file_state(this, fd->inode, ctx);
++ else
++ state = GF_CS_LOCAL;
++
++ local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new());
++
++ ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
++ "dict_set failed key:"
++ " %s",
++ GF_CS_OBJECT_STATUS);
++ goto err;
++ }
++
++ if (priv->remote_read) {
++ local->stub = fop_readv_stub(frame, cs_resume_remote_readv, fd, size,
++ offset, flags, xdata);
++ } else {
++ local->stub = fop_readv_stub(frame, cs_resume_readv, fd, size, offset,
++ flags, xdata);
++ }
++ if (!local->stub) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
++ op_errno = ENOMEM;
++ goto err;
++ }
++
++ if (state == GF_CS_LOCAL) {
++ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
++ xdata);
++ } else {
++ local->call_cnt++;
++ ret = locate_and_execute(frame);
++ if (ret) {
++ op_errno = ENOMEM;
++ goto err;
++ }
++ }
++
++ return 0;
++
++err:
++ CS_STACK_UNWIND(readv, frame, -1, op_errno, NULL, -1, NULL, NULL, NULL);
++
++ return 0;
++}
++
++int
++cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame,
++ inode_t *inode, off_t offset, size_t size,
++ uint32_t flags)
++{
++ int ret = 0;
++
++ ret = cs_serve_readv(frame, offset, size, flags);
+
+ return ret;
+ }
+@@ -1059,7 +1414,7 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ goto err;
+ } else {
+ ret = __cs_inode_ctx_update(this, inode, val);
+- gf_msg_debug(this->name, 0, "status : %lu", val);
++ gf_msg_debug(this->name, 0, "status : %" PRIu64, val);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed");
+ local->op_ret = -1;
+@@ -1087,6 +1442,10 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ gf_msg_debug(this->name, 0, "NULL filepath");
+ }
+
++ ret = cs_update_xattrs(frame, xdata);
++ if (ret)
++ goto err;
++
+ local->op_ret = 0;
+ local->xattr_rsp = dict_ref(xdata);
+ memcpy(&local->stbuf, stbuf, sizeof(struct iatt));
+@@ -1121,6 +1480,8 @@ cs_do_stat_check(call_frame_t *main_frame)
+ goto err;
+ }
+
++ cs_set_xattr_req(main_frame);
++
+ if (local->fd) {
+ STACK_WIND(main_frame, cs_stat_check_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, local->fd, local->xattr_req);
+@@ -1177,6 +1538,10 @@ cs_common_cbk(call_frame_t *frame)
+ NULL, NULL, NULL);
+ break;
+
++ case GF_FOP_TRUNCATE:
++ CS_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
++ NULL, NULL, NULL);
++ break;
+ default:
+ break;
+ }
+@@ -1427,7 +1792,7 @@ __cs_inode_ctx_get(xlator_t *this, inode_t *inode, cs_inode_ctx_t **ctx)
+ if (ret)
+ *ctx = NULL;
+ else
+- *ctx = (cs_inode_ctx_t *)ctxint;
++ *ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint;
+
+ return;
+ }
+@@ -1452,7 +1817,7 @@ __cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val)
+
+ ctx->state = val;
+
+- ctxint = (uint64_t)ctx;
++ ctxint = (uint64_t)(uintptr_t)ctx;
+
+ ret = __inode_ctx_set(inode, this, &ctxint);
+ if (ret) {
+@@ -1460,7 +1825,7 @@ __cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val)
+ goto out;
+ }
+ } else {
+- ctx = (cs_inode_ctx_t *)ctxint;
++ ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint;
+
+ ctx->state = val;
+ }
+@@ -1483,7 +1848,7 @@ cs_inode_ctx_reset(xlator_t *this, inode_t *inode)
+ return 0;
+ }
+
+- ctx = (cs_inode_ctx_t *)ctxint;
++ ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint;
+
+ GF_FREE(ctx);
+ return 0;
+@@ -1532,6 +1897,57 @@ cs_resume_postprocess(xlator_t *this, call_frame_t *frame, inode_t *inode)
+ out:
+ return ret;
+ }
++
++int32_t
++__cs_get_dict_str(char **str, dict_t *xattr, const char *name, int *errnum)
++{
++ data_t *data = NULL;
++ int ret = -1;
++
++ assert(str != NULL);
++
++ data = dict_get(xattr, (char *)name);
++ if (!data) {
++ *errnum = ENODATA;
++ goto out;
++ }
++
++ *str = GF_CALLOC(data->len + 1, sizeof(char), gf_common_mt_char);
++ if (!(*str)) {
++ *errnum = ENOMEM;
++ goto out;
++ }
++
++ memcpy(*str, data->data, sizeof(char) * (data->len));
++ return 0;
++
++out:
++ return ret;
++}
++
++int32_t
++__cs_get_dict_uuid(uuid_t uuid, dict_t *xattr, const char *name, int *errnum)
++{
++ data_t *data = NULL;
++ int ret = -1;
++
++ assert(uuid != NULL);
++
++ data = dict_get(xattr, (char *)name);
++ if (!data) {
++ *errnum = ENODATA;
++ goto out;
++ }
++
++ assert(data->len == sizeof(uuid_t));
++
++ gf_uuid_copy(uuid, (unsigned char *)data->data);
++ return 0;
++
++out:
++ return ret;
++}
++
+ int32_t
+ cs_fdctx_to_dict(xlator_t *this, fd_t *fd, dict_t *dict)
+ {
+@@ -1606,7 +2022,6 @@ cs_notify(xlator_t *this, int event, void *data, ...)
+
+ struct xlator_fops cs_fops = {
+ .stat = cs_stat,
+- .readdirp = cs_readdirp,
+ .truncate = cs_truncate,
+ .seek = cs_seek,
+ .statfs = cs_statfs,
+@@ -1627,7 +2042,9 @@ struct xlator_fops cs_fops = {
+ .zerofill = cs_zerofill,
+ };
+
+-struct xlator_cbks cs_cbks = {};
++struct xlator_cbks cs_cbks = {
++ .forget = cs_forget,
++};
+
+ struct xlator_dumpops cs_dumpops = {
+ .fdctx_to_dict = cs_fdctx_to_dict,
+@@ -1647,6 +2064,15 @@ struct volume_options cs_options[] = {
+ {.key = {"cloudsync-storetype"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Defines which remote store is enabled"},
++ {.key = {"cloudsync-remote-read"},
++ .type = GF_OPTION_TYPE_BOOL,
++ .description = "Defines a remote read fop when on"},
++ {.key = {"cloudsync-store-id"},
++ .type = GF_OPTION_TYPE_STR,
++ .description = "Defines a volume wide store id"},
++ {.key = {"cloudsync-product-id"},
++ .type = GF_OPTION_TYPE_STR,
++ .description = "Defines a volume wide product id"},
+ {.key = {NULL}},
+ };
+
+diff --git a/xlators/features/cloudsync/src/cloudsync.h b/xlators/features/cloudsync/src/cloudsync.h
+index dbdb207..0cb800a 100644
+--- a/xlators/features/cloudsync/src/cloudsync.h
++++ b/xlators/features/cloudsync/src/cloudsync.h
+@@ -19,6 +19,7 @@
+ #include "cloudsync-common.h"
+ #include "cloudsync-autogen-fops.h"
+
++#define ALIGN_SIZE 4096
+ #define CS_LOCK_DOMAIN "cs.protect.file.stat"
+ typedef struct cs_dlstore {
+ off_t off;
+@@ -29,6 +30,7 @@ typedef struct cs_dlstore {
+ } cs_dlstore;
+
+ typedef struct cs_inode_ctx {
++ cs_loc_xattr_t locxattr;
+ gf_cs_obj_state state;
+ } cs_inode_ctx_t;
+
+@@ -100,4 +102,22 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t
+ cs_resume_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xattr_req);
++
++int32_t
++cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
++ int32_t op_errno, struct iovec *vector, int32_t count,
++ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata);
++int32_t
++cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++ off_t offset, uint32_t flags, dict_t *xdata);
++int32_t
++cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++ off_t offset, uint32_t flags, dict_t *xdata);
++
++int
++cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame,
++ inode_t *inode, off_t offset, size_t size,
++ uint32_t flags);
++int
++cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags);
+ #endif /* __CLOUDSYNC_H__ */
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 4b32fb6..73abf37 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -3693,7 +3693,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .op_version = GD_OP_VERSION_5_0,
+ .description = "enable/disable noatime option with ctime enabled.",
+ .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT},
+- {.key = "feature.cloudsync-storetype",
++ {.key = "features.cloudsync-storetype",
+ .voltype = "features/cloudsync",
+ .op_version = GD_OP_VERSION_5_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+@@ -3721,4 +3721,9 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .validate_fn = validate_boolean,
+ .description = "option to enforce mandatory lock on a file",
+ .flags = VOLOPT_FLAG_XLATOR_OPT},
++ {.key = "features.cloudsync-remote-read",
++ .voltype = "features/cloudsync",
++ .value = "off",
++ .op_version = GD_OP_VERSION_6_0,
++ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = NULL}};
+--
+1.8.3.1
+
diff --git a/0154-cloudsync-cvlt-Cloudsync-plugin-for-commvault-store.patch b/0154-cloudsync-cvlt-Cloudsync-plugin-for-commvault-store.patch
new file mode 100644
index 0000000..6068678
--- /dev/null
+++ b/0154-cloudsync-cvlt-Cloudsync-plugin-for-commvault-store.patch
@@ -0,0 +1,1394 @@
+From b402b89f71a3ebabca24c459f106af1f9610939a Mon Sep 17 00:00:00 2001
+From: Anuradha Talur <atalur@commvault.com>
+Date: Fri, 30 Nov 2018 11:23:07 -0800
+Subject: [PATCH 154/169] cloudsync/cvlt: Cloudsync plugin for commvault store
+
+backport of: https://review.gluster.org/#/c/glusterfs/+/21771/
+
+> Change-Id: Icbe53e78e9c4f6699c7a26a806ef4b14b39f5019
+> updates: bz#1642168
+> Signed-off-by: Anuradha Talur <atalur@commvault.com>
+
+Change-Id: Ib543605daa51fa1cfe77ed475390a30ef14e6452
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172194
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ configure.ac | 13 +
+ glusterfs.spec.in | 1 +
+ libglusterfs/src/glusterfs/glfs-message-id.h | 1 +
+ .../src/cloudsync-plugins/src/Makefile.am | 6 +-
+ .../src/cloudsync-plugins/src/cvlt/Makefile.am | 3 +
+ .../src/cloudsync-plugins/src/cvlt/src/Makefile.am | 12 +
+ .../cloudsync-plugins/src/cvlt/src/archivestore.h | 203 +++++
+ .../cloudsync-plugins/src/cvlt/src/cvlt-messages.h | 30 +
+ .../src/cvlt/src/libcloudsynccvlt.sym | 1 +
+ .../src/cvlt/src/libcvlt-mem-types.h | 19 +
+ .../src/cloudsync-plugins/src/cvlt/src/libcvlt.c | 842 +++++++++++++++++++++
+ .../src/cloudsync-plugins/src/cvlt/src/libcvlt.h | 84 ++
+ xlators/features/cloudsync/src/cloudsync.c | 6 +-
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 10 +-
+ 14 files changed, 1228 insertions(+), 3 deletions(-)
+ create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am
+ create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am
+ create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h
+ create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h
+ create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym
+ create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h
+ create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c
+ create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h
+
+diff --git a/configure.ac b/configure.ac
+index 0e11d4c..f597b86 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -170,6 +170,8 @@ AC_CONFIG_FILES([Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile
++ xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile
++ xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile
+ xlators/playground/Makefile
+ xlators/playground/template/Makefile
+ xlators/playground/template/src/Makefile
+@@ -937,6 +939,17 @@ AM_CONDITIONAL([BUILD_AMAZONS3_PLUGIN], [test "x$HAVE_AMAZONS3" = "xyes"])
+ if test "x$HAVE_AMAZONS3" = "xyes";then
+ BUILD_CLOUDSYNC="yes"
+ fi
++BUILD_CVLT_PLUGIN="no"
++case $host_os in
++#enable cvlt plugin only for linux platforms
++ linux*)
++ BUILD_CVLT_PLUGIN="yes"
++ BUILD_CLOUDSYNC="yes"
++ ;;
++ *)
++ ;;
++esac
++AM_CONDITIONAL([BUILD_CVLT_PLUGIN], [test "x$BUILD_CVLT_PLUGIN" = "xyes"])
+ AM_CONDITIONAL([BUILD_CLOUDSYNC], [test "x$BUILD_CLOUDSYNC" = "xyes"])
+ dnl end cloudsync section
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index ed58356..85e75f2 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1199,6 +1199,7 @@ exit 0
+ %files cloudsync-plugins
+ %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsyncs3.so
++ %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsynccvlt.so
+
+ %files devel
+ %dir %{_includedir}/glusterfs
+diff --git a/libglusterfs/src/glusterfs/glfs-message-id.h b/libglusterfs/src/glusterfs/glfs-message-id.h
+index 001f4ab..a1a16ca 100644
+--- a/libglusterfs/src/glusterfs/glfs-message-id.h
++++ b/libglusterfs/src/glusterfs/glfs-message-id.h
+@@ -93,6 +93,7 @@ enum _msgid_comp {
+ GLFS_MSGID_COMP(TEMPLATE, 1),
+ GLFS_MSGID_COMP(UTIME, 1),
+ GLFS_MSGID_COMP(SNAPVIEW_SERVER, 1),
++ GLFS_MSGID_COMP(CVLT, 1),
+ /* --- new segments for messages goes above this line --- */
+
+ GLFS_MSGID_END
+diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am
+index 4deefb6..fb6b058 100644
+--- a/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am
++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am
+@@ -2,6 +2,10 @@ if BUILD_AMAZONS3_PLUGIN
+ AMAZONS3_DIR = cloudsyncs3
+ endif
+
+-SUBDIRS = ${AMAZONS3_DIR}
++if BUILD_CVLT_PLUGIN
++ CVLT_DIR = cvlt
++endif
++
++SUBDIRS = ${AMAZONS3_DIR} ${CVLT_DIR}
+
+ CLEANFILES =
+diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am
+new file mode 100644
+index 0000000..a985f42
+--- /dev/null
++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am
+@@ -0,0 +1,3 @@
++SUBDIRS = src
++
++CLEANFILES =
+diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am
+new file mode 100644
+index 0000000..b512464
+--- /dev/null
++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am
+@@ -0,0 +1,12 @@
++csp_LTLIBRARIES = cloudsynccvlt.la
++cspdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins
++
++cloudsynccvlt_la_SOURCES = libcvlt.c $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-common.c
++cloudsynccvlt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
++cloudsynccvlt_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym
++AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
++noinst_HEADERS = archivestore.h libcvlt.h libcvlt-mem-types.h cvlt-messages.h
++AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) -I$(top_srcdir)/xlators/features/cloudsync/src
++CLEANFILES =
++
++EXTRA_DIST = libcloudsynccvlt.sym
+diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h
+new file mode 100644
+index 0000000..7230ef7
+--- /dev/null
++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h
+@@ -0,0 +1,203 @@
++/*
++ Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#ifndef __ARCHIVESTORE_H__
++#define __ARCHIVESTORE_H__
++
++#include <stdlib.h>
++#include <stddef.h>
++#include <stdint.h>
++#include <dlfcn.h>
++#include <uuid/uuid.h>
++
++#define CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid"
++#define CS_XATTR_PRODUCT_ID "trusted.cloudsync.product-id"
++#define CS_XATTR_STORE_ID "trusted.cloudsync.store-id"
++
++struct _archstore_methods;
++typedef struct _archstore_methods archstore_methods_t;
++
++struct _archstore_desc {
++ void *priv; /* Private field for store mgmt. */
++ /* To be used only by archive store*/
++};
++typedef struct _archstore_desc archstore_desc_t;
++
++struct _archstore_info {
++ char *id; /* Identifier for the archivestore */
++ uint32_t idlen; /* Length of identifier string */
++ char *prod; /* Name of the data mgmt. product */
++ uint32_t prodlen; /* Length of the product string */
++};
++typedef struct _archstore_info archstore_info_t;
++
++struct _archstore_fileinfo {
++ uuid_t uuid; /* uuid of the file */
++ char *path; /* file path */
++ uint32_t pathlength; /* length of file path */
++};
++typedef struct _archstore_fileinfo archstore_fileinfo_t;
++
++struct _app_callback_info {
++ archstore_info_t *src_archstore;
++ archstore_fileinfo_t *src_archfile;
++ archstore_info_t *dest_archstore;
++ archstore_fileinfo_t *dest_archfile;
++};
++typedef struct _app_callback_info app_callback_info_t;
++
++typedef void (*app_callback_t)(archstore_desc_t *, app_callback_info_t *,
++ void *, int64_t, int32_t);
++
++enum _archstore_scan_type { FULL = 1, INCREMENTAL = 2 };
++typedef enum _archstore_scan_type archstore_scan_type_t;
++
++typedef int32_t archstore_errno_t;
++
++/*
++ * Initialize archive store.
++ * arg1 pointer to structure containing archive store information
++ * arg2 error number if any generated during the initialization
++ * arg3 name of the log file
++ */
++typedef int32_t (*init_archstore_t)(archstore_desc_t *, archstore_errno_t *,
++ const char *);
++
++/*
++ * Clean up archive store.
++ * arg1 pointer to structure containing archive store information
++ * arg2 error number if any generated during the cleanup
++ */
++typedef int32_t (*term_archstore_t)(archstore_desc_t *, archstore_errno_t *);
++
++/*
++ * Read the contents of the file from archive store
++ * arg1 pointer to structure containing archive store description
++ * arg2 pointer to structure containing archive store information
++ * arg3 pointer to structure containing information about file to be read
++ * arg4 offset in the file from which data should be read
++ * arg5 buffer where the data should be read
++ * arg6 number of bytes of data to be read
++ * arg7 error number if any generated during the read from file
++ * arg8 callback handler to be invoked after the data is read
++ * arg9 cookie to be passed when callback is invoked
++ */
++typedef int32_t (*read_archstore_t)(archstore_desc_t *, archstore_info_t *,
++ archstore_fileinfo_t *, off_t, char *,
++ size_t, archstore_errno_t *, app_callback_t,
++ void *);
++
++/*
++ * Restore the contents of the file from archive store
++ * This is basically in-place restore
++ * arg1 pointer to structure containing archive store description
++ * arg2 pointer to structure containing archive store information
++ * arg3 pointer to structure containing information about file to be restored
++ * arg4 error number if any generated during the file restore
++ * arg5 callback to be invoked after the file is restored
++ * arg6 cookie to be passed when callback is invoked
++ */
++typedef int32_t (*recall_archstore_t)(archstore_desc_t *, archstore_info_t *,
++ archstore_fileinfo_t *,
++ archstore_errno_t *, app_callback_t,
++ void *);
++
++/*
++ * Restore the contents of the file from archive store to a different store
++ * This is basically out-of-place restore
++ * arg1 pointer to structure containing archive store description
++ * arg2 pointer to structure containing source archive store information
++ * arg3 pointer to structure containing information about file to be restored
++ * arg4 pointer to structure containing destination archive store information
++ * arg5 pointer to structure containing information about the location to
++ which the file will be restored
++ * arg6 error number if any generated during the file restore
++ * arg7 callback to be invoked after the file is restored
++ * arg8 cookie to be passed when callback is invoked
++ */
++typedef int32_t (*restore_archstore_t)(archstore_desc_t *, archstore_info_t *,
++ archstore_fileinfo_t *,
++ archstore_info_t *,
++ archstore_fileinfo_t *,
++ archstore_errno_t *, app_callback_t,
++ void *);
++
++/*
++ * Archive the contents of the file to archive store
++ * arg1 pointer to structure containing archive store description
++ * arg2 pointer to structure containing source archive store information
++ * arg3 pointer to structure containing information about files to be archived
++ * arg4 pointer to structure containing destination archive store information
++ * arg5 pointer to structure containing information about files that failed
++ * to be archived
++ * arg6 error number if any generated during the file archival
++ * arg7 callback to be invoked after the file is archived
++ * arg8 cookie to be passed when callback is invoked
++ */
++typedef int32_t (*archive_archstore_t)(archstore_desc_t *, archstore_info_t *,
++ archstore_fileinfo_t *,
++ archstore_info_t *,
++ archstore_fileinfo_t *,
++ archstore_errno_t *, app_callback_t,
++ void *);
++
++/*
++ * Backup list of files provided in the input file
++ * arg1 pointer to structure containing archive store description
++ * arg2 pointer to structure containing source archive store information
++ * arg3 pointer to structure containing information about files to be backed up
++ * arg4 pointer to structure containing destination archive store information
++ * arg5 pointer to structure containing information about files that failed
++ * to be backed up
++ * arg6 error number if any generated during the file archival
++ * arg7 callback to be invoked after the file is archived
++ * arg8 cookie to be passed when callback is invoked
++ */
++typedef int32_t (*backup_archstore_t)(archstore_desc_t *, archstore_info_t *,
++ archstore_fileinfo_t *,
++ archstore_info_t *,
++ archstore_fileinfo_t *,
++ archstore_errno_t *, app_callback_t,
++ void *);
++
++/*
++ * Scan the contents of a store and determine the files which need to be
++ * backed up.
++ * arg1 pointer to structure containing archive store description
++ * arg2 pointer to structure containing archive store information
++ * arg3 type of scan whether full or incremental
++ * arg4 path to file that contains list of files to be backed up
++ * arg5 error number if any generated during scan operation
++ */
++typedef int32_t (*scan_archstore_t)(archstore_desc_t *, archstore_info_t *,
++ archstore_scan_type_t, char *,
++ archstore_errno_t *);
++
++struct _archstore_methods {
++ init_archstore_t init;
++ term_archstore_t fini;
++ backup_archstore_t backup;
++ archive_archstore_t archive;
++ scan_archstore_t scan;
++ restore_archstore_t restore;
++ recall_archstore_t recall;
++ read_archstore_t read;
++};
++
++typedef int (*get_archstore_methods_t)(archstore_methods_t *);
++
++/*
++ * Single function that will be invoked by applications for extracting
++ * the function pointers to all data management functions.
++ */
++int32_t
++get_archstore_methods(archstore_methods_t *);
++
++#endif /* End of __ARCHIVESTORE_H__ */
+diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h
+new file mode 100644
+index 0000000..57c9aa7
+--- /dev/null
++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h
+@@ -0,0 +1,30 @@
++/*
++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++ */
++
++#ifndef _CVLT_MESSAGES_H_
++#define _CVLT_MESSAGES_H_
++
++#include <glusterfs/glfs-message-id.h>
++
++/* To add new message IDs, append new identifiers at the end of the list.
++ *
++ * Never remove a message ID. If it's not used anymore, you can rename it or
++ * leave it as it is, but not delete it. This is to prevent reutilization of
++ * IDs by other messages.
++ *
++ * The component name must match one of the entries defined in
++ * glfs-message-id.h.
++ */
++
++GLFS_MSGID(CVLT, CVLT_EXTRACTION_FAILED, CVLT_FREE,
++ CVLT_RESOURCE_ALLOCATION_FAILED, CVLT_RESTORE_FAILED,
++ CVLT_READ_FAILED, CVLT_NO_MEMORY, CVLT_DLOPEN_FAILED);
++
++#endif /* !_CVLT_MESSAGES_H_ */
+diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym
+new file mode 100644
+index 0000000..0bc2736
+--- /dev/null
++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym
+@@ -0,0 +1 @@
++store_ops
+diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h
+new file mode 100644
+index 0000000..c24fab8
+--- /dev/null
++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h
+@@ -0,0 +1,19 @@
++/*
++ * Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com>
++ * This file is part of GlusterFS.
++ *
++ * This file is licensed to you under your choice of the GNU Lesser
++ * General Public License, version 3 or any later version (LGPLv3 or
++ * later), or the GNU General Public License, version 2 (GPLv2), in all
++ * cases as published by the Free Software Foundation.
++ */
++
++#ifndef __LIBCVLT_MEM_TYPES_H__
++#define __LIBCVLT_MEM_TYPES_H__
++
++#include <glusterfs/mem-types.h>
++enum libcvlt_mem_types_ {
++ gf_libcvlt_mt_cvlt_private_t = gf_common_mt_end + 1,
++ gf_libcvlt_mt_end
++};
++#endif /* __LIBCVLT_MEM_TYPES_H__ */
+diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c
+new file mode 100644
+index 0000000..e827882
+--- /dev/null
++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c
+@@ -0,0 +1,842 @@
++#include <stdlib.h>
++#include <glusterfs/xlator.h>
++#include <glusterfs/glusterfs.h>
++#include "libcvlt.h"
++#include "cloudsync-common.h"
++#include "cvlt-messages.h"
++
++#define LIBARCHIVE_SO "libopenarchive.so"
++#define ALIGN_SIZE 4096
++#define CVLT_TRAILER "cvltv1"
++
++store_methods_t store_ops = {
++ .fop_download = cvlt_download,
++ .fop_init = cvlt_init,
++ .fop_reconfigure = cvlt_reconfigure,
++ .fop_fini = cvlt_fini,
++ .fop_remote_read = cvlt_read,
++};
++
++static const int32_t num_req = 32;
++static const int32_t num_iatt = 32;
++static char *plugin = "cvlt_cloudSync";
++
++int32_t
++mem_acct_init(xlator_t *this)
++{
++ int ret = -1;
++
++ if (!this)
++ return ret;
++
++ ret = xlator_mem_acct_init(this, gf_libcvlt_mt_end + 1);
++
++ if (ret != 0) {
++ return ret;
++ }
++
++ return ret;
++}
++
++static void
++cvlt_free_resources(archive_t *arch)
++{
++ /*
++ * We will release all the resources that were allocated by the xlator.
++ * Check whether there are any buffers which have not been released
++ * back to a mempool.
++ */
++
++ if (arch->handle) {
++ dlclose(arch->handle);
++ }
++
++ if (arch->iobuf_pool) {
++ iobuf_pool_destroy(arch->iobuf_pool);
++ }
++
++ if (arch->req_pool) {
++ mem_pool_destroy(arch->req_pool);
++ arch->req_pool = NULL;
++ }
++
++ return;
++}
++
++static int32_t
++cvlt_extract_store_fops(xlator_t *this, archive_t *arch)
++{
++ int32_t op_ret = -1;
++ get_archstore_methods_t get_archstore_methods;
++
++ /*
++ * libopenarchive.so defines methods for performing data management
++ * operations. We will extract the methods from library and these
++ * methods will be invoked for moving data between glusterfs volume
++ * and the data management product.
++ */
++
++ VALIDATE_OR_GOTO(arch, err);
++
++ arch->handle = dlopen(LIBARCHIVE_SO, RTLD_NOW);
++ if (!arch->handle) {
++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_DLOPEN_FAILED,
++ " failed to open %s ", LIBARCHIVE_SO);
++ return op_ret;
++ }
++
++ dlerror(); /* Clear any existing error */
++
++ get_archstore_methods = dlsym(arch->handle, "get_archstore_methods");
++ if (!get_archstore_methods) {
++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
++ " Error extracting get_archstore_methods()");
++ dlclose(arch->handle);
++ arch->handle = NULL;
++ return op_ret;
++ }
++
++ op_ret = get_archstore_methods(&(arch->fops));
++ if (op_ret) {
++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
++ " Failed to extract methods in get_archstore_methods");
++ dlclose(arch->handle);
++ arch->handle = NULL;
++ return op_ret;
++ }
++
++err:
++ return op_ret;
++}
++
++static int32_t
++cvlt_alloc_resources(xlator_t *this, archive_t *arch, int num_req, int num_iatt)
++{
++ /*
++ * Initialize information about all the memory pools that will be
++ * used by this xlator.
++ */
++ arch->nreqs = 0;
++
++ arch->req_pool = NULL;
++
++ arch->handle = NULL;
++ arch->xl = this;
++
++ arch->req_pool = mem_pool_new(cvlt_request_t, num_req);
++ if (!arch->req_pool) {
++ goto err;
++ }
++
++ arch->iobuf_pool = iobuf_pool_new();
++ if (!arch->iobuf_pool) {
++ goto err;
++ }
++
++ if (cvlt_extract_store_fops(this, arch)) {
++ goto err;
++ }
++
++ return 0;
++
++err:
++
++ return -1;
++}
++
++static void
++cvlt_req_init(cvlt_request_t *req)
++{
++ sem_init(&(req->sem), 0, 0);
++
++ return;
++}
++
++static void
++cvlt_req_destroy(cvlt_request_t *req)
++{
++ if (req->iobuf) {
++ iobuf_unref(req->iobuf);
++ }
++
++ if (req->iobref) {
++ iobref_unref(req->iobref);
++ }
++
++ sem_destroy(&(req->sem));
++
++ return;
++}
++
++static cvlt_request_t *
++cvlt_alloc_req(archive_t *arch)
++{
++ cvlt_request_t *reqptr = NULL;
++
++ if (!arch) {
++ goto err;
++ }
++
++ if (arch->req_pool) {
++ reqptr = mem_get0(arch->req_pool);
++ if (reqptr) {
++ cvlt_req_init(reqptr);
++ }
++ }
++
++ if (reqptr) {
++ LOCK(&(arch->lock));
++ arch->nreqs++;
++ UNLOCK(&(arch->lock));
++ }
++
++err:
++ return reqptr;
++}
++
++static int32_t
++cvlt_free_req(archive_t *arch, cvlt_request_t *reqptr)
++{
++ if (!reqptr) {
++ goto err;
++ }
++
++ if (!arch) {
++ goto err;
++ }
++
++ if (arch->req_pool) {
++ /*
++ * Free the request resources if they exist.
++ */
++
++ cvlt_req_destroy(reqptr);
++ mem_put(reqptr);
++
++ LOCK(&(arch->lock));
++ arch->nreqs--;
++ UNLOCK(&(arch->lock));
++ }
++
++ return 0;
++
++err:
++ return -1;
++}
++
++static int32_t
++cvlt_init_xlator(xlator_t *this, archive_t *arch, int num_req, int num_iatt)
++{
++ int32_t ret = -1;
++ int32_t errnum = -1;
++ int32_t locked = 0;
++
++ /*
++ * Perform all the initializations needed for brining up the xlator.
++ */
++ if (!arch) {
++ goto err;
++ }
++
++ LOCK_INIT(&(arch->lock));
++ LOCK(&(arch->lock));
++
++ locked = 1;
++
++ ret = cvlt_alloc_resources(this, arch, num_req, num_iatt);
++
++ if (ret) {
++ goto err;
++ }
++
++ /*
++ * Now that the fops have been extracted initialize the store
++ */
++ ret = arch->fops.init(&(arch->descinfo), &errnum, plugin);
++ if (ret) {
++ goto err;
++ }
++
++ UNLOCK(&(arch->lock));
++ locked = 0;
++ ret = 0;
++
++ return ret;
++
++err:
++ cvlt_free_resources(arch);
++
++ if (locked) {
++ UNLOCK(&(arch->lock));
++ }
++
++ return ret;
++}
++
++static int32_t
++cvlt_term_xlator(archive_t *arch)
++{
++ int32_t errnum = -1;
++
++ if (!arch) {
++ goto err;
++ }
++
++ LOCK(&(arch->lock));
++
++ /*
++ * Release the resources that have been allocated inside store
++ */
++ arch->fops.fini(&(arch->descinfo), &errnum);
++
++ cvlt_free_resources(arch);
++
++ UNLOCK(&(arch->lock));
++
++ GF_FREE(arch);
++
++ return 0;
++
++err:
++ return -1;
++}
++
++static int32_t
++cvlt_init_store_info(archive_t *priv, archstore_info_t *store_info)
++{
++ if (!store_info) {
++ return -1;
++ }
++
++ store_info->prod = priv->product_id;
++ store_info->prodlen = strlen(priv->product_id);
++
++ store_info->id = priv->store_id;
++ store_info->idlen = strlen(priv->store_id);
++
++ return 0;
++}
++
++static int32_t
++cvlt_init_file_info(cs_loc_xattr_t *xattr, archstore_fileinfo_t *file_info)
++{
++ if (!xattr || !file_info) {
++ return -1;
++ }
++
++ gf_uuid_copy(file_info->uuid, xattr->uuid);
++ file_info->path = xattr->file_path;
++ file_info->pathlength = strlen(xattr->file_path);
++
++ return 0;
++}
++
++static int32_t
++cvlt_init_gluster_store_info(cs_loc_xattr_t *xattr,
++ archstore_info_t *store_info)
++{
++ static char *product = "glusterfs";
++
++ if (!xattr || !store_info) {
++ return -1;
++ }
++
++ store_info->prod = product;
++ store_info->prodlen = strlen(product);
++
++ store_info->id = xattr->volname;
++ store_info->idlen = strlen(xattr->volname);
++
++ return 0;
++}
++
++static int32_t
++cvlt_init_gluster_file_info(cs_loc_xattr_t *xattr,
++ archstore_fileinfo_t *file_info)
++{
++ if (!xattr || !file_info) {
++ return -1;
++ }
++
++ gf_uuid_copy(file_info->uuid, xattr->gfid);
++ file_info->path = xattr->file_path;
++ file_info->pathlength = strlen(xattr->file_path);
++
++ return 0;
++}
++
++static void
++cvlt_copy_stat_info(struct iatt *buf, cs_size_xattr_t *xattrs)
++{
++ /*
++ * If the file was archived then the reported size will not be a
++ * correct one. We need to fix this.
++ */
++ if (buf && xattrs) {
++ buf->ia_size = xattrs->size;
++ buf->ia_blksize = xattrs->blksize;
++ buf->ia_blocks = xattrs->blocks;
++ }
++
++ return;
++}
++
++static void
++cvlt_readv_complete(archstore_desc_t *desc, app_callback_info_t *cbkinfo,
++ void *cookie, int64_t op_ret, int32_t op_errno)
++{
++ struct iovec iov;
++ xlator_t *this = NULL;
++ struct iatt postbuf = {
++ 0,
++ };
++ call_frame_t *frame = NULL;
++ cvlt_request_t *req = (cvlt_request_t *)cookie;
++ cs_local_t *local = NULL;
++ cs_private_t *cspriv = NULL;
++ archive_t *priv = NULL;
++
++ frame = req->frame;
++ this = frame->this;
++ local = frame->local;
++
++ cspriv = this->private;
++ priv = (archive_t *)cspriv->stores->config;
++
++ if (strcmp(priv->trailer, CVLT_TRAILER)) {
++ op_ret = -1;
++ op_errno = EINVAL;
++ goto out;
++ }
++
++ gf_msg_debug(plugin, 0,
++ " Read callback invoked offset:%" PRIu64 "bytes: %" PRIu64
++ " op : %d ret : %" PRId64 " errno : %d",
++ req->offset, req->bytes, req->op_type, op_ret, op_errno);
++
++ if (op_ret < 0) {
++ goto out;
++ }
++
++ req->iobref = iobref_new();
++ if (!req->iobref) {
++ op_ret = -1;
++ op_errno = ENOMEM;
++ goto out;
++ }
++
++ iobref_add(req->iobref, req->iobuf);
++ iov.iov_base = iobuf_ptr(req->iobuf);
++ iov.iov_len = op_ret;
++
++ cvlt_copy_stat_info(&postbuf, &(req->szxattr));
++
++ /*
++ * Hack to notify higher layers of EOF.
++ */
++ if (!postbuf.ia_size || (req->offset + iov.iov_len >= postbuf.ia_size)) {
++ gf_msg_debug(plugin, 0, " signalling end-of-file for uuid=%s",
++ uuid_utoa(req->file_info.uuid));
++ op_errno = ENOENT;
++ }
++
++out:
++
++ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, &postbuf,
++ req->iobref, local->xattr_rsp);
++
++ if (req) {
++ cvlt_free_req(priv, req);
++ }
++
++ return;
++}
++
++static void
++cvlt_download_complete(archstore_desc_t *store, app_callback_info_t *cbk_info,
++ void *cookie, int64_t ret, int errcode)
++{
++ cvlt_request_t *req = (cvlt_request_t *)cookie;
++
++ gf_msg_debug(plugin, 0,
++ " Download callback invoked ret : %" PRId64 " errno : %d",
++ ret, errcode);
++
++ req->op_ret = ret;
++ req->op_errno = errcode;
++ sem_post(&(req->sem));
++
++ return;
++}
++
++void *
++cvlt_init(xlator_t *this)
++{
++ int ret = 0;
++ archive_t *priv = NULL;
++
++ if (!this->children || this->children->next) {
++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0,
++ "should have exactly one child");
++ ret = -1;
++ goto out;
++ }
++
++ if (!this->parents) {
++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0,
++ "dangling volume. check volfile");
++ ret = -1;
++ goto out;
++ }
++
++ priv = GF_CALLOC(1, sizeof(archive_t), gf_libcvlt_mt_cvlt_private_t);
++ if (!priv) {
++ ret = -1;
++ goto out;
++ }
++
++ priv->trailer = CVLT_TRAILER;
++ if (cvlt_init_xlator(this, priv, num_req, num_iatt)) {
++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, "xlator init failed");
++ ret = -1;
++ goto out;
++ }
++
++ GF_OPTION_INIT("cloudsync-store-id", priv->store_id, str, out);
++ GF_OPTION_INIT("cloudsync-product-id", priv->product_id, str, out);
++
++ gf_msg(plugin, GF_LOG_INFO, 0, 0,
++ "store id is : %s "
++ "product id is : %s.",
++ priv->store_id, priv->product_id);
++out:
++ if (ret == -1) {
++ cvlt_term_xlator(priv);
++ return (NULL);
++ }
++ return priv;
++}
++
++int
++cvlt_reconfigure(xlator_t *this, dict_t *options)
++{
++ cs_private_t *cspriv = NULL;
++ archive_t *priv = NULL;
++
++ cspriv = this->private;
++ priv = (archive_t *)cspriv->stores->config;
++
++ if (strcmp(priv->trailer, CVLT_TRAILER))
++ goto out;
++
++ GF_OPTION_RECONF("cloudsync-store-id", priv->store_id, options, str, out);
++
++ GF_OPTION_RECONF("cloudsync-product-id", priv->product_id, options, str,
++ out);
++ gf_msg_debug(plugin, 0,
++ "store id is : %s "
++ "product id is : %s.",
++ priv->store_id, priv->product_id);
++ return 0;
++out:
++ return -1;
++}
++
++void
++cvlt_fini(void *config)
++{
++ archive_t *priv = NULL;
++
++ priv = (archive_t *)config;
++
++ if (strcmp(priv->trailer, CVLT_TRAILER))
++ return;
++
++ cvlt_term_xlator(priv);
++ gf_msg(plugin, GF_LOG_INFO, 0, CVLT_FREE, " released xlator resources");
++ return;
++}
++
++int
++cvlt_download(call_frame_t *frame, void *config)
++{
++ archive_t *parch = NULL;
++ cs_local_t *local = frame->local;
++ cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr;
++ cvlt_request_t *req = NULL;
++ archstore_info_t dest_storeinfo;
++ archstore_fileinfo_t dest_fileinfo;
++ int32_t op_ret, op_errno;
++
++ parch = (archive_t *)config;
++
++ if (strcmp(parch->trailer, CVLT_TRAILER)) {
++ op_ret = -1;
++ op_errno = EINVAL;
++ goto err;
++ }
++
++ gf_msg_debug(plugin, 0, " download invoked for uuid = %s gfid=%s ",
++ locxattr->uuid, uuid_utoa(locxattr->gfid));
++
++ if (!(parch->fops.restore)) {
++ op_errno = ELIBBAD;
++ goto err;
++ }
++
++ /*
++ * Download needs to be processed. Allocate a request.
++ */
++ req = cvlt_alloc_req(parch);
++
++ if (!req) {
++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_RESOURCE_ALLOCATION_FAILED,
++ " failed to allocated request for gfid=%s",
++ uuid_utoa(locxattr->gfid));
++ op_errno = ENOMEM;
++ goto err;
++ }
++
++ /*
++ * Initialize the request object.
++ */
++ req->op_type = CVLT_RESTORE_OP;
++ req->frame = frame;
++
++ /*
++ * The file is currently residing inside a data management store.
++ * To restore the file contents we need to provide the information
++ * about data management store.
++ */
++ op_ret = cvlt_init_store_info(parch, &(req->store_info));
++ if (op_ret < 0) {
++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
++ " failed to extract store info for gfid=%s",
++ uuid_utoa(locxattr->gfid));
++ goto err;
++ }
++
++ op_ret = cvlt_init_file_info(locxattr, &(req->file_info));
++ if (op_ret < 0) {
++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
++ " failed to extract file info for gfid=%s",
++ uuid_utoa(locxattr->gfid));
++ goto err;
++ }
++
++ /*
++ * We need t perform in-place restore of the file from data managment
++ * store to gusterfs volume.
++ */
++ op_ret = cvlt_init_gluster_store_info(locxattr, &dest_storeinfo);
++ if (op_ret < 0) {
++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
++ " failed to extract destination store info for gfid=%s",
++ uuid_utoa(locxattr->gfid));
++ goto err;
++ }
++
++ op_ret = cvlt_init_gluster_file_info(locxattr, &dest_fileinfo);
++ if (op_ret < 0) {
++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
++ " failed to extract file info for gfid=%s",
++ uuid_utoa(locxattr->gfid));
++ goto err;
++ }
++
++ /*
++ * Submit the restore request.
++ */
++ op_ret = parch->fops.restore(&(parch->descinfo), &(req->store_info),
++ &(req->file_info), &dest_storeinfo,
++ &dest_fileinfo, &op_errno,
++ cvlt_download_complete, req);
++ if (op_ret < 0) {
++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED,
++ " failed to restore file gfid=%s from data managment store",
++ uuid_utoa(locxattr->gfid));
++ goto err;
++ }
++
++ /*
++ * Wait for the restore to complete.
++ */
++ sem_wait(&(req->sem));
++
++ if (req->op_ret < 0) {
++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED,
++ " restored failed for gfid=%s", uuid_utoa(locxattr->gfid));
++ goto err;
++ }
++
++ if (req) {
++ cvlt_free_req(parch, req);
++ }
++
++ return 0;
++
++err:
++
++ if (req) {
++ cvlt_free_req(parch, req);
++ }
++
++ return -1;
++}
++
++int
++cvlt_read(call_frame_t *frame, void *config)
++{
++ int32_t op_ret = -1;
++ int32_t op_errno = 0;
++ archive_t *parch = NULL;
++ cvlt_request_t *req = NULL;
++ struct iovec iov = {
++ 0,
++ };
++ struct iobref *iobref;
++ size_t size = 0;
++ off_t off = 0;
++
++ cs_local_t *local = frame->local;
++ cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr;
++
++ size = local->xattrinfo.size;
++ off = local->xattrinfo.offset;
++
++ parch = (archive_t *)config;
++
++ if (strcmp(parch->trailer, CVLT_TRAILER)) {
++ op_ret = -1;
++ op_errno = EINVAL;
++ goto err;
++ }
++
++ gf_msg_debug(plugin, 0,
++ " read invoked for gfid = %s offset = %" PRIu64
++ " file_size = %" PRIu64,
++ uuid_utoa(locxattr->gfid), off, local->stbuf.ia_size);
++
++ if (off >= local->stbuf.ia_size) {
++ /*
++ * Hack to notify higher layers of EOF.
++ */
++
++ op_errno = ENOENT;
++ op_ret = 0;
++
++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED,
++ " reporting end-of-file for gfid=%s", uuid_utoa(locxattr->gfid));
++
++ goto err;
++ }
++
++ if (!size) {
++ op_errno = EINVAL;
++
++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED,
++ " zero size read attempted on gfid=%s",
++ uuid_utoa(locxattr->gfid));
++ goto err;
++ }
++
++ if (!(parch->fops.read)) {
++ op_errno = ELIBBAD;
++ goto err;
++ }
++
++ /*
++ * The read request need to be processed. Allocate a request.
++ */
++ req = cvlt_alloc_req(parch);
++
++ if (!req) {
++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_NO_MEMORY,
++ " failed to allocated request for gfid=%s",
++ uuid_utoa(locxattr->gfid));
++ op_errno = ENOMEM;
++ goto err;
++ }
++
++ req->iobuf = iobuf_get_page_aligned(parch->iobuf_pool, size, ALIGN_SIZE);
++ if (!req->iobuf) {
++ op_errno = ENOMEM;
++ goto err;
++ }
++
++ /*
++ * Initialize the request object.
++ */
++ req->op_type = CVLT_READ_OP;
++ req->offset = off;
++ req->bytes = size;
++ req->frame = frame;
++ req->szxattr.size = local->stbuf.ia_size;
++ req->szxattr.blocks = local->stbuf.ia_blocks;
++ req->szxattr.blksize = local->stbuf.ia_blksize;
++
++ /*
++ * The file is currently residing inside a data management store.
++ * To read the file contents we need to provide the information
++ * about data management store.
++ */
++ op_ret = cvlt_init_store_info(parch, &(req->store_info));
++ if (op_ret < 0) {
++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
++ " failed to extract store info for gfid=%s"
++ " offset=%" PRIu64 " size=%" GF_PRI_SIZET
++ ", "
++ " buf=%p",
++ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr);
++ goto err;
++ }
++
++ op_ret = cvlt_init_file_info(locxattr, &(req->file_info));
++ if (op_ret < 0) {
++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
++ " failed to extract file info for gfid=%s"
++ " offset=%" PRIu64 " size=%" GF_PRI_SIZET
++ ", "
++ " buf=%p",
++ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr);
++ goto err;
++ }
++
++ /*
++ * Submit the read request.
++ */
++ op_ret = parch->fops.read(&(parch->descinfo), &(req->store_info),
++ &(req->file_info), off, req->iobuf->ptr, size,
++ &op_errno, cvlt_readv_complete, req);
++
++ if (op_ret < 0) {
++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
++ " read failed on gfid=%s"
++ " offset=%" PRIu64 " size=%" GF_PRI_SIZET
++ ", "
++ " buf=%p",
++ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr);
++ goto err;
++ }
++
++ return 0;
++
++err:
++
++ iobref = iobref_new();
++ gf_msg_debug(plugin, 0, " read unwinding stack op_ret = %d, op_errno = %d",
++ op_ret, op_errno);
++
++ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1,
++ &(local->stbuf), iobref, local->xattr_rsp);
++
++ if (iobref) {
++ iobref_unref(iobref);
++ }
++
++ if (req) {
++ cvlt_free_req(parch, req);
++ }
++
++ return 0;
++}
+diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h
+new file mode 100644
+index 0000000..c45ac94
+--- /dev/null
++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h
+@@ -0,0 +1,84 @@
++/*
++ Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++#ifndef _LIBCVLT_H
++#define _LIBCVLT_H
++
++#include <semaphore.h>
++#include <glusterfs/xlator.h>
++#include <glusterfs/glusterfs.h>
++#include <glusterfs/call-stub.h>
++#include <glusterfs/syncop.h>
++#include <glusterfs/compat-errno.h>
++#include "cloudsync-common.h"
++#include "libcvlt-mem-types.h"
++#include "archivestore.h"
++
++enum _cvlt_op {
++ CVLT_READ_OP = 1,
++ CVLT_WRITE_OP = 2,
++ CVLT_RESTORE_OP = 3,
++ CVLT_ARCHIVE_OP = 4,
++ CVLT_LOOKUP_OP = 5,
++ CVLT_XATTR_OP = 6,
++ CVLT_STAT_OP = 7,
++ CVLT_FSTAT_op = 8,
++ CVLT_UNDEF_OP = 127
++};
++typedef enum _cvlt_op cvlt_op_t;
++
++struct _archive;
++struct _cvlt_request {
++ uint64_t offset;
++ uint64_t bytes;
++ struct iobuf *iobuf;
++ struct iobref *iobref;
++ call_frame_t *frame;
++ cvlt_op_t op_type;
++ int32_t op_ret;
++ int32_t op_errno;
++ xlator_t *this;
++ sem_t sem;
++ archstore_info_t store_info;
++ archstore_fileinfo_t file_info;
++ cs_size_xattr_t szxattr;
++};
++typedef struct _cvlt_request cvlt_request_t;
++
++struct _archive {
++ gf_lock_t lock; /* lock for controlling access */
++ xlator_t *xl; /* xlator */
++ void *handle; /* handle returned from dlopen */
++ int32_t nreqs; /* num requests active */
++ struct mem_pool *req_pool; /* pool for requests */
++ struct iobuf_pool *iobuf_pool; /* iobuff pool */
++ archstore_desc_t descinfo; /* Archive store descriptor info */
++ archstore_methods_t fops; /* function pointers */
++ char *product_id;
++ char *store_id;
++ char *trailer;
++};
++typedef struct _archive archive_t;
++
++void *
++cvlt_init(xlator_t *);
++
++int
++cvlt_reconfigure(xlator_t *, dict_t *);
++
++void
++cvlt_fini(void *);
++
++int
++cvlt_download(call_frame_t *, void *);
++
++int
++cvlt_read(call_frame_t *, void *);
++
++#endif
+diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c
+index 2240fc3..8026b05 100644
+--- a/xlators/features/cloudsync/src/cloudsync.c
++++ b/xlators/features/cloudsync/src/cloudsync.c
+@@ -39,7 +39,11 @@ struct cs_plugin plugins[] = {
+ {.name = "cloudsyncs3",
+ .library = "cloudsyncs3.so",
+ .description = "cloudsync s3 store."},
+-
++#if defined(__linux__)
++ {.name = "cvlt",
++ .library = "cloudsynccvlt.so",
++ .description = "Commvault content store."},
++#endif
+ {.name = NULL},
+ };
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 73abf37..7a83124 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -3724,6 +3724,14 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "features.cloudsync-remote-read",
+ .voltype = "features/cloudsync",
+ .value = "off",
+- .op_version = GD_OP_VERSION_6_0,
++ .op_version = GD_OP_VERSION_7_0,
++ .flags = VOLOPT_FLAG_CLIENT_OPT},
++ {.key = "features.cloudsync-store-id",
++ .voltype = "features/cloudsync",
++ .op_version = GD_OP_VERSION_7_0,
++ .flags = VOLOPT_FLAG_CLIENT_OPT},
++ {.key = "features.cloudsync-product-id",
++ .voltype = "features/cloudsync",
++ .op_version = GD_OP_VERSION_7_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = NULL}};
+--
+1.8.3.1
+
diff --git a/0155-cloudsync-Make-readdirp-return-stat-info-of-all-the-.patch b/0155-cloudsync-Make-readdirp-return-stat-info-of-all-the-.patch
new file mode 100644
index 0000000..937a772
--- /dev/null
+++ b/0155-cloudsync-Make-readdirp-return-stat-info-of-all-the-.patch
@@ -0,0 +1,114 @@
+From 693fcf327eace37fe698953b90050d67fc840ac6 Mon Sep 17 00:00:00 2001
+From: Anuradha Talur <atalur@commvault.com>
+Date: Wed, 24 Apr 2019 12:06:23 -0700
+Subject: [PATCH 155/169] cloudsync: Make readdirp return stat info of all the
+ dirents
+
+This change got missed while the initial changes were sent.
+Should have been a part of :
+ https://review.gluster.org/#/c/glusterfs/+/21757/
+
+Gist of the change:
+ Function that fills in stat info for dirents is
+invoked in readdirp in posix when cloudsync populates xdata
+request with GF_CS_OBJECT_STATUS.
+
+backport of:https://review.gluster.org/#/c/glusterfs/+/22616/
+
+> Change-Id: Ide0c4e80afb74cd2120f74ba934ed40123152d69
+> updates: bz#1642168
+> Signed-off-by: Anuradha Talur <atalur@commvault.com>
+
+Change-Id: I77de3f9d8ae01a0280a9d1753f94d74b5e5ce2fd
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172193
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/features/cloudsync/src/cloudsync-fops-c.py | 2 +-
+ xlators/features/cloudsync/src/cloudsync.c | 35 ++++++++++++++++++++++
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 2 ++
+ 3 files changed, 38 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/features/cloudsync/src/cloudsync-fops-c.py b/xlators/features/cloudsync/src/cloudsync-fops-c.py
+index a7a2201..8878b70 100755
+--- a/xlators/features/cloudsync/src/cloudsync-fops-c.py
++++ b/xlators/features/cloudsync/src/cloudsync-fops-c.py
+@@ -285,7 +285,7 @@ loc_stat_op_fop_template = ['lookup', 'stat', 'discover', 'access', 'setattr',
+
+ # These fops need a separate implementation
+ special_fops = ['statfs', 'setxattr', 'unlink', 'getxattr',
+- 'truncate', 'fstat', 'readv']
++ 'truncate', 'fstat', 'readv', 'readdirp']
+
+ def gen_defaults():
+ for name in ops:
+diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c
+index 8026b05..26e512c 100644
+--- a/xlators/features/cloudsync/src/cloudsync.c
++++ b/xlators/features/cloudsync/src/cloudsync.c
+@@ -280,6 +280,40 @@ out:
+ }
+
+ int32_t
++cs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++ off_t off, dict_t *xdata)
++{
++ int ret = 0;
++ int op_errno = ENOMEM;
++
++ if (!xdata) {
++ xdata = dict_new();
++ if (!xdata) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
++ "failed to create "
++ "dict");
++ goto err;
++ }
++ }
++
++ ret = dict_set_uint32(xdata, GF_CS_OBJECT_STATUS, 1);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
++ "dict_set failed key:"
++ " %s",
++ GF_CS_OBJECT_STATUS);
++ goto err;
++ }
++
++ STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
++ return 0;
++err:
++ STACK_UNWIND_STRICT(readdirp, frame, -1, op_errno, NULL, NULL);
++ return 0;
++}
++
++int32_t
+ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+@@ -2026,6 +2060,7 @@ cs_notify(xlator_t *this, int event, void *data, ...)
+
+ struct xlator_fops cs_fops = {
+ .stat = cs_stat,
++ .readdirp = cs_readdirp,
+ .truncate = cs_truncate,
+ .seek = cs_seek,
+ .statfs = cs_statfs,
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index 065fced..2c19ce1 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -5472,6 +5472,8 @@ posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries,
+ continue;
+ }
+
++ posix_update_iatt_buf(&stbuf, -1, hpath, dict);
++
+ if (!inode)
+ inode = inode_find(itable, stbuf.ia_gfid);
+
+--
+1.8.3.1
+
diff --git a/0156-cloudsync-Fix-bug-in-cloudsync-fops-c.py.patch b/0156-cloudsync-Fix-bug-in-cloudsync-fops-c.py.patch
new file mode 100644
index 0000000..1a73388
--- /dev/null
+++ b/0156-cloudsync-Fix-bug-in-cloudsync-fops-c.py.patch
@@ -0,0 +1,94 @@
+From d8c98e9785e652692d928a2efbbb571703f728b0 Mon Sep 17 00:00:00 2001
+From: Anuradha Talur <atalur@commvault.com>
+Date: Wed, 24 Apr 2019 12:35:08 -0700
+Subject: [PATCH 156/169] cloudsync: Fix bug in cloudsync-fops-c.py
+
+In some of the fops generated by generator.py, xdata request
+was not being wound to the child xlator correctly.
+
+This was happening because when though the logic in
+cloudsync-fops-c.py was correct, generator.py was generating
+a resultant code that omits this logic.
+
+Made changes in cloudsync-fops-c.py so that correct code is
+produced.
+
+backport of: https://review.gluster.org/#/c/glusterfs/+/22617/
+
+> Change-Id: I6f25bdb36ede06fd03be32c04087a75639d79150
+> updates: bz#1642168
+> Signed-off-by: Anuradha Talur <atalur@commvault.com>
+
+Change-Id: I87cc71e98c2c6cec78a6e84850fc8d82f8dd4dfd
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172195
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/features/cloudsync/src/cloudsync-fops-c.py | 24 +++++++++++++++++++---
+ 1 file changed, 21 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/features/cloudsync/src/cloudsync-fops-c.py b/xlators/features/cloudsync/src/cloudsync-fops-c.py
+index 8878b70..c444ea6 100755
+--- a/xlators/features/cloudsync/src/cloudsync-fops-c.py
++++ b/xlators/features/cloudsync/src/cloudsync-fops-c.py
+@@ -39,7 +39,15 @@ cs_@NAME@ (call_frame_t *frame, xlator_t *this,
+ else
+ state = GF_CS_LOCAL;
+
+- local->xattr_req = xdata ? dict_ref (xdata) : (xdata = dict_new ());
++ xdata = xdata ? dict_ref (xdata) : dict_new ();
++
++ if (!xdata) {
++ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
++ op_errno = ENOMEM;
++ goto err;
++ }
++
++ local->xattr_req = xdata;
+
+ ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1);
+ if (ret) {
+@@ -187,19 +195,29 @@ int32_t
+ cs_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+ {
++ int op_errno = EINVAL;
+ cs_local_t *local = NULL;
+ int ret = 0;
+
+ local = cs_local_init (this, frame, loc, NULL, GF_FOP_@UPNAME@);
+ if (!local) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local is NULL");
++ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (loc->inode->ia_type == IA_IFDIR)
+ goto wind;
+
+- local->xattr_req = xdata ? dict_ref (xdata) : dict_new ();
++ xdata = xdata ? dict_ref (xdata) : dict_new ();
++
++ if (!xdata) {
++ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
++ op_errno = ENOMEM;
++ goto err;
++ }
++
++ local->xattr_req = xdata;
+
+ ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1);
+ if (ret) {
+@@ -215,7 +233,7 @@ wind:
+
+ return 0;
+ err:
+- CS_STACK_UNWIND (@NAME@, frame, -1, errno, @CBK_ERROR_ARGS@);
++ CS_STACK_UNWIND (@NAME@, frame, -1, op_errno, @CBK_ERROR_ARGS@);
+
+ return 0;
+ }
+--
+1.8.3.1
+
diff --git a/0157-afr-frame-Destroy-frame-after-afr_selfheal_entry_gra.patch b/0157-afr-frame-Destroy-frame-after-afr_selfheal_entry_gra.patch
new file mode 100644
index 0000000..185a24a
--- /dev/null
+++ b/0157-afr-frame-Destroy-frame-after-afr_selfheal_entry_gra.patch
@@ -0,0 +1,68 @@
+From 4a72ac20f728aa5c3141359ff89f1b61d4cd210a Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Fri, 17 May 2019 23:03:35 +0530
+Subject: [PATCH 157/169] afr/frame: Destroy frame after
+ afr_selfheal_entry_granular
+
+In function "afr_selfheal_entry_granular", after completing the
+heal we are not destroying the frame. This will lead to crash.
+when we execute statedump operation, where it tried to access
+xlator object. If this xlator object is freed as part of the
+graph destroy this will lead to an invalid memory access
+
+Upstream patch:https://review.gluster.org/22743
+
+>Change-Id: I0a5e78e704ef257c3ac0087eab2c310e78fbe36d
+>fixes: bz#1708926
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I326354008e6d98376c8333d270f2f80036ad07f0
+BUG: 1716626
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172282
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/cluster/afr/src/afr-self-heal-entry.c | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index fc09b4c..a6890fa 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -832,6 +832,8 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ subvol = priv->children[subvol_idx];
+
+ args.frame = afr_copy_frame(frame);
++ if (!args.frame)
++ goto out;
+ args.xl = this;
+ /* args.heal_fd represents the fd associated with the original directory
+ * on which entry heal is being attempted.
+@@ -850,9 +852,10 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ * do not treat heal as failure.
+ */
+ if (is_src)
+- return -errno;
++ ret = -errno;
+ else
+- return 0;
++ ret = 0;
++ goto out;
+ }
+
+ ret = syncop_dir_scan(subvol, &loc, GF_CLIENT_PID_SELF_HEALD, &args,
+@@ -862,7 +865,9 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
+
+ if (args.mismatch == _gf_true)
+ ret = -1;
+-
++out:
++ if (args.frame)
++ AFR_STACK_DESTROY(args.frame);
+ return ret;
+ }
+
+--
+1.8.3.1
+
diff --git a/0158-glusterfsd-cleanup-Protect-graph-object-under-a-lock.patch b/0158-glusterfsd-cleanup-Protect-graph-object-under-a-lock.patch
new file mode 100644
index 0000000..d12e81d
--- /dev/null
+++ b/0158-glusterfsd-cleanup-Protect-graph-object-under-a-lock.patch
@@ -0,0 +1,162 @@
+From 11b64d494c52004002f900888694d20ef8af6df6 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Sat, 11 May 2019 22:40:22 +0530
+Subject: [PATCH 158/169] glusterfsd/cleanup: Protect graph object under a lock
+
+While processing a cleanup_and_exit function, we are
+accessing a graph object. But this has not been protected
+under a lock. Because a parallel cleanup of a graph is quite
+possible which might lead to an invalid memory access
+
+Upstream patch:https://review.gluster.org/#/c/glusterfs/+/22709/
+
+>Change-Id: Id05ca70d5b57e172b0401d07b6a1f5386c044e79
+>fixes: bz#1708926
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I55ab0525c79baa99a3bd929ee979c5519be5ab21
+BUG: 1716626
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172283
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ libglusterfs/src/graph.c | 58 +++++++++++++++----------
+ libglusterfs/src/statedump.c | 16 +++++--
+ tests/bugs/glusterd/optimized-basic-testcases.t | 4 +-
+ 3 files changed, 50 insertions(+), 28 deletions(-)
+
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index 4c8b02d..18fb2d9 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -1392,8 +1392,12 @@ glusterfs_graph_cleanup(void *arg)
+ }
+ pthread_mutex_unlock(&ctx->notify_lock);
+
+- glusterfs_graph_fini(graph);
+- glusterfs_graph_destroy(graph);
++ pthread_mutex_lock(&ctx->cleanup_lock);
++ {
++ glusterfs_graph_fini(graph);
++ glusterfs_graph_destroy(graph);
++ }
++ pthread_mutex_unlock(&ctx->cleanup_lock);
+ out:
+ return NULL;
+ }
+@@ -1468,31 +1472,37 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
+
+ if (!ctx || !ctx->active || !volfile_obj)
+ goto out;
+- parent_graph = ctx->active;
+- graph = volfile_obj->graph;
+- if (!graph)
+- goto out;
+- if (graph->first)
+- xl = graph->first;
+
+- last_xl = graph->last_xl;
+- if (last_xl)
+- last_xl->next = NULL;
+- if (!xl || xl->cleanup_starting)
+- goto out;
++ pthread_mutex_lock(&ctx->cleanup_lock);
++ {
++ parent_graph = ctx->active;
++ graph = volfile_obj->graph;
++ if (!graph)
++ goto unlock;
++ if (graph->first)
++ xl = graph->first;
++
++ last_xl = graph->last_xl;
++ if (last_xl)
++ last_xl->next = NULL;
++ if (!xl || xl->cleanup_starting)
++ goto unlock;
+
+- xl->cleanup_starting = 1;
+- gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED,
+- "detaching child %s", volfile_obj->vol_id);
++ xl->cleanup_starting = 1;
++ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED,
++ "detaching child %s", volfile_obj->vol_id);
+
+- list_del_init(&volfile_obj->volfile_list);
+- glusterfs_mux_xlator_unlink(parent_graph->top, xl);
+- parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph);
+- parent_graph->xl_count -= graph->xl_count;
+- parent_graph->leaf_count -= graph->leaf_count;
+- default_notify(xl, GF_EVENT_PARENT_DOWN, xl);
+- parent_graph->id++;
+- ret = 0;
++ list_del_init(&volfile_obj->volfile_list);
++ glusterfs_mux_xlator_unlink(parent_graph->top, xl);
++ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph);
++ parent_graph->xl_count -= graph->xl_count;
++ parent_graph->leaf_count -= graph->leaf_count;
++ default_notify(xl, GF_EVENT_PARENT_DOWN, xl);
++ parent_graph->id++;
++ ret = 0;
++ }
++unlock:
++ pthread_mutex_unlock(&ctx->cleanup_lock);
+ out:
+ if (!ret) {
+ list_del_init(&volfile_obj->volfile_list);
+diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c
+index 0cf80c0..0d58f8f 100644
+--- a/libglusterfs/src/statedump.c
++++ b/libglusterfs/src/statedump.c
+@@ -805,11 +805,17 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
+ int brick_count = 0;
+ int len = 0;
+
+- gf_proc_dump_lock();
+-
+ if (!ctx)
+ goto out;
+
++ /*
++ * Multiplexed daemons can change the active graph when attach/detach
++ * is called. So this has to be protected with the cleanup lock.
++ */
++ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name))
++ pthread_mutex_lock(&ctx->cleanup_lock);
++ gf_proc_dump_lock();
++
+ if (!mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name) &&
+ (ctx && ctx->active)) {
+ top = ctx->active->first;
+@@ -923,7 +929,11 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
+ out:
+ GF_FREE(dump_options.dump_path);
+ dump_options.dump_path = NULL;
+- gf_proc_dump_unlock();
++ if (ctx) {
++ gf_proc_dump_unlock();
++ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name))
++ pthread_mutex_unlock(&ctx->cleanup_lock);
++ }
+
+ return;
+ }
+diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t
+index d700b5e..110f1b9 100644
+--- a/tests/bugs/glusterd/optimized-basic-testcases.t
++++ b/tests/bugs/glusterd/optimized-basic-testcases.t
+@@ -289,7 +289,9 @@ mkdir -p /xyz/var/lib/glusterd/abc
+ TEST $CLI volume create "test" $H0:/xyz/var/lib/glusterd/abc
+ EXPECT 'Created' volinfo_field "test" 'Status';
+
+-EXPECT "1" generate_statedump_and_check_for_glusterd_info
++#While taking a statedump, there is a TRY_LOCK on call_frame, which might may cause
++#failure. So Adding a EXPECT_WITHIN
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" generate_statedump_and_check_for_glusterd_info
+
+ cleanup_statedump `pidof glusterd`
+ cleanup
+--
+1.8.3.1
+
diff --git a/0159-glusterd-add-an-op-version-check.patch b/0159-glusterd-add-an-op-version-check.patch
new file mode 100644
index 0000000..323ae95
--- /dev/null
+++ b/0159-glusterd-add-an-op-version-check.patch
@@ -0,0 +1,66 @@
+From bd087c3d2766b81b25ea7bbe425b55023fd12545 Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Wed, 15 May 2019 07:35:45 +0530
+Subject: [PATCH 159/169] glusterd: add an op-version check
+
+Problem: "gluster v status" is hung in heterogenous cluster
+when issued from a non-upgraded node.
+
+Cause: commit 34e010d64 fixes the txn-opinfo mem leak
+in op-sm framework by not setting the txn-opinfo if some
+conditions are true. When vol status is issued from a
+non-upgraded node, command is hanging in its upgraded peer
+as the upgraded node setting the txn-opinfo based on new
+conditions where as non-upgraded nodes are following diff
+conditions.
+
+Fix: Add an op-version check, so that all the nodes follow
+same set of conditions to set txn-opinfo.
+
+upstream patch: https://review.gluster.org/#/c/glusterfs/+/22730/
+
+BUG: 1707246
+
+> fixes: bz#1710159
+> Change-Id: Ie1f353212c5931ddd1b728d2e6949dfe6225c4ab
+> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+Change-Id: Ie1f353212c5931ddd1b728d2e6949dfe6225c4ab
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172307
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 94a5e1f..d0c1a2c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -8158,9 +8158,12 @@ glusterd_op_sm()
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ xlator_t *this = NULL;
+ glusterd_op_info_t txn_op_info;
++ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
++ priv = this->private;
++ GF_ASSERT(priv);
+
+ ret = synclock_trylock(&gd_op_sm_lock);
+ if (ret) {
+@@ -8238,7 +8241,8 @@ glusterd_op_sm()
+ "Unable to clear "
+ "transaction's opinfo");
+ } else {
+- if (!(event_type == GD_OP_EVENT_STAGE_OP &&
++ if ((priv->op_version < GD_OP_VERSION_6_0) ||
++ !(event_type == GD_OP_EVENT_STAGE_OP &&
+ opinfo.state.state == GD_OP_STATE_STAGED &&
+ opinfo.skip_locking)) {
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+--
+1.8.3.1
+
diff --git a/0160-geo-rep-Geo-rep-help-text-issue.patch b/0160-geo-rep-Geo-rep-help-text-issue.patch
new file mode 100644
index 0000000..efba5a4
--- /dev/null
+++ b/0160-geo-rep-Geo-rep-help-text-issue.patch
@@ -0,0 +1,41 @@
+From 77df6b8930fd4acf3d0c38220fa4317ee97d530f Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <sacharya@redhat.com>
+Date: Thu, 9 May 2019 10:43:01 +0530
+Subject: [PATCH 160/169] geo-rep: Geo-rep help text issue
+
+Modified Geo-rep help text for better sanity.
+
+>fixes: bz#1652887
+>Change-Id: I40ef7ef709eaecf0125ab4b4a7517e2c5d1ef4a0
+>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+
+backport of https://review.gluster.org/#/c/glusterfs/+/22689/
+
+BUG: 1480907
+Change-Id: I40ef7ef709eaecf0125ab4b4a7517e2c5d1ef4a0
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172316
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-volume.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index 3432dbe..564aef7 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -3422,8 +3422,8 @@ struct cli_cmd volume_cmds[] = {
+ "reset all the reconfigured options"},
+
+ #if (SYNCDAEMON_COMPILE)
+- {"volume " GEOREP " [<VOLNAME>] [<SLAVE-URL>] {\\\n create [[ssh-port n] "
+- "[[no-verify] | [push-pem]]] [force] \\\n"
++ {"volume " GEOREP " [<MASTER-VOLNAME>] [<SLAVE-IP>]::[<SLAVE-VOLNAME>] {"
++ "\\\n create [[ssh-port n] [[no-verify] \\\n | [push-pem]]] [force] \\\n"
+ " | start [force] \\\n | stop [force] \\\n | pause [force] \\\n | resume "
+ "[force] \\\n"
+ " | config [[[\\!]<option>] [<value>]] \\\n | status "
+--
+1.8.3.1
+
diff --git a/0161-geo-rep-Fix-rename-with-existing-destination-with-sa.patch b/0161-geo-rep-Fix-rename-with-existing-destination-with-sa.patch
new file mode 100644
index 0000000..4522ec4
--- /dev/null
+++ b/0161-geo-rep-Fix-rename-with-existing-destination-with-sa.patch
@@ -0,0 +1,289 @@
+From 69ac1fd2da7a57f2f0854412863911959bf71fde Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Tue, 2 Apr 2019 12:38:09 +0530
+Subject: [PATCH 161/169] geo-rep: Fix rename with existing destination with
+ same gfid
+
+Problem:
+ Geo-rep fails to sync the rename properly if destination exists.
+It results in source to be remained on slave causing more number of
+files on slave. Also heavy rename workload like logrotate caused
+lot of ESTALE errors
+
+Cause:
+ Geo-rep fails to sync rename if destination exists if creation
+of source file also falls into single batch of changelogs being
+processed. This is because, after fixing problematic gfids verifying
+from master, while re-processing original entries, CREATE also was
+re-processed causing more files on slave and rename to be failed.
+
+Solution:
+ Entries need to be removed from retrial list after fixing
+problematic gfids on slave so that it's not re-created again on slave.
+ Also treat ESTALE as EEXIST so that the error is properly handled
+verifying the op on master volume.
+
+Backport of:
+ > Patch: https://review.gluster.org/22519
+ > Change-Id: I50cf289e06b997adddff0552bf2466d9201dd1f9
+ > BUG: 1694820
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+ > Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+Change-Id: I50cf289e06b997adddff0552bf2466d9201dd1f9
+fixes: bz#1708121
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172393
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ geo-replication/syncdaemon/master.py | 41 +++++++++++++++++++++--
+ geo-replication/syncdaemon/resource.py | 10 ++++--
+ tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t | 5 +++
+ tests/00-geo-rep/georep-basic-dr-rsync.t | 5 +++
+ tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t | 5 +++
+ tests/00-geo-rep/georep-basic-dr-tarssh.t | 5 +++
+ tests/geo-rep.rc | 36 ++++++++++++++++++++
+ 7 files changed, 102 insertions(+), 5 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
+index 3da7610..42c86d7 100644
+--- a/geo-replication/syncdaemon/master.py
++++ b/geo-replication/syncdaemon/master.py
+@@ -65,6 +65,9 @@ def _volinfo_hook_relax_foreign(self):
+ def edct(op, **ed):
+ dct = {}
+ dct['op'] = op
++ # This is used in automatic gfid conflict resolution.
++ # When marked True, it's skipped during re-processing.
++ dct['skip_entry'] = False
+ for k in ed:
+ if k == 'stat':
+ st = ed[k]
+@@ -792,6 +795,7 @@ class GMasterChangelogMixin(GMasterCommon):
+ pfx = gauxpfx()
+ fix_entry_ops = []
+ failures1 = []
++ remove_gfids = set()
+ for failure in failures:
+ if failure[2]['name_mismatch']:
+ pbname = failure[2]['slave_entry']
+@@ -822,6 +826,18 @@ class GMasterChangelogMixin(GMasterCommon):
+ edct('UNLINK',
+ gfid=failure[2]['slave_gfid'],
+ entry=pbname))
++ remove_gfids.add(slave_gfid)
++ if op in ['RENAME']:
++ # If renamed gfid doesn't exists on master, remove
++ # rename entry and unlink src on slave
++ st = lstat(os.path.join(pfx, failure[0]['gfid']))
++ if isinstance(st, int) and st == ENOENT:
++ logging.debug("Unlink source %s" % repr(failure))
++ remove_gfids.add(failure[0]['gfid'])
++ fix_entry_ops.append(
++ edct('UNLINK',
++ gfid=failure[0]['gfid'],
++ entry=failure[0]['entry']))
+ # Takes care of scenarios of hardlinks/renames on master
+ elif not isinstance(st, int):
+ if matching_disk_gfid(slave_gfid, pbname):
+@@ -831,7 +847,12 @@ class GMasterChangelogMixin(GMasterCommon):
+ ' Safe to ignore, take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+- entries.remove(failure[0])
++ remove_gfids.add(failure[0]['gfid'])
++ if op == 'RENAME':
++ fix_entry_ops.append(
++ edct('UNLINK',
++ gfid=failure[0]['gfid'],
++ entry=failure[0]['entry']))
+ # The file exists on master but with different name.
+ # Probably renamed and got missed during xsync crawl.
+ elif failure[2]['slave_isdir']:
+@@ -856,7 +877,10 @@ class GMasterChangelogMixin(GMasterCommon):
+ 'take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+- entries.remove(failure[0])
++ try:
++ entries.remove(failure[0])
++ except ValueError:
++ pass
+ else:
+ rename_dict = edct('RENAME', gfid=slave_gfid,
+ entry=src_entry,
+@@ -896,7 +920,10 @@ class GMasterChangelogMixin(GMasterCommon):
+ 'ignore, take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+- entries.remove(failure[0])
++ try:
++ entries.remove(failure[0])
++ except ValueError:
++ pass
+ else:
+ logging.info(lf('Fixing ENOENT error in slave. Create '
+ 'parent directory on slave.',
+@@ -913,6 +940,14 @@ class GMasterChangelogMixin(GMasterCommon):
+ edct('MKDIR', gfid=pargfid, entry=dir_entry,
+ mode=st.st_mode, uid=st.st_uid, gid=st.st_gid))
+
++ logging.debug("remove_gfids: %s" % repr(remove_gfids))
++ if remove_gfids:
++ for e in entries:
++ if e['op'] in ['MKDIR', 'MKNOD', 'CREATE', 'RENAME'] \
++ and e['gfid'] in remove_gfids:
++ logging.debug("Removed entry op from retrial list: entry: %s" % repr(e))
++ e['skip_entry'] = True
++
+ if fix_entry_ops:
+ # Process deletions of entries whose gfids are mismatched
+ failures1 = self.slave.server.entry_ops(fix_entry_ops)
+diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
+index c290d86..f54ccd9 100644
+--- a/geo-replication/syncdaemon/resource.py
++++ b/geo-replication/syncdaemon/resource.py
+@@ -426,7 +426,7 @@ class Server(object):
+ e['stat']['uid'] = uid
+ e['stat']['gid'] = gid
+
+- if cmd_ret == EEXIST:
++ if cmd_ret in [EEXIST, ESTALE]:
+ if dst:
+ en = e['entry1']
+ else:
+@@ -510,6 +510,12 @@ class Server(object):
+ entry = e['entry']
+ uid = 0
+ gid = 0
++
++ # Skip entry processing if it's marked true during gfid
++ # conflict resolution
++ if e['skip_entry']:
++ continue
++
+ if e.get("stat", {}):
+ # Copy UID/GID value and then reset to zero. Copied UID/GID
+ # will be used to run chown once entry is created.
+@@ -688,7 +694,7 @@ class Server(object):
+ if blob:
+ cmd_ret = errno_wrap(Xattr.lsetxattr,
+ [pg, 'glusterfs.gfid.newfile', blob],
+- [EEXIST, ENOENT],
++ [EEXIST, ENOENT, ESTALE],
+ [ESTALE, EINVAL, EBUSY])
+ collect_failure(e, cmd_ret, uid, gid)
+
+diff --git a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
+index 67ac167..1a55ed2 100644
+--- a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
++++ b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
+@@ -203,6 +203,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+ TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
+ TEST "echo sampledata > $master_mnt/rsync_option_test_file"
+
++#rename with existing destination case BUG:1694820
++TEST create_rename_with_existing_destination ${master_mnt}
++#verify rename with existing destination case BUG:1694820
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
++
+ #Verify arequal for whole volume
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
+
+diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t
+index 8b64370..d0c0fc9 100644
+--- a/tests/00-geo-rep/georep-basic-dr-rsync.t
++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t
+@@ -204,6 +204,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+ TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
+ TEST "echo sampledata > $master_mnt/rsync_option_test_file"
+
++#rename with existing destination case BUG:1694820
++TEST create_rename_with_existing_destination ${master_mnt}
++#verify rename with existing destination case BUG:1694820
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
++
+ #Verify arequal for whole volume
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
+
+diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
+index 1726d0b..cb530ad 100644
+--- a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
++++ b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
+@@ -202,6 +202,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/s
+ #rsnapshot usecase
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
++#rename with existing destination case BUG:1694820
++TEST create_rename_with_existing_destination ${master_mnt}
++#verify rename with existing destination case BUG:1694820
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
++
+ #Verify arequal for whole volume
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
+
+diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh.t b/tests/00-geo-rep/georep-basic-dr-tarssh.t
+index c5d16ac..9e2f613 100644
+--- a/tests/00-geo-rep/georep-basic-dr-tarssh.t
++++ b/tests/00-geo-rep/georep-basic-dr-tarssh.t
+@@ -202,6 +202,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+ #rsnapshot usecase
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
++#rename with existing destination case BUG:1694820
++TEST create_rename_with_existing_destination ${master_mnt}
++#verify rename with existing destination case BUG:1694820
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
++
+ #Verify arequal for whole volume
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
+
+diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc
+index d723129..e357ba8 100644
+--- a/tests/geo-rep.rc
++++ b/tests/geo-rep.rc
+@@ -403,3 +403,39 @@ function check_slave_read_only()
+ gluster volume info $1 | grep 'features.read-only: on'
+ echo $?
+ }
++
++function create_rename_with_existing_destination()
++{
++ dir=$1/rename_with_existing_destination
++ mkdir $dir
++ for i in {1..5}
++ do
++ echo "Data_set$i" > $dir/data_set$i
++ mv $dir/data_set$i $dir/data_set -f
++ done
++}
++
++function verify_rename_with_existing_destination()
++{
++ dir=$1/rename_with_existing_destination
++
++ if [ ! -d $dir ]; then
++ echo 1
++ elif [ ! -f $dir/data_set ]; then
++ echo 2
++ elif [ -f $dir/data_set1 ]; then
++ echo 3
++ elif [ -f $dir/data_set2 ]; then
++ echo 4
++ elif [ -f $dir/data_set3 ]; then
++ echo 5
++ elif [ -f $dir/data_set4 ]; then
++ echo 6
++ elif [ -f $dir/data_set5 ]; then
++ echo 7
++ elif test "XData_set5" != "X$(cat $dir/data_set)"; then
++ echo 8
++ else
++ echo 0
++ fi
++}
+--
+1.8.3.1
+
diff --git a/0162-geo-rep-Fix-sync-method-config.patch b/0162-geo-rep-Fix-sync-method-config.patch
new file mode 100644
index 0000000..ed8ab94
--- /dev/null
+++ b/0162-geo-rep-Fix-sync-method-config.patch
@@ -0,0 +1,210 @@
+From d148248aa3f0dfe7356a13d6fd029f0c6b3746cf Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Wed, 8 May 2019 10:56:31 +0530
+Subject: [PATCH 162/169] geo-rep: Fix sync-method config
+
+Problem:
+When 'use_tarssh' is set to true, it exits with successful
+message but the default 'rsync' was used as sync-engine.
+The new config 'sync-method' is not allowed to set from cli.
+
+Analysis and Fix:
+The 'use_tarssh' config is deprecated with new
+config framework and 'sync-method' is the new
+config to choose sync-method i.e. tarssh or rsync.
+This patch fixes the 'sync-method' config. The allowed
+values are tarssh and rsync.
+
+Backport of:
+ > Patch: https://review.gluster.org/22683
+ > Change-Id: I0edb0319cad0455b29e49f2f08a64ce324735e84
+ > BUG: 1707686
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+Change-Id: I0edb0319cad0455b29e49f2f08a64ce324735e84
+fixes: bz#1708067
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172394
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ geo-replication/gsyncd.conf.in | 9 +++++----
+ geo-replication/syncdaemon/resource.py | 7 ++++---
+ tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t | 4 ++--
+ tests/00-geo-rep/georep-basic-dr-rsync.t | 4 ++--
+ tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t | 6 +++---
+ tests/00-geo-rep/georep-basic-dr-tarssh.t | 6 +++---
+ tests/geo-rep.rc | 3 ++-
+ 7 files changed, 21 insertions(+), 18 deletions(-)
+
+diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in
+index 6160c7c..c2e4f0d 100644
+--- a/geo-replication/gsyncd.conf.in
++++ b/geo-replication/gsyncd.conf.in
+@@ -128,10 +128,11 @@ value=
+ value=5
+ type=int
+
+-[use-tarssh]
+-value=false
+-type=bool
+-help=Use sync-mode as tarssh
++[sync-method]
++value=rsync
++help=Sync method for data sync. Available methods are tar over ssh and rsync. Default is rsync.
++validation=choice
++allowed_values=tarssh,rsync
+
+ [remote-gsyncd]
+ value =
+diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
+index f54ccd9..522279b 100644
+--- a/geo-replication/syncdaemon/resource.py
++++ b/geo-replication/syncdaemon/resource.py
+@@ -1512,7 +1512,7 @@ class SSH(object):
+
+ return po
+
+- def tarssh(self, files, slaveurl, log_err=False):
++ def tarssh(self, files, log_err=False):
+ """invoke tar+ssh
+ -z (compress) can be use if needed, but omitting it now
+ as it results in weird error (tar+ssh errors out (errcode: 2)
+@@ -1520,10 +1520,11 @@ class SSH(object):
+ if not files:
+ raise GsyncdError("no files to sync")
+ logging.debug("files: " + ", ".join(files))
+- (host, rdir) = slaveurl.split(':')
++ (host, rdir) = self.slaveurl.split(':')
++
+ tar_cmd = ["tar"] + \
+ ["--sparse", "-cf", "-", "--files-from", "-"]
+- ssh_cmd = gconf.get("ssh-command-tar").split() + \
++ ssh_cmd = gconf.get("ssh-command").split() + \
+ gconf.get("ssh-options-tar").split() + \
+ ["-p", str(gconf.get("ssh-port"))] + \
+ [host, "tar"] + \
+diff --git a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
+index 1a55ed2..8b90347 100644
+--- a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
++++ b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
+@@ -159,7 +159,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+ #logrotate
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+ #CREATE+RENAME
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+@@ -209,7 +209,7 @@ TEST create_rename_with_existing_destination ${master_mnt}
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+ #Verify arequal for whole volume
+-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+ #Stop Geo-rep
+ TEST $GEOREP_CLI $master $slave stop
+diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t
+index d0c0fc9..428e9ed 100644
+--- a/tests/00-geo-rep/georep-basic-dr-rsync.t
++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t
+@@ -160,7 +160,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+ #logrotate
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+ #CREATE+RENAME
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+@@ -210,7 +210,7 @@ TEST create_rename_with_existing_destination ${master_mnt}
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+ #Verify arequal for whole volume
+-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+ #Stop Geo-rep
+ TEST $GEOREP_CLI $master $slave stop
+diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
+index cb530ad..8fed929 100644
+--- a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
++++ b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
+@@ -81,7 +81,7 @@ TEST $GEOREP_CLI $master $slave config use_meta_volume true
+ TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+ #Config tarssh as sync-engine
+-TEST $GEOREP_CLI $master $slave config use_tarssh true
++TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+ #Wait for common secret pem file to be created
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+@@ -162,7 +162,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+ #logrotate
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+ #CREATE+RENAME
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+@@ -208,7 +208,7 @@ TEST create_rename_with_existing_destination ${master_mnt}
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+ #Verify arequal for whole volume
+-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+ #Stop Geo-rep
+ TEST $GEOREP_CLI $master $slave stop
+diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh.t b/tests/00-geo-rep/georep-basic-dr-tarssh.t
+index 9e2f613..feb2de7 100644
+--- a/tests/00-geo-rep/georep-basic-dr-tarssh.t
++++ b/tests/00-geo-rep/georep-basic-dr-tarssh.t
+@@ -81,7 +81,7 @@ TEST $GEOREP_CLI $master $slave config use_meta_volume true
+ TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+ #Config tarssh as sync-engine
+-TEST $GEOREP_CLI $master $slave config use_tarssh true
++TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+ #Wait for common secret pem file to be created
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+@@ -162,7 +162,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+ #logrotate
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+ #CREATE+RENAME
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+@@ -208,7 +208,7 @@ TEST create_rename_with_existing_destination ${master_mnt}
+ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+ #Verify arequal for whole volume
+-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+ #Stop Geo-rep
+ TEST $GEOREP_CLI $master $slave stop
+diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc
+index e357ba8..2035b9f 100644
+--- a/tests/geo-rep.rc
++++ b/tests/geo-rep.rc
+@@ -168,7 +168,8 @@ function arequal_checksum()
+ {
+ master=$1
+ slave=$2
+- diff <(arequal-checksum -p $master) <(arequal-checksum -p $slave) | wc -l
++ ret=$(diff <(arequal-checksum -p $master) <(arequal-checksum -p $slave) | wc -l)
++ echo x$ret
+ }
+
+ function symlink_ok()
+--
+1.8.3.1
+
diff --git a/0163-geo-rep-Fix-sync-hang-with-tarssh.patch b/0163-geo-rep-Fix-sync-hang-with-tarssh.patch
new file mode 100644
index 0000000..9b90128
--- /dev/null
+++ b/0163-geo-rep-Fix-sync-hang-with-tarssh.patch
@@ -0,0 +1,255 @@
+From 29ec87484b1ee3ad6417c37726db8aa9296f3a83 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Wed, 8 May 2019 11:26:06 +0530
+Subject: [PATCH 163/169] geo-rep: Fix sync hang with tarssh
+
+Problem:
+Geo-rep sync hangs when tarssh is used as sync
+engine at heavy workload.
+
+Analysis and Root cause:
+It's found out that the tar process was hung.
+When debugged further, it's found out that stderr
+buffer of tar process on master was full i.e., 64k.
+When the buffer was copied to a file from /proc/pid/fd/2,
+the hang is resolved.
+
+This can happen when files picked by tar process
+to sync doesn't exist on master anymore. If this count
+increases around 1k, the stderr buffer is filled up.
+
+Fix:
+The tar process is executed using Popen with stderr as PIPE.
+The final execution is something like below.
+
+tar | ssh <args> root@slave tar --overwrite -xf - -C <path>
+
+It was waiting on ssh process first using communicate() and then tar.
+Note that communicate() reads stdout and stderr. So when stderr of tar
+process is filled up, there is no one to read until untar via ssh is
+completed. This can't happen and leads to deadlock.
+Hence we should be waiting on both process parallely, so that stderr is
+read on both processes.
+
+Backport of:
+ > Patch: https://review.gluster.org/22684
+ > Change-Id: I609c7cc5c07e210c504771115b4d551a2e891adf
+ > BUG: 1707728
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+Change-Id: I609c7cc5c07e210c504771115b4d551a2e891adf
+fixes: bz#1708116
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172395
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ geo-replication/syncdaemon/resource.py | 22 ++++--
+ tests/00-geo-rep/georep-stderr-hang.t | 128 +++++++++++++++++++++++++++++++++
+ tests/geo-rep.rc | 17 +++++
+ 3 files changed, 163 insertions(+), 4 deletions(-)
+ create mode 100644 tests/00-geo-rep/georep-stderr-hang.t
+
+diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
+index 522279b..b16db60 100644
+--- a/geo-replication/syncdaemon/resource.py
++++ b/geo-replication/syncdaemon/resource.py
+@@ -1540,15 +1540,29 @@ class SSH(object):
+
+ p0.stdin.close()
+ p0.stdout.close() # Allow p0 to receive a SIGPIPE if p1 exits.
+- # wait for tar to terminate, collecting any errors, further
+- # waiting for transfer to complete
+- _, stderr1 = p1.communicate()
+
+ # stdin and stdout of p0 is already closed, Reset to None and
+ # wait for child process to complete
+ p0.stdin = None
+ p0.stdout = None
+- p0.communicate()
++
++ def wait_for_tar(p0):
++ _, stderr = p0.communicate()
++ if log_err:
++ for errline in stderr.strip().split("\n")[:-1]:
++ if "No such file or directory" not in errline:
++ logging.error(lf("SYNC Error",
++ sync_engine="Tarssh",
++ error=errline))
++
++ t = syncdutils.Thread(target=wait_for_tar, args=(p0, ))
++ # wait for tar to terminate, collecting any errors, further
++ # waiting for transfer to complete
++ t.start()
++
++ # wait for ssh process
++ _, stderr1 = p1.communicate()
++ t.join()
+
+ if log_err:
+ for errline in stderr1.strip().split("\n")[:-1]:
+diff --git a/tests/00-geo-rep/georep-stderr-hang.t b/tests/00-geo-rep/georep-stderr-hang.t
+new file mode 100644
+index 0000000..496f0e6
+--- /dev/null
++++ b/tests/00-geo-rep/georep-stderr-hang.t
+@@ -0,0 +1,128 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++. $(dirname $0)/../geo-rep.rc
++. $(dirname $0)/../env.rc
++
++SCRIPT_TIMEOUT=500
++
++AREQUAL_PATH=$(dirname $0)/../utils
++test "`uname -s`" != "Linux" && {
++ CFLAGS="$CFLAGS -lintl";
++}
++build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
++
++### Basic Tests with Distribute Replicate volumes
++
++##Cleanup and start glusterd
++cleanup;
++TEST glusterd;
++TEST pidof glusterd
++
++
++##Variables
++GEOREP_CLI="$CLI volume geo-replication"
++master=$GMV0
++SH0="127.0.0.1"
++slave=${SH0}::${GSV0}
++num_active=2
++num_passive=2
++master_mnt=$M0
++slave_mnt=$M1
++
++############################################################
++#SETUP VOLUMES AND GEO-REPLICATION
++############################################################
++
++##create_and_start_master_volume
++TEST $CLI volume create $GMV0 $H0:$B0/${GMV0}1;
++TEST $CLI volume start $GMV0
++
++##create_and_start_slave_volume
++TEST $CLI volume create $GSV0 $H0:$B0/${GSV0}1;
++TEST $CLI volume start $GSV0
++TEST $CLI volume set $GSV0 performance.stat-prefetch off
++TEST $CLI volume set $GSV0 performance.quick-read off
++TEST $CLI volume set $GSV0 performance.readdir-ahead off
++TEST $CLI volume set $GSV0 performance.read-ahead off
++
++##Mount master
++TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
++
++##Mount slave
++TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
++
++############################################################
++#BASIC GEO-REPLICATION TESTS
++############################################################
++
++TEST create_georep_session $master $slave
++EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Created"
++
++#Config gluster-command-dir
++TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
++
++#Config gluster-command-dir
++TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
++
++#Set changelog roll-over time to 45 secs
++TEST $CLI volume set $GMV0 changelog.rollover-time 45
++
++#Wait for common secret pem file to be created
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
++
++#Verify the keys are distributed
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
++
++#Set sync-jobs to 1
++TEST $GEOREP_CLI $master $slave config sync-jobs 1
++
++#Start_georep
++TEST $GEOREP_CLI $master $slave start
++
++touch $M0
++EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
++EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Changelog Crawl"
++
++#Check History Crawl.
++TEST $GEOREP_CLI $master $slave stop
++TEST create_data_hang "rsync_hang"
++TEST create_data "history_rsync"
++TEST $GEOREP_CLI $master $slave start
++EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
++
++#Verify arequal for whole volume
++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
++
++#Stop Geo-rep
++TEST $GEOREP_CLI $master $slave stop
++
++#Config tarssh as sync-engine
++TEST $GEOREP_CLI $master $slave config sync-method tarssh
++
++#Create tarssh hang data
++TEST create_data_hang "tarssh_hang"
++TEST create_data "history_tar"
++
++TEST $GEOREP_CLI $master $slave start
++EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
++
++#Verify arequal for whole volume
++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
++
++#Stop Geo-rep
++TEST $GEOREP_CLI $master $slave stop
++
++#Delete Geo-rep
++TEST $GEOREP_CLI $master $slave delete
++
++#Cleanup are-equal binary
++TEST rm $AREQUAL_PATH/arequal-checksum
++
++#Cleanup authorized keys
++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
++sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
++
++cleanup;
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc
+index 2035b9f..e4f014e 100644
+--- a/tests/geo-rep.rc
++++ b/tests/geo-rep.rc
+@@ -101,6 +101,23 @@ function create_data()
+ chown 1000:1000 ${master_mnt}/${prefix}_chown_f1_ಸಂತಸ
+ }
+
++function create_data_hang()
++{
++ prefix=$1
++ mkdir ${master_mnt}/${prefix}
++ cd ${master_mnt}/${prefix}
++ # ~1k files is required with 1 sync-job and hang happens if
++ # stderr buffer of tar/ssh executed with Popen is full (i.e., 64k).
++ # 64k is hit when ~800 files were not found while syncing data
++ # from master. So around 1k files is required to hit the condition.
++ for i in {1..1000}
++ do
++ echo "test data" > file$i
++ mv -f file$i file
++ done
++ cd -
++}
++
+ function chown_file_ok()
+ {
+ local file_owner=$(stat --format "%u:%g" "$1")
+--
+1.8.3.1
+
diff --git a/0164-cluster-ec-Fix-handling-of-heal-info-cases-without-l.patch b/0164-cluster-ec-Fix-handling-of-heal-info-cases-without-l.patch
new file mode 100644
index 0000000..78fa34b
--- /dev/null
+++ b/0164-cluster-ec-Fix-handling-of-heal-info-cases-without-l.patch
@@ -0,0 +1,165 @@
+From c2b1c50f06cc59b47c9c834617dff2aed7177a78 Mon Sep 17 00:00:00 2001
+From: Ashish Pandey <aspandey@redhat.com>
+Date: Mon, 18 Mar 2019 12:54:54 +0530
+Subject: [PATCH 164/169] cluster/ec: Fix handling of heal info cases without
+ locks
+
+When we use heal info command, it takes lot of time as in
+some cases it takes lock on entries to find out if the
+entry actually needs heal or not.
+
+There are some cases where we can avoid these locks and
+can conclude if the entry needs heal or not.
+
+1 - We do a lookup (without lock) on an entry, which we found in
+.glusterfs/indices/xattrop, and find that lock count is
+zero. Now if the file contains dirty bit set on all or any
+brick, we can say that this entry needs heal.
+
+2 - If the lock count is one and dirty is greater than 1,
+then it also means that some fop had left the dirty bit set
+which made the dirty count of current fop (which has taken lock)
+more than one. At this point also we can definitely say that
+this entry needs heal.
+
+This patch is modifying code to take into consideration above two
+points.
+It is also changing code to not to call ec_heal_inspect if ec_heal_do
+was called from client side heal. Client side heal triggeres heal
+only when it is sure that it requires heal.
+
+[We have changed the code to not to call heal for lookup]
+
+Upstream patch -
+https://review.gluster.org/#/c/glusterfs/+/22372/
+
+Fixes: bz#1716385
+Change-Id: I7f09f0ecd12f65a353297aefd57026fd2bebdf9c
+Signed-off-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172579
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/cluster/ec/src/ec-heal.c | 42 ++++++++++++++++------------------------
+ 1 file changed, 17 insertions(+), 25 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index 3aa04fb..2fa1f11 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -2541,13 +2541,15 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
+
+ /* Mount triggers heal only when it detects that it must need heal, shd
+ * triggers heals periodically which need not be thorough*/
+- ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false,
+- !ec->shd.iamshd, &need_heal);
++ if (ec->shd.iamshd) {
++ ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
++ &need_heal);
+
+- if (need_heal == EC_HEAL_NONEED) {
+- gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
+- "Heal is not required for : %s ", uuid_utoa(loc->gfid));
+- goto out;
++ if (need_heal == EC_HEAL_NONEED) {
++ gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
++ "Heal is not required for : %s ", uuid_utoa(loc->gfid));
++ goto out;
++ }
+ }
+ sources = alloca0(ec->nodes);
+ healed_sinks = alloca0(ec->nodes);
+@@ -2902,7 +2904,7 @@ out:
+ static int32_t
+ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
+ gf_boolean_t self_locked, int32_t lock_count,
+- ec_heal_need_t *need_heal)
++ ec_heal_need_t *need_heal, uint64_t *versions)
+ {
+ int i = 0;
+ int source_count = 0;
+@@ -2912,7 +2914,7 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
+ *need_heal = EC_HEAL_NONEED;
+ if (self_locked || lock_count == 0) {
+ for (i = 0; i < ec->nodes; i++) {
+- if (dirty[i]) {
++ if (dirty[i] || (versions[i] != versions[0])) {
+ *need_heal = EC_HEAL_MUST;
+ goto out;
+ }
+@@ -2928,6 +2930,9 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
+ *need_heal = EC_HEAL_MUST;
+ goto out;
+ }
++ if (dirty[i] != dirty[0] || (versions[i] != versions[0])) {
++ *need_heal = EC_HEAL_MAYBE;
++ }
+ }
+ }
+ } else {
+@@ -2948,7 +2953,6 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
+ unsigned char *healed_sinks = NULL;
+ uint64_t *meta_versions = NULL;
+ int ret = 0;
+- int i = 0;
+
+ sources = alloca0(ec->nodes);
+ healed_sinks = alloca0(ec->nodes);
+@@ -2961,15 +2965,7 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
+ }
+
+ ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
+- need_heal);
+- if (ret == ec->nodes && *need_heal == EC_HEAL_NONEED) {
+- for (i = 1; i < ec->nodes; i++) {
+- if (meta_versions[i] != meta_versions[0]) {
+- *need_heal = EC_HEAL_MUST;
+- goto out;
+- }
+- }
+- }
++ need_heal, meta_versions);
+ out:
+ return ret;
+ }
+@@ -3005,7 +3001,7 @@ ec_need_data_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
+ }
+
+ ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
+- need_heal);
++ need_heal, data_versions);
+ out:
+ return ret;
+ }
+@@ -3033,7 +3029,7 @@ ec_need_entry_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
+ }
+
+ ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
+- need_heal);
++ need_heal, data_versions);
+ out:
+ return ret;
+ }
+@@ -3131,10 +3127,6 @@ ec_heal_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ need_heal:
+ ret = ec_need_heal(ec, inode, replies, lock_count, self_locked, thorough,
+ need_heal);
+-
+- if (!self_locked && *need_heal == EC_HEAL_MUST) {
+- *need_heal = EC_HEAL_MAYBE;
+- }
+ out:
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+@@ -3220,7 +3212,7 @@ ec_get_heal_info(xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)
+
+ ret = ec_heal_inspect(frame, ec, loc.inode, up_subvols, _gf_false,
+ _gf_false, &need_heal);
+- if (ret == ec->nodes && need_heal == EC_HEAL_NONEED) {
++ if (ret == ec->nodes && need_heal != EC_HEAL_MAYBE) {
+ goto set_heal;
+ }
+ need_heal = EC_HEAL_NONEED;
+--
+1.8.3.1
+
diff --git a/0165-tests-shd-Add-test-coverage-for-shd-mux.patch b/0165-tests-shd-Add-test-coverage-for-shd-mux.patch
new file mode 100644
index 0000000..5398a18
--- /dev/null
+++ b/0165-tests-shd-Add-test-coverage-for-shd-mux.patch
@@ -0,0 +1,442 @@
+From b7f832288d2d2e57231d90765afc049ad7cb2f9d Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 9 May 2019 14:07:48 +0530
+Subject: [PATCH 165/169] tests/shd: Add test coverage for shd mux
+
+This patch add more test cases for shd mux test cases
+The test case includes
+1) Createing multiple volumes to check the attach and detach
+ of self heal daemon requests.
+2) Make sure the healing happens in all sceanarios
+3) After a volume detach make sure the threads of the detached
+ volume is all cleaned.
+4) Repeat all the above tests for ec volume
+5) Node Reboot case
+6) glusterd restart cases
+7) Add-brick/remove brick
+8) Convert a distributed volume to disperse volume
+9) Convert a replicated volume to distributed volume
+
+Backport of: https://review.gluster.org/#/c/glusterfs/+/22697/
+
+>Change-Id: I7c317ef9d23a45ffd831157e4890d7c83a8fce7b
+>fixes: bz#1708929
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: Ie732ead9413bd32b8c262303468a0720538334fb
+BUG: 1704562
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172634
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ tests/basic/glusterd-restart-shd-mux.t | 96 +++++++++++++++++++++
+ tests/basic/shd-mux.t | 149 +++++++++++++++++++++++++++++++++
+ tests/basic/volume-scale-shd-mux.t | 112 +++++++++++++++++++++++++
+ tests/volume.rc | 15 ++++
+ 4 files changed, 372 insertions(+)
+ create mode 100644 tests/basic/glusterd-restart-shd-mux.t
+ create mode 100644 tests/basic/shd-mux.t
+ create mode 100644 tests/basic/volume-scale-shd-mux.t
+
+diff --git a/tests/basic/glusterd-restart-shd-mux.t b/tests/basic/glusterd-restart-shd-mux.t
+new file mode 100644
+index 0000000..a50af9d
+--- /dev/null
++++ b/tests/basic/glusterd-restart-shd-mux.t
+@@ -0,0 +1,96 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++
++cleanup;
++
++TESTS_EXPECTED_IN_LOOP=20
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
++TEST $CLI volume set $V0 cluster.background-self-heal-count 0
++TEST $CLI volume set $V0 cluster.eager-lock off
++TEST $CLI volume set $V0 performance.flush-behind off
++TEST $CLI volume start $V0
++
++for i in $(seq 1 3); do
++ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
++ TEST $CLI volume start ${V0}_afr$i
++ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
++ TEST $CLI volume start ${V0}_ec$i
++done
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
++
++#Stop the glusterd
++TEST pkill glusterd
++#Only stopping glusterd, so there will be one shd
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" shd_count
++TEST glusterd
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
++#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
++#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++
++shd_pid=$(get_shd_mux_pid $V0)
++for i in $(seq 1 3); do
++ afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid"
++ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path
++ ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid"
++ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path
++done
++
++#Reboot a node scenario
++TEST pkill gluster
++#Only stopped glusterd, so there will be one shd
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
++
++TEST glusterd
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
++
++#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
++#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++
++shd_pid=$(get_shd_mux_pid $V0)
++for i in $(seq 1 3); do
++ afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid"
++ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path
++ ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid"
++ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path
++done
++
++for i in $(seq 1 3); do
++ TEST $CLI volume stop ${V0}_afr$i
++ TEST $CLI volume stop ${V0}_ec$i
++done
++
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST kill_brick $V0 $H0 $B0/${V0}3
++
++TEST touch $M0/foo{1..100}
++
++EXPECT_WITHIN $HEAL_TIMEOUT "^204$" get_pending_heal_count $V0
++
++TEST $CLI volume start ${V0} force
++
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++TEST rm -rf $M0/*
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++
++
++TEST $CLI volume stop ${V0}
++TEST $CLI volume delete ${V0}
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^0$" shd_count
++
++cleanup
+diff --git a/tests/basic/shd-mux.t b/tests/basic/shd-mux.t
+new file mode 100644
+index 0000000..e42a34a
+--- /dev/null
++++ b/tests/basic/shd-mux.t
+@@ -0,0 +1,149 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++
++cleanup;
++
++TESTS_EXPECTED_IN_LOOP=16
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
++TEST $CLI volume set $V0 cluster.background-self-heal-count 0
++TEST $CLI volume set $V0 cluster.eager-lock off
++TEST $CLI volume set $V0 performance.flush-behind off
++TEST $CLI volume start $V0
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++shd_pid=$(get_shd_mux_pid $V0)
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++
++#Create a one more volume
++TEST $CLI volume create ${V0}_1 replica 3 $H0:$B0/${V0}_1{0,1,2,3,4,5}
++TEST $CLI volume start ${V0}_1
++
++#Check whether the shd has multiplexed or not
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}
++
++TEST $CLI volume set ${V0}_1 cluster.background-self-heal-count 0
++TEST $CLI volume set ${V0}_1 cluster.eager-lock off
++TEST $CLI volume set ${V0}_1 performance.flush-behind off
++TEST $GFS --volfile-id=/${V0}_1 --volfile-server=$H0 $M1
++
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST kill_brick $V0 $H0 $B0/${V0}4
++TEST kill_brick ${V0}_1 $H0 $B0/${V0}_10
++TEST kill_brick ${V0}_1 $H0 $B0/${V0}_14
++
++TEST touch $M0/foo{1..100}
++TEST touch $M1/foo{1..100}
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count ${V0}_1
++
++TEST $CLI volume start ${V0} force
++TEST $CLI volume start ${V0}_1 force
++
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_1
++
++TEST rm -rf $M0/*
++TEST rm -rf $M1/*
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
++
++#Stop the volume
++TEST $CLI volume stop ${V0}_1
++TEST $CLI volume delete ${V0}_1
++
++#Check the stop succeeded and detached the volume with out restarting it
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
++
++#Check the thread count become to earlier number after stopping
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++
++
++#Now create a ec volume and check mux works
++TEST $CLI volume create ${V0}_2 disperse 6 redundancy 2 $H0:$B0/${V0}_2{0,1,2,3,4,5}
++TEST $CLI volume start ${V0}_2
++
++#Check whether the shd has multiplexed or not
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}
++
++TEST $CLI volume set ${V0}_2 cluster.background-self-heal-count 0
++TEST $CLI volume set ${V0}_2 cluster.eager-lock off
++TEST $CLI volume set ${V0}_2 performance.flush-behind off
++TEST $GFS --volfile-id=/${V0}_2 --volfile-server=$H0 $M1
++
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST kill_brick $V0 $H0 $B0/${V0}4
++TEST kill_brick ${V0}_2 $H0 $B0/${V0}_20
++TEST kill_brick ${V0}_2 $H0 $B0/${V0}_22
++
++TEST touch $M0/foo{1..100}
++TEST touch $M1/foo{1..100}
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^404$" get_pending_heal_count ${V0}_2
++
++TEST $CLI volume start ${V0} force
++TEST $CLI volume start ${V0}_2 force
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
++
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_2
++
++TEST rm -rf $M0/*
++TEST rm -rf $M1/*
++
++
++#Stop the volume
++TEST $CLI volume stop ${V0}_2
++TEST $CLI volume delete ${V0}_2
++
++#Check the stop succeeded and detached the volume with out restarting it
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
++
++#Check the thread count become to zero for ec related threads
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
++#Check the thread count become to earlier number after stopping
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++
++for i in $(seq 1 3); do
++ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
++ TEST $CLI volume start ${V0}_afr$i
++ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
++ TEST $CLI volume start ${V0}_ec$i
++done
++
++#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
++#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++#Delete the volumes
++for i in $(seq 1 3); do
++ TEST $CLI volume stop ${V0}_afr$i
++ TEST $CLI volume stop ${V0}_ec$i
++ TEST $CLI volume delete ${V0}_afr$i
++ TEST $CLI volume delete ${V0}_ec$i
++done
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
++
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++
++TEST $CLI volume stop ${V0}
++TEST $CLI volume delete ${V0}
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
++
++cleanup
+diff --git a/tests/basic/volume-scale-shd-mux.t b/tests/basic/volume-scale-shd-mux.t
+new file mode 100644
+index 0000000..dd9cf83
+--- /dev/null
++++ b/tests/basic/volume-scale-shd-mux.t
+@@ -0,0 +1,112 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++
++cleanup;
++
++TESTS_EXPECTED_IN_LOOP=6
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
++TEST $CLI volume set $V0 cluster.background-self-heal-count 0
++TEST $CLI volume set $V0 cluster.eager-lock off
++TEST $CLI volume set $V0 performance.flush-behind off
++TEST $CLI volume start $V0
++
++for i in $(seq 1 2); do
++ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
++ TEST $CLI volume start ${V0}_afr$i
++ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
++ TEST $CLI volume start ${V0}_ec$i
++done
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
++#Check the thread count become to number of volumes*number of ec subvolume (2*6=12)
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
++#Check the thread count become to number of volumes*number of afr subvolume (3*6=18)
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++
++TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}{6,7,8};
++#Check the thread count become to number of volumes*number of afr subvolume plus 3 additional threads from newly added bricks (3*6+3=21)
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^21$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++
++#Remove the brick and check the detach is successful
++$CLI volume remove-brick $V0 $H0:$B0/${V0}{6,7,8} force
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++
++TEST $CLI volume add-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5};
++#Check the thread count become to number of volumes*number of ec subvolume plus 2 additional threads from newly added bricks (2*6+6=18)
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
++
++#Remove the brick and check the detach is successful
++$CLI volume remove-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5} force
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
++
++
++for i in $(seq 1 2); do
++ TEST $CLI volume stop ${V0}_afr$i
++ TEST $CLI volume stop ${V0}_ec$i
++done
++
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST kill_brick $V0 $H0 $B0/${V0}4
++
++TEST touch $M0/foo{1..100}
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
++
++TEST $CLI volume start ${V0} force
++
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++TEST rm -rf $M0/*
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++shd_pid=$(get_shd_mux_pid $V0)
++TEST $CLI volume create ${V0}_distribute1 $H0:$B0/${V0}_distribute10
++TEST $CLI volume start ${V0}_distribute1
++
++#Creating a non-replicate/non-ec volume should not have any effect in shd
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++EXPECT "^${shd_pid}$" get_shd_mux_pid $V0
++
++TEST mkdir $B0/add/
++#Now convert the distributed volume to replicate
++TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3}
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^9$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++
++#scale down the volume
++TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
++
++TEST $CLI volume stop ${V0}
++TEST $CLI volume delete ${V0}
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
++
++TEST rm -rf $B0/add/
++TEST mkdir $B0/add/
++#Now convert the distributed volume back to replicate and make sure that a new shd is spawned
++TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3};
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
++EXPECT_WITHIN $HEAL_TIMEOUT "^3$" number_healer_threads_shd ${V0}_distribute1 "__afr_shd_healer_wait"
++
++#Now convert the replica volume to distribute again and make sure the shd is now stopped
++TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force
++TEST rm -rf $B0/add/
++
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
++
++cleanup
+diff --git a/tests/volume.rc b/tests/volume.rc
+index a0ea3b8..bb400cc 100644
+--- a/tests/volume.rc
++++ b/tests/volume.rc
+@@ -912,3 +912,18 @@ function volgen_check_ancestry {
+ echo "N"
+ fi
+ }
++
++function get_shd_mux_pid {
++ local volume=$1
++ pid=`$CLI volume status $volume shd | awk '/Self-heal/{print $8}'`
++ echo $pid
++}
++
++function shd_count {
++ ps aux | grep "glustershd" | grep -v grep | wc -l
++}
++
++function number_healer_threads_shd {
++ local pid=$(get_shd_mux_pid $1)
++ pstack $pid | grep $2 | wc -l
++}
+--
+1.8.3.1
+
diff --git a/0166-glusterd-svc-glusterd_svcs_stop-should-call-individu.patch b/0166-glusterd-svc-glusterd_svcs_stop-should-call-individu.patch
new file mode 100644
index 0000000..3aa441d
--- /dev/null
+++ b/0166-glusterd-svc-glusterd_svcs_stop-should-call-individu.patch
@@ -0,0 +1,92 @@
+From 79fff98f9ca5f815cf0227312b9a997d555dad29 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Wed, 22 May 2019 13:32:23 +0530
+Subject: [PATCH 166/169] glusterd/svc: glusterd_svcs_stop should call
+ individual wrapper function
+
+glusterd_svcs_stop should call individual wrapper function to stop a
+daemon rather than calling glusterd_svc_stop. For example for shd,
+it should call glusterd_shdsvc_stop instead of calling basic API
+function to stop. Because the individual functions for each daemon
+could be doing some specific operation in their wrapper function.
+
+Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22761/
+
+>Change-Id: Ie6d40590251ad470ef3901d1141ab7b22c3498f5
+>fixes: bz#1712741
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I6df03e53f08c337d5d9b0e855a0b77894a2aacc9
+BUG: 1716865
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172288
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 12 ++++++++++--
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 10 +++++-----
+ 2 files changed, 15 insertions(+), 7 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index 75f9a07..981cc87 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -656,10 +656,18 @@ glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig)
+ int pid = -1;
+
+ conf = THIS->private;
++ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ svc_proc = svc->svc_proc;
+- GF_VALIDATE_OR_GOTO("glusterd", svc_proc, out);
+- GF_VALIDATE_OR_GOTO("glusterd", conf, out);
++ if (!svc_proc) {
++ /*
++ * This can happen when stop was called on a volume that is not shd
++ * compatible.
++ */
++ gf_msg_debug("glusterd", 0, "svc_proc is null, ie shd already stopped");
++ ret = 0;
++ goto out;
++ }
+
+ /* Get volinfo->shd from svc object */
+ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index f7be394..6a3ca52 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -86,25 +86,25 @@ glusterd_svcs_stop(glusterd_volinfo_t *volinfo)
+ priv = this->private;
+ GF_ASSERT(priv);
+
+- ret = glusterd_svc_stop(&(priv->nfs_svc), SIGKILL);
++ ret = priv->nfs_svc.stop(&(priv->nfs_svc), SIGKILL);
+ if (ret)
+ goto out;
+
+- ret = glusterd_svc_stop(&(priv->quotad_svc), SIGTERM);
++ ret = priv->quotad_svc.stop(&(priv->quotad_svc), SIGTERM);
+ if (ret)
+ goto out;
+
+ if (volinfo) {
+- ret = glusterd_svc_stop(&(volinfo->shd.svc), PROC_START_NO_WAIT);
++ ret = volinfo->shd.svc.stop(&(volinfo->shd.svc), SIGTERM);
+ if (ret)
+ goto out;
+ }
+
+- ret = glusterd_svc_stop(&(priv->bitd_svc), SIGTERM);
++ ret = priv->bitd_svc.stop(&(priv->bitd_svc), SIGTERM);
+ if (ret)
+ goto out;
+
+- ret = glusterd_svc_stop(&(priv->scrub_svc), SIGTERM);
++ ret = priv->scrub_svc.stop(&(priv->scrub_svc), SIGTERM);
+ out:
+ return ret;
+ }
+--
+1.8.3.1
+
diff --git a/0167-glusterd-shd-Optimize-the-glustershd-manager-to-send.patch b/0167-glusterd-shd-Optimize-the-glustershd-manager-to-send.patch
new file mode 100644
index 0000000..44e971a
--- /dev/null
+++ b/0167-glusterd-shd-Optimize-the-glustershd-manager-to-send.patch
@@ -0,0 +1,64 @@
+From 321080e55f0ae97115a9542ba5de8494e7610860 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Tue, 14 May 2019 23:12:44 +0530
+Subject: [PATCH 167/169] glusterd/shd: Optimize the glustershd manager to send
+ reconfigure
+
+Traditionally all svc manager will execute process stop and then
+followed by start each time when they called. But that is not
+required by shd, because the attach request implemented in the shd
+multiplex has the intelligence to check whether a detach is required
+prior to attaching the graph. So there is no need to send an explicit
+detach request if we are sure that the next call is an attach request
+
+Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22729/
+>Change-Id: I9157c8dcaffdac038f73286bcf5646a3f1d3d8ec
+>fixes: bz#1710054
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I56aaaf3b4d28215307e160c1ba0e09bb74c30fbe
+BUG: 1716865
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172289
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/graph.c | 1 -
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 9 +++++----
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index 18fb2d9..27d9335 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -1497,7 +1497,6 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
+ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph);
+ parent_graph->xl_count -= graph->xl_count;
+ parent_graph->leaf_count -= graph->leaf_count;
+- default_notify(xl, GF_EVENT_PARENT_DOWN, xl);
+ parent_graph->id++;
+ ret = 0;
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index 981cc87..d81d760 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -311,10 +311,11 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+ */
+ ret = svc->stop(svc, SIGTERM);
+ } else if (volinfo) {
+- ret = svc->stop(svc, SIGTERM);
+- if (ret)
+- goto out;
+-
++ if (volinfo->status != GLUSTERD_STATUS_STARTED) {
++ ret = svc->stop(svc, SIGTERM);
++ if (ret)
++ goto out;
++ }
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+ ret = svc->start(svc, flags);
+ if (ret)
+--
+1.8.3.1
+
diff --git a/0168-cluster-dht-Fix-directory-perms-during-selfheal.patch b/0168-cluster-dht-Fix-directory-perms-during-selfheal.patch
new file mode 100644
index 0000000..3cf8dff
--- /dev/null
+++ b/0168-cluster-dht-Fix-directory-perms-during-selfheal.patch
@@ -0,0 +1,46 @@
+From 6198461bce7d264b71fe91e981aa3af3a19a8abe Mon Sep 17 00:00:00 2001
+From: N Balachandran <nbalacha@redhat.com>
+Date: Tue, 4 Jun 2019 14:51:44 +0530
+Subject: [PATCH 168/169] cluster/dht: Fix directory perms during selfheal
+
+Fixed a bug in the revalidate code path that wiped out
+directory permissions if no mds subvol was found.
+
+upstream: https://review.gluster.org/#/c/glusterfs/+/22813/
+
+> Change-Id: I8b4239ffee7001493c59d4032a2d3062586ea115
+> fixes: bz#1716830
+> Signed-off-by: N Balachandran <nbalacha@redhat.com>
+
+BUG: 1716821
+Change-Id: I6d84d381d07a27d1ef9113a2104a62ceaf2110e3
+Signed-off-by: N Balachandran <nbalacha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172622
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 183872f..e1edb38 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -1739,9 +1739,11 @@ unlock:
+
+ if (dht_needs_selfheal(frame, this)) {
+ if (!__is_root_gfid(local->loc.inode->gfid)) {
+- local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
+- local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
+- local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
++ if (local->mds_subvol) {
++ local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
++ local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
++ local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
++ }
+ } else {
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+--
+1.8.3.1
+
diff --git a/0169-Build-Fix-spec-to-enable-rhel8-client-build.patch b/0169-Build-Fix-spec-to-enable-rhel8-client-build.patch
new file mode 100644
index 0000000..294574d
--- /dev/null
+++ b/0169-Build-Fix-spec-to-enable-rhel8-client-build.patch
@@ -0,0 +1,62 @@
+From 2e6241a800c98ba95b3420255d8089e0271b46eb Mon Sep 17 00:00:00 2001
+From: Sunil Kumar Acharya <sheggodu@redhat.com>
+Date: Thu, 6 Jun 2019 16:18:26 +0530
+Subject: [PATCH 169/169] Build: Fix spec to enable rhel8 client build
+
+Updated the spec file with required changes to enable RHGS RHEL8
+client build. As Ganesha scripts are not python3 compatible, we
+will not be generating RHGS RHEL8 server build until the required
+changes are backported from upstream.
+
+Label : DOWNSTREAM ONLY
+
+BUG: 1717927
+Change-Id: I2a8d37d24405a8b2d5533ebf7b85327485f810d7
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172668
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ glusterfs.spec.in | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 85e75f2..9c7d7a7 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -91,7 +91,7 @@
+
+ # disable server components forcefully as rhel <= 6
+ %if ( 0%{?rhel} )
+-%if (!(( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" )))
++%if (!(( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" ) || ( "%{?dist}" == ".el8rhgs" )))
+ %global _without_server --without-server
+ %endif
+ %endif
+@@ -270,7 +270,7 @@ BuildRequires: python%{_pythonver}-devel
+ %if ( 0%{?rhel} && 0%{?rhel} < 8 )
+ BuildRequires: python-ctypes
+ %endif
+-%if ( 0%{?_with_ipv6default:1} ) || ( 0%{!?_without_libtirpc:1} )
++%if ( 0%{?_with_ipv6default:1} ) || ( 0%{!?_without_libtirpc:1} ) || ( 0%{?rhel} && ( 0%{?rhel} >= 8 ) )
+ BuildRequires: libtirpc-devel
+ %endif
+ %if ( 0%{?fedora} && 0%{?fedora} > 27 ) || ( 0%{?rhel} && 0%{?rhel} > 7 )
+@@ -722,12 +722,10 @@ GlusterFS Events
+
+ %prep
+ %setup -q -n %{name}-%{version}%{?prereltag}
+-%if ( ! %{_usepython3} )
+ echo "fixing python shebangs..."
+-for f in api events extras geo-replication libglusterfs tools xlators; do
+-find $f -type f -exec sed -i 's|/usr/bin/python3|/usr/bin/python2|' {} \;
++for i in `find . -type f -exec bash -c "if file {} | grep 'Python script, ASCII text executable' >/dev/null; then echo {}; fi" ';'`; do
++ sed -i -e 's|^#!/usr/bin/python.*|#!%{__python3}|' -e 's|^#!/usr/bin/env python.*|#!%{__python3}|' $i
+ done
+-%endif
+
+ %build
+
+--
+1.8.3.1
+
diff --git a/0170-geo-rep-Convert-gfid-conflict-resolutiong-logs-into-.patch b/0170-geo-rep-Convert-gfid-conflict-resolutiong-logs-into-.patch
new file mode 100644
index 0000000..a7ea429
--- /dev/null
+++ b/0170-geo-rep-Convert-gfid-conflict-resolutiong-logs-into-.patch
@@ -0,0 +1,119 @@
+From c4e292379928eaf1ebb47ee1c8e9b1eabbe90574 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Tue, 14 May 2019 11:05:45 +0530
+Subject: [PATCH 170/178] geo-rep: Convert gfid conflict resolutiong logs into
+ debug
+
+The gfid conflict resolution code path is not supposed
+to hit in generic code path. But few of the heavy rename
+workload (BUG: 1694820) makes it a generic case. So
+logging the entries to be fixed as INFO floods the log
+in these particular workloads. Hence convert them to DEBUG.
+
+Backport of:
+ > Patch: https://review.gluster.org/22720
+ > fixes: bz#1709653
+ > Change-Id: I4d5e102b87be5fe5b54f78f329e588882d72b9d9
+
+BUG: 1714536
+Change-Id: I4d5e102b87be5fe5b54f78f329e588882d72b9d9
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172731
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ geo-replication/syncdaemon/master.py | 21 ++++++++++++---------
+ 1 file changed, 12 insertions(+), 9 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
+index 42c86d7..3f98337 100644
+--- a/geo-replication/syncdaemon/master.py
++++ b/geo-replication/syncdaemon/master.py
+@@ -811,7 +811,7 @@ class GMasterChangelogMixin(GMasterCommon):
+ st = lstat(os.path.join(pfx, slave_gfid))
+ # Takes care of scenarios with no hardlinks
+ if isinstance(st, int) and st == ENOENT:
+- logging.info(lf('Entry not present on master. Fixing gfid '
++ logging.debug(lf('Entry not present on master. Fixing gfid '
+ 'mismatch in slave. Deleting the entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+@@ -843,7 +843,7 @@ class GMasterChangelogMixin(GMasterCommon):
+ if matching_disk_gfid(slave_gfid, pbname):
+ # Safe to ignore the failure as master contains same
+ # file with same gfid. Remove entry from entries list
+- logging.info(lf('Fixing gfid mismatch in slave. '
++ logging.debug(lf('Fixing gfid mismatch in slave. '
+ ' Safe to ignore, take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+@@ -865,14 +865,14 @@ class GMasterChangelogMixin(GMasterCommon):
+ dst_entry = os.path.join(pfx, realpath.split('/')[-2],
+ realpath.split('/')[-1])
+ src_entry = pbname
+- logging.info(lf('Fixing dir name/gfid mismatch in '
++ logging.debug(lf('Fixing dir name/gfid mismatch in '
+ 'slave', retry_count=retry_count,
+ entry=repr(failure)))
+ if src_entry == dst_entry:
+ # Safe to ignore the failure as master contains
+ # same directory as in slave with same gfid.
+ # Remove the failure entry from entries list
+- logging.info(lf('Fixing dir name/gfid mismatch'
++ logging.debug(lf('Fixing dir name/gfid mismatch'
+ ' in slave. Safe to ignore, '
+ 'take out entry',
+ retry_count=retry_count,
+@@ -886,7 +886,7 @@ class GMasterChangelogMixin(GMasterCommon):
+ entry=src_entry,
+ entry1=dst_entry, stat=st,
+ link=None)
+- logging.info(lf('Fixing dir name/gfid mismatch'
++ logging.debug(lf('Fixing dir name/gfid mismatch'
+ ' in slave. Renaming',
+ retry_count=retry_count,
+ entry=repr(rename_dict)))
+@@ -896,7 +896,7 @@ class GMasterChangelogMixin(GMasterCommon):
+ # renamed file exists and we are sure from
+ # matching_disk_gfid check that the entry doesn't
+ # exist with same gfid so we can safely delete on slave
+- logging.info(lf('Fixing file gfid mismatch in slave. '
++ logging.debug(lf('Fixing file gfid mismatch in slave. '
+ 'Hardlink/Rename Case. Deleting entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+@@ -915,7 +915,7 @@ class GMasterChangelogMixin(GMasterCommon):
+ # Safe to ignore the failure as master doesn't contain
+ # parent directory.
+ if isinstance(st, int):
+- logging.info(lf('Fixing ENOENT error in slave. Parent '
++ logging.debug(lf('Fixing ENOENT error in slave. Parent '
+ 'does not exist on master. Safe to '
+ 'ignore, take out entry',
+ retry_count=retry_count,
+@@ -925,7 +925,7 @@ class GMasterChangelogMixin(GMasterCommon):
+ except ValueError:
+ pass
+ else:
+- logging.info(lf('Fixing ENOENT error in slave. Create '
++ logging.debug(lf('Fixing ENOENT error in slave. Create '
+ 'parent directory on slave.',
+ retry_count=retry_count,
+ entry=repr(failure)))
+@@ -1223,10 +1223,13 @@ class GMasterChangelogMixin(GMasterCommon):
+
+ if gconf.get("gfid-conflict-resolution"):
+ count = 0
++ if failures:
++ logging.info(lf('Entry ops failed with gfid mismatch',
++ count=len(failures)))
+ while failures and count < self.MAX_OE_RETRIES:
+ count += 1
+ self.handle_entry_failures(failures, entries)
+- logging.info("Retry original entries. count = %s" % count)
++ logging.info(lf('Retry original entries', count=count))
+ failures = self.slave.server.entry_ops(entries)
+ if not failures:
+ logging.info("Successfully fixed all entry ops with "
+--
+1.8.3.1
+
diff --git a/0171-posix-add-storage.reserve-size-option.patch b/0171-posix-add-storage.reserve-size-option.patch
new file mode 100644
index 0000000..2e76908
--- /dev/null
+++ b/0171-posix-add-storage.reserve-size-option.patch
@@ -0,0 +1,295 @@
+From 4d82c7879387e6f7963b4d9c84c4ff8a1788055d Mon Sep 17 00:00:00 2001
+From: Sheetal Pamecha <sheetal.pamecha08@gmail.com>
+Date: Mon, 19 Nov 2018 22:15:25 +0530
+Subject: [PATCH 171/178] posix: add storage.reserve-size option
+
+storage.reserve-size option will take size as input
+instead of percentage. If set, priority will be given to
+storage.reserve-size over storage.reserve. Default value
+of this option is 0.
+
+> fixes: bz#1651445
+> Change-Id: I7a7342c68e436e8bf65bd39c567512ee04abbcea
+> Signed-off-by: Sheetal Pamecha <sheetal.pamecha08@gmail.com>
+> Cherry pick from commit 950726dfc8e3171bef625b563c0c6dbba1ec2928
+> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21686/
+
+BUG: 1573077
+Change-Id: I7a7342c68e436e8bf65bd39c567512ee04abbcea
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172709
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ tests/bugs/posix/bug-1651445.t | 58 +++++++++++++++++++++++++
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 33 ++++++++++++++
+ xlators/storage/posix/src/posix-common.c | 34 ++++++++++++---
+ xlators/storage/posix/src/posix-helpers.c | 13 ++++--
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 10 +++--
+ xlators/storage/posix/src/posix.h | 3 +-
+ 6 files changed, 138 insertions(+), 13 deletions(-)
+ create mode 100644 tests/bugs/posix/bug-1651445.t
+
+diff --git a/tests/bugs/posix/bug-1651445.t b/tests/bugs/posix/bug-1651445.t
+new file mode 100644
+index 0000000..f6f1833
+--- /dev/null
++++ b/tests/bugs/posix/bug-1651445.t
+@@ -0,0 +1,58 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../snapshot.rc
++
++cleanup
++
++TEST verify_lvm_version
++TEST glusterd
++TEST pidof glusterd
++TEST init_n_bricks 3
++TEST setup_lvm 3
++
++TEST $CLI volume create $V0 replica 3 $H0:$L{1,2,3}
++TEST $CLI volume start $V0
++
++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++TEST $CLI volume set $V0 storage.reserve-size 10MB
++
++#No effect as priority to reserve-size
++TEST $CLI volume set $V0 storage.reserve 20
++
++TEST dd if=/dev/zero of=$M0/a bs=100M count=1
++sleep 5
++
++#Below dd confirms posix is giving priority to reserve-size
++TEST dd if=/dev/zero of=$M0/b bs=40M count=1
++
++sleep 5
++TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1
++
++rm -rf $M0/*
++#Size will reserve from the previously set reserve option = 20%
++TEST $CLI volume set $V0 storage.reserve-size 0
++
++#Overwrite reserve option
++TEST $CLI volume set $V0 storage.reserve-size 40MB
++
++#wait 5s to reset disk_space_full flag
++sleep 5
++
++TEST dd if=/dev/zero of=$M0/a bs=100M count=1
++TEST dd if=/dev/zero of=$M0/b bs=10M count=1
++
++# Wait 5s to update disk_space_full flag because thread check disk space
++# after every 5s
++
++sleep 5
++# setup_lvm create lvm partition of 150M and 40M are reserve so after
++# consuming more than 110M next dd should fail
++TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1
++
++TEST $CLI volume stop $V0
++TEST $CLI volume delete $V0
++
++cleanup
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 7a83124..3a7ab83 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -1231,6 +1231,30 @@ out:
+
+ return ret;
+ }
++static int
++validate_size(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, char *value,
++ char **op_errstr)
++{
++ xlator_t *this = NULL;
++ uint64_t size = 0;
++ int ret = -1;
++
++ this = THIS;
++ GF_VALIDATE_OR_GOTO("glusterd", this, out);
++ ret = gf_string2bytesize_uint64(value, &size);
++ if (ret < 0) {
++ gf_asprintf(op_errstr,
++ "%s is not a valid size. %s "
++ "expects a valid value in bytes",
++ value, key);
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s",
++ *op_errstr);
++ }
++out:
++ gf_msg_debug("glusterd", 0, "Returning %d", ret);
++
++ return ret;
++}
+
+ /* dispatch table for VOLUME SET
+ * -----------------------------
+@@ -2830,6 +2854,15 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .op_version = GD_OP_VERSION_3_13_0,
+ },
+ {
++ .key = "storage.reserve-size",
++ .voltype = "storage/posix",
++ .value = "0",
++ .validate_fn = validate_size,
++ .description = "If set, priority will be given to "
++ "storage.reserve-size over storage.reserve",
++ .op_version = GD_OP_VERSION_7_0,
++ },
++ {
+ .option = "health-check-timeout",
+ .key = "storage.health-check-timeout",
+ .type = NO_DOC,
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index ed82e35..0f70af5 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -345,11 +345,18 @@ posix_reconfigure(xlator_t *this, dict_t *options)
+ " fallback to <hostname>:<export>");
+ }
+
+- GF_OPTION_RECONF("reserve", priv->disk_reserve, options, uint32, out);
+- if (priv->disk_reserve) {
++ GF_OPTION_RECONF("reserve-size", priv->disk_reserve_size, options, size,
++ out);
++
++ GF_OPTION_RECONF("reserve", priv->disk_reserve_percent, options, uint32,
++ out);
++ if (priv->disk_reserve_size || priv->disk_reserve_percent) {
+ ret = posix_spawn_disk_space_check_thread(this);
+- if (ret)
++ if (ret) {
++ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED,
++ "Getting disk space check from thread failed");
+ goto out;
++ }
+ }
+
+ GF_OPTION_RECONF("health-check-interval", priv->health_check_interval,
+@@ -968,11 +975,17 @@ posix_init(xlator_t *this)
+
+ _private->disk_space_check_active = _gf_false;
+ _private->disk_space_full = 0;
+- GF_OPTION_INIT("reserve", _private->disk_reserve, uint32, out);
+- if (_private->disk_reserve) {
++ GF_OPTION_INIT("reserve-size", _private->disk_reserve_size, size, out);
++
++ GF_OPTION_INIT("reserve", _private->disk_reserve_percent, uint32, out);
++
++ if (_private->disk_reserve_size || _private->disk_reserve_percent) {
+ ret = posix_spawn_disk_space_check_thread(this);
+- if (ret)
++ if (ret) {
++ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED,
++ "Getting disk space check from thread failed ");
+ goto out;
++ }
+ }
+
+ _private->health_check_active = _gf_false;
+@@ -1216,6 +1229,15 @@ struct volume_options posix_options[] = {
+ " Set to 0 to disable",
+ .op_version = {GD_OP_VERSION_3_13_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
++ {.key = {"reserve-size"},
++ .type = GF_OPTION_TYPE_SIZET,
++ .min = 0,
++ .default_value = "0",
++ .validate = GF_OPT_VALIDATE_MIN,
++ .description = "size in megabytes to be reserved for disk space."
++ " Set to 0 to disable",
++ .op_version = {GD_OP_VERSION_7_0},
++ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"batch-fsync-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "reverse-fsync",
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index d0fd45a..aecf4f8 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -2246,6 +2246,7 @@ posix_disk_space_check(xlator_t *this)
+ struct posix_private *priv = NULL;
+ char *subvol_path = NULL;
+ int op_ret = 0;
++ uint64_t size = 0;
+ int percent = 0;
+ struct statvfs buf = {0};
+ uint64_t totsz = 0;
+@@ -2256,7 +2257,14 @@ posix_disk_space_check(xlator_t *this)
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ subvol_path = priv->base_path;
+- percent = priv->disk_reserve;
++
++ if (priv->disk_reserve_size) {
++ size = priv->disk_reserve_size;
++ } else {
++ percent = priv->disk_reserve_percent;
++ totsz = (buf.f_blocks * buf.f_bsize);
++ size = ((totsz * percent) / 100);
++ }
+
+ op_ret = sys_statvfs(subvol_path, &buf);
+
+@@ -2265,10 +2273,9 @@ posix_disk_space_check(xlator_t *this)
+ "statvfs failed on %s", subvol_path);
+ goto out;
+ }
+- totsz = (buf.f_blocks * buf.f_bsize);
+ freesz = (buf.f_bfree * buf.f_bsize);
+
+- if (freesz <= ((totsz * percent) / 100)) {
++ if (freesz <= size) {
+ priv->disk_space_full = 1;
+ } else {
+ priv->disk_space_full = 0;
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index 2c19ce1..7ca4d26 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -720,7 +720,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ thread after every 5 sec sleep to working correctly storage.reserve
+ option behaviour
+ */
+- if (priv->disk_reserve)
++ if (priv->disk_reserve_size || priv->disk_reserve_percent)
+ posix_disk_space_check(this);
+
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, out);
+@@ -2331,8 +2331,12 @@ posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ goto out;
+ }
+
+- percent = priv->disk_reserve;
+- reserved_blocks = (buf.f_blocks * percent) / 100;
++ if (priv->disk_reserve_size) {
++ reserved_blocks = priv->disk_reserve_size / buf.f_bsize;
++ } else {
++ percent = priv->disk_reserve_percent;
++ reserved_blocks = (buf.f_blocks * percent) / 100;
++ }
+
+ if (buf.f_bfree > reserved_blocks) {
+ buf.f_bfree = (buf.f_bfree - reserved_blocks);
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index 1da4d01..4364b96 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -225,7 +225,8 @@ struct posix_private {
+ pthread_t health_check;
+ gf_boolean_t health_check_active;
+
+- uint32_t disk_reserve;
++ uint32_t disk_reserve_percent;
++ uint64_t disk_reserve_size;
+ uint32_t disk_space_full;
+ pthread_t disk_space_check;
+ gf_boolean_t disk_space_check_active;
+--
+1.8.3.1
+
diff --git a/0172-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch b/0172-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch
new file mode 100644
index 0000000..0dc15ba
--- /dev/null
+++ b/0172-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch
@@ -0,0 +1,141 @@
+From 998d9b8b5e271f407e1c654c34f45f0db36abc71 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Tue, 21 May 2019 17:15:07 +0530
+Subject: [PATCH 172/178] ec/fini: Fix race with ec_fini and ec_notify
+
+During a graph cleanup, we first sent a PARENT_DOWN and wait for
+a child down to ultimately free the xlator and the graph.
+
+In the ec xlator, we cleanup the threads when we get a PARENT_DOWN event.
+But a racing event like CHILD_UP or event xl_op may trigger healing threads
+after threads cleanup.
+
+So there is a chance that the threads might access a freed private variabe
+
+Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22758/
+
+>Change-Id: I252d10181bb67b95900c903d479de707a8489532
+>fixes: bz#1703948
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I84a10352d9fb3e68d4147b3791e3af45ab79050e
+BUG: 1703434
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172285
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/glusterfs/xlator.h | 3 +++
+ libglusterfs/src/libglusterfs.sym | 1 +
+ libglusterfs/src/xlator.c | 21 +++++++++++++++++++++
+ xlators/cluster/ec/src/ec-heal.c | 4 ++++
+ xlators/cluster/ec/src/ec-heald.c | 6 ++++++
+ xlators/cluster/ec/src/ec.c | 3 +++
+ 6 files changed, 38 insertions(+)
+
+diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
+index 8998976..09e463e 100644
+--- a/libglusterfs/src/glusterfs/xlator.h
++++ b/libglusterfs/src/glusterfs/xlator.h
+@@ -1092,4 +1092,7 @@ gluster_graph_take_reference(xlator_t *tree);
+
+ gf_boolean_t
+ mgmt_is_multiplexed_daemon(char *name);
++
++gf_boolean_t
++xlator_is_cleanup_starting(xlator_t *this);
+ #endif /* _XLATOR_H */
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index ec474e7..7a2edef 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -1161,3 +1161,4 @@ glusterfs_process_svc_attach_volfp
+ glusterfs_mux_volfile_reconfigure
+ glusterfs_process_svc_detach
+ mgmt_is_multiplexed_daemon
++xlator_is_cleanup_starting
+diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
+index 022c3ed..fbfbbe2 100644
+--- a/libglusterfs/src/xlator.c
++++ b/libglusterfs/src/xlator.c
+@@ -1486,3 +1486,24 @@ mgmt_is_multiplexed_daemon(char *name)
+ }
+ return _gf_false;
+ }
++
++gf_boolean_t
++xlator_is_cleanup_starting(xlator_t *this)
++{
++ gf_boolean_t cleanup = _gf_false;
++ glusterfs_graph_t *graph = NULL;
++ xlator_t *xl = NULL;
++
++ if (!this)
++ goto out;
++ graph = this->graph;
++
++ if (!graph)
++ goto out;
++
++ xl = graph->first;
++ if (xl && xl->cleanup_starting)
++ cleanup = _gf_true;
++out:
++ return cleanup;
++}
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index 2fa1f11..8844c29 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -2855,6 +2855,10 @@ ec_replace_brick_heal_wrap(void *opaque)
+ itable = ec->xl->itable;
+ else
+ goto out;
++
++ if (xlator_is_cleanup_starting(ec->xl))
++ goto out;
++
+ ret = ec_replace_heal(ec, itable->root);
+ out:
+ return ret;
+diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
+index edf5e11..91512d7 100644
+--- a/xlators/cluster/ec/src/ec-heald.c
++++ b/xlators/cluster/ec/src/ec-heald.c
+@@ -444,6 +444,9 @@ unlock:
+ int
+ ec_shd_full_healer_spawn(xlator_t *this, int subvol)
+ {
++ if (xlator_is_cleanup_starting(this))
++ return -1;
++
+ return ec_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol),
+ ec_shd_full_healer);
+ }
+@@ -451,6 +454,9 @@ ec_shd_full_healer_spawn(xlator_t *this, int subvol)
+ int
+ ec_shd_index_healer_spawn(xlator_t *this, int subvol)
+ {
++ if (xlator_is_cleanup_starting(this))
++ return -1;
++
+ return ec_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol),
+ ec_shd_index_healer);
+ }
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index 264582a..df5912c 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -486,6 +486,9 @@ ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state)
+ {
+ uintptr_t current_state = 0;
+
++ if (xlator_is_cleanup_starting(ec->xl))
++ return _gf_false;
++
+ if ((ec->xl_notify & index_mask) == 0) {
+ ec->xl_notify |= index_mask;
+ ec->xl_notify_count++;
+--
+1.8.3.1
+
diff --git a/0173-glusterd-store-fips-mode-rchecksum-option-in-the-inf.patch b/0173-glusterd-store-fips-mode-rchecksum-option-in-the-inf.patch
new file mode 100644
index 0000000..bdaaac5
--- /dev/null
+++ b/0173-glusterd-store-fips-mode-rchecksum-option-in-the-inf.patch
@@ -0,0 +1,52 @@
+From c001b60047c73e07f42ee858dd8ae19136ecd61b Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Thu, 6 Jun 2019 13:19:29 +0530
+Subject: [PATCH 173/178] glusterd: store fips-mode-rchecksum option in the
+ info file
+
+commit 146e4b45d0ce906ae50fd6941a1efafd133897ea enabled
+storage.fips-mode-rchecksum option for all new volumes with op-version
+>=GD_OP_VERSION_7_0 but `gluster vol get $volname
+storage.fips-mode-rchecksum` was displaying it as 'off'. This patch fixes it.
+
+>upstream patch link : https://review.gluster.org/#/c/glusterfs/+/22830/
+
+>fixes: bz#1717782
+>Change-Id: Ie09f89838893c5776a3f60569dfe8d409d1494dd
+>Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+
+BUG: 1715407
+Change-Id: Ie09f89838893c5776a3f60569dfe8d409d1494dd
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172799
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 2bc4836..7768b8e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -13124,6 +13124,17 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option)
+ }
+ }
+ }
++ if (conf->op_version >= GD_OP_VERSION_7_0) {
++ ret = dict_set_dynstr_with_alloc(volinfo->dict,
++ "storage.fips-mode-rchecksum", "on");
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
++ "Failed to set option 'storage.fips-mode-rchecksum' "
++ "on volume %s",
++ volinfo->volname);
++ goto out;
++ }
++ }
+ out:
+ return ret;
+ }
+--
+1.8.3.1
+
diff --git a/0174-xlator-log-Add-more-logging-in-xlator_is_cleanup_sta.patch b/0174-xlator-log-Add-more-logging-in-xlator_is_cleanup_sta.patch
new file mode 100644
index 0000000..66c5ff4
--- /dev/null
+++ b/0174-xlator-log-Add-more-logging-in-xlator_is_cleanup_sta.patch
@@ -0,0 +1,53 @@
+From 9b94397a5a735910fab2a29670146a1feb6d890e Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Tue, 4 Jun 2019 11:13:50 +0530
+Subject: [PATCH 174/178] xlator/log: Add more logging in
+ xlator_is_cleanup_starting
+
+This patch will add two extra logs for invalid argument
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22810/
+
+>Change-Id: I3950b4f4b9d88b1f1e788ef93d8f09d4bd8d4d8b
+>updates: bz#1703948
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I3950b4f4b9d88b1f1e788ef93d8f09d4bd8d4d8b
+BUG: 1703434
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172800
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/xlator.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
+index fbfbbe2..71e1ed4 100644
+--- a/libglusterfs/src/xlator.c
++++ b/libglusterfs/src/xlator.c
+@@ -1494,12 +1494,18 @@ xlator_is_cleanup_starting(xlator_t *this)
+ glusterfs_graph_t *graph = NULL;
+ xlator_t *xl = NULL;
+
+- if (!this)
++ if (!this) {
++ gf_msg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
++ "xlator object is null, returning false");
+ goto out;
+- graph = this->graph;
++ }
+
+- if (!graph)
++ graph = this->graph;
++ if (!graph) {
++ gf_msg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
++ "Graph is not set for xlator %s", this->name);
+ goto out;
++ }
+
+ xl = graph->first;
+ if (xl && xl->cleanup_starting)
+--
+1.8.3.1
+
diff --git a/0175-ec-fini-Fix-race-between-xlator-cleanup-and-on-going.patch b/0175-ec-fini-Fix-race-between-xlator-cleanup-and-on-going.patch
new file mode 100644
index 0000000..c13d96d
--- /dev/null
+++ b/0175-ec-fini-Fix-race-between-xlator-cleanup-and-on-going.patch
@@ -0,0 +1,241 @@
+From 9fd966aa6879ac9867381629f82eca24b950d731 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Sun, 2 Jun 2019 01:36:33 +0530
+Subject: [PATCH 175/178] ec/fini: Fix race between xlator cleanup and on going
+ async fop
+
+Problem:
+While we process a cleanup, there is a chance for a race between
+async operations, for example ec_launch_replace_heal. So this can
+lead to invalid mem access.
+
+Solution:
+Just like we track on going heal fops, we can also track fops like
+ec_launch_replace_heal, so that we can decide when to send a
+PARENT_DOWN request.
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22798/
+
+>Change-Id: I055391c5c6c34d58aef7336847f3b570cb831298
+>fixes: bz#1703948
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I055391c5c6c34d58aef7336847f3b570cb831298
+BUG: 1714588
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172801
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/cluster/ec/src/ec-common.c | 10 ++++++++++
+ xlators/cluster/ec/src/ec-common.h | 2 ++
+ xlators/cluster/ec/src/ec-data.c | 4 +++-
+ xlators/cluster/ec/src/ec-heal.c | 17 +++++++++++++++--
+ xlators/cluster/ec/src/ec-types.h | 1 +
+ xlators/cluster/ec/src/ec.c | 37 +++++++++++++++++++++++++------------
+ 6 files changed, 56 insertions(+), 15 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index e85aa8b..9cc6395 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -2955,3 +2955,13 @@ ec_manager(ec_fop_data_t *fop, int32_t error)
+
+ __ec_manager(fop, error);
+ }
++
++gf_boolean_t
++__ec_is_last_fop(ec_t *ec)
++{
++ if ((list_empty(&ec->pending_fops)) &&
++ (GF_ATOMIC_GET(ec->async_fop_count) == 0)) {
++ return _gf_true;
++ }
++ return _gf_false;
++}
+diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
+index e948342..bf6c97d 100644
+--- a/xlators/cluster/ec/src/ec-common.h
++++ b/xlators/cluster/ec/src/ec-common.h
+@@ -204,4 +204,6 @@ void
+ ec_reset_entry_healing(ec_fop_data_t *fop);
+ char *
+ ec_msg_str(ec_fop_data_t *fop);
++gf_boolean_t
++__ec_is_last_fop(ec_t *ec);
+ #endif /* __EC_COMMON_H__ */
+diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
+index 6ef9340..8d2d9a1 100644
+--- a/xlators/cluster/ec/src/ec-data.c
++++ b/xlators/cluster/ec/src/ec-data.c
+@@ -202,11 +202,13 @@ ec_handle_last_pending_fop_completion(ec_fop_data_t *fop, gf_boolean_t *notify)
+ {
+ ec_t *ec = fop->xl->private;
+
++ *notify = _gf_false;
++
+ if (!list_empty(&fop->pending_list)) {
+ LOCK(&ec->lock);
+ {
+ list_del_init(&fop->pending_list);
+- *notify = list_empty(&ec->pending_fops);
++ *notify = __ec_is_last_fop(ec);
+ }
+ UNLOCK(&ec->lock);
+ }
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index 8844c29..237fea2 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -2814,8 +2814,20 @@ int
+ ec_replace_heal_done(int ret, call_frame_t *heal, void *opaque)
+ {
+ ec_t *ec = opaque;
++ gf_boolean_t last_fop = _gf_false;
+
++ if (GF_ATOMIC_DEC(ec->async_fop_count) == 0) {
++ LOCK(&ec->lock);
++ {
++ last_fop = __ec_is_last_fop(ec);
++ }
++ UNLOCK(&ec->lock);
++ }
+ gf_msg_debug(ec->xl->name, 0, "getxattr on bricks is done ret %d", ret);
++
++ if (last_fop)
++ ec_pending_fops_completed(ec);
++
+ return 0;
+ }
+
+@@ -2869,14 +2881,15 @@ ec_launch_replace_heal(ec_t *ec)
+ {
+ int ret = -1;
+
+- if (!ec)
+- return ret;
+ ret = synctask_new(ec->xl->ctx->env, ec_replace_brick_heal_wrap,
+ ec_replace_heal_done, NULL, ec);
++
+ if (ret < 0) {
+ gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d",
+ ret);
++ ec_replace_heal_done(-1, NULL, ec);
+ }
++
+ return ret;
+ }
+
+diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
+index 1c295c0..4dbf4a3 100644
+--- a/xlators/cluster/ec/src/ec-types.h
++++ b/xlators/cluster/ec/src/ec-types.h
+@@ -643,6 +643,7 @@ struct _ec {
+ uintptr_t xl_notify; /* Bit flag representing
+ notification for bricks. */
+ uintptr_t node_mask;
++ gf_atomic_t async_fop_count; /* Number of on going asynchronous fops. */
+ xlator_t **xl_list;
+ gf_lock_t lock;
+ gf_timer_t *timer;
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index df5912c..f0d58c0 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -355,6 +355,7 @@ ec_notify_cbk(void *data)
+ ec_t *ec = data;
+ glusterfs_event_t event = GF_EVENT_MAXVAL;
+ gf_boolean_t propagate = _gf_false;
++ gf_boolean_t launch_heal = _gf_false;
+
+ LOCK(&ec->lock);
+ {
+@@ -384,6 +385,11 @@ ec_notify_cbk(void *data)
+ * still bricks DOWN, they will be healed when they
+ * come up. */
+ ec_up(ec->xl, ec);
++
++ if (ec->shd.iamshd && !ec->shutdown) {
++ launch_heal = _gf_true;
++ GF_ATOMIC_INC(ec->async_fop_count);
++ }
+ }
+
+ propagate = _gf_true;
+@@ -391,13 +397,12 @@ ec_notify_cbk(void *data)
+ unlock:
+ UNLOCK(&ec->lock);
+
++ if (launch_heal) {
++ /* We have just brought the volume UP, so we trigger
++ * a self-heal check on the root directory. */
++ ec_launch_replace_heal(ec);
++ }
+ if (propagate) {
+- if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) {
+- /* We have just brought the volume UP, so we trigger
+- * a self-heal check on the root directory. */
+- ec_launch_replace_heal(ec);
+- }
+-
+ default_notify(ec->xl, event, NULL);
+ }
+ }
+@@ -425,7 +430,7 @@ ec_disable_delays(ec_t *ec)
+ {
+ ec->shutdown = _gf_true;
+
+- return list_empty(&ec->pending_fops);
++ return __ec_is_last_fop(ec);
+ }
+
+ void
+@@ -603,7 +608,10 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
+ if (event == GF_EVENT_CHILD_UP) {
+ /* We need to trigger a selfheal if a brick changes
+ * to UP state. */
+- needs_shd_check = ec_set_up_state(ec, mask, mask);
++ if (ec_set_up_state(ec, mask, mask) && ec->shd.iamshd &&
++ !ec->shutdown) {
++ needs_shd_check = _gf_true;
++ }
+ } else if (event == GF_EVENT_CHILD_DOWN) {
+ ec_set_up_state(ec, mask, 0);
+ }
+@@ -633,17 +641,21 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
+ }
+ } else {
+ propagate = _gf_false;
++ needs_shd_check = _gf_false;
++ }
++
++ if (needs_shd_check) {
++ GF_ATOMIC_INC(ec->async_fop_count);
+ }
+ }
+ unlock:
+ UNLOCK(&ec->lock);
+
+ done:
++ if (needs_shd_check) {
++ ec_launch_replace_heal(ec);
++ }
+ if (propagate) {
+- if (needs_shd_check && ec->shd.iamshd) {
+- ec_launch_replace_heal(ec);
+- }
+-
+ error = default_notify(this, event, data);
+ }
+
+@@ -705,6 +717,7 @@ init(xlator_t *this)
+ ec->xl = this;
+ LOCK_INIT(&ec->lock);
+
++ GF_ATOMIC_INIT(ec->async_fop_count, 0);
+ INIT_LIST_HEAD(&ec->pending_fops);
+ INIT_LIST_HEAD(&ec->heal_waiting);
+ INIT_LIST_HEAD(&ec->healing);
+--
+1.8.3.1
+
diff --git a/0176-features-shard-Fix-crash-during-background-shard-del.patch b/0176-features-shard-Fix-crash-during-background-shard-del.patch
new file mode 100644
index 0000000..487d8e2
--- /dev/null
+++ b/0176-features-shard-Fix-crash-during-background-shard-del.patch
@@ -0,0 +1,289 @@
+From 40ac42501d6bbff7206e753e8e988beefe74f5f4 Mon Sep 17 00:00:00 2001
+From: Krutika Dhananjay <kdhananj@redhat.com>
+Date: Fri, 5 Apr 2019 10:30:23 +0530
+Subject: [PATCH 176/178] features/shard: Fix crash during background shard
+ deletion in a specific case
+
+Consider the following case -
+1. A file gets FALLOCATE'd such that > "shard-lru-limit" number of
+ shards are created.
+2. And then it is deleted after that.
+
+The unique thing about FALLOCATE is that unlike WRITE, all of the
+participant shards are resolved and created and fallocated in a single
+batch. This means, in this case, after the first "shard-lru-limit"
+number of shards are resolved and added to lru list, as part of
+resolution of the remaining shards, some of the existing shards in lru
+list will need to be evicted. So these evicted shards will be
+inode_unlink()d as part of eviction. Now once the fop gets to the actual
+FALLOCATE stage, the lru'd-out shards get added to fsync list.
+
+2 things to note at this point:
+i. the lru'd out shards are only part of fsync list, so each holds 1 ref
+ on base shard
+ii. and the more recently used shards are part of both fsync and lru list.
+ So each of these shards holds 2 refs on base inode - one for being
+ part of fsync list, and the other for being part of lru list.
+
+FALLOCATE completes successfully and then this very file is deleted, and
+background shard deletion launched. Here's where the ref counts get mismatched.
+First as part of inode_resolve()s during the deletion, the lru'd-out inodes
+return NULL, because they are inode_unlink()'d by now. So these inodes need to
+be freshly looked up. But as part of linking them in lookup_cbk (precisely in
+shard_link_block_inode()), inode_link() returns the lru'd-out inode object.
+And its inode ctx is still valid and ctx->base_inode valid from the last
+time it was added to list.
+
+But shard_common_lookup_shards_cbk() passes NULL in the place of base_pointer
+to __shard_update_shards_inode_list(). This means, as part of adding the lru'd out
+inode back to lru list, base inode is not ref'd since its NULL.
+
+Whereas post unlinking this shard, during shard_unlink_block_inode(),
+ctx->base_inode is accessible and is unref'd because the shard was found to be part
+of LRU list, although the matching ref didn't occur. This at some point leads to
+base_inode refcount becoming 0 and it getting destroyed and released back while some
+of its associated shards are continuing to be unlinked in parallel and the client crashes
+whenever it is accessed next.
+
+Fix is to pass base shard correctly, if available, in shard_link_block_inode().
+
+Also, the patch fixes the ret value check in tests/bugs/shard/shard-fallocate.c
+
+>Change-Id: Ibd0bc4c6952367608e10701473cbad3947d7559f
+>Updates: bz#1696136
+>Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+
+Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22507/
+
+BUG: 1694595
+Change-Id: Ibd0bc4c6952367608e10701473cbad3947d7559f
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172856
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ tests/bugs/shard/bug-1696136.c | 121 +++++++++++++++++++++++++++++++++++++
+ tests/bugs/shard/bug-1696136.t | 33 ++++++++++
+ tests/bugs/shard/shard-fallocate.c | 2 +-
+ xlators/features/shard/src/shard.c | 12 +++-
+ 4 files changed, 164 insertions(+), 4 deletions(-)
+ create mode 100644 tests/bugs/shard/bug-1696136.c
+ create mode 100644 tests/bugs/shard/bug-1696136.t
+
+diff --git a/tests/bugs/shard/bug-1696136.c b/tests/bugs/shard/bug-1696136.c
+new file mode 100644
+index 0000000..b9e8d13
+--- /dev/null
++++ b/tests/bugs/shard/bug-1696136.c
+@@ -0,0 +1,121 @@
++#define _GNU_SOURCE
++#include <fcntl.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <glusterfs/api/glfs.h>
++#include <glusterfs/api/glfs-handles.h>
++
++enum fallocate_flag {
++ TEST_FALLOCATE_NONE,
++ TEST_FALLOCATE_KEEP_SIZE,
++ TEST_FALLOCATE_ZERO_RANGE,
++ TEST_FALLOCATE_PUNCH_HOLE,
++ TEST_FALLOCATE_MAX,
++};
++
++int
++get_fallocate_flag(int opcode)
++{
++ int ret = 0;
++
++ switch (opcode) {
++ case TEST_FALLOCATE_NONE:
++ ret = 0;
++ break;
++ case TEST_FALLOCATE_KEEP_SIZE:
++ ret = FALLOC_FL_KEEP_SIZE;
++ break;
++ case TEST_FALLOCATE_ZERO_RANGE:
++ ret = FALLOC_FL_ZERO_RANGE;
++ break;
++ case TEST_FALLOCATE_PUNCH_HOLE:
++ ret = FALLOC_FL_PUNCH_HOLE;
++ break;
++ default:
++ ret = -1;
++ break;
++ }
++ return ret;
++}
++
++int
++main(int argc, char *argv[])
++{
++ int ret = 1;
++ int opcode = -1;
++ off_t offset = 0;
++ size_t len = 0;
++ glfs_t *fs = NULL;
++ glfs_fd_t *fd = NULL;
++
++ if (argc != 8) {
++ fprintf(stderr,
++ "Syntax: %s <host> <volname> <opcode> <offset> <len> "
++ "<file-path> <log-file>\n",
++ argv[0]);
++ return 1;
++ }
++
++ fs = glfs_new(argv[2]);
++ if (!fs) {
++ fprintf(stderr, "glfs_new: returned NULL\n");
++ return 1;
++ }
++
++ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
++ if (ret != 0) {
++ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
++ goto out;
++ }
++
++ ret = glfs_set_logging(fs, argv[7], 7);
++ if (ret != 0) {
++ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
++ goto out;
++ }
++
++ ret = glfs_init(fs);
++ if (ret != 0) {
++ fprintf(stderr, "glfs_init: returned %d\n", ret);
++ goto out;
++ }
++
++ opcode = atoi(argv[3]);
++ opcode = get_fallocate_flag(opcode);
++ if (opcode < 0) {
++ fprintf(stderr, "get_fallocate_flag: invalid flag \n");
++ goto out;
++ }
++
++ offset = atoi(argv[4]);
++ len = atoi(argv[5]);
++
++ fd = glfs_open(fs, argv[6], O_RDWR);
++ if (fd == NULL) {
++ fprintf(stderr, "glfs_open: returned NULL\n");
++ goto out;
++ }
++
++ ret = glfs_fallocate(fd, opcode, offset, len);
++ if (ret < 0) {
++ fprintf(stderr, "glfs_fallocate: returned %d\n", ret);
++ goto out;
++ }
++
++ ret = glfs_unlink(fs, argv[6]);
++ if (ret < 0) {
++ fprintf(stderr, "glfs_unlink: returned %d\n", ret);
++ goto out;
++ }
++ /* Sleep for 3s to give enough time for background deletion to complete
++ * during which if the bug exists, the process will crash.
++ */
++ sleep(3);
++ ret = 0;
++
++out:
++ if (fd)
++ glfs_close(fd);
++ glfs_fini(fs);
++ return ret;
++}
+diff --git a/tests/bugs/shard/bug-1696136.t b/tests/bugs/shard/bug-1696136.t
+new file mode 100644
+index 0000000..b6dc858
+--- /dev/null
++++ b/tests/bugs/shard/bug-1696136.t
+@@ -0,0 +1,33 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../fallocate.rc
++
++cleanup
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume set $V0 features.shard on
++TEST $CLI volume set $V0 features.shard-block-size 4MB
++TEST $CLI volume set $V0 features.shard-lru-limit 120
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++
++TEST build_tester $(dirname $0)/bug-1696136.c -lgfapi -Wall -O2
++
++# Create a file
++TEST touch $M0/file1
++
++# Fallocate a 500M file. This will make sure number of participant shards are > lru-limit
++TEST $(dirname $0)/bug-1696136 $H0 $V0 "0" "0" "536870912" /file1 `gluster --print-logdir`/glfs-$V0.log
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST $CLI volume stop $V0
++TEST $CLI volume delete $V0
++rm -f $(dirname $0)/bug-1696136
++
++cleanup
+diff --git a/tests/bugs/shard/shard-fallocate.c b/tests/bugs/shard/shard-fallocate.c
+index 3a784d3..45b9ce0 100644
+--- a/tests/bugs/shard/shard-fallocate.c
++++ b/tests/bugs/shard/shard-fallocate.c
+@@ -97,7 +97,7 @@ main(int argc, char *argv[])
+ }
+
+ ret = glfs_fallocate(fd, opcode, offset, len);
+- if (ret <= 0) {
++ if (ret < 0) {
+ fprintf(stderr, "glfs_fallocate: returned %d\n", ret);
+ goto out;
+ }
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index fa3564a..3c4bcdc 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -2213,13 +2213,19 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
+ xlator_t *this = NULL;
+ inode_t *fsync_inode = NULL;
+ shard_priv_t *priv = NULL;
++ inode_t *base_inode = NULL;
+
+ this = THIS;
+ priv = this->private;
+- if (local->loc.inode)
++ if (local->loc.inode) {
+ gf_uuid_copy(gfid, local->loc.inode->gfid);
+- else
++ base_inode = local->loc.inode;
++ } else if (local->resolver_base_inode) {
++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
++ base_inode = local->resolver_base_inode;
++ } else {
+ gf_uuid_copy(gfid, local->base_gfid);
++ }
+
+ shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname));
+
+@@ -2232,7 +2238,7 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
+ LOCK(&priv->lock);
+ {
+ fsync_inode = __shard_update_shards_inode_list(
+- linked_inode, this, local->loc.inode, block_num, gfid);
++ linked_inode, this, base_inode, block_num, gfid);
+ }
+ UNLOCK(&priv->lock);
+ if (fsync_inode)
+--
+1.8.3.1
+
diff --git a/0177-features-shard-Fix-extra-unref-when-inode-object-is-.patch b/0177-features-shard-Fix-extra-unref-when-inode-object-is-.patch
new file mode 100644
index 0000000..0156324
--- /dev/null
+++ b/0177-features-shard-Fix-extra-unref-when-inode-object-is-.patch
@@ -0,0 +1,161 @@
+From 4f0aa008ed393d7ce222c4ea4bd0fa6ed52b48f6 Mon Sep 17 00:00:00 2001
+From: Krutika Dhananjay <kdhananj@redhat.com>
+Date: Fri, 5 Apr 2019 12:29:23 +0530
+Subject: [PATCH 177/178] features/shard: Fix extra unref when inode object is
+ lru'd out and added back
+
+Long tale of double unref! But do read...
+
+In cases where a shard base inode is evicted from lru list while still
+being part of fsync list but added back soon before its unlink, there
+could be an extra inode_unref() leading to premature inode destruction
+leading to crash.
+
+One such specific case is the following -
+
+Consider features.shard-deletion-rate = features.shard-lru-limit = 2.
+This is an oversimplified example but explains the problem clearly.
+
+First, a file is FALLOCATE'd to a size so that number of shards under
+/.shard = 3 > lru-limit.
+Shards 1, 2 and 3 need to be resolved. 1 and 2 are resolved first.
+Resultant lru list:
+ 1 -----> 2
+refs on base inode - (1) + (1) = 2
+3 needs to be resolved. So 1 is lru'd out. Resultant lru list -
+ 2 -----> 3
+refs on base inode - (1) + (1) = 2
+
+Note that 1 is inode_unlink()d but not destroyed because there are
+non-zero refs on it since it is still participating in this ongoing
+FALLOCATE operation.
+
+FALLOCATE is sent on all participant shards. In the cbk, all of them are
+added to fync_list.
+Resulting fsync list -
+ 1 -----> 2 -----> 3 (order doesn't matter)
+refs on base inode - (1) + (1) + (1) = 3
+Total refs = 3 + 2 = 5
+
+Now an attempt is made to unlink this file. Background deletion is triggered.
+The first $shard-deletion-rate shards need to be unlinked in the first batch.
+So shards 1 and 2 need to be resolved. inode_resolve fails on 1 but succeeds
+on 2 and so it's moved to tail of list.
+lru list now -
+ 3 -----> 2
+No change in refs.
+
+shard 1 is looked up. In lookup_cbk, it's linked and added back to lru list
+at the cost of evicting shard 3.
+lru list now -
+ 2 -----> 1
+refs on base inode: (1) + (1) = 2
+fsync list now -
+ 1 -----> 2 (again order doesn't matter)
+refs on base inode - (1) + (1) = 2
+Total refs = 2 + 2 = 4
+After eviction, it is found 3 needs fsync. So fsync is wound, yet to be ack'd.
+So it is still inode_link()d.
+
+Now deletion of shards 1 and 2 completes. lru list is empty. Base inode unref'd and
+destroyed.
+In the next batched deletion, 3 needs to be deleted. It is inode_resolve()able.
+It is added back to lru list but base inode passed to __shard_update_shards_inode_list()
+is NULL since the inode is destroyed. But its ctx->inode still contains base inode ptr
+from first addition to lru list for no additional ref on it.
+lru list now -
+ 3
+refs on base inode - (0)
+Total refs on base inode = 0
+Unlink is sent on 3. It completes. Now since the ctx contains ptr to base_inode and the
+shard is part of lru list, base shard is unref'd leading to a crash.
+
+FIX:
+When shard is readded back to lru list, copy the base inode pointer as is into its inode ctx,
+even if it is NULL. This is needed to prevent double unrefs at the time of deleting it.
+
+Upstream patch:
+> BUG: 1696136
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22517
+> Change-Id: I99a44039da2e10a1aad183e84f644d63ca552462
+> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+
+Change-Id: I99a44039da2e10a1aad183e84f644d63ca552462
+Updates: bz#1694595
+Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172803
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ .../bug-1696136-lru-limit-equals-deletion-rate.t | 34 ++++++++++++++++++++++
+ xlators/features/shard/src/shard.c | 6 ++--
+ 2 files changed, 36 insertions(+), 4 deletions(-)
+ create mode 100644 tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t
+
+diff --git a/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t b/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t
+new file mode 100644
+index 0000000..3e4a65a
+--- /dev/null
++++ b/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t
+@@ -0,0 +1,34 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../fallocate.rc
++
++cleanup
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume set $V0 features.shard on
++TEST $CLI volume set $V0 features.shard-block-size 4MB
++TEST $CLI volume set $V0 features.shard-lru-limit 120
++TEST $CLI volume set $V0 features.shard-deletion-rate 120
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++
++TEST build_tester $(dirname $0)/bug-1696136.c -lgfapi -Wall -O2
++
++# Create a file
++TEST touch $M0/file1
++
++# Fallocate a 500M file. This will make sure number of participant shards are > lru-limit
++TEST $(dirname $0)/bug-1696136 $H0 $V0 "0" "0" "536870912" /file1 `gluster --print-logdir`/glfs-$V0.log
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST $CLI volume stop $V0
++TEST $CLI volume delete $V0
++rm -f $(dirname $0)/bug-1696136
++
++cleanup
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index 3c4bcdc..c1799ad 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -689,8 +689,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
+ ctx->block_num = block_num;
+ list_add_tail(&ctx->ilist, &priv->ilist_head);
+ priv->inode_count++;
+- if (base_inode)
+- ctx->base_inode = inode_ref(base_inode);
++ ctx->base_inode = inode_ref(base_inode);
+ } else {
+ /*If on the other hand there is no available slot for this inode
+ * in the list, delete the lru inode from the head of the list,
+@@ -765,8 +764,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
+ else
+ gf_uuid_copy(ctx->base_gfid, gfid);
+ ctx->block_num = block_num;
+- if (base_inode)
+- ctx->base_inode = inode_ref(base_inode);
++ ctx->base_inode = inode_ref(base_inode);
+ list_add_tail(&ctx->ilist, &priv->ilist_head);
+ }
+ } else {
+--
+1.8.3.1
+
diff --git a/0178-Cluster-afr-Don-t-treat-all-bricks-having-metadata-p.patch b/0178-Cluster-afr-Don-t-treat-all-bricks-having-metadata-p.patch
new file mode 100644
index 0000000..1ff767d
--- /dev/null
+++ b/0178-Cluster-afr-Don-t-treat-all-bricks-having-metadata-p.patch
@@ -0,0 +1,287 @@
+From 307074330db6e9f14941dfbabbe6f299cf841533 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Mon, 10 Jun 2019 23:58:16 +0530
+Subject: [PATCH 178/178] Cluster/afr: Don't treat all bricks having metadata
+ pending as split-brain
+
+Backport of: https://review.gluster.org/#/c/glusterfs/+/22831/
+
+Problem:
+We currently don't have a roll-back/undoing of post-ops if quorum is not met.
+Though the FOP is still unwound with failure, the xattrs remain on the disk.
+Due to these partial post-ops and partial heals (healing only when 2 bricks
+are up), we can end up in metadata split-brain purely from the afr xattrs
+point of view i.e each brick is blamed by atleast one of the others for
+metadata. These scenarios are hit when there is frequent connect/disconnect
+of the client/shd to the bricks.
+
+Fix:
+Pick a source based on the xattr values. If 2 bricks blame one, the blamed
+one must be treated as sink. If there is no majority, all are sources. Once
+we pick a source, self-heal will then do the heal instead of erroring out
+due to split-brain.
+This patch also adds restriction of all the bricks to be up to perform
+metadata heal to avoid any metadata loss.
+
+Removed the test case tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t
+as it was doing metadata heal even when only 2 of 3 bricks were up.
+
+Change-Id: I02064ecb7d68d498f75a353af64f75249a633508
+fixes: bz#1715438
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172935
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../bug-1468279-source-not-blaming-sinks.t | 64 ----------
+ .../bug-1717819-metadata-split-brain-detection.t | 130 +++++++++++++++++++++
+ xlators/cluster/afr/src/afr-self-heal-common.c | 4 +-
+ xlators/cluster/afr/src/afr-self-heal-metadata.c | 2 +-
+ 4 files changed, 133 insertions(+), 67 deletions(-)
+ delete mode 100644 tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t
+ create mode 100644 tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t
+
+diff --git a/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t b/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t
+deleted file mode 100644
+index 054a4ad..0000000
+--- a/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t
++++ /dev/null
+@@ -1,64 +0,0 @@
+-#!/bin/bash
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-cleanup;
+-
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+-TEST $CLI volume start $V0
+-TEST $CLI volume set $V0 cluster.self-heal-daemon off
+-TEST $CLI volume set $V0 cluster.metadata-self-heal off
+-TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
+-TEST touch $M0/file
+-
+-# Kill B1, create a pending metadata heal.
+-TEST kill_brick $V0 $H0 $B0/${V0}0
+-TEST setfattr -n user.xattr -v value1 $M0/file
+-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file
+-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file
+-
+-# Kill B2, heal from B3 to B1.
+-TEST $CLI volume start $V0 force
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+-TEST kill_brick $V0 $H0 $B0/${V0}1
+-TEST $CLI volume set $V0 cluster.self-heal-daemon on
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+-$CLI volume heal $V0
+-EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-0 "metadata"
+-TEST $CLI volume set $V0 cluster.self-heal-daemon off
+-
+-# Create another pending metadata heal.
+-TEST setfattr -n user.xattr -v value2 $M0/file
+-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file
+-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file
+-
+-# Kill B1, heal from B3 to B2
+-TEST $CLI volume start $V0 force
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+-TEST kill_brick $V0 $H0 $B0/${V0}0
+-TEST $CLI volume set $V0 cluster.self-heal-daemon on
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+-$CLI volume heal $V0
+-EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-1 "metadata"
+-TEST $CLI volume set $V0 cluster.self-heal-daemon off
+-
+-# ALL bricks up again.
+-TEST $CLI volume start $V0 force
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+-# B1 and B2 blame each other, B3 doesn't blame anyone.
+-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file
+-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file
+-EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file
+-EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file
+-TEST $CLI volume set $V0 cluster.self-heal-daemon on
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+-TEST $CLI volume heal $V0
+-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+-
+-cleanup;
+diff --git a/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t
+new file mode 100644
+index 0000000..94b8bf3
+--- /dev/null
++++ b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t
+@@ -0,0 +1,130 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++## Start and create a volume
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume info;
++
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
++TEST $CLI volume start $V0;
++EXPECT 'Started' volinfo_field $V0 'Status';
++TEST $CLI volume heal $V0 disable
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++###############################################################################
++# Case of 2 bricks blaming the third and the third blaming the other two.
++
++TEST mkdir $M0/dir
++
++# B0 and B2 must blame B1
++TEST kill_brick $V0 $H0 $B0/$V0"1"
++TEST setfattr -n user.metadata -v 1 $M0/dir
++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir trusted.afr.$V0-client-1 metadata
++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir trusted.afr.$V0-client-1 metadata
++CLIENT_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $M0/dir)
++
++# B1 must blame B0 and B2
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir
++
++# Launch heal
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir)
++B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir)
++B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir)
++
++TEST [ "$CLIENT_XATTR" == "$B0_XATTR" ]
++TEST [ "$CLIENT_XATTR" == "$B1_XATTR" ]
++TEST [ "$CLIENT_XATTR" == "$B2_XATTR" ]
++TEST setfattr -x user.metadata $M0/dir
++
++###############################################################################
++# Case of each brick blaming the next one in a cyclic manner
++
++TEST $CLI volume heal $V0 disable
++TEST `echo "hello" >> $M0/dir/file`
++# Mark cyclic xattrs and modify metadata directly on the bricks.
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"2"/dir/file
++
++setfattr -n user.metadata -v 1 $B0/$V0"0"/dir/file
++setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file
++setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file
++
++# Add entry to xattrop dir to trigger index heal.
++xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
++base_entry_b0=`ls $xattrop_dir0`
++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/file))
++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0
++
++# Launch heal
++TEST $CLI volume heal $V0 enable
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file)
++B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file)
++B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file)
++
++TEST [ "$B0_XATTR" == "$B1_XATTR" ]
++TEST [ "$B0_XATTR" == "$B2_XATTR" ]
++TEST rm -f $M0/dir/file
++
++###############################################################################
++# Case of 2 bricks having quorum blaming and the other having only one blaming.
++
++TEST $CLI volume heal $V0 disable
++TEST `echo "hello" >> $M0/dir/file`
++# B0 and B2 must blame B1
++TEST kill_brick $V0 $H0 $B0/$V0"1"
++TEST setfattr -n user.metadata -v 1 $M0/dir/file
++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir/file trusted.afr.$V0-client-1 metadata
++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir/file trusted.afr.$V0-client-1 metadata
++
++# B1 must blame B0 and B2
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
++
++# B0 must blame B2
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file
++
++# Modify the metadata directly on the bricks B1 & B2.
++setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file
++setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file
++
++# Launch heal
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
++
++B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file)
++B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file)
++B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file)
++
++TEST [ "$B0_XATTR" == "$B1_XATTR" ]
++TEST [ "$B0_XATTR" == "$B2_XATTR" ]
++
++###############################################################################
++
++cleanup
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index 595bed4..5157e7d 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -1590,7 +1590,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
+ }
+ }
+
+- if (type == AFR_DATA_TRANSACTION) {
++ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) {
+ min_participants = priv->child_count;
+ } else {
+ min_participants = AFR_SH_MIN_PARTICIPANTS;
+@@ -1656,7 +1656,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
+ }
+ }
+
+- if (type == AFR_DATA_TRANSACTION)
++ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION)
+ afr_selfheal_post_op_failure_accounting(priv, accused, sources,
+ locked_on);
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
+index ba43341..ecfa791 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
++++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
+@@ -398,7 +398,7 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode)
+ ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
+ data_lock);
+ {
+- if (ret < AFR_SH_MIN_PARTICIPANTS) {
++ if (ret < priv->child_count) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+--
+1.8.3.1
+
diff --git a/0179-tests-Fix-split-brain-favorite-child-policy.t-failur.patch b/0179-tests-Fix-split-brain-favorite-child-policy.t-failur.patch
new file mode 100644
index 0000000..c9df885
--- /dev/null
+++ b/0179-tests-Fix-split-brain-favorite-child-policy.t-failur.patch
@@ -0,0 +1,72 @@
+From fe1d641e4666f9a20f656b1799cf6e7b75af1279 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Tue, 11 Jun 2019 11:31:02 +0530
+Subject: [PATCH 179/192] tests: Fix split-brain-favorite-child-policy.t
+ failure
+
+Backport of: https://review.gluster.org/#/c/glusterfs/+/22850/
+
+Problem:
+The test case is failing to heal the volume within $HEAL_TIMEOUT @195.
+This is happening because as part of split-brain resolution the file
+gets expunged from the sink and the new entry mark for that file will
+be done on the source bricks as part of impunging. Since the source
+bricks shd-threads failed to get the heal-domain lock, they will wait
+for the heal-timeout of 10 minutes, which is greater than $HEAL_TIMEOUT.
+
+Fix:
+Set the cluster.heal-timeout to 5 seconds to trigger the heal so that
+one of the source brick heals the file within the $HEAL_TIMEOUT.
+
+Change-Id: Iae5e819aa564ccde6639c51711f49d1152260c2d
+updates: bz#1704562
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172965
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/afr/split-brain-favorite-child-policy.t | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/tests/basic/afr/split-brain-favorite-child-policy.t b/tests/basic/afr/split-brain-favorite-child-policy.t
+index 0e321c6..c268c12 100644
+--- a/tests/basic/afr/split-brain-favorite-child-policy.t
++++ b/tests/basic/afr/split-brain-favorite-child-policy.t
+@@ -16,6 +16,7 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
+ TEST $CLI volume set $V0 cluster.entry-self-heal off
+ TEST $CLI volume set $V0 cluster.data-self-heal off
+ TEST $CLI volume set $V0 cluster.metadata-self-heal off
++TEST $CLI volume set $V0 cluster.heal-timeout 5
+ TEST $CLI volume start $V0
+ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+ TEST touch $M0/file
+@@ -38,7 +39,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+ TEST $CLI volume heal $V0
+
+-#file fill in split-brain
++#file still in split-brain
+ cat $M0/file > /dev/null
+ EXPECT "1" echo $?
+
+@@ -124,7 +125,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+ TEST $CLI volume heal $V0
+
+-#file fill in split-brain
++#file still in split-brain
+ cat $M0/file > /dev/null
+ EXPECT "1" echo $?
+
+@@ -179,7 +180,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+ TEST $CLI volume heal $V0
+
+-#file fill in split-brain
++#file still in split-brain
+ cat $M0/file > /dev/null
+ EXPECT "1" echo $?
+
+--
+1.8.3.1
+
diff --git a/0180-ganesha-scripts-Make-generate-epoch.py-python3-compa.patch b/0180-ganesha-scripts-Make-generate-epoch.py-python3-compa.patch
new file mode 100644
index 0000000..7bdc9f2
--- /dev/null
+++ b/0180-ganesha-scripts-Make-generate-epoch.py-python3-compa.patch
@@ -0,0 +1,44 @@
+From 30b6d3452df0ef6621592a786f0c4347e09aa8f2 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Tue, 11 Jun 2019 12:00:25 +0530
+Subject: [PATCH 180/192] ganesha/scripts : Make generate-epoch.py python3
+ compatible
+
+This would help in building RHEL8 glusterfs server build. We don't need
+to validate this fix as such given RHEL8 glusterfs server support at
+RHGS 3.5.0 is an internal milestone.
+
+Label : DOWNSTREAM ONLY
+
+Change-Id: I738219613680406de5c86a452446035c72a52bc4
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172974
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ extras/ganesha/scripts/generate-epoch.py | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/extras/ganesha/scripts/generate-epoch.py b/extras/ganesha/scripts/generate-epoch.py
+index 5db5e56..61ccda9 100755
+--- a/extras/ganesha/scripts/generate-epoch.py
++++ b/extras/ganesha/scripts/generate-epoch.py
+@@ -36,13 +36,13 @@ def epoch_uuid():
+
+ uuid_bin = binascii.unhexlify(glusterd_uuid.replace("-",""))
+
+- epoch_uuid = int(uuid_bin.encode('hex'), 32) & 0xFFFF0000
++ epoch_uuid = int(binascii.hexlify(uuid_bin), 32) & 0xFFFF0000
+ return epoch_uuid
+
+ # Construct epoch as follows -
+ # first 32-bit contains the now() time
+ # rest 32-bit value contains the local glusterd node uuid
+ epoch = (epoch_now() | epoch_uuid())
+-print str(epoch)
++print(str(epoch))
+
+ exit(0)
+--
+1.8.3.1
+
diff --git a/0181-afr-log-before-attempting-data-self-heal.patch b/0181-afr-log-before-attempting-data-self-heal.patch
new file mode 100644
index 0000000..955441d
--- /dev/null
+++ b/0181-afr-log-before-attempting-data-self-heal.patch
@@ -0,0 +1,59 @@
+From 99f86ae7d45667d86b1b6f9f9540ec2889c6c4ce Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Wed, 8 May 2019 04:51:27 -0400
+Subject: [PATCH 181/192] afr: log before attempting data self-heal.
+
+Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22685/
+
+I was working on a blog about troubleshooting AFR issues and I wanted to copy
+the messages logged by self-heal for my blog. I then realized that AFR-v2 is not
+logging *before* attempting data heal while it logs it for metadata and entry
+heals.
+
+I [MSGID: 108026] [afr-self-heal-entry.c:883:afr_selfheal_entry_do]
+0-testvol-replicate-0: performing entry selfheal on
+d120c0cf-6e87-454b-965b-0d83a4c752bb
+I [MSGID: 108026] [afr-self-heal-common.c:1741:afr_log_selfheal]
+0-testvol-replicate-0: Completed entry selfheal on
+d120c0cf-6e87-454b-965b-0d83a4c752bb. sources=[0] 2 sinks=1
+I [MSGID: 108026] [afr-self-heal-common.c:1741:afr_log_selfheal]
+0-testvol-replicate-0: Completed data selfheal on
+a9b5f183-21eb-4fb3-a342-287d3a7dddc5. sources=[0] 2 sinks=1
+I [MSGID: 108026] [afr-self-heal-metadata.c:52:__afr_selfheal_metadata_do]
+0-testvol-replicate-0: performing metadata selfheal on
+a9b5f183-21eb-4fb3-a342-287d3a7dddc5
+I [MSGID: 108026] [afr-self-heal-common.c:1741:afr_log_selfheal]
+0-testvol-replicate-0: Completed metadata selfheal on
+a9b5f183-21eb-4fb3-a342-287d3a7dddc5. sources=[0] 2 sinks=1
+
+Adding it in this patch. Now there is a 'performing' and a corresponding
+'Completed' message for every type of heal.
+
+BUG: 1710701
+Change-Id: I91e29dd05af1c78dbc447d1a02dda32b03d64aef
+fixes: bz#1710701
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173108
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-self-heal-data.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
+index 18a0334..cdff4a5 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-data.c
++++ b/xlators/cluster/afr/src/afr-self-heal-data.c
+@@ -324,6 +324,9 @@ afr_selfheal_data_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
+ call_frame_t *iter_frame = NULL;
+ unsigned char arbiter_sink_status = 0;
+
++ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
++ "performing data selfheal on %s", uuid_utoa(fd->inode->gfid));
++
+ priv = this->private;
+ if (priv->arbiter_count) {
+ arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX];
+--
+1.8.3.1
+
diff --git a/0182-geo-rep-fix-mountbroker-setup.patch b/0182-geo-rep-fix-mountbroker-setup.patch
new file mode 100644
index 0000000..64dd617
--- /dev/null
+++ b/0182-geo-rep-fix-mountbroker-setup.patch
@@ -0,0 +1,55 @@
+From 37df54966d5b7f01ad24d329bac5da1cf17f2abe Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Wed, 12 Jun 2019 16:10:52 +0530
+Subject: [PATCH 182/192] geo-rep : fix mountbroker setup
+
+Problem:
+
+Unable to setup mountbroker root directory while creating geo-replication
+session for non-root user.
+
+Casue:
+With patch[1] which defines the max-port for glusterd one extra sapce
+got added in field of 'option max-port'.
+[1]. https://review.gluster.org/#/c/glusterfs/+/21872/
+
+In geo-rep spliting of key-value pair form vol file was done on the
+basis of space so this additional space caused "ValueError: too many values
+to unpack".
+
+Solution:
+Use split so that it can treat consecutive whitespace as a single separator.
+
+Backport of:
+
+ >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22716/
+ >Fixes: bz#1709248
+ >Change-Id: Ia22070a43f95d66d84cb35487f23f9ee58b68c73
+ >Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+BUG: 1708043
+Change-Id: Ic6d535a6faad62ce185c6aa5adc18f5fdf8f27be
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173149
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ geo-replication/src/peer_mountbroker.py.in | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/geo-replication/src/peer_mountbroker.py.in b/geo-replication/src/peer_mountbroker.py.in
+index 54f95c4..ce33f97 100644
+--- a/geo-replication/src/peer_mountbroker.py.in
++++ b/geo-replication/src/peer_mountbroker.py.in
+@@ -47,7 +47,7 @@ class MountbrokerUserMgmt(object):
+ for line in f:
+ line = line.strip()
+ if line.startswith("option "):
+- key, value = line.split(" ")[1:]
++ key, value = line.split()[1:]
+ self._options[key] = value
+ if line.startswith("#"):
+ self.commented_lines.append(line)
+--
+1.8.3.1
+
diff --git a/0183-glusterd-svc-Stop-stale-process-using-the-glusterd_p.patch b/0183-glusterd-svc-Stop-stale-process-using-the-glusterd_p.patch
new file mode 100644
index 0000000..0cd6092
--- /dev/null
+++ b/0183-glusterd-svc-Stop-stale-process-using-the-glusterd_p.patch
@@ -0,0 +1,47 @@
+From fe9159ee42f0f67b01e6a495df8105ea0f66738d Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 30 May 2019 23:48:05 +0530
+Subject: [PATCH 183/192] glusterd/svc: Stop stale process using the
+ glusterd_proc_stop
+
+While restarting a glusterd process, when we have a stale pid
+we were doing a simple kill. Instead we can use glusterd_proc_stop
+Because it has more logging plus force kill in case if there is
+any problem with kill signal handling.
+
+Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22791/
+
+>Change-Id: I4a2dadc210a7a65762dd714e809899510622b7ec
+>updates: bz#1710054
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I3327528d8ebf90bbb2221265a0cf059c9359f141
+BUG: 1720248
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172290
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index 6a3ca52..a6e662f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -488,9 +488,9 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc)
+
+ if (!mux_proc) {
+ if (pid != -1 && sys_access(svc->proc.pidfile, R_OK) == 0) {
+- /* stale pid file, unlink it. */
+- kill(pid, SIGTERM);
+- sys_unlink(svc->proc.pidfile);
++ /* stale pid file, stop and unlink it */
++ glusterd_proc_stop(&svc->proc, SIGTERM, PROC_STOP_FORCE);
++ glusterd_unlink_file(svc->proc.pidfile);
+ }
+ mux_proc = __gf_find_compatible_svc(GD_NODE_SHD);
+ }
+--
+1.8.3.1
+
diff --git a/0184-tests-Add-gating-configuration-file-for-rhel8.patch b/0184-tests-Add-gating-configuration-file-for-rhel8.patch
new file mode 100644
index 0000000..9c385b5
--- /dev/null
+++ b/0184-tests-Add-gating-configuration-file-for-rhel8.patch
@@ -0,0 +1,38 @@
+From c6fdb740675999883a8a7942fbcd32f9889dc739 Mon Sep 17 00:00:00 2001
+From: Sunil Kumar Acharya <sheggodu@redhat.com>
+Date: Thu, 13 Jun 2019 21:58:43 +0530
+Subject: [PATCH 184/192] tests: Add gating configuration file for rhel8
+
+Adding configuration files to enable automatic execution
+of gating CI for glusterfs.
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1720318
+Change-Id: I8b42792d93d1eea455f86acd1576c20e12eed9f0
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173412
+Reviewed-by: Vivek Das <vdas@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ gating.yaml | 7 +++++++
+ 1 file changed, 7 insertions(+)
+ create mode 100644 gating.yaml
+
+diff --git a/gating.yaml b/gating.yaml
+new file mode 100644
+index 0000000..eeab6e9
+--- /dev/null
++++ b/gating.yaml
+@@ -0,0 +1,7 @@
++--- !Policy
++product_versions:
++ - rhel-8
++decision_context: osci_compose_gate_modules
++subject_type: redhat-module
++rules:
++ - !PassingTestCaseRule {test_case_name: manual.sst_rh_gluster_storage.glusterfs.bvt}
+--
+1.8.3.1
+
diff --git a/0185-gfapi-provide-an-api-for-setting-statedump-path.patch b/0185-gfapi-provide-an-api-for-setting-statedump-path.patch
new file mode 100644
index 0000000..8e93247
--- /dev/null
+++ b/0185-gfapi-provide-an-api-for-setting-statedump-path.patch
@@ -0,0 +1,174 @@
+From 462e3988936761317975fd811dd355b81328b60a Mon Sep 17 00:00:00 2001
+From: Amar Tumballi <amarts@redhat.com>
+Date: Thu, 14 Mar 2019 10:04:28 +0530
+Subject: [PATCH 185/192] gfapi: provide an api for setting statedump path
+
+Currently for an application using glfsapi to use glusterfs, when a
+statedump is taken, it uses /var/run/gluster dir to dump info.
+
+There can be concerns as this directory may be owned by some other
+user, and hence it may fail taking statedump. Such applications
+should have an option to use different path.
+
+This patch provides an API to do so.
+
+Upstream details:
+> Updates: bz#1689097
+> Change-Id: I8918e002bc823d83614c972b6c738baa04681b23
+> URL: https://review.gluster.org/22364
+
+BUG: 1720461
+Change-Id: I6079c8d799f35eaf76e62d259b51573bf561ba5b
+Signed-off-by: Amar Tumballi <amarts@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173451
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ api/src/gfapi.aliases | 2 ++
+ api/src/gfapi.map | 5 ++++
+ api/src/glfs.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ api/src/glfs.h | 28 +++++++++++++++++++++++
+ 4 files changed, 98 insertions(+)
+
+diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases
+index 09c0fd8..8fdf734 100644
+--- a/api/src/gfapi.aliases
++++ b/api/src/gfapi.aliases
+@@ -195,3 +195,5 @@ _pub_glfs_zerofill_async _glfs_zerofill_async$GFAPI_6.0
+ _pub_glfs_copy_file_range _glfs_copy_file_range$GFAPI_6.0
+ _pub_glfs_fsetattr _glfs_fsetattr$GFAPI_6.0
+ _pub_glfs_setattr _glfs_setattr$GFAPI_6.0
++
++_pub_glfs_set_statedump_path _glfs_set_statedump_path@GFAPI_future
+diff --git a/api/src/gfapi.map b/api/src/gfapi.map
+index b97a614..cf118e8 100644
+--- a/api/src/gfapi.map
++++ b/api/src/gfapi.map
+@@ -271,3 +271,8 @@ GFAPI_PRIVATE_6.1 {
+ global:
+ glfs_setfspid;
+ } GFAPI_6.0;
++
++GFAPI_future {
++ global:
++ glfs_set_statedump_path;
++} GFAPI_PRIVATE_6.1;
+diff --git a/api/src/glfs.c b/api/src/glfs.c
+index f4a8e08..ba513e6 100644
+--- a/api/src/glfs.c
++++ b/api/src/glfs.c
+@@ -1212,6 +1212,7 @@ glusterfs_ctx_destroy(glusterfs_ctx_t *ctx)
+ glusterfs_graph_destroy_residual(trav_graph);
+ }
+
++ GF_FREE(ctx->statedump_path);
+ FREE(ctx);
+
+ return ret;
+@@ -1738,3 +1739,65 @@ invalid_fs:
+ }
+
+ GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_unregister, 3.13.0);
++
++int
++pub_glfs_set_statedump_path(struct glfs *fs, const char *path)
++{
++ struct stat st;
++ int ret;
++ DECLARE_OLD_THIS;
++ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
++
++ if (!path) {
++ gf_log("glfs", GF_LOG_ERROR, "path is NULL");
++ errno = EINVAL;
++ goto err;
++ }
++
++ /* If path is not present OR, if it is directory AND has enough permission
++ * to create files, then proceed */
++ ret = sys_stat(path, &st);
++ if (ret && errno != ENOENT) {
++ gf_log("glfs", GF_LOG_ERROR, "%s: not a valid path (%s)", path,
++ strerror(errno));
++ errno = EINVAL;
++ goto err;
++ }
++
++ if (!ret) {
++ /* file is present, now check other things */
++ if (!S_ISDIR(st.st_mode)) {
++ gf_log("glfs", GF_LOG_ERROR, "%s: path is not directory", path);
++ errno = EINVAL;
++ goto err;
++ }
++ if (sys_access(path, W_OK | X_OK) < 0) {
++ gf_log("glfs", GF_LOG_ERROR,
++ "%s: path doesn't have write permission", path);
++ errno = EPERM;
++ goto err;
++ }
++ }
++
++ /* If set, it needs to be freed, so we don't have leak */
++ GF_FREE(fs->ctx->statedump_path);
++
++ fs->ctx->statedump_path = gf_strdup(path);
++ if (!fs->ctx->statedump_path) {
++ gf_log("glfs", GF_LOG_ERROR,
++ "%s: failed to set statedump path, no memory", path);
++ errno = ENOMEM;
++ goto err;
++ }
++
++ __GLFS_EXIT_FS;
++
++ return 0;
++err:
++ __GLFS_EXIT_FS;
++
++invalid_fs:
++ return -1;
++}
++
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_statedump_path, future);
+diff --git a/api/src/glfs.h b/api/src/glfs.h
+index 6714782..a6c12e1 100644
+--- a/api/src/glfs.h
++++ b/api/src/glfs.h
+@@ -1453,5 +1453,33 @@ int
+ glfs_setattr(struct glfs *fs, const char *path, struct glfs_stat *stat,
+ int follow) __THROW GFAPI_PUBLIC(glfs_setattr, 6.0);
+
++/*
++ SYNOPSIS
++
++ glfs_set_statedump_path: Function to set statedump path.
++
++ DESCRIPTION
++
++ This function is used to set statedump directory
++
++ PARAMETERS
++
++ @fs: The 'virtual mount' object to be configured with the volume
++ specification file.
++
++ @path: statedump path. Should be a directory. But the API won't fail if the
++ directory doesn't exist yet, as one may create it later.
++
++ RETURN VALUES
++
++ 0 : Success.
++ -1 : Failure. @errno will be set with the type of failure.
++
++ */
++
++int
++glfs_set_statedump_path(struct glfs *fs, const char *path) __THROW
++ GFAPI_PUBLIC(glfs_set_statedump_path, future);
++
+ __END_DECLS
+ #endif /* !_GLFS_H */
+--
+1.8.3.1
+
diff --git a/0186-cli-Remove-brick-warning-seems-unnecessary.patch b/0186-cli-Remove-brick-warning-seems-unnecessary.patch
new file mode 100644
index 0000000..e81b015
--- /dev/null
+++ b/0186-cli-Remove-brick-warning-seems-unnecessary.patch
@@ -0,0 +1,57 @@
+From be925e84edcecd879e953bdb68c10f98825dba53 Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <sacharya@redhat.com>
+Date: Mon, 3 Jun 2019 18:05:24 +0530
+Subject: [PATCH 186/192] cli: Remove-brick warning seems unnecessary
+
+As force-migration option is disabled by default,
+the warning seems unnessary.
+
+Rephrased the warning to make best sense out of it.
+
+>fixes: bz#1712668
+>Change-Id: Ia18c3c5e7b3fec808fce2194ca0504a837708822
+>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+
+backport of https://review.gluster.org/#/c/glusterfs/+/22805/
+
+Bug: 1708183
+Change-Id: Ia18c3c5e7b3fec808fce2194ca0504a837708822
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173447
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-volume.c | 17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index 564aef7..a42e663 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -2090,14 +2090,15 @@ cli_cmd_volume_remove_brick_cbk(struct cli_state *state,
+ " on the volume.\nDo you want to continue?";
+ } else if (command == GF_OP_CMD_START) {
+ question =
+- "Running remove-brick with cluster.force-migration"
+- " enabled can result in data corruption. It is safer"
+- " to disable this option so that files that receive "
+- "writes during migration are not migrated.\nFiles "
+- "that are not migrated can then be manually copied "
+- "after the remove-brick commit operation.\nDo you "
+- "want to continue with your current "
+- "cluster.force-migration settings?";
++ "It is recommended that remove-brick be run with"
++ " cluster.force-migration option disabled to prevent"
++ " possible data corruption. Doing so will ensure that"
++ " files that receive writes during migration will not"
++ " be migrated and will need to be manually copied"
++ " after the remove-brick commit operation. Please"
++ " check the value of the option and update accordingly."
++ " \nDo you want to continue with your current"
++ " cluster.force-migration settings?";
+ }
+
+ if (!brick_count) {
+--
+1.8.3.1
+
diff --git a/0187-gfapi-statedump_path-add-proper-version-number.patch b/0187-gfapi-statedump_path-add-proper-version-number.patch
new file mode 100644
index 0000000..f1df9c9
--- /dev/null
+++ b/0187-gfapi-statedump_path-add-proper-version-number.patch
@@ -0,0 +1,98 @@
+From a65982755b31fb548ff7a997ee754360a516da94 Mon Sep 17 00:00:00 2001
+From: Amar Tumballi <amarts@redhat.com>
+Date: Fri, 14 Jun 2019 13:58:25 +0530
+Subject: [PATCH 187/192] gfapi: statedump_path() add proper version number
+
+An API should have the proper version number, and 'future' version
+number is just a place holder. One shouldn't be using it in the
+release versions.
+
+With the previous backport of the patch, the version remained same
+as that of 'master' branch, which is future, but as it is an API,
+it needed a fixed version number. With this patch, corrected the same.
+
+Label: DOWNSTREAM_ONLY
+
+> In upstream, this is corrected by a backport to the stable version, 6.4
+> URL: https://review.gluster.org/22864
+
+BUG: 1720461
+Change-Id: I939850689d47d4f240c9d43f6be1a11de29c4760
+Signed-off-by: Amar Tumballi <amarts@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173475
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ api/examples/glfsxmp.c | 5 +++++
+ api/src/gfapi.aliases | 2 +-
+ api/src/gfapi.map | 2 +-
+ api/src/glfs.c | 2 +-
+ api/src/glfs.h | 2 +-
+ 5 files changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/api/examples/glfsxmp.c b/api/examples/glfsxmp.c
+index 9d96eea..33f44df 100644
+--- a/api/examples/glfsxmp.c
++++ b/api/examples/glfsxmp.c
+@@ -1573,6 +1573,11 @@ main(int argc, char *argv[])
+
+ ret = glfs_set_logging(fs2, "/dev/stderr", 7);
+
++ ret = glfs_set_statedump_path(fs2, "/tmp");
++ if (ret) {
++ fprintf(stderr, "glfs_set_statedump_path: %s\n", strerror(errno));
++ }
++
+ ret = glfs_init(fs2);
+
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases
+index 8fdf734..692ae13 100644
+--- a/api/src/gfapi.aliases
++++ b/api/src/gfapi.aliases
+@@ -196,4 +196,4 @@ _pub_glfs_copy_file_range _glfs_copy_file_range$GFAPI_6.0
+ _pub_glfs_fsetattr _glfs_fsetattr$GFAPI_6.0
+ _pub_glfs_setattr _glfs_setattr$GFAPI_6.0
+
+-_pub_glfs_set_statedump_path _glfs_set_statedump_path@GFAPI_future
++_pub_glfs_set_statedump_path _glfs_set_statedump_path@GFAPI_6.4
+diff --git a/api/src/gfapi.map b/api/src/gfapi.map
+index cf118e8..df65837 100644
+--- a/api/src/gfapi.map
++++ b/api/src/gfapi.map
+@@ -272,7 +272,7 @@ GFAPI_PRIVATE_6.1 {
+ glfs_setfspid;
+ } GFAPI_6.0;
+
+-GFAPI_future {
++GFAPI_6.4 {
+ global:
+ glfs_set_statedump_path;
+ } GFAPI_PRIVATE_6.1;
+diff --git a/api/src/glfs.c b/api/src/glfs.c
+index ba513e6..6bbb620 100644
+--- a/api/src/glfs.c
++++ b/api/src/glfs.c
+@@ -1800,4 +1800,4 @@ invalid_fs:
+ return -1;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_statedump_path, future);
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_statedump_path, 6.4);
+diff --git a/api/src/glfs.h b/api/src/glfs.h
+index a6c12e1..08b6ca0 100644
+--- a/api/src/glfs.h
++++ b/api/src/glfs.h
+@@ -1479,7 +1479,7 @@ glfs_setattr(struct glfs *fs, const char *path, struct glfs_stat *stat,
+
+ int
+ glfs_set_statedump_path(struct glfs *fs, const char *path) __THROW
+- GFAPI_PUBLIC(glfs_set_statedump_path, future);
++ GFAPI_PUBLIC(glfs_set_statedump_path, 6.4);
+
+ __END_DECLS
+ #endif /* !_GLFS_H */
+--
+1.8.3.1
+
diff --git a/0188-features-shard-Fix-integer-overflow-in-block-count-a.patch b/0188-features-shard-Fix-integer-overflow-in-block-count-a.patch
new file mode 100644
index 0000000..2360ceb
--- /dev/null
+++ b/0188-features-shard-Fix-integer-overflow-in-block-count-a.patch
@@ -0,0 +1,38 @@
+From 7221352670a750e35268573dba36c139a5041b14 Mon Sep 17 00:00:00 2001
+From: Krutika Dhananjay <kdhananj@redhat.com>
+Date: Fri, 3 May 2019 10:50:40 +0530
+Subject: [PATCH 188/192] features/shard: Fix integer overflow in block count
+ accounting
+
+... by holding delta_blocks in 64-bit int as opposed to 32-bit int.
+
+> Upstream: https://review.gluster.org/22655
+> BUG: 1705884
+> Change-Id: I2c1ddab17457f45e27428575ad16fa678fd6c0eb
+
+Change-Id: I2c1ddab17457f45e27428575ad16fa678fd6c0eb
+updates: bz#1668001
+Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173476
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/features/shard/src/shard.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
+index 570fe46..cd6a663 100644
+--- a/xlators/features/shard/src/shard.h
++++ b/xlators/features/shard/src/shard.h
+@@ -275,7 +275,7 @@ typedef struct shard_local {
+ size_t req_size;
+ size_t readdir_size;
+ int64_t delta_size;
+- int delta_blocks;
++ int64_t delta_blocks;
+ loc_t loc;
+ loc_t dot_shard_loc;
+ loc_t dot_shard_rm_loc;
+--
+1.8.3.1
+
diff --git a/0189-features-shard-Fix-block-count-accounting-upon-trunc.patch b/0189-features-shard-Fix-block-count-accounting-upon-trunc.patch
new file mode 100644
index 0000000..bc07fad
--- /dev/null
+++ b/0189-features-shard-Fix-block-count-accounting-upon-trunc.patch
@@ -0,0 +1,323 @@
+From 369c5772a722b6e346ec8b41f992112785366778 Mon Sep 17 00:00:00 2001
+From: Krutika Dhananjay <kdhananj@redhat.com>
+Date: Wed, 8 May 2019 13:00:51 +0530
+Subject: [PATCH 189/192] features/shard: Fix block-count accounting upon
+ truncate to lower size
+
+ > Upstream: https://review.gluster.org/22681
+ > BUG: 1705884
+ > Change-Id: I9128a192e9bf8c3c3a959e96b7400879d03d7c53
+
+The way delta_blocks is computed in shard is incorrect, when a file
+is truncated to a lower size. The accounting only considers change
+in size of the last of the truncated shards.
+
+FIX:
+
+Get the block-count of each shard just before an unlink at posix in
+xdata. Their summation plus the change in size of last shard
+(from an actual truncate) is used to compute delta_blocks which is
+used in the xattrop for size update.
+
+Change-Id: I9128a192e9bf8c3c3a959e96b7400879d03d7c53
+updates: bz#1668001
+Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173477
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/glusterfs/glusterfs.h | 2 +
+ tests/bugs/shard/bug-1705884.t | 32 +++++++++++++++
+ xlators/features/shard/src/shard.c | 60 +++++++++++++++++++++++------
+ xlators/features/shard/src/shard.h | 2 +-
+ xlators/storage/posix/src/posix-entry-ops.c | 9 +++++
+ 5 files changed, 92 insertions(+), 13 deletions(-)
+ create mode 100644 tests/bugs/shard/bug-1705884.t
+
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index 516b497..9ec2365 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -328,6 +328,8 @@ enum gf_internal_fop_indicator {
+ #define GF_RESPONSE_LINK_COUNT_XDATA "gf_response_link_count"
+ #define GF_REQUEST_LINK_COUNT_XDATA "gf_request_link_count"
+
++#define GF_GET_FILE_BLOCK_COUNT "gf_get_file_block_count"
++
+ #define CTR_ATTACH_TIER_LOOKUP "ctr_attach_tier_lookup"
+
+ #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
+diff --git a/tests/bugs/shard/bug-1705884.t b/tests/bugs/shard/bug-1705884.t
+new file mode 100644
+index 0000000..f6e5037
+--- /dev/null
++++ b/tests/bugs/shard/bug-1705884.t
+@@ -0,0 +1,32 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../fallocate.rc
++
++cleanup
++
++require_fallocate -l 1m $M0/file
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume set $V0 features.shard on
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++
++TEST fallocate -l 200M $M0/foo
++EXPECT `echo "$(( ( 200 * 1024 * 1024 ) / 512 ))"` stat -c %b $M0/foo
++TEST truncate -s 0 $M0/foo
++EXPECT "0" stat -c %b $M0/foo
++TEST fallocate -l 100M $M0/foo
++EXPECT `echo "$(( ( 100 * 1024 * 1024 ) / 512 ))"` stat -c %b $M0/foo
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST $CLI volume stop $V0
++TEST $CLI volume delete $V0
++
++cleanup
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index c1799ad..b248767 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -1135,6 +1135,7 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ {
+ int ret = -1;
+ int64_t *size_attr = NULL;
++ int64_t delta_blocks = 0;
+ inode_t *inode = NULL;
+ shard_local_t *local = NULL;
+ dict_t *xattr_req = NULL;
+@@ -1156,13 +1157,13 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
+
+ /* If both size and block count have not changed, then skip the xattrop.
+ */
+- if ((local->delta_size + local->hole_size == 0) &&
+- (local->delta_blocks == 0)) {
++ delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
++ if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
+ goto out;
+ }
+
+ ret = shard_set_size_attrs(local->delta_size + local->hole_size,
+- local->delta_blocks, &size_attr);
++ delta_blocks, &size_attr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
+ "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
+@@ -1947,6 +1948,7 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ dict_t *xdata)
+ {
+ inode_t *inode = NULL;
++ int64_t delta_blocks = 0;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+@@ -1967,14 +1969,15 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ }
+
+ local->postbuf.ia_size = local->offset;
+- local->postbuf.ia_blocks -= (prebuf->ia_blocks - postbuf->ia_blocks);
+ /* Let the delta be negative. We want xattrop to do subtraction */
+ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
+- local->delta_blocks = postbuf->ia_blocks - prebuf->ia_blocks;
++ delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
++ postbuf->ia_blocks - prebuf->ia_blocks);
++ GF_ASSERT(delta_blocks <= 0);
++ local->postbuf.ia_blocks += delta_blocks;
+ local->hole_size = 0;
+
+- shard_inode_ctx_set(inode, this, postbuf, 0, SHARD_MASK_TIMES);
+-
++ shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
+ shard_update_file_size(frame, this, NULL, &local->loc,
+ shard_post_update_size_truncate_handler);
+ return 0;
+@@ -2034,8 +2037,10 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+ {
++ int ret = 0;
+ int call_count = 0;
+ int shard_block_num = (long)cookie;
++ uint64_t block_count = 0;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+@@ -2045,6 +2050,16 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ local->op_errno = op_errno;
+ goto done;
+ }
++ ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
++ if (!ret) {
++ GF_ATOMIC_SUB(local->delta_blocks, block_count);
++ } else {
++ /* dict_get failed possibly due to a heterogeneous cluster? */
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to get key %s from dict during truncate of gfid %s",
++ GF_GET_FILE_BLOCK_COUNT,
++ uuid_utoa(local->resolver_base_inode->gfid));
++ }
+
+ shard_unlink_block_inode(local, shard_block_num);
+ done:
+@@ -2074,6 +2089,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
+ gf_boolean_t wind_failed = _gf_false;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
++ dict_t *xdata_req = NULL;
+
+ local = frame->local;
+ priv = this->private;
+@@ -2101,7 +2117,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
+ local->postbuf.ia_size = local->offset;
+ local->postbuf.ia_blocks = local->prebuf.ia_blocks;
+ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
+- local->delta_blocks = 0;
++ GF_ATOMIC_INIT(local->delta_blocks, 0);
+ local->hole_size = 0;
+ shard_update_file_size(frame, this, local->fd, &local->loc,
+ shard_post_update_size_truncate_handler);
+@@ -2110,6 +2126,21 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
+
+ local->call_count = call_count;
+ i = 1;
++ xdata_req = dict_new();
++ if (!xdata_req) {
++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++ return 0;
++ }
++ ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to set key %s into dict during truncate of %s",
++ GF_GET_FILE_BLOCK_COUNT,
++ uuid_utoa(local->resolver_base_inode->gfid));
++ dict_unref(xdata_req);
++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++ return 0;
++ }
+
+ SHARD_SET_ROOT_FS_ID(frame, local);
+ while (cur_block <= last_block) {
+@@ -2148,7 +2179,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
+
+ STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk,
+ (void *)(long)cur_block, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->unlink, &loc, 0, NULL);
++ FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req);
+ loc_wipe(&loc);
+ next:
+ i++;
+@@ -2156,6 +2187,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
+ if (!--call_count)
+ break;
+ }
++ dict_unref(xdata_req);
+ return 0;
+ }
+
+@@ -2608,7 +2640,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
+ */
+ local->hole_size = local->offset - local->prebuf.ia_size;
+ local->delta_size = 0;
+- local->delta_blocks = 0;
++ GF_ATOMIC_INIT(local->delta_blocks, 0);
+ local->postbuf.ia_size = local->offset;
+ tmp_stbuf.ia_size = local->offset;
+ shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
+@@ -2624,7 +2656,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
+ */
+ local->hole_size = 0;
+ local->delta_size = (local->offset - local->prebuf.ia_size);
+- local->delta_blocks = 0;
++ GF_ATOMIC_INIT(local->delta_blocks, 0);
+ tmp_stbuf.ia_size = local->offset;
+ shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
+ SHARD_INODE_WRITE_MASK);
+@@ -2680,6 +2712,7 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ if (!local->xattr_req)
+ goto err;
+ local->resolver_base_inode = loc->inode;
++ GF_ATOMIC_INIT(local->delta_blocks, 0);
+
+ shard_lookup_base_file(frame, this, &local->loc,
+ shard_post_lookup_truncate_handler);
+@@ -2735,6 +2768,7 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ local->resolver_base_inode = fd->inode;
++ GF_ATOMIC_INIT(local->delta_blocks, 0);
+
+ shard_lookup_base_file(frame, this, &local->loc,
+ shard_post_lookup_truncate_handler);
+@@ -5295,7 +5329,8 @@ shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
+ local->op_errno = op_errno;
+ } else {
+ local->written_size += op_ret;
+- local->delta_blocks += (post->ia_blocks - pre->ia_blocks);
++ GF_ATOMIC_ADD(local->delta_blocks,
++ post->ia_blocks - pre->ia_blocks);
+ local->delta_size += (post->ia_size - pre->ia_size);
+ shard_inode_ctx_set(local->fd->inode, this, post, 0,
+ SHARD_MASK_TIMES);
+@@ -6599,6 +6634,7 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
+ local->fd = fd_ref(fd);
+ local->block_size = block_size;
+ local->resolver_base_inode = local->fd->inode;
++ GF_ATOMIC_INIT(local->delta_blocks, 0);
+
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
+index cd6a663..04abd62 100644
+--- a/xlators/features/shard/src/shard.h
++++ b/xlators/features/shard/src/shard.h
+@@ -275,7 +275,7 @@ typedef struct shard_local {
+ size_t req_size;
+ size_t readdir_size;
+ int64_t delta_size;
+- int64_t delta_blocks;
++ gf_atomic_t delta_blocks;
+ loc_t loc;
+ loc_t dot_shard_loc;
+ loc_t dot_shard_rm_loc;
+diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
+index b24a052..34ee2b8 100644
+--- a/xlators/storage/posix/src/posix-entry-ops.c
++++ b/xlators/storage/posix/src/posix-entry-ops.c
+@@ -1071,6 +1071,7 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ char *real_path = NULL;
+ char *par_path = NULL;
+ int32_t fd = -1;
++ int ret = -1;
+ struct iatt stbuf = {
+ 0,
+ };
+@@ -1235,6 +1236,14 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ goto out;
+ }
+
++ if (xdata && dict_get(xdata, GF_GET_FILE_BLOCK_COUNT)) {
++ ret = dict_set_uint64(unwind_dict, GF_GET_FILE_BLOCK_COUNT,
++ stbuf.ia_blocks);
++ if (ret)
++ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
++ "Failed to set %s in rsp dict", GF_GET_FILE_BLOCK_COUNT);
++ }
++
+ if (xdata && dict_get(xdata, GET_LINK_COUNT))
+ get_link_count = _gf_true;
+ op_ret = posix_unlink_gfid_handle_and_entry(frame, this, real_path, &stbuf,
+--
+1.8.3.1
+
diff --git a/0190-Build-removing-the-hardcoded-usage-of-python3.patch b/0190-Build-removing-the-hardcoded-usage-of-python3.patch
new file mode 100644
index 0000000..25f01a8
--- /dev/null
+++ b/0190-Build-removing-the-hardcoded-usage-of-python3.patch
@@ -0,0 +1,49 @@
+From c7aae487213e464b2ee7a785d752bd8264ceb371 Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <hgowtham@redhat.com>
+Date: Thu, 13 Jun 2019 20:12:14 +0530
+Subject: [PATCH 190/192] Build: removing the hardcoded usage of python3
+
+Label : DOWNSTREAM ONLY
+
+Problem: RHEL8 needed python3 so python3 was hardcoded to be used
+in gluster build. python2 was still being used by RHEL7 machines and
+when the shebang was redirected to use python3 glusterfind failed.
+It was not working from 6.0-5 downstream build.
+
+Fix: revert back to the old mechanism where we check the python version
+and redirect the python script according to the usage.
+
+Change-Id: I8dc6c9185b2740e20e4c4d734cc1a9e335e9c449
+fixes: bz#1719640
+Signed-off-by: Hari Gowtham <hgowtham@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173392
+Reviewed-by: Kaleb Keithley <kkeithle@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ glusterfs.spec.in | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 9c7d7a7..0127e8e 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -722,10 +722,12 @@ GlusterFS Events
+
+ %prep
+ %setup -q -n %{name}-%{version}%{?prereltag}
++%if ( ! %{_usepython3} )
+ echo "fixing python shebangs..."
+-for i in `find . -type f -exec bash -c "if file {} | grep 'Python script, ASCII text executable' >/dev/null; then echo {}; fi" ';'`; do
+- sed -i -e 's|^#!/usr/bin/python.*|#!%{__python3}|' -e 's|^#!/usr/bin/env python.*|#!%{__python3}|' $i
++for f in api events extras geo-replication libglusterfs tools xlators; do
++find $f -type f -exec sed -i 's|/usr/bin/python3|/usr/bin/python2|' {} \;
+ done
++%endif
+
+ %build
+
+--
+1.8.3.1
+
diff --git a/0191-Build-Update-python-shebangs-based-on-version.patch b/0191-Build-Update-python-shebangs-based-on-version.patch
new file mode 100644
index 0000000..0d88b9d
--- /dev/null
+++ b/0191-Build-Update-python-shebangs-based-on-version.patch
@@ -0,0 +1,49 @@
+From 4f471c25dad4d7d51443005108ec53c2d390daf5 Mon Sep 17 00:00:00 2001
+From: Sunil Kumar Acharya <sheggodu@redhat.com>
+Date: Fri, 14 Jun 2019 20:20:26 +0530
+Subject: [PATCH 191/192] Build: Update python shebangs based on version
+
+RHEL 7 uses python2 where as RHEL 8 uses python 3.
+Updating the spec file to use appropriate shebangs
+to avoid script failures.
+
+Label : DOWNSTREAM ONLY
+
+BUG: 1719640
+Change-Id: I075764b6a00ba53a305451e3fc58584facd75a78
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173518
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Hari Gowtham Gopal <hgowtham@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ glusterfs.spec.in | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 0127e8e..29e4a37 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -722,11 +722,15 @@ GlusterFS Events
+
+ %prep
+ %setup -q -n %{name}-%{version}%{?prereltag}
+-%if ( ! %{_usepython3} )
+ echo "fixing python shebangs..."
+-for f in api events extras geo-replication libglusterfs tools xlators; do
+-find $f -type f -exec sed -i 's|/usr/bin/python3|/usr/bin/python2|' {} \;
+-done
++%if ( %{_usepython3} )
++ for i in `find . -type f -exec bash -c "if file {} | grep 'Python script, ASCII text executable' >/dev/null; then echo {}; fi" ';'`; do
++ sed -i -e 's|^#!/usr/bin/python.*|#!%{__python3}|' -e 's|^#!/usr/bin/env python.*|#!%{__python3}|' $i
++ done
++%else
++ for f in api events extras geo-replication libglusterfs tools xlators; do
++ find $f -type f -exec sed -i 's|/usr/bin/python3|/usr/bin/python2|' {} \;
++ done
+ %endif
+
+ %build
+--
+1.8.3.1
+
diff --git a/0192-build-Ensure-gluster-cli-package-is-built-as-part-of.patch b/0192-build-Ensure-gluster-cli-package-is-built-as-part-of.patch
new file mode 100644
index 0000000..e1b2ada
--- /dev/null
+++ b/0192-build-Ensure-gluster-cli-package-is-built-as-part-of.patch
@@ -0,0 +1,114 @@
+From d2319a4746ba07ada5b3a20462ec2900e1c03c5a Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Thu, 13 Jun 2019 19:56:32 +0530
+Subject: [PATCH 192/192] build: Ensure gluster-cli package is built as part of
+ client build
+
+Till RHGS 3.4.x RHGS client was shipping gluster-cli rpm. With RHGS 3.5
+which is a rebase of glusterfs 6.0 gluster-cli is only built for server.
+gluster cli offers a remote cli execution capability with --remote-host
+option for which you need not to have cli and glusterd co located and
+hence shipping cli as part of the client package is mandatory. With out
+this change the client upgrade for RHEL minor versions are also broken.
+
+>Fixes: bz#1720615
+>Change-Id: I5071f3255ff615113b36b08cd5326be6e37d907d
+>Signed-off-by: Niels de Vos <ndevos@redhat.com>
+
+upstream patch: https://review.gluster.org/#/c/glusterfs/+/22868/
+
+BUG: 1720079
+Change-Id: I11ec3e2b4d98b3e701147c60ca797d54570d598e
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173388
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ cli/src/Makefile.am | 2 --
+ doc/Makefile.am | 4 ++--
+ glusterfs.spec.in | 9 +++------
+ 3 files changed, 5 insertions(+), 10 deletions(-)
+
+diff --git a/cli/src/Makefile.am b/cli/src/Makefile.am
+index 6be070f..3e7511f 100644
+--- a/cli/src/Makefile.am
++++ b/cli/src/Makefile.am
+@@ -1,6 +1,4 @@
+-if WITH_SERVER
+ sbin_PROGRAMS = gluster
+-endif
+
+ gluster_SOURCES = cli.c registry.c input.c cli-cmd.c cli-rl.c cli-cmd-global.c \
+ cli-cmd-volume.c cli-cmd-peer.c cli-rpc-ops.c cli-cmd-parser.c\
+diff --git a/doc/Makefile.am b/doc/Makefile.am
+index 7c04d74..9904767 100644
+--- a/doc/Makefile.am
++++ b/doc/Makefile.am
+@@ -1,9 +1,9 @@
+ EXTRA_DIST = glusterfs.8 mount.glusterfs.8 gluster.8 \
+ glusterd.8 glusterfsd.8
+
+-man8_MANS = glusterfs.8 mount.glusterfs.8
++man8_MANS = gluster.8 glusterfs.8 mount.glusterfs.8
+ if WITH_SERVER
+-man8_MANS += gluster.8 glusterd.8 glusterfsd.8
++man8_MANS += glusterd.8 glusterfsd.8
+ endif
+
+ CLEANFILES =
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 29e4a37..c505cd9 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -353,7 +353,6 @@ is in user space and easily manageable.
+
+ This package provides the api include files.
+
+-%if ( 0%{!?_without_server:1} )
+ %package cli
+ Summary: GlusterFS CLI
+ Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+@@ -368,7 +367,6 @@ called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+ is in user space and easily manageable.
+
+ This package provides the GlusterFS CLI application and its man page
+-%endif
+
+ %package cloudsync-plugins
+ Summary: Cloudsync Plugins
+@@ -891,10 +889,8 @@ touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+ find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glusterfs
+
+ ## Install bash completion for cli
+-%if ( 0%{!?_without_server:1} )
+ install -p -m 0744 -D extras/command-completion/gluster.bash \
+ %{buildroot}%{_sysconfdir}/bash_completion.d/gluster
+-%endif
+
+ %if ( 0%{!?_without_server:1} )
+ echo "RHGS 3.5" > %{buildroot}%{_datadir}/glusterfs/release
+@@ -1193,12 +1189,10 @@ exit 0
+ %dir %{_includedir}/glusterfs/api
+ %{_includedir}/glusterfs/api/*
+
+-%if ( 0%{!?_without_server:1} )
+ %files cli
+ %{_sbindir}/gluster
+ %{_mandir}/man8/gluster.8*
+ %{_sysconfdir}/bash_completion.d/gluster
+-%endif
+
+ %files cloudsync-plugins
+ %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins
+@@ -1938,6 +1932,9 @@ fi
+ %endif
+
+ %changelog
++* Fri Jun 14 2019 Atin Mukherjee <amukherj@redhat.com>
++- Ensure gluster-cli package is part of client build (#1720079)
++
+ * Mon May 27 2019 Jiffin Tony Thottan <jthottan@redhat.com>
+ - Change the dependency to 2.7.3 on nfs-ganesha for glusterfs-ganesha (#1714078)
+
+--
+1.8.3.1
+
diff --git a/0193-spec-fixed-python-dependency-for-rhel6.patch b/0193-spec-fixed-python-dependency-for-rhel6.patch
new file mode 100644
index 0000000..6b00b69
--- /dev/null
+++ b/0193-spec-fixed-python-dependency-for-rhel6.patch
@@ -0,0 +1,42 @@
+From 58bc818f19cbc8e4dd97097dc3e4ec7af8fa8d4a Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Tue, 7 May 2019 05:35:11 +0000
+Subject: [PATCH 193/221] spec: fixed python dependency for rhel6
+
+Installing redhat-storage-server was failing with python dependency
+for glusterfs-geo-replication package. This patch conditionally sets
+the python version for rhel7 and fixes the problem.
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1704207
+
+Change-Id: Ie3b079fd1ccfa6fd2cbf5b08b7a70bd03f090e01
+fixes: bz#1704207
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/169555
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ glusterfs.spec.in | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index c505cd9..1150101 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -500,7 +500,11 @@ Summary: GlusterFS Geo-replication
+ Requires: %{name}%{?_isa} = %{version}-%{release}
+ Requires: %{name}-server%{?_isa} = %{version}-%{release}
+ Requires: python%{_pythonver}
++%if ( 0%{?rhel} && 0%{?rhel} < 7 )
++Requires: python-prettytable
++%else
+ Requires: python%{_pythonver}-prettytable
++%endif
+ Requires: python%{_pythonver}-gluster = %{version}-%{release}
+
+ Requires: rsync
+--
+1.8.3.1
+
diff --git a/0194-stack-Make-sure-to-have-unique-call-stacks-in-all-ca.patch b/0194-stack-Make-sure-to-have-unique-call-stacks-in-all-ca.patch
new file mode 100644
index 0000000..7b8371f
--- /dev/null
+++ b/0194-stack-Make-sure-to-have-unique-call-stacks-in-all-ca.patch
@@ -0,0 +1,144 @@
+From 783f53b0b09845cd6c38f145eac685a094767ce0 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Mon, 27 May 2019 11:43:26 +0530
+Subject: [PATCH 194/221] stack: Make sure to have unique call-stacks in all
+ cases
+
+At the moment new stack doesn't populate frame->root->unique in all cases. This
+makes it difficult to debug hung frames by examining successive state dumps.
+Fuse and server xlators populate it whenever they can, but other xlators won't
+be able to assign 'unique' when they need to create a new frame/stack because
+they don't know what 'unique' fuse/server xlators already used. What we need is
+for unique to be correct. If a stack with same unique is present in successive
+statedumps, that means the same operation is still in progress. This makes
+'finding hung frames' part of debugging hung frames easier.
+
+ >upstream: bz#1714098
+ >Upstream-patch: https://review.gluster.org/c/glusterfs/+/22773
+fixes bz#1716760
+Change-Id: I3e9a8f6b4111e260106c48a2ac3a41ef29361b9e
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/172304
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ libglusterfs/src/stack.c | 2 ++
+ xlators/features/quota/src/quotad-helpers.c | 3 ---
+ xlators/mount/fuse/src/fuse-bridge.c | 15 ++++++++-------
+ xlators/mount/fuse/src/fuse-helpers.c | 1 -
+ xlators/protocol/server/src/server-helpers.c | 3 ---
+ 5 files changed, 10 insertions(+), 14 deletions(-)
+
+diff --git a/libglusterfs/src/stack.c b/libglusterfs/src/stack.c
+index 82b3577..371f60c 100644
+--- a/libglusterfs/src/stack.c
++++ b/libglusterfs/src/stack.c
+@@ -17,6 +17,7 @@ create_frame(xlator_t *xl, call_pool_t *pool)
+ {
+ call_stack_t *stack = NULL;
+ call_frame_t *frame = NULL;
++ static uint64_t unique = 0;
+
+ if (!xl || !pool) {
+ return NULL;
+@@ -52,6 +53,7 @@ create_frame(xlator_t *xl, call_pool_t *pool)
+ {
+ list_add(&stack->all_frames, &pool->all_frames);
+ pool->cnt++;
++ stack->unique = unique++;
+ }
+ UNLOCK(&pool->lock);
+ GF_ATOMIC_INC(pool->total_count);
+diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c
+index be8f908..d9f0351 100644
+--- a/xlators/features/quota/src/quotad-helpers.c
++++ b/xlators/features/quota/src/quotad-helpers.c
+@@ -73,7 +73,6 @@ quotad_aggregator_alloc_frame(rpcsvc_request_t *req)
+ goto out;
+
+ frame->root->state = state;
+- frame->root->unique = 0;
+
+ frame->this = this;
+ out:
+@@ -93,8 +92,6 @@ quotad_aggregator_get_frame_from_req(rpcsvc_request_t *req)
+
+ frame->root->op = req->procnum;
+
+- frame->root->unique = req->xid;
+-
+ frame->root->uid = req->uid;
+ frame->root->gid = req->gid;
+ frame->root->pid = req->pid;
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index c3945d7..c05866b 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -3270,11 +3270,11 @@ fuse_release(xlator_t *this, fuse_in_header_t *finh, void *msg,
+
+ priv = this->private;
+
+- fuse_log_eh(this, "RELEASE(): %" PRIu64 ":, fd: %p, gfid: %s", finh->unique,
+- fd, uuid_utoa(fd->inode->gfid));
++ fuse_log_eh(this, "RELEASE(): finh->unique: %" PRIu64 ":, fd: %p, gfid: %s",
++ finh->unique, fd, uuid_utoa(fd->inode->gfid));
+
+- gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": RELEASE %p",
+- finh->unique, state->fd);
++ gf_log("glusterfs-fuse", GF_LOG_TRACE,
++ "finh->unique: %" PRIu64 ": RELEASE %p", finh->unique, state->fd);
+
+ fuse_fd_ctx_destroy(this, state->fd);
+ fd_unref(fd);
+@@ -3759,11 +3759,12 @@ fuse_releasedir(xlator_t *this, fuse_in_header_t *finh, void *msg,
+
+ priv = this->private;
+
+- fuse_log_eh(this, "RELEASEDIR (): %" PRIu64 ": fd: %p, gfid: %s",
++ fuse_log_eh(this,
++ "RELEASEDIR (): finh->unique: %" PRIu64 ": fd: %p, gfid: %s",
+ finh->unique, state->fd, uuid_utoa(state->fd->inode->gfid));
+
+- gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": RELEASEDIR %p",
+- finh->unique, state->fd);
++ gf_log("glusterfs-fuse", GF_LOG_TRACE,
++ "finh->unique: %" PRIu64 ": RELEASEDIR %p", finh->unique, state->fd);
+
+ fuse_fd_ctx_destroy(this, state->fd);
+ fd_unref(state->fd);
+diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c
+index cf4f8e1..5bfc40c 100644
+--- a/xlators/mount/fuse/src/fuse-helpers.c
++++ b/xlators/mount/fuse/src/fuse-helpers.c
+@@ -358,7 +358,6 @@ get_call_frame_for_req(fuse_state_t *state)
+ frame->root->uid = finh->uid;
+ frame->root->gid = finh->gid;
+ frame->root->pid = finh->pid;
+- frame->root->unique = finh->unique;
+ set_lk_owner_from_uint64(&frame->root->lk_owner, state->lk_owner);
+ }
+
+diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c
+index 1a34239..e74a24d 100644
+--- a/xlators/protocol/server/src/server-helpers.c
++++ b/xlators/protocol/server/src/server-helpers.c
+@@ -459,7 +459,6 @@ server_alloc_frame(rpcsvc_request_t *req)
+
+ frame->root->client = client;
+ frame->root->state = state; /* which socket */
+- frame->root->unique = 0; /* which call */
+
+ frame->this = client->this;
+ out:
+@@ -487,8 +486,6 @@ get_frame_from_request(rpcsvc_request_t *req)
+
+ frame->root->op = req->procnum;
+
+- frame->root->unique = req->xid;
+-
+ client = req->trans->xl_private;
+ this = req->trans->xl;
+ priv = this->private;
+--
+1.8.3.1
+
diff --git a/0195-build-package-glusterfs-ganesha-for-rhel7-and-above.patch b/0195-build-package-glusterfs-ganesha-for-rhel7-and-above.patch
new file mode 100644
index 0000000..949ebb6
--- /dev/null
+++ b/0195-build-package-glusterfs-ganesha-for-rhel7-and-above.patch
@@ -0,0 +1,89 @@
+From 909a6461c860fffde5f886891dd53752f60eae67 Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Tue, 18 Jun 2019 12:10:55 +0530
+Subject: [PATCH 195/221] build : package glusterfs-ganesha for rhel7 and above
+
+Label : DOWNSTREAM ONLY
+
+Change-Id: If845675b18fe055708d905ec566014baf004cb76
+fixes: bz#1720551
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173748
+Reviewed-by: Sreenath Girijan Menon <sgirijan@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Kaleb Keithley <kkeithle@redhat.com>
+---
+ glusterfs.spec.in | 19 ++++++++++++++-----
+ 1 file changed, 14 insertions(+), 5 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 1150101..00603ec 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -302,6 +302,9 @@ Obsoletes: %{name}-ufo
+ %if ( 0%{!?_with_gnfs:1} )
+ Obsoletes: %{name}-gnfs
+ %endif
++%if ( 0%{?rhel} < 7 )
++Obsoletes: %{name}-ganesha
++%endif
+ Provides: %{name}-common = %{version}-%{release}
+ Provides: %{name}-core = %{version}-%{release}
+
+@@ -452,7 +455,7 @@ is in user space and easily manageable.
+ This package provides support to FUSE based clients and inlcudes the
+ glusterfs(d) binary.
+
+-%if ( 0%{!?_without_server:1} )
++%if ( 0%{!?_without_server:1} && 0%{?rhel} > 6 )
+ %package ganesha
+ Summary: NFS-Ganesha configuration
+ Group: Applications/File
+@@ -855,7 +858,7 @@ install -D -p -m 0644 extras/glusterfs-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs
+
+ # ganesha ghosts
+-%if ( 0%{!?_without_server:1} )
++%if ( 0%{!?_without_server:1} && 0%{?rhel} > 6 )
+ mkdir -p %{buildroot}%{_sysconfdir}/ganesha
+ touch %{buildroot}%{_sysconfdir}/ganesha/ganesha-ha.conf
+ mkdir -p %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/
+@@ -1165,11 +1168,14 @@ exit 0
+ %endif
+ %endif
+
+-%if ( 0%{?_without_server:1} )
+-#exclude ganesha related files
++%if ( 0%{?_without_server:1} || 0%{?rhel} < 7 )
++#exclude ganesha related files for rhel 6 and client builds
+ %exclude %{_sysconfdir}/ganesha/ganesha-ha.conf.sample
+ %exclude %{_libexecdir}/ganesha/*
+ %exclude %{_prefix}/lib/ocf/resource.d/heartbeat/*
++%if ( 0%{!?_without_server:1} )
++%{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh
++%endif
+ %endif
+
+ %exclude %{_datadir}/glusterfs/scripts/setup-thin-arbiter.sh
+@@ -1324,7 +1330,7 @@ exit 0
+ %exclude %{_datadir}/glusterfs/tests/vagrant
+ %endif
+
+-%if ( 0%{!?_without_server:1} )
++%if ( 0%{!?_without_server:1} && 0%{?rhel} > 6 )
+ %files ganesha
+ %dir %{_libexecdir}/ganesha
+ %{_sysconfdir}/ganesha/ganesha-ha.conf.sample
+@@ -1936,6 +1942,9 @@ fi
+ %endif
+
+ %changelog
++* Tue Jun 18 2019 Jiffin Tony Thottan <jthottan@redhat.com>
++- build glusterfs-ganesha for rhel 7 and above (#1720551)
++
+ * Fri Jun 14 2019 Atin Mukherjee <amukherj@redhat.com>
+ - Ensure gluster-cli package is part of client build (#1720079)
+
+--
+1.8.3.1
+
diff --git a/0196-posix-ctime-Fix-ctime-upgrade-issue.patch b/0196-posix-ctime-Fix-ctime-upgrade-issue.patch
new file mode 100644
index 0000000..1a7b68d
--- /dev/null
+++ b/0196-posix-ctime-Fix-ctime-upgrade-issue.patch
@@ -0,0 +1,384 @@
+From 584ee2dbb8158ee3d3c3f055f1b06ff3d9177192 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Thu, 13 Jun 2019 16:23:21 +0530
+Subject: [PATCH 196/221] posix/ctime: Fix ctime upgrade issue
+
+Problem:
+On a EC volume, during upgrade from the older version where
+ctime feature is not enabled(or not present) to the newer
+version where the ctime feature is available (enabled default),
+the self heal hangs and doesn't complete.
+
+Cause:
+The ctime feature has both client side code (utime) and
+server side code (posix). The feature is driven from client.
+Only if the client side sets the time in the frame, should
+the server side sets the time attributes in xattr. But posix
+setattr/fseattr was not doing that. When one of the server
+nodes is updated, since ctime is enabled by default, it
+starts setting xattr on setattr/fseattr on the updated node/brick.
+
+On a EC volume the first two updated nodes(bricks) are not a
+problem because there are 4 other bricks with consistent data.
+However once the third brick is updated, the new attribute(mdata xattr)
+will cause an inconsistency on metadata on 3 bricks, which
+prevents the file to be repaired.
+
+Fix:
+Don't create mdata xattr with utimes/utimensat system call.
+Only update if already present.
+
+Backport of:
+ > Patch: https://review.gluster.org/22858
+ > Change-Id: Ieacedecb8a738bb437283ef3e0f042fd49dc4c8c
+ > fixes: bz#1720201
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+Change-Id: Ieacedecb8a738bb437283ef3e0f042fd49dc4c8c
+BUG: 1713664
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174238
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ tests/basic/afr/split-brain-healing.t | 36 ++++---
+ tests/utils/get-mdata-xattr.c | 152 +++++++++++++++++++++++++++++
+ tests/volume.rc | 30 ++++++
+ xlators/storage/posix/src/posix-metadata.c | 21 ++++
+ 4 files changed, 223 insertions(+), 16 deletions(-)
+ create mode 100644 tests/utils/get-mdata-xattr.c
+
+diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t
+index c80f900..78553e6 100644
+--- a/tests/basic/afr/split-brain-healing.t
++++ b/tests/basic/afr/split-brain-healing.t
+@@ -20,11 +20,14 @@ function get_replicate_subvol_number {
+ cleanup;
+
+ AREQUAL_PATH=$(dirname $0)/../../utils
++GET_MDATA_PATH=$(dirname $0)/../../utils
+ CFLAGS=""
+ test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+ }
+ build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
++build_tester $GET_MDATA_PATH/get-mdata-xattr.c
++
+ TEST glusterd
+ TEST pidof glusterd
+ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
+@@ -152,13 +155,13 @@ EXPECT $SMALLER_FILE_SIZE stat -c %s file4
+ subvolume=$(get_replicate_subvol_number file5)
+ if [ $subvolume == 0 ]
+ then
+- mtime1=$(stat -c %Y $B0/${V0}1/file5)
+- mtime2=$(stat -c %Y $B0/${V0}2/file5)
++ mtime1=$(get_mtime $B0/${V0}1/file5)
++ mtime2=$(get_mtime $B0/${V0}2/file5)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+ elif [ $subvolume == 1 ]
+ then
+- mtime1=$(stat -c %Y $B0/${V0}3/file5)
+- mtime2=$(stat -c %Y $B0/${V0}4/file5)
++ mtime1=$(get_mtime $B0/${V0}3/file5)
++ mtime2=$(get_mtime $B0/${V0}4/file5)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+ fi
+ $CLI volume heal $V0 split-brain latest-mtime /file5
+@@ -166,12 +169,12 @@ EXPECT "0" echo $?
+
+ if [ $subvolume == 0 ]
+ then
+- mtime1_after_heal=$(stat -c %Y $B0/${V0}1/file5)
+- mtime2_after_heal=$(stat -c %Y $B0/${V0}2/file5)
++ mtime1_after_heal=$(get_mtime $B0/${V0}1/file5)
++ mtime2_after_heal=$(get_mtime $B0/${V0}2/file5)
+ elif [ $subvolume == 1 ]
+ then
+- mtime1_after_heal=$(stat -c %Y $B0/${V0}3/file5)
+- mtime2_after_heal=$(stat -c %Y $B0/${V0}4/file5)
++ mtime1_after_heal=$(get_mtime $B0/${V0}3/file5)
++ mtime2_after_heal=$(get_mtime $B0/${V0}4/file5)
+ fi
+
+ #TODO: To below comparisons on full sub-second resolution
+@@ -188,14 +191,14 @@ subvolume=$(get_replicate_subvol_number file6)
+ if [ $subvolume == 0 ]
+ then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6)
+- mtime1=$(stat -c %Y $B0/${V0}1/file6)
+- mtime2=$(stat -c %Y $B0/${V0}2/file6)
++ mtime1=$(get_mtime $B0/${V0}1/file6)
++ mtime2=$(get_mtime $B0/${V0}2/file6)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+ elif [ $subvolume == 1 ]
+ then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6)
+- mtime1=$(stat -c %Y $B0/${V0}3/file6)
+- mtime2=$(stat -c %Y $B0/${V0}4/file6)
++ mtime1=$(get_mtime $B0/${V0}3/file6)
++ mtime2=$(get_mtime $B0/${V0}4/file6)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+ fi
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+@@ -204,12 +207,12 @@ EXPECT "0" echo $?
+
+ if [ $subvolume == 0 ]
+ then
+- mtime1_after_heal=$(stat -c %Y $B0/${V0}1/file6)
+- mtime2_after_heal=$(stat -c %Y $B0/${V0}2/file6)
++ mtime1_after_heal=$(get_mtime $B0/${V0}1/file6)
++ mtime2_after_heal=$(get_mtime $B0/${V0}2/file6)
+ elif [ $subvolume == 1 ]
+ then
+- mtime1_after_heal=$(stat -c %Y $B0/${V0}3/file6)
+- mtime2_after_heal=$(stat -c %Y $B0/${V0}4/file6)
++ mtime1_after_heal=$(get_mtime $B0/${V0}3/file6)
++ mtime2_after_heal=$(get_mtime $B0/${V0}4/file6)
+ fi
+
+ #TODO: To below comparisons on full sub-second resolution
+@@ -253,4 +256,5 @@ EXPECT "1" echo $?
+
+ cd -
+ TEST rm $AREQUAL_PATH/arequal-checksum
++TEST rm $GET_MDATA_PATH/get-mdata-xattr
+ cleanup
+diff --git a/tests/utils/get-mdata-xattr.c b/tests/utils/get-mdata-xattr.c
+new file mode 100644
+index 0000000..e9f5471
+--- /dev/null
++++ b/tests/utils/get-mdata-xattr.c
+@@ -0,0 +1,152 @@
++/*
++ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#include <stdlib.h>
++#include <endian.h>
++#include <stdio.h>
++#include <time.h>
++#include <string.h>
++#include <inttypes.h>
++#include <sys/types.h>
++#include <sys/xattr.h>
++#include <errno.h>
++
++typedef struct gf_timespec_disk {
++ uint64_t tv_sec;
++ uint64_t tv_nsec;
++} gf_timespec_disk_t;
++
++/* posix_mdata_t on disk structure */
++typedef struct __attribute__((__packed__)) posix_mdata_disk {
++ /* version of structure, bumped up if any new member is added */
++ uint8_t version;
++ /* flags indicates valid fields in the structure */
++ uint64_t flags;
++ gf_timespec_disk_t ctime;
++ gf_timespec_disk_t mtime;
++ gf_timespec_disk_t atime;
++} posix_mdata_disk_t;
++
++/* In memory representation posix metadata xattr */
++typedef struct {
++ /* version of structure, bumped up if any new member is added */
++ uint8_t version;
++ /* flags indicates valid fields in the structure */
++ uint64_t flags;
++ struct timespec ctime;
++ struct timespec mtime;
++ struct timespec atime;
++} posix_mdata_t;
++
++#define GF_XATTR_MDATA_KEY "trusted.glusterfs.mdata"
++
++/* posix_mdata_from_disk converts posix_mdata_disk_t into host byte order
++ */
++static inline void
++posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in)
++{
++ out->version = in->version;
++ out->flags = be64toh(in->flags);
++
++ out->ctime.tv_sec = be64toh(in->ctime.tv_sec);
++ out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec);
++
++ out->mtime.tv_sec = be64toh(in->mtime.tv_sec);
++ out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec);
++
++ out->atime.tv_sec = be64toh(in->atime.tv_sec);
++ out->atime.tv_nsec = be64toh(in->atime.tv_nsec);
++}
++
++/* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */
++static int
++posix_fetch_mdata_xattr(const char *real_path, posix_mdata_t *metadata)
++{
++ size_t size = -1;
++ char *value = NULL;
++ char gfid_str[64] = {0};
++
++ char *key = GF_XATTR_MDATA_KEY;
++
++ if (!metadata || !real_path) {
++ goto err;
++ }
++
++ /* Get size */
++ size = lgetxattr(real_path, key, NULL, 0);
++ if (size == -1) {
++ goto err;
++ }
++
++ value = calloc(size + 1, sizeof(char));
++ if (!value) {
++ goto err;
++ }
++
++ /* Get xattr value */
++ size = lgetxattr(real_path, key, value, size);
++ if (size == -1) {
++ goto err;
++ }
++ posix_mdata_from_disk(metadata, (posix_mdata_disk_t *)value);
++
++out:
++ if (value)
++ free(value);
++ return 0;
++err:
++ if (value)
++ free(value);
++ return -1;
++}
++
++int
++main(int argc, char *argv[])
++{
++ posix_mdata_t metadata;
++ uint64_t result;
++
++ if (argc != 3) {
++ /*
++ Usage: get_mdata_xattr -c|-m|-a <file-name>
++ where -c --> ctime
++ -m --> mtime
++ -a --> atime
++ */
++ printf("-1");
++ goto err;
++ }
++
++ if (posix_fetch_mdata_xattr(argv[2], &metadata)) {
++ printf("-1");
++ goto err;
++ }
++
++ switch (argv[1][1]) {
++ case 'c':
++ result = metadata.ctime.tv_sec;
++ break;
++ case 'm':
++ result = metadata.mtime.tv_sec;
++ break;
++ case 'a':
++ result = metadata.atime.tv_sec;
++ break;
++ default:
++ printf("-1");
++ goto err;
++ }
++ printf("%" PRIu64, result);
++ fflush(stdout);
++ return 0;
++err:
++ fflush(stdout);
++ return -1;
++}
+diff --git a/tests/volume.rc b/tests/volume.rc
+index bb400cc..6a78c37 100644
+--- a/tests/volume.rc
++++ b/tests/volume.rc
+@@ -927,3 +927,33 @@ function number_healer_threads_shd {
+ local pid=$(get_shd_mux_pid $1)
+ pstack $pid | grep $2 | wc -l
+ }
++
++function get_mtime {
++ local time=$(get-mdata-xattr -m $1)
++ if [ $time == "-1" ];
++ then
++ echo $(stat -c %Y $1)
++ else
++ echo $time
++ fi
++}
++
++function get_ctime {
++ local time=$(get-mdata-xattr -c $1)
++ if [ $time == "-1" ];
++ then
++ echo $(stat -c %Z $2)
++ else
++ echo $time
++ fi
++}
++
++function get_atime {
++ local time=$(get-mdata-xattr -a $1)
++ if [ $time == "-1" ];
++ then
++ echo $(stat -c %X $1)
++ else
++ echo $time
++ fi
++}
+diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
+index e96f222..5a5e6cd 100644
+--- a/xlators/storage/posix/src/posix-metadata.c
++++ b/xlators/storage/posix/src/posix-metadata.c
+@@ -416,6 +416,22 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+ * still fine as the times would get eventually
+ * accurate.
+ */
++
++ /* Don't create xattr with utimes/utimensat, only update if
++ * present. This otherwise causes issues during inservice
++ * upgrade. It causes inconsistent xattr values with in replica
++ * set. The scenario happens during upgrade where clients are
++ * older versions (without the ctime feature) and the server is
++ * upgraded to the new version (with the ctime feature which
++ * is enabled by default).
++ */
++
++ if (update_utime) {
++ UNLOCK(&inode->lock);
++ GF_FREE(mdata);
++ return 0;
++ }
++
+ mdata->version = 1;
+ mdata->flags = 0;
+ mdata->ctime.tv_sec = time->tv_sec;
+@@ -527,6 +543,11 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+
+ priv = this->private;
+
++ /* NOTE:
++ * This routine (utimes) is intentionally allowed for all internal and
++ * external clients even if ctime is not set. This is because AFR and
++ * WORM uses time attributes for it's internal operations
++ */
+ if (inode && priv->ctime) {
+ if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {
+ tv.tv_sec = stbuf->ia_atime;
+--
+1.8.3.1
+
diff --git a/0197-posix-fix-crash-in-posix_cs_set_state.patch b/0197-posix-fix-crash-in-posix_cs_set_state.patch
new file mode 100644
index 0000000..c17e6c2
--- /dev/null
+++ b/0197-posix-fix-crash-in-posix_cs_set_state.patch
@@ -0,0 +1,71 @@
+From 58070aa568ffbaac267b02428e974b2459ae13b0 Mon Sep 17 00:00:00 2001
+From: Susant Palai <spalai@redhat.com>
+Date: Tue, 18 Jun 2019 16:43:43 +0530
+Subject: [PATCH 197/221] :posix: fix crash in posix_cs_set_state
+
+> Fixes: bz#1721474
+> Change-Id: Ic2a53fa3d1e9e23424c6898e0986f80d52c5e3f6
+> Signed-off-by: Susant Palai <spalai@redhat.com>
+(cherry-pick of https://review.gluster.org/#/c/glusterfs/+/22892/)
+
+BUG: 1721477
+Change-Id: Ic2a53fa3d1e9e23424c6898e0986f80d52c5e3f6
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173936
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/storage/posix/src/posix-helpers.c | 5 +++++
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 7 ++++---
+ 2 files changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index aecf4f8..849db3d 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -3235,6 +3235,11 @@ posix_cs_set_state(xlator_t *this, dict_t **rsp, gf_cs_obj_state state,
+ char *value = NULL;
+ size_t xattrsize = 0;
+
++ if (!rsp) {
++ ret = -1;
++ goto out;
++ }
++
+ if (!(*rsp)) {
+ *rsp = dict_new();
+ if (!(*rsp)) {
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index 7ca4d26..b92c411 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -1028,6 +1028,7 @@ posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt statpost = {
+ 0,
+ };
++ dict_t *rsp_xdata = NULL;
+
+ #ifdef FALLOC_FL_KEEP_SIZE
+ if (keep_size)
+@@ -1035,15 +1036,15 @@ posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ #endif /* FALLOC_FL_KEEP_SIZE */
+
+ ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre,
+- &statpost, xdata, NULL);
++ &statpost, xdata, &rsp_xdata);
+ if (ret < 0)
+ goto err;
+
+- STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL);
++ STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, rsp_xdata);
+ return 0;
+
+ err:
+- STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL);
++ STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, rsp_xdata);
+ return 0;
+ }
+
+--
+1.8.3.1
+
diff --git a/0198-cluster-ec-Prevent-double-pre-op-xattrops.patch b/0198-cluster-ec-Prevent-double-pre-op-xattrops.patch
new file mode 100644
index 0000000..5e7c272
--- /dev/null
+++ b/0198-cluster-ec-Prevent-double-pre-op-xattrops.patch
@@ -0,0 +1,119 @@
+From 9912a432dc3493007462f76c5933d04a160814ae Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Thu, 20 Jun 2019 17:05:49 +0530
+Subject: [PATCH 198/221] cluster/ec: Prevent double pre-op xattrops
+
+Problem:
+Race:
+Thread-1 Thread-2
+1) Does ec_get_size_version() to perform
+pre-op fxattrop as part of write-1
+ 2) Calls ec_set_dirty_flag() in
+ ec_get_size_version() for write-2.
+ This sets dirty[] to 1
+3) Completes executing
+ec_prepare_update_cbk leading to
+ctx->dirty[] = '1'
+ 4) Takes LOCK(inode->lock) to check if there are
+ any flags and sets dirty-flag because
+ lock->waiting_flag is 0 now. This leads to
+ fxattrop to increment on-disk dirty[] to '2'
+
+At the end of the writes the file will be marked for heal even when it doesn't need heal.
+
+Fix:
+Perform ec_set_dirty_flag() and other checks inside LOCK() to prevent dirty[] to be marked
+as '1' in step 2) above
+
+ > Upstream-patch: https://review.gluster.org/c/glusterfs/+/22907
+
+fixes: bz#1600918
+Change-Id: Icac2ab39c0b1e7e154387800fbededc561612865
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174385
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ tests/basic/ec/ec-dirty-flags.t | 23 +++++++++++++++++++++++
+ xlators/cluster/ec/src/ec-common.c | 13 +++++++------
+ 2 files changed, 30 insertions(+), 6 deletions(-)
+ create mode 100644 tests/basic/ec/ec-dirty-flags.t
+
+diff --git a/tests/basic/ec/ec-dirty-flags.t b/tests/basic/ec/ec-dirty-flags.t
+new file mode 100644
+index 0000000..68e6610
+--- /dev/null
++++ b/tests/basic/ec/ec-dirty-flags.t
+@@ -0,0 +1,23 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++# This checks if the fop keeps the dirty flags settings correctly after
++# finishing the fop.
++
++cleanup
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
++TEST $CLI volume heal $V0 disable
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
++cd $M0
++for i in {1..1000}; do dd if=/dev/zero of=file-${i} bs=512k count=2; done
++cd -
++EXPECT "^0$" get_pending_heal_count $V0
++
++cleanup
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index 9cc6395..35c2256 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -1405,6 +1405,10 @@ ec_get_size_version(ec_lock_link_t *link)
+ !ec_is_data_fop(fop->id))
+ link->optimistic_changelog = _gf_true;
+
++ memset(&loc, 0, sizeof(loc));
++
++ LOCK(&lock->loc.inode->lock);
++
+ set_dirty = ec_set_dirty_flag(link, ctx, dirty);
+
+ /* If ec metadata has already been retrieved, do not try again. */
+@@ -1412,20 +1416,16 @@ ec_get_size_version(ec_lock_link_t *link)
+ if (ec_is_data_fop(fop->id)) {
+ fop->healing |= lock->healing;
+ }
+- return;
++ goto unlock;
+ }
+
+ /* Determine if there's something we need to retrieve for the current
+ * operation. */
+ if (!set_dirty && !lock->query && (lock->loc.inode->ia_type != IA_IFREG) &&
+ (lock->loc.inode->ia_type != IA_INVAL)) {
+- return;
++ goto unlock;
+ }
+
+- memset(&loc, 0, sizeof(loc));
+-
+- LOCK(&lock->loc.inode->lock);
+-
+ changed_flags = ec_set_xattrop_flags_and_params(lock, link, dirty);
+ if (link->waiting_flags) {
+ /* This fop needs to wait until all its flags are cleared which
+@@ -1436,6 +1436,7 @@ ec_get_size_version(ec_lock_link_t *link)
+ GF_ASSERT(!changed_flags);
+ }
+
++unlock:
+ UNLOCK(&lock->loc.inode->lock);
+
+ if (!changed_flags)
+--
+1.8.3.1
+
diff --git a/0199-upcall-Avoid-sending-notifications-for-invalid-inode.patch b/0199-upcall-Avoid-sending-notifications-for-invalid-inode.patch
new file mode 100644
index 0000000..161675e
--- /dev/null
+++ b/0199-upcall-Avoid-sending-notifications-for-invalid-inode.patch
@@ -0,0 +1,80 @@
+From e41b4a45f9f5c07ffa38582d0bb4517f6a66eaa3 Mon Sep 17 00:00:00 2001
+From: Soumya Koduri <skoduri@redhat.com>
+Date: Fri, 7 Jun 2019 19:33:07 +0530
+Subject: [PATCH 199/221] upcall: Avoid sending notifications for invalid
+ inodes
+
+For nameless LOOKUPs, server creates a new inode which shall
+remain invalid until the fop is successfully processed post
+which it is linked to the inode table.
+
+But incase if there is an already linked inode for that entry,
+it discards that newly created inode which results in upcall
+notification. This may result in client being bombarded with
+unnecessary upcalls affecting performance if the data set is huge.
+
+This issue can be avoided by looking up and storing the upcall
+context in the original linked inode (if exists), thus saving up on
+those extra callbacks.
+
+This is backport of below upstream fix -
+mainline: https://review.gluster.org/22840
+release-6: https://review.gluster.org/22873
+
+Change-Id: I044a1737819bb40d1a049d2f53c0566e746d2a17
+fixes: bz#1717784
+Signed-off-by: Soumya Koduri <skoduri@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173507
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Kaleb Keithley <kkeithle@redhat.com>
+---
+ xlators/features/upcall/src/upcall-internal.c | 19 ++++++++++++++++++-
+ 1 file changed, 18 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c
+index 46cf6f8..7998dd2 100644
+--- a/xlators/features/upcall/src/upcall-internal.c
++++ b/xlators/features/upcall/src/upcall-internal.c
+@@ -520,6 +520,7 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
+ upcall_client_t *tmp = NULL;
+ upcall_inode_ctx_t *up_inode_ctx = NULL;
+ gf_boolean_t found = _gf_false;
++ inode_t *linked_inode = NULL;
+
+ if (!is_upcall_enabled(this))
+ return;
+@@ -532,7 +533,20 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
+ return;
+ }
+
+- if (inode)
++ /* For nameless LOOKUPs, inode created shall always be
++ * invalid. Hence check if there is any already linked inode.
++ * If yes, update the inode_ctx of that valid inode
++ */
++ if (inode && (inode->ia_type == IA_INVAL) && stbuf) {
++ linked_inode = inode_find(inode->table, stbuf->ia_gfid);
++ if (linked_inode) {
++ gf_log("upcall", GF_LOG_DEBUG,
++ "upcall_inode_ctx_get of linked inode (%p)", inode);
++ up_inode_ctx = upcall_inode_ctx_get(linked_inode, this);
++ }
++ }
++
++ if (inode && !up_inode_ctx)
+ up_inode_ctx = upcall_inode_ctx_get(inode, this);
+
+ if (!up_inode_ctx) {
+@@ -600,6 +614,9 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
+ }
+ pthread_mutex_unlock(&up_inode_ctx->client_list_lock);
+ out:
++ /* release the ref from inode_find */
++ if (linked_inode)
++ inode_unref(linked_inode);
+ return;
+ }
+
+--
+1.8.3.1
+
diff --git a/0200-gfapi-fix-incorrect-initialization-of-upcall-syncop-.patch b/0200-gfapi-fix-incorrect-initialization-of-upcall-syncop-.patch
new file mode 100644
index 0000000..ffef4d4
--- /dev/null
+++ b/0200-gfapi-fix-incorrect-initialization-of-upcall-syncop-.patch
@@ -0,0 +1,206 @@
+From bd553499909d2d57fd05696dc7604901cef3a36a Mon Sep 17 00:00:00 2001
+From: Soumya Koduri <skoduri@redhat.com>
+Date: Fri, 7 Jun 2019 17:20:15 +0530
+Subject: [PATCH 200/221] gfapi: fix incorrect initialization of upcall syncop
+ arguments
+
+While sending upcall notifications via synctasks, the argument used to
+carry relevant data for these tasks is not initialized properly. This patch
+is to fix the same.
+
+This is backport of below upstream fix -
+mainline: https://review.gluster.org/22839
+release-6: https://review.gluster.org/22871
+
+Change-Id: I9fa8f841e71d3c37d3819fbd430382928c07176c
+fixes: bz#1717784
+Signed-off-by: Soumya Koduri <skoduri@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173508
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Kaleb Keithley <kkeithle@redhat.com>
+---
+ api/src/glfs-fops.c | 109 ++++++++++++++++++++++++++++++++++------------------
+ 1 file changed, 72 insertions(+), 37 deletions(-)
+
+diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
+index 01ba60b..396f18c 100644
+--- a/api/src/glfs-fops.c
++++ b/api/src/glfs-fops.c
+@@ -34,7 +34,7 @@
+
+ struct upcall_syncop_args {
+ struct glfs *fs;
+- struct gf_upcall *upcall_data;
++ struct glfs_upcall *up_arg;
+ };
+
+ #define READDIRBUF_SIZE (sizeof(struct dirent) + GF_NAME_MAX + 1)
+@@ -5714,12 +5714,28 @@ out:
+ }
+
+ static int
++upcall_syncop_args_free(struct upcall_syncop_args *args)
++{
++ if (args && args->up_arg)
++ GLFS_FREE(args->up_arg);
++ GF_FREE(args);
++ return 0;
++}
++
++static int
+ glfs_upcall_syncop_cbk(int ret, call_frame_t *frame, void *opaque)
+ {
+ struct upcall_syncop_args *args = opaque;
+
+- GF_FREE(args->upcall_data);
+- GF_FREE(args);
++ /* Here we not using upcall_syncop_args_free as application
++ * will be cleaning up the args->up_arg using glfs_free
++ * post processing upcall.
++ */
++ if (ret) {
++ upcall_syncop_args_free(args);
++ } else
++ GF_FREE(args);
++
+ return 0;
+ }
+
+@@ -5727,13 +5743,29 @@ static int
+ glfs_cbk_upcall_syncop(void *opaque)
+ {
+ struct upcall_syncop_args *args = opaque;
+- int ret = -1;
+ struct glfs_upcall *up_arg = NULL;
+ struct glfs *fs;
+- struct gf_upcall *upcall_data;
+
+ fs = args->fs;
+- upcall_data = args->upcall_data;
++ up_arg = args->up_arg;
++
++ if (fs->up_cbk && up_arg) {
++ (fs->up_cbk)(up_arg, fs->up_data);
++ return 0;
++ }
++
++ return -1;
++}
++
++static struct upcall_syncop_args *
++upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data)
++{
++ struct upcall_syncop_args *args = NULL;
++ int ret = -1;
++ struct glfs_upcall *up_arg = NULL;
++
++ if (!fs || !upcall_data)
++ goto out;
+
+ up_arg = GLFS_CALLOC(1, sizeof(struct gf_upcall), glfs_release_upcall,
+ glfs_mt_upcall_entry_t);
+@@ -5754,33 +5786,51 @@ glfs_cbk_upcall_syncop(void *opaque)
+ errno = EINVAL;
+ }
+
+- if (!ret && (up_arg->reason != GLFS_UPCALL_EVENT_NULL)) {
+- /* It could so happen that the file which got
+- * upcall notification may have got deleted by
+- * the same client. In such cases up_arg->reason
+- * is set to GLFS_UPCALL_EVENT_NULL. No need to
+- * send upcall then */
+- (fs->up_cbk)(up_arg, fs->up_data);
+- } else if (up_arg->reason == GLFS_UPCALL_EVENT_NULL) {
++ /* It could so happen that the file which got
++ * upcall notification may have got deleted by
++ * the same client. In such cases up_arg->reason
++ * is set to GLFS_UPCALL_EVENT_NULL. No need to
++ * send upcall then
++ */
++ if (up_arg->reason == GLFS_UPCALL_EVENT_NULL) {
+ gf_msg(THIS->name, GF_LOG_DEBUG, errno, API_MSG_INVALID_ENTRY,
+ "Upcall_EVENT_NULL received. Skipping it.");
+ goto out;
+- } else {
++ } else if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ENTRY,
+ "Upcall entry validation failed.");
+ goto out;
+ }
+
++ args = GF_CALLOC(1, sizeof(struct upcall_syncop_args),
++ glfs_mt_upcall_entry_t);
++ if (!args) {
++ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED,
++ "Upcall syncop args allocation failed.");
++ goto out;
++ }
++
++ /* Note: we are not taking any ref on fs here.
++ * Ideally applications have to unregister for upcall events
++ * or stop polling for upcall events before performing
++ * glfs_fini. And as for outstanding synctasks created, we wait
++ * for all syncenv threads to finish tasks before cleaning up the
++ * fs->ctx. Hence it seems safe to process these callback
++ * notification without taking any lock/ref.
++ */
++ args->fs = fs;
++ args->up_arg = up_arg;
++
+ /* application takes care of calling glfs_free on up_arg post
+ * their processing */
+- ret = 0;
+
++ return args;
+ out:
+- if (ret && up_arg) {
++ if (up_arg) {
+ GLFS_FREE(up_arg);
+ }
+
+- return 0;
++ return NULL;
+ }
+
+ static void
+@@ -5797,24 +5847,10 @@ glfs_cbk_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data)
+ goto out;
+ }
+
+- args = GF_CALLOC(1, sizeof(struct upcall_syncop_args),
+- glfs_mt_upcall_entry_t);
+- if (!args) {
+- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED,
+- "Upcall syncop args allocation failed.");
+- goto out;
+- }
++ args = upcall_syncop_args_init(fs, upcall_data);
+
+- /* Note: we are not taking any ref on fs here.
+- * Ideally applications have to unregister for upcall events
+- * or stop polling for upcall events before performing
+- * glfs_fini. And as for outstanding synctasks created, we wait
+- * for all syncenv threads to finish tasks before cleaning up the
+- * fs->ctx. Hence it seems safe to process these callback
+- * notification without taking any lock/ref.
+- */
+- args->fs = fs;
+- args->upcall_data = gf_memdup(upcall_data, sizeof(*upcall_data));
++ if (!args)
++ goto out;
+
+ ret = synctask_new(THIS->ctx->env, glfs_cbk_upcall_syncop,
+ glfs_upcall_syncop_cbk, NULL, args);
+@@ -5823,8 +5859,7 @@ glfs_cbk_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data)
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_UPCALL_SYNCOP_FAILED,
+ "Synctak for Upcall event_type(%d) and gfid(%s) failed",
+ upcall_data->event_type, (char *)(upcall_data->gfid));
+- GF_FREE(args->upcall_data);
+- GF_FREE(args);
++ upcall_syncop_args_free(args);
+ }
+
+ out:
+--
+1.8.3.1
+
diff --git a/0201-geo-rep-Fix-permissions-for-GEOREP_DIR-in-non-root-s.patch b/0201-geo-rep-Fix-permissions-for-GEOREP_DIR-in-non-root-s.patch
new file mode 100644
index 0000000..0884a87
--- /dev/null
+++ b/0201-geo-rep-Fix-permissions-for-GEOREP_DIR-in-non-root-s.patch
@@ -0,0 +1,44 @@
+From a61c2a81e5731e4e0b5136147f404e60d3c72ad0 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Tue, 18 Jun 2019 16:25:35 +0530
+Subject: [PATCH 201/221] geo-rep: Fix permissions for GEOREP_DIR in non-root
+ setup
+
+During mountbroker setup: 'gluster-mountbroker <mountbroker-root> <group>'
+commad to set the permission and group for GEOREP_DIR directory
+(/var/lib/glusterd/geo-replication) fails due to extra argument, which is
+enssential for non-root geo-rep setup.
+
+Backport of:
+
+>Updtream patch: https://review.gluster.org/#/c/glusterfs/+/22890/
+>fixes: bz#1721441
+>Change-Id: Ia83442733bf0b29f630e8c9e398097316efca092
+>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+BUG: bz#1722331
+Change-Id: Ia83442733bf0b29f630e8c9e398097316efca092
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174169
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ geo-replication/src/peer_mountbroker.py.in | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/geo-replication/src/peer_mountbroker.py.in b/geo-replication/src/peer_mountbroker.py.in
+index ce33f97..96a7264 100644
+--- a/geo-replication/src/peer_mountbroker.py.in
++++ b/geo-replication/src/peer_mountbroker.py.in
+@@ -197,7 +197,7 @@ class NodeSetup(Cmd):
+ execute(["chgrp", "-R", args.group, GEOREP_DIR])
+ execute(["chgrp", "-R", args.group, LOG_DIR])
+ execute(["chgrp", args.group, CLI_LOG])
+- execute(["chmod", "770", args.group, GEOREP_DIR])
++ execute(["chmod", "770", GEOREP_DIR])
+ execute(["find", LOG_DIR, "-type", "d", "-exec", "chmod", "770", "{}",
+ "+"])
+ execute(["find", LOG_DIR, "-type", "f", "-exec", "chmod", "660", "{}",
+--
+1.8.3.1
+
diff --git a/0202-shd-mux-Fix-race-between-mux_proc-unlink-and-stop.patch b/0202-shd-mux-Fix-race-between-mux_proc-unlink-and-stop.patch
new file mode 100644
index 0000000..7cadb24
--- /dev/null
+++ b/0202-shd-mux-Fix-race-between-mux_proc-unlink-and-stop.patch
@@ -0,0 +1,46 @@
+From e386fb4f4baf834e6a8fc25cc2fbbb17eb0a7a56 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 20 Jun 2019 20:43:24 +0530
+Subject: [PATCH 202/221] shd/mux: Fix race between mux_proc unlink and stop
+
+There is a small race window, where we have a shd proc
+without having a connection. That is when we stopped the
+last shd running on a process. The list was removed
+outside of a lock just after stopping the process.
+
+So there is a window where we stopped the process, but
+the shd proc list contains the entry.
+
+Backport of: https://review.gluster.org/22909
+
+>Change-Id: Id82a82509e5cd72acac24e8b7b87197626525441
+>fixes: bz#1722541
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I794131ede23f32fcfa5f71181149d8c1e7e439b8
+BUG: 1721802
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174541
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index d81d760..dbe2560 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -694,6 +694,9 @@ glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig)
+ gf_is_service_running(svc->proc.pidfile, &pid);
+ cds_list_del_init(&svc->mux_svc);
+ empty = cds_list_empty(&svc_proc->svcs);
++ if (empty) {
++ cds_list_del_init(&svc_proc->svc_proc_list);
++ }
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+ if (empty) {
+--
+1.8.3.1
+
diff --git a/0203-glusterd-shd-Change-shd-logfile-to-a-unique-name.patch b/0203-glusterd-shd-Change-shd-logfile-to-a-unique-name.patch
new file mode 100644
index 0000000..39c9cd8
--- /dev/null
+++ b/0203-glusterd-shd-Change-shd-logfile-to-a-unique-name.patch
@@ -0,0 +1,233 @@
+From 541e1400ecaec5fea0f56e8ca18f00c229906d8a Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Tue, 18 Jun 2019 22:15:37 +0530
+Subject: [PATCH 203/221] glusterd/shd: Change shd logfile to a unique name
+
+With the shd mux changes, shd was havinga a logfile
+with volname of the first started volume.
+
+This was creating a lot confusion, as other volumes data
+is also logging to a logfile which has a different vol name.
+
+With this changes the logfile will be changed to a unique name
+ie "/var/log/glusterfs/glustershd.log". This was the same
+logfile name before the shd mux
+
+Backport of: https://review.gluster.org/22895
+
+>Change-Id: I2b94c1f0b2cf3c9493505dddf873687755a46dda
+>fixes: bz#1721601
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: Ia659386dd19f533fbadaf5a9d5453c9ef2acac64
+BUG: 1721351
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174542
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ .../mgmt/glusterd/src/glusterd-shd-svc-helper.c | 12 --------
+ .../mgmt/glusterd/src/glusterd-shd-svc-helper.h | 6 ----
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 14 ++++-----
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 34 +++++++++++++++++-----
+ xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c | 4 +--
+ xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h | 4 +++
+ 6 files changed, 40 insertions(+), 34 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
+index 9196758..57ceda9 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
+@@ -75,18 +75,6 @@ glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path,
+ }
+
+ void
+-glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len)
+-{
+- snprintf(logdir, len, "%s/shd/%s", DEFAULT_LOG_FILE_DIRECTORY, volname);
+-}
+-
+-void
+-glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len)
+-{
+- snprintf(logfile, len, "%s/shd.log", logdir);
+-}
+-
+-void
+ glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd)
+ {
+ glusterd_svc_proc_t *svc_proc = NULL;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
+index c70702c..59466ec 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
+@@ -27,12 +27,6 @@ glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path,
+ int path_len);
+
+ void
+-glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len);
+-
+-void
+-glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len);
+-
+-void
+ glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd);
+
+ int
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index dbe2560..8ad90a9 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -90,8 +90,8 @@ glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn,
+ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv);
+ glusterd_svc_create_rundir(rundir);
+
+- glusterd_svc_build_shd_logdir(logdir, volinfo->volname, sizeof(logdir));
+- glusterd_svc_build_shd_logfile(logfile, logdir, sizeof(logfile));
++ glusterd_svc_build_logfile_path(shd_svc_name, DEFAULT_LOG_FILE_DIRECTORY,
++ logfile, sizeof(logfile));
+
+ /* Initialize the connection mgmt */
+ if (mux_conn && mux_svc->rpc) {
+@@ -104,7 +104,7 @@ glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn,
+ if (ret < 0)
+ goto out;
+ } else {
+- ret = mkdir_p(logdir, 0755, _gf_true);
++ ret = mkdir_p(DEFAULT_LOG_FILE_DIRECTORY, 0755, _gf_true);
+ if ((ret == -1) && (EEXIST != errno)) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create logdir %s", logdir);
+@@ -460,6 +460,7 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
+ return -1;
+
+ glusterd_volinfo_ref(volinfo);
++
+ if (!svc->inited) {
+ ret = glusterd_shd_svc_mux_init(volinfo, svc);
+ if (ret)
+@@ -471,12 +472,11 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
+ /* Unref will happen from glusterd_svc_attach_cbk */
+ ret = glusterd_attach_svc(svc, volinfo, flags);
+ if (ret) {
+- glusterd_volinfo_unref(volinfo);
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+- "Failed to attach shd svc(volume=%s) to pid=%d. Starting"
+- "a new process",
++ "Failed to attach shd svc(volume=%s) to pid=%d",
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+- ret = glusterd_recover_shd_attach_failure(volinfo, svc, flags);
++ glusterd_shd_svcproc_cleanup(&volinfo->shd);
++ glusterd_volinfo_unref(volinfo);
+ }
+ goto out;
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index a6e662f..400826f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -469,6 +469,9 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc)
+ glusterd_conf_t *conf = NULL;
+ glusterd_svc_t *parent_svc = NULL;
+ int pid = -1;
++ char pidfile[PATH_MAX] = {
++ 0,
++ };
+
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+@@ -478,8 +481,26 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc)
+
+ pthread_mutex_lock(&conf->attach_lock);
+ {
++ if (svc->inited && !glusterd_proc_is_running(&(svc->proc))) {
++ /* This is the case when shd process was abnormally killed */
++ pthread_mutex_unlock(&conf->attach_lock);
++ glusterd_shd_svcproc_cleanup(&volinfo->shd);
++ pthread_mutex_lock(&conf->attach_lock);
++ }
++
+ if (!svc->inited) {
+- if (gf_is_service_running(svc->proc.pidfile, &pid)) {
++ glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile));
++ ret = snprintf(svc->proc.name, sizeof(svc->proc.name), "%s",
++ "glustershd");
++ if (ret < 0)
++ goto unlock;
++
++ ret = snprintf(svc->proc.pidfile, sizeof(svc->proc.pidfile), "%s",
++ pidfile);
++ if (ret < 0)
++ goto unlock;
++
++ if (gf_is_service_running(pidfile, &pid)) {
+ /* Just connect is required, but we don't know what happens
+ * during the disconnect. So better to reattach.
+ */
+@@ -487,10 +508,10 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc)
+ }
+
+ if (!mux_proc) {
+- if (pid != -1 && sys_access(svc->proc.pidfile, R_OK) == 0) {
++ if (pid != -1 && sys_access(pidfile, R_OK) == 0) {
+ /* stale pid file, stop and unlink it */
+ glusterd_proc_stop(&svc->proc, SIGTERM, PROC_STOP_FORCE);
+- glusterd_unlink_file(svc->proc.pidfile);
++ glusterd_unlink_file(pidfile);
+ }
+ mux_proc = __gf_find_compatible_svc(GD_NODE_SHD);
+ }
+@@ -684,11 +705,10 @@ glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL,
+- "svc %s of volume %s failed to "
+- "attach to pid %d. Starting a new process",
+- svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc));
++ "svc %s of volume %s failed to attach to pid %d", svc->name,
++ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+ if (!strcmp(svc->name, "glustershd")) {
+- glusterd_recover_shd_attach_failure(volinfo, svc, *flag);
++ glusterd_shd_svcproc_cleanup(&volinfo->shd);
+ }
+ }
+ out:
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
+index f32dafc..fa316a6 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
+@@ -33,14 +33,14 @@ glusterd_svc_create_rundir(char *rundir)
+ return ret;
+ }
+
+-static void
++void
+ glusterd_svc_build_logfile_path(char *server, char *logdir, char *logfile,
+ size_t len)
+ {
+ snprintf(logfile, len, "%s/%s.log", logdir, server);
+ }
+
+-static void
++void
+ glusterd_svc_build_volfileid_path(char *server, char *volfileid, size_t len)
+ {
+ snprintf(volfileid, len, "gluster/%s", server);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
+index fbc5225..5a5466a 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
+@@ -74,6 +74,10 @@ glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile,
+ size_t len);
+
+ void
++glusterd_svc_build_logfile_path(char *server, char *logdir, char *logfile,
++ size_t len);
++
++void
+ glusterd_svc_build_svcdir(char *server, char *workdir, char *path, size_t len);
+
+ void
+--
+1.8.3.1
+
diff --git a/0204-glusterd-conditionally-clear-txn_opinfo-in-stage-op.patch b/0204-glusterd-conditionally-clear-txn_opinfo-in-stage-op.patch
new file mode 100644
index 0000000..6d05a0b
--- /dev/null
+++ b/0204-glusterd-conditionally-clear-txn_opinfo-in-stage-op.patch
@@ -0,0 +1,60 @@
+From 4d0b11088c4a3a630d71acf902064d1ed10412e8 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Tue, 25 Jun 2019 11:11:10 +0530
+Subject: [PATCH 204/221] glusterd: conditionally clear txn_opinfo in stage op
+
+...otherwise this leads to a crash when volume status is run on a
+heterogeneous mode.
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22939/
+
+>Fixes: bz#1723658
+>Change-Id: I0d39f412b2e5e9d3ef0a3462b90b38bb5364b09d
+>Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+
+BUG: 1722131
+Change-Id: I0d39f412b2e5e9d3ef0a3462b90b38bb5364b09d
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174566
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index d0c1a2c..9ea695e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -5714,9 +5714,14 @@ glusterd_op_ac_stage_op(glusterd_op_sm_event_t *event, void *ctx)
+ glusterd_op_info_t txn_op_info = {
+ {0},
+ };
++ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
++
++ priv = this->private;
++ GF_ASSERT(priv);
++
+ GF_ASSERT(ctx);
+
+ req_ctx = ctx;
+@@ -5768,9 +5773,12 @@ out:
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+
+ /* for no volname transactions, the txn_opinfo needs to be cleaned up
+- * as there's no unlock event triggered
++ * as there's no unlock event triggered. However if the originator node of
++ * this transaction is still running with a version lower than 60000,
++ * txn_opinfo can't be cleared as that'll lead to a race of referring op_ctx
++ * after it's being freed.
+ */
+- if (txn_op_info.skip_locking)
++ if (txn_op_info.skip_locking && priv->op_version >= GD_OP_VERSION_6_0)
+ ret = glusterd_clear_txn_opinfo(txn_id);
+
+ if (rsp_dict)
+--
+1.8.3.1
+
diff --git a/0205-glusterd-Can-t-run-rebalance-due-to-long-unix-socket.patch b/0205-glusterd-Can-t-run-rebalance-due-to-long-unix-socket.patch
new file mode 100644
index 0000000..2b23236
--- /dev/null
+++ b/0205-glusterd-Can-t-run-rebalance-due-to-long-unix-socket.patch
@@ -0,0 +1,195 @@
+From b1a4947e382c5e2ba1137ed606ecffc69fcf00e9 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Tue, 25 Jun 2019 17:30:17 +0530
+Subject: [PATCH 205/221] glusterd: Can't run rebalance due to long unix socket
+
+Problem: glusterd populate unix socket file name based
+ on volname and if volname is lengthy socket
+ system call's are failed due to breach maximum length
+ is defined in the kernel.
+
+Solution:Convert unix socket name to hash to resolve the issue
+
+> Change-Id: I5072e8184013095587537dbfa4767286307fff65
+> fixes: bz#1720566
+> (Cherry pick from commit 2d7b77eb971700c1073db2b74f5877c1ae8293fc)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22869/)
+
+BUG: 1720192
+Change-Id: I5072e8184013095587537dbfa4767286307fff65
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174557
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ tests/bugs/glusterd/bug-1720566.t | 50 ++++++++++++++++++++++++++
+ xlators/mgmt/glusterd/src/glusterd-rebalance.c | 38 +-------------------
+ xlators/mgmt/glusterd/src/glusterd.h | 23 +++++-------
+ 3 files changed, 59 insertions(+), 52 deletions(-)
+ create mode 100644 tests/bugs/glusterd/bug-1720566.t
+
+diff --git a/tests/bugs/glusterd/bug-1720566.t b/tests/bugs/glusterd/bug-1720566.t
+new file mode 100644
+index 0000000..99bcf6f
+--- /dev/null
++++ b/tests/bugs/glusterd/bug-1720566.t
+@@ -0,0 +1,50 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../cluster.rc
++. $(dirname $0)/../../volume.rc
++
++
++cleanup;
++V0="TestLongVolnamec363b7b536700ff06eedeae0dd9037fec363b7b536700ff06eedeae0dd9037fec363b7b536700ff06eedeae0dd9abcd"
++V1="TestLongVolname3102bd28a16c49440bd5210e4ec4d5d93102bd28a16c49440bd5210e4ec4d5d933102bd28a16c49440bd5210e4ebbcd"
++TEST launch_cluster 2;
++TEST $CLI_1 peer probe $H2;
++
++EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
++
++$CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
++EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
++$CLI_1 volume create $V1 $H1:$B1/$V1 $H2:$B2/$V1
++EXPECT 'Created' cluster_volinfo_field 1 $V1 'Status';
++
++$CLI_1 volume start $V0
++EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
++
++$CLI_1 volume start $V1
++EXPECT 'Started' cluster_volinfo_field 1 $V1 'Status';
++
++#Mount FUSE
++TEST glusterfs -s $H1 --volfile-id=$V0 $M0;
++
++
++#Mount FUSE
++TEST glusterfs -s $H1 --volfile-id=$V1 $M1;
++
++TEST mkdir $M0/dir{1..4};
++TEST touch $M0/dir{1..4}/files{1..4};
++
++TEST mkdir $M1/dir{1..4};
++TEST touch $M1/dir{1..4}/files{1..4};
++
++TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}_1 $H2:$B2/${V0}_1
++TEST $CLI_1 volume add-brick $V1 $H1:$B1/${V1}_1 $H2:$B2/${V1}_1
++
++
++TEST $CLI_1 volume rebalance $V0 start
++TEST $CLI_1 volume rebalance $V1 start
++
++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V1
++
++cleanup;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+index cbed9a9..b419a89 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+@@ -266,18 +266,7 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
+
+ if (dict_get_strn(this->options, "transport.socket.bind-address",
+ SLEN("transport.socket.bind-address"),
+- &volfileserver) == 0) {
+- /*In the case of running multiple glusterds on a single machine,
+- *we should ensure that log file and unix socket file should be
+- *unique in given cluster */
+-
+- GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD(sockfile, volinfo, priv);
+- snprintf(logfile, PATH_MAX, "%s/%s-%s-%s.log",
+- DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname,
+- (cmd == GF_DEFRAG_CMD_START_TIER ? "tier" : "rebalance"),
+- uuid_utoa(MY_UUID));
+-
+- } else {
++ &volfileserver) != 0) {
+ volfileserver = "localhost";
+ }
+
+@@ -378,9 +367,6 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
+ glusterd_defrag_info_t *defrag = volinfo->rebal.defrag;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+- struct stat buf = {
+- 0,
+- };
+
+ this = THIS;
+ GF_ASSERT(this);
+@@ -396,28 +382,6 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
+ goto out;
+
+ GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo);
+- /* Check if defrag sockfile exists in the new location
+- * in /var/run/ , if it does not try the old location
+- */
+- ret = sys_stat(sockfile, &buf);
+- /* TODO: Remove this once we don't need backward compatibility
+- * with the older path
+- */
+- if (ret && (errno == ENOENT)) {
+- gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+- "Rebalance sockfile "
+- "%s does not exist. Trying old path.",
+- sockfile);
+- GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD(sockfile, volinfo, priv);
+- ret = sys_stat(sockfile, &buf);
+- if (ret && (ENOENT == errno)) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REBAL_NO_SOCK_FILE,
+- "Rebalance "
+- "sockfile %s does not exist",
+- sockfile);
+- goto out;
+- }
+- }
+
+ /* Setting frame-timeout to 10mins (600seconds).
+ * Unix domain sockets ensures that the connection is reliable. The
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index f96bca3..7d07d33 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -910,30 +910,23 @@ typedef ssize_t (*gd_serialize_t)(struct iovec outmsg, void *args);
+ } \
+ } while (0)
+
+-#define GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD(path, volinfo, priv) \
+- do { \
+- char defrag_path[PATH_MAX]; \
+- int32_t _sockfile_old_len; \
+- GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \
+- _sockfile_old_len = snprintf(path, PATH_MAX, "%s/%s.sock", \
+- defrag_path, uuid_utoa(MY_UUID)); \
+- if ((_sockfile_old_len < 0) || (_sockfile_old_len >= PATH_MAX)) { \
+- path[0] = 0; \
+- } \
+- } while (0)
+-
+ #define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo) \
+ do { \
+ char operation[NAME_MAX]; \
++ char tmppath[PATH_MAX] = { \
++ 0, \
++ }; \
+ int32_t _defrag_sockfile_len; \
+ GLUSTERD_GET_DEFRAG_PROCESS(operation, volinfo); \
+ _defrag_sockfile_len = snprintf( \
+- path, UNIX_PATH_MAX, \
+- DEFAULT_VAR_RUN_DIRECTORY "/gluster-%s-%s.sock", operation, \
+- uuid_utoa(volinfo->volume_id)); \
++ tmppath, PATH_MAX, \
++ DEFAULT_VAR_RUN_DIRECTORY "/gluster-%s-%s-%s.sock", operation, \
++ volinfo->volname, uuid_utoa(MY_UUID)); \
+ if ((_defrag_sockfile_len < 0) || \
+ (_defrag_sockfile_len >= PATH_MAX)) { \
+ path[0] = 0; \
++ } else { \
++ glusterd_set_socket_filepath(tmppath, path, sizeof(path)); \
+ } \
+ } while (0)
+
+--
+1.8.3.1
+
diff --git a/0206-glusterd-ignore-user.-options-from-compatibility-che.patch b/0206-glusterd-ignore-user.-options-from-compatibility-che.patch
new file mode 100644
index 0000000..8908097
--- /dev/null
+++ b/0206-glusterd-ignore-user.-options-from-compatibility-che.patch
@@ -0,0 +1,41 @@
+From f77d4a024cb9b17de7d5add064b34adfb0455d17 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Mon, 24 Jun 2019 18:32:52 +0530
+Subject: [PATCH 206/221] glusterd: ignore user.* options from compatibility
+ check in brick mux
+
+user.* options are just custom and they don't contribute anything in
+terms of determining the volume compatibility in brick multiplexing
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22933/
+
+>Fixes: bz#1723402
+>Change-Id: Ic7e0181ab72993d29cab345cde64ae1340bf4faf
+>Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+
+BUG: 1722509
+Change-Id: Ic7e0181ab72993d29cab345cde64ae1340bf4faf
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174589
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 7768b8e..c6e9bb0 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -2425,6 +2425,9 @@ unsafe_option(dict_t *this, char *key, data_t *value, void *arg)
+ if (fnmatch("*diagnostics.client-log*", key, 0) == 0) {
+ return _gf_false;
+ }
++ if (fnmatch("user.*", key, 0) == 0) {
++ return _gf_false;
++ }
+
+ return _gf_true;
+ }
+--
+1.8.3.1
+
diff --git a/0207-glusterd-fix-use-after-free-of-a-dict_t.patch b/0207-glusterd-fix-use-after-free-of-a-dict_t.patch
new file mode 100644
index 0000000..5a92d58
--- /dev/null
+++ b/0207-glusterd-fix-use-after-free-of-a-dict_t.patch
@@ -0,0 +1,44 @@
+From a7a7d497af4230430f8a0cc54d8b49cfea260039 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Tue, 25 Jun 2019 18:00:06 +0200
+Subject: [PATCH 207/221] glusterd: fix use-after-free of a dict_t
+
+A dict was passed to a function that calls dict_unref() without taking
+any additional reference. Given that the same dict is also used after
+the function returns, this was causing a use-after-free situation.
+
+To fix the issue, we simply take an additional reference before calling
+the function.
+
+Upstream patch:
+> BUG: 1723890
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22943
+> Change-Id: I98c6b76b08fe3fa6224edf281a26e9ba1ffe3017
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: I98c6b76b08fe3fa6224edf281a26e9ba1ffe3017
+Updates: bz#1722801
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174656
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index c6e9bb0..4c487d0 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -3697,7 +3697,7 @@ glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf,
+ if (totthread) {
+ gf_log(this->name, GF_LOG_INFO,
+ "Finished merger of all dictionraies into single one");
+- dict_arr[totthread++] = peer_data;
++ dict_arr[totthread++] = dict_ref(peer_data);
+ ret = glusterd_dict_arr_serialize(dict_arr, totthread, buf, length);
+ gf_log(this->name, GF_LOG_INFO,
+ "Serialize dictionary data return is %d", ret);
+--
+1.8.3.1
+
diff --git a/0208-mem-pool-remove-dead-code.patch b/0208-mem-pool-remove-dead-code.patch
new file mode 100644
index 0000000..c8678f2
--- /dev/null
+++ b/0208-mem-pool-remove-dead-code.patch
@@ -0,0 +1,161 @@
+From d7ddc1cd3af86198ffca2d1958871d4c2c04bd9e Mon Sep 17 00:00:00 2001
+From: Yaniv Kaul <ykaul@redhat.com>
+Date: Thu, 21 Mar 2019 19:51:30 +0200
+Subject: [PATCH 208/221] mem-pool: remove dead code.
+
+Upstream patch:
+> Change-Id: I3bbda719027b45e1289db2e6a718627141bcbdc8
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22394
+> BUG: 1193929
+> Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
+
+Updates: bz#1722801
+Change-Id: I3bbda719027b45e1289db2e6a718627141bcbdc8
+Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174710
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/glusterfs/mem-pool.h | 11 ------
+ libglusterfs/src/mem-pool.c | 70 -----------------------------------
+ 2 files changed, 81 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h
+index 90905fb..0250b59 100644
+--- a/libglusterfs/src/glusterfs/mem-pool.h
++++ b/libglusterfs/src/glusterfs/mem-pool.h
+@@ -308,15 +308,4 @@ mem_pool_destroy(struct mem_pool *pool);
+ void
+ gf_mem_acct_enable_set(void *ctx);
+
+-/* hit will be set to :
+- * _gf_true if the memory is served from mem pool
+- * _gf_false if the requested size was not present in mem pool and hence
+- * std alloc'd.
+- */
+-void *
+-mem_pool_get(unsigned long sizeof_type, gf_boolean_t *hit);
+-
+-void *
+-mem_pool_get0(unsigned long sizeof_type, gf_boolean_t *hit);
+-
+ #endif /* _MEM_POOL_H */
+diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
+index 3934a78..9b4ea52 100644
+--- a/libglusterfs/src/mem-pool.c
++++ b/libglusterfs/src/mem-pool.c
+@@ -365,10 +365,6 @@ static size_t pool_list_size;
+ #define N_COLD_LISTS 1024
+ #define POOL_SWEEP_SECS 30
+
+-static unsigned long sweep_times;
+-static unsigned long sweep_usecs;
+-static unsigned long frees_to_system;
+-
+ typedef struct {
+ struct list_head death_row;
+ pooled_obj_hdr_t *cold_lists[N_COLD_LISTS];
+@@ -426,7 +422,6 @@ free_obj_list(pooled_obj_hdr_t *victim)
+ next = victim->next;
+ free(victim);
+ victim = next;
+- ++frees_to_system;
+ }
+ }
+
+@@ -438,9 +433,6 @@ pool_sweeper(void *arg)
+ per_thread_pool_list_t *next_pl;
+ per_thread_pool_t *pt_pool;
+ unsigned int i;
+- struct timeval begin_time;
+- struct timeval end_time;
+- struct timeval elapsed;
+ gf_boolean_t poisoned;
+
+ /*
+@@ -457,7 +449,6 @@ pool_sweeper(void *arg)
+ state.n_cold_lists = 0;
+
+ /* First pass: collect stuff that needs our attention. */
+- (void)gettimeofday(&begin_time, NULL);
+ (void)pthread_mutex_lock(&pool_lock);
+ list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list)
+ {
+@@ -470,10 +461,6 @@ pool_sweeper(void *arg)
+ }
+ }
+ (void)pthread_mutex_unlock(&pool_lock);
+- (void)gettimeofday(&end_time, NULL);
+- timersub(&end_time, &begin_time, &elapsed);
+- sweep_usecs += elapsed.tv_sec * 1000000 + elapsed.tv_usec;
+- sweep_times += 1;
+
+ /* Second pass: free dead pools. */
+ (void)pthread_mutex_lock(&pool_free_lock);
+@@ -879,63 +866,6 @@ mem_get(struct mem_pool *mem_pool)
+ #endif /* GF_DISABLE_MEMPOOL */
+ }
+
+-void *
+-mem_pool_get(unsigned long sizeof_type, gf_boolean_t *hit)
+-{
+-#if defined(GF_DISABLE_MEMPOOL)
+- return GF_MALLOC(sizeof_type, gf_common_mt_mem_pool);
+-#else
+- pooled_obj_hdr_t *retval;
+- unsigned int power;
+- struct mem_pool_shared *pool = NULL;
+-
+- if (!sizeof_type) {
+- gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+- "invalid argument");
+- return NULL;
+- }
+-
+- /* We ensure sizeof_type > 1 and the next power of two will be, at least,
+- * 2^POOL_SMALLEST */
+- sizeof_type |= (1 << POOL_SMALLEST) - 1;
+- power = sizeof(sizeof_type) * 8 - __builtin_clzl(sizeof_type - 1) + 1;
+- if (power > POOL_LARGEST) {
+- gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+- "invalid argument");
+- return NULL;
+- }
+- pool = &pools[power - POOL_SMALLEST];
+-
+- retval = mem_get_from_pool(NULL, pool, hit);
+-
+- return retval + 1;
+-#endif /* GF_DISABLE_MEMPOOL */
+-}
+-
+-void *
+-mem_pool_get0(unsigned long sizeof_type, gf_boolean_t *hit)
+-{
+- void *ptr = NULL;
+- unsigned int power;
+- struct mem_pool_shared *pool = NULL;
+-
+- ptr = mem_pool_get(sizeof_type, hit);
+- if (ptr) {
+-#if defined(GF_DISABLE_MEMPOOL)
+- memset(ptr, 0, sizeof_type);
+-#else
+- /* We ensure sizeof_type > 1 and the next power of two will be, at
+- * least, 2^POOL_SMALLEST */
+- sizeof_type |= (1 << POOL_SMALLEST) - 1;
+- power = sizeof(sizeof_type) * 8 - __builtin_clzl(sizeof_type - 1) + 1;
+- pool = &pools[power - POOL_SMALLEST];
+- memset(ptr, 0, AVAILABLE_SIZE(pool->power_of_two));
+-#endif
+- }
+-
+- return ptr;
+-}
+-
+ void
+ mem_put(void *ptr)
+ {
+--
+1.8.3.1
+
diff --git a/0209-core-avoid-dynamic-TLS-allocation-when-possible.patch b/0209-core-avoid-dynamic-TLS-allocation-when-possible.patch
new file mode 100644
index 0000000..f46f1b6
--- /dev/null
+++ b/0209-core-avoid-dynamic-TLS-allocation-when-possible.patch
@@ -0,0 +1,1059 @@
+From 2f5969a77493814e242e6bac3c6bf7acf3202e0f Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Tue, 5 Mar 2019 18:58:20 +0100
+Subject: [PATCH 209/221] core: avoid dynamic TLS allocation when possible
+
+Some interdependencies between logging and memory management functions
+make it impossible to use the logging framework before initializing
+memory subsystem because they both depend on Thread Local Storage
+allocated through pthread_key_create() during initialization.
+
+This causes a crash when we try to log something very early in the
+initialization phase.
+
+To prevent this, several dynamically allocated TLS structures have
+been replaced by static TLS reserved at compile time using '__thread'
+keyword. This also reduces the number of error sources, making
+initialization simpler.
+
+Upstream patch:
+> BUG: 1193929
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22302
+> Change-Id: I8ea2e072411e30790d50084b6b7e909c7bb01d50
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: I8ea2e072411e30790d50084b6b7e909c7bb01d50
+Updates: bz#1722801
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174711
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ api/src/glfs.c | 3 +-
+ cli/src/cli.c | 3 +-
+ glusterfsd/src/glusterfsd.c | 4 +-
+ libglusterfs/src/globals.c | 289 ++++-----------------
+ libglusterfs/src/glusterfs/globals.h | 6 +-
+ libglusterfs/src/glusterfs/mem-pool.h | 7 +-
+ libglusterfs/src/libglusterfs.sym | 3 +-
+ libglusterfs/src/mem-pool.c | 98 +++----
+ libglusterfs/src/syncop.c | 133 ++--------
+ .../changelog/lib/src/gf-changelog-helpers.c | 51 +---
+ xlators/features/changelog/lib/src/gf-changelog.c | 3 +-
+ xlators/nfs/server/src/mount3udp_svc.c | 6 +-
+ 12 files changed, 114 insertions(+), 492 deletions(-)
+
+diff --git a/api/src/glfs.c b/api/src/glfs.c
+index 6bbb620..f36616d 100644
+--- a/api/src/glfs.c
++++ b/api/src/glfs.c
+@@ -829,8 +829,7 @@ pub_glfs_new(const char *volname)
+ * Do this as soon as possible in case something else depends on
+ * pool allocations.
+ */
+- mem_pools_init_early();
+- mem_pools_init_late();
++ mem_pools_init();
+
+ fs = glfs_new_fs(volname);
+ if (!fs)
+diff --git a/cli/src/cli.c b/cli/src/cli.c
+index ff39a98..99a16a0 100644
+--- a/cli/src/cli.c
++++ b/cli/src/cli.c
+@@ -795,8 +795,7 @@ main(int argc, char *argv[])
+ int ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+
+- mem_pools_init_early();
+- mem_pools_init_late();
++ mem_pools_init();
+
+ ctx = glusterfs_ctx_new();
+ if (!ctx)
+diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
+index 6aee4c1..2172af4 100644
+--- a/glusterfsd/src/glusterfsd.c
++++ b/glusterfsd/src/glusterfsd.c
+@@ -2722,8 +2722,6 @@ main(int argc, char *argv[])
+ };
+ cmd_args_t *cmd = NULL;
+
+- mem_pools_init_early();
+-
+ gf_check_and_set_mem_acct(argc, argv);
+
+ ctx = glusterfs_ctx_new();
+@@ -2838,7 +2836,7 @@ main(int argc, char *argv[])
+ * the parent, but we want to do it as soon as possible after that in
+ * case something else depends on pool allocations.
+ */
+- mem_pools_init_late();
++ mem_pools_init();
+
+ #ifdef GF_LINUX_HOST_OS
+ ret = set_oom_score_adj(ctx);
+diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
+index 4fec063..02098e6 100644
+--- a/libglusterfs/src/globals.c
++++ b/libglusterfs/src/globals.c
+@@ -99,16 +99,19 @@ const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = {
+ glusterfs_ctx_t *global_ctx = NULL;
+ pthread_mutex_t global_ctx_mutex = PTHREAD_MUTEX_INITIALIZER;
+ xlator_t global_xlator;
+-static pthread_key_t this_xlator_key;
+-static pthread_key_t synctask_key;
+-static pthread_key_t uuid_buf_key;
+-static char global_uuid_buf[GF_UUID_BUF_SIZE];
+-static pthread_key_t lkowner_buf_key;
+-static char global_lkowner_buf[GF_LKOWNER_BUF_SIZE];
+-static pthread_key_t leaseid_buf_key;
+ static int gf_global_mem_acct_enable = 1;
+ static pthread_once_t globals_inited = PTHREAD_ONCE_INIT;
+
++static pthread_key_t free_key;
++
++static __thread xlator_t *thread_xlator = NULL;
++static __thread void *thread_synctask = NULL;
++static __thread void *thread_leaseid = NULL;
++static __thread struct syncopctx thread_syncopctx = {};
++static __thread char thread_uuid_buf[GF_UUID_BUF_SIZE] = {};
++static __thread char thread_lkowner_buf[GF_LKOWNER_BUF_SIZE] = {};
++static __thread char thread_leaseid_buf[GF_LEASE_ID_BUF_SIZE] = {};
++
+ int
+ gf_global_mem_acct_enable_get(void)
+ {
+@@ -122,12 +125,6 @@ gf_global_mem_acct_enable_set(int val)
+ return 0;
+ }
+
+-void
+-glusterfs_this_destroy(void *ptr)
+-{
+- FREE(ptr);
+-}
+-
+ static struct xlator_cbks global_cbks = {
+ .forget = NULL,
+ .release = NULL,
+@@ -212,18 +209,9 @@ struct volume_options global_xl_options[] = {
+
+ static volume_opt_list_t global_xl_opt_list;
+
+-int
++void
+ glusterfs_this_init()
+ {
+- int ret = 0;
+- ret = pthread_key_create(&this_xlator_key, glusterfs_this_destroy);
+- if (ret != 0) {
+- gf_msg("", GF_LOG_WARNING, ret, LG_MSG_PTHREAD_KEY_CREATE_FAILED,
+- "failed to create "
+- "the pthread key");
+- return ret;
+- }
+-
+ global_xlator.name = "glusterfs";
+ global_xlator.type = GF_GLOBAL_XLATOR_NAME;
+ global_xlator.cbks = &global_cbks;
+@@ -237,301 +225,120 @@ glusterfs_this_init()
+ global_xl_opt_list.given_opt = global_xl_options;
+
+ list_add_tail(&global_xl_opt_list.list, &global_xlator.volume_options);
+-
+- return ret;
+ }
+
+ xlator_t **
+ __glusterfs_this_location()
+ {
+- xlator_t **this_location = NULL;
+- int ret = 0;
+-
+- this_location = pthread_getspecific(this_xlator_key);
+-
+- if (!this_location) {
+- this_location = CALLOC(1, sizeof(*this_location));
+- if (!this_location)
+- goto out;
++ xlator_t **this_location;
+
+- ret = pthread_setspecific(this_xlator_key, this_location);
+- if (ret != 0) {
+- FREE(this_location);
+- this_location = NULL;
+- goto out;
+- }
+- }
+-out:
+- if (this_location) {
+- if (!*this_location)
+- *this_location = &global_xlator;
++ this_location = &thread_xlator;
++ if (*this_location == NULL) {
++ thread_xlator = &global_xlator;
+ }
++
+ return this_location;
+ }
+
+ xlator_t *
+ glusterfs_this_get()
+ {
+- xlator_t **this_location = NULL;
+-
+- this_location = __glusterfs_this_location();
+- if (!this_location)
+- return &global_xlator;
+-
+- return *this_location;
++ return *__glusterfs_this_location();
+ }
+
+-int
++void
+ glusterfs_this_set(xlator_t *this)
+ {
+- xlator_t **this_location = NULL;
+-
+- this_location = __glusterfs_this_location();
+- if (!this_location)
+- return -ENOMEM;
+-
+- *this_location = this;
+-
+- return 0;
++ thread_xlator = this;
+ }
+
+ /* SYNCOPCTX */
+-static pthread_key_t syncopctx_key;
+-
+-static void
+-syncopctx_key_destroy(void *ptr)
+-{
+- struct syncopctx *opctx = ptr;
+-
+- if (opctx) {
+- if (opctx->groups)
+- GF_FREE(opctx->groups);
+-
+- GF_FREE(opctx);
+- }
+-
+- return;
+-}
+
+ void *
+ syncopctx_getctx()
+ {
+- void *opctx = NULL;
+-
+- opctx = pthread_getspecific(syncopctx_key);
+-
+- return opctx;
+-}
+-
+-int
+-syncopctx_setctx(void *ctx)
+-{
+- int ret = 0;
+-
+- ret = pthread_setspecific(syncopctx_key, ctx);
+-
+- return ret;
+-}
+-
+-static int
+-syncopctx_init(void)
+-{
+- int ret;
+-
+- ret = pthread_key_create(&syncopctx_key, syncopctx_key_destroy);
+-
+- return ret;
++ return &thread_syncopctx;
+ }
+
+ /* SYNCTASK */
+
+-int
+-synctask_init()
+-{
+- int ret = 0;
+-
+- ret = pthread_key_create(&synctask_key, NULL);
+-
+- return ret;
+-}
+-
+ void *
+ synctask_get()
+ {
+- void *synctask = NULL;
+-
+- synctask = pthread_getspecific(synctask_key);
+-
+- return synctask;
++ return thread_synctask;
+ }
+
+-int
++void
+ synctask_set(void *synctask)
+ {
+- int ret = 0;
+-
+- pthread_setspecific(synctask_key, synctask);
+-
+- return ret;
++ thread_synctask = synctask;
+ }
+
+ // UUID_BUFFER
+
+-void
+-glusterfs_uuid_buf_destroy(void *ptr)
+-{
+- FREE(ptr);
+-}
+-
+-int
+-glusterfs_uuid_buf_init()
+-{
+- int ret = 0;
+-
+- ret = pthread_key_create(&uuid_buf_key, glusterfs_uuid_buf_destroy);
+- return ret;
+-}
+-
+ char *
+ glusterfs_uuid_buf_get()
+ {
+- char *buf;
+- int ret = 0;
+-
+- buf = pthread_getspecific(uuid_buf_key);
+- if (!buf) {
+- buf = MALLOC(GF_UUID_BUF_SIZE);
+- ret = pthread_setspecific(uuid_buf_key, (void *)buf);
+- if (ret)
+- buf = global_uuid_buf;
+- }
+- return buf;
++ return thread_uuid_buf;
+ }
+
+ /* LKOWNER_BUFFER */
+
+-void
+-glusterfs_lkowner_buf_destroy(void *ptr)
+-{
+- FREE(ptr);
+-}
+-
+-int
+-glusterfs_lkowner_buf_init()
+-{
+- int ret = 0;
+-
+- ret = pthread_key_create(&lkowner_buf_key, glusterfs_lkowner_buf_destroy);
+- return ret;
+-}
+-
+ char *
+ glusterfs_lkowner_buf_get()
+ {
+- char *buf;
+- int ret = 0;
+-
+- buf = pthread_getspecific(lkowner_buf_key);
+- if (!buf) {
+- buf = MALLOC(GF_LKOWNER_BUF_SIZE);
+- ret = pthread_setspecific(lkowner_buf_key, (void *)buf);
+- if (ret)
+- buf = global_lkowner_buf;
+- }
+- return buf;
++ return thread_lkowner_buf;
+ }
+
+ /* Leaseid buffer */
+-void
+-glusterfs_leaseid_buf_destroy(void *ptr)
+-{
+- FREE(ptr);
+-}
+-
+-int
+-glusterfs_leaseid_buf_init()
+-{
+- int ret = 0;
+-
+- ret = pthread_key_create(&leaseid_buf_key, glusterfs_leaseid_buf_destroy);
+- return ret;
+-}
+
+ char *
+ glusterfs_leaseid_buf_get()
+ {
+ char *buf = NULL;
+- int ret = 0;
+
+- buf = pthread_getspecific(leaseid_buf_key);
+- if (!buf) {
+- buf = CALLOC(1, GF_LEASE_ID_BUF_SIZE);
+- ret = pthread_setspecific(leaseid_buf_key, (void *)buf);
+- if (ret) {
+- FREE(buf);
+- buf = NULL;
+- }
++ buf = thread_leaseid;
++ if (buf == NULL) {
++ buf = thread_leaseid_buf;
++ thread_leaseid = buf;
+ }
++
+ return buf;
+ }
+
+ char *
+ glusterfs_leaseid_exist()
+ {
+- return pthread_getspecific(leaseid_buf_key);
++ return thread_leaseid;
+ }
+
+ static void
+-gf_globals_init_once()
++glusterfs_cleanup(void *ptr)
+ {
+- int ret = 0;
+-
+- ret = glusterfs_this_init();
+- if (ret) {
+- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_TRANSLATOR_INIT_FAILED,
+- "ERROR: glusterfs-translator init failed");
+- goto out;
+- }
+-
+- ret = glusterfs_uuid_buf_init();
+- if (ret) {
+- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_UUID_BUF_INIT_FAILED,
+- "ERROR: glusterfs uuid buffer init failed");
+- goto out;
++ if (thread_syncopctx.groups != NULL) {
++ GF_FREE(thread_syncopctx.groups);
+ }
+
+- ret = glusterfs_lkowner_buf_init();
+- if (ret) {
+- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_LKOWNER_BUF_INIT_FAILED,
+- "ERROR: glusterfs lkowner buffer init failed");
+- goto out;
+- }
++ mem_pool_thread_destructor();
++}
+
+- ret = glusterfs_leaseid_buf_init();
+- if (ret) {
+- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_LEASEID_BUF_INIT_FAILED,
+- "ERROR: glusterfs leaseid buffer init failed");
+- goto out;
+- }
++static void
++gf_globals_init_once()
++{
++ int ret = 0;
+
+- ret = synctask_init();
+- if (ret) {
+- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_SYNCTASK_INIT_FAILED,
+- "ERROR: glusterfs synctask init failed");
+- goto out;
+- }
++ glusterfs_this_init();
+
+- ret = syncopctx_init();
+- if (ret) {
+- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_SYNCOPCTX_INIT_FAILED,
+- "ERROR: glusterfs syncopctx init failed");
+- goto out;
+- }
+-out:
++ /* This is needed only to cleanup the potential allocation of
++ * thread_syncopctx.groups. */
++ ret = pthread_key_create(&free_key, glusterfs_cleanup);
++ if (ret != 0) {
++ gf_msg("", GF_LOG_ERROR, ret, LG_MSG_PTHREAD_KEY_CREATE_FAILED,
++ "failed to create the pthread key");
+
+- if (ret) {
+ gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_GLOBAL_INIT_FAILED,
+ "Exiting as global initialization failed");
++
+ exit(ret);
+ }
+ }
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index e45db14..55476f6 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -147,7 +147,7 @@ xlator_t **
+ __glusterfs_this_location(void);
+ xlator_t *
+ glusterfs_this_get(void);
+-int
++void
+ glusterfs_this_set(xlator_t *);
+
+ extern xlator_t global_xlator;
+@@ -156,13 +156,11 @@ extern struct volume_options global_xl_options[];
+ /* syncopctx */
+ void *
+ syncopctx_getctx(void);
+-int
+-syncopctx_setctx(void *ctx);
+
+ /* task */
+ void *
+ synctask_get(void);
+-int
++void
+ synctask_set(void *);
+
+ /* uuid_buf */
+diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h
+index 0250b59..c5a486b 100644
+--- a/libglusterfs/src/glusterfs/mem-pool.h
++++ b/libglusterfs/src/glusterfs/mem-pool.h
+@@ -279,9 +279,7 @@ struct mem_pool_shared {
+ };
+
+ void
+-mem_pools_init_early(void); /* basic initialization of memory pools */
+-void
+-mem_pools_init_late(void); /* start the pool_sweeper thread */
++mem_pools_init(void); /* start the pool_sweeper thread */
+ void
+ mem_pools_fini(void); /* cleanup memory pools */
+
+@@ -306,6 +304,9 @@ void
+ mem_pool_destroy(struct mem_pool *pool);
+
+ void
++mem_pool_thread_destructor(void);
++
++void
+ gf_mem_acct_enable_set(void *ctx);
+
+ #endif /* _MEM_POOL_H */
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index 7a2edef..86215d2 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -872,8 +872,7 @@ mem_get0
+ mem_pool_destroy
+ mem_pool_new_fn
+ mem_pools_fini
+-mem_pools_init_early
+-mem_pools_init_late
++mem_pools_init
+ mem_put
+ mkdir_p
+ next_token
+diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
+index 9b4ea52..ab78804 100644
+--- a/libglusterfs/src/mem-pool.c
++++ b/libglusterfs/src/mem-pool.c
+@@ -353,7 +353,6 @@ free:
+ FREE(ptr);
+ }
+
+-static pthread_key_t pool_key;
+ static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER;
+ static struct list_head pool_threads;
+ static pthread_mutex_t pool_free_lock = PTHREAD_MUTEX_INITIALIZER;
+@@ -361,6 +360,8 @@ static struct list_head pool_free_threads;
+ static struct mem_pool_shared pools[NPOOLS];
+ static size_t pool_list_size;
+
++static __thread per_thread_pool_list_t *thread_pool_list = NULL;
++
+ #if !defined(GF_DISABLE_MEMPOOL)
+ #define N_COLD_LISTS 1024
+ #define POOL_SWEEP_SECS 30
+@@ -373,7 +374,6 @@ typedef struct {
+
+ enum init_state {
+ GF_MEMPOOL_INIT_NONE = 0,
+- GF_MEMPOOL_INIT_PREINIT,
+ GF_MEMPOOL_INIT_EARLY,
+ GF_MEMPOOL_INIT_LATE,
+ GF_MEMPOOL_INIT_DESTROY
+@@ -486,9 +486,9 @@ pool_sweeper(void *arg)
+ }
+
+ void
+-pool_destructor(void *arg)
++mem_pool_thread_destructor(void)
+ {
+- per_thread_pool_list_t *pool_list = arg;
++ per_thread_pool_list_t *pool_list = thread_pool_list;
+
+ /* The pool-sweeper thread will take it from here.
+ *
+@@ -499,7 +499,10 @@ pool_destructor(void *arg)
+ * This change can modify what mem_put() does, but both possibilities are
+ * fine until the sweeper thread kicks in. The real synchronization must be
+ * between mem_put() and the sweeper thread. */
+- pool_list->poison = 1;
++ if (pool_list != NULL) {
++ pool_list->poison = 1;
++ thread_pool_list = NULL;
++ }
+ }
+
+ static __attribute__((constructor)) void
+@@ -522,46 +525,14 @@ mem_pools_preinit(void)
+ pool_list_size = sizeof(per_thread_pool_list_t) +
+ sizeof(per_thread_pool_t) * (NPOOLS - 1);
+
+- init_done = GF_MEMPOOL_INIT_PREINIT;
++ init_done = GF_MEMPOOL_INIT_EARLY;
+ }
+
+-/* Use mem_pools_init_early() function for basic initialization. There will be
+- * no cleanup done by the pool_sweeper thread until mem_pools_init_late() has
+- * been called. Calling mem_get() will be possible after this function has
+- * setup the basic structures. */
++/* Call mem_pools_init() once threading has been configured completely. This
++ * prevent the pool_sweeper thread from getting killed once the main() thread
++ * exits during deamonizing. */
+ void
+-mem_pools_init_early(void)
+-{
+- pthread_mutex_lock(&init_mutex);
+- /* Use a pthread_key destructor to clean up when a thread exits.
+- *
+- * We won't increase init_count here, that is only done when the
+- * pool_sweeper thread is started too.
+- */
+- if (init_done == GF_MEMPOOL_INIT_PREINIT ||
+- init_done == GF_MEMPOOL_INIT_DESTROY) {
+- /* key has not been created yet */
+- if (pthread_key_create(&pool_key, pool_destructor) != 0) {
+- gf_log("mem-pool", GF_LOG_CRITICAL,
+- "failed to initialize mem-pool key");
+- }
+-
+- init_done = GF_MEMPOOL_INIT_EARLY;
+- } else {
+- gf_log("mem-pool", GF_LOG_CRITICAL,
+- "incorrect order of mem-pool initialization "
+- "(init_done=%d)",
+- init_done);
+- }
+-
+- pthread_mutex_unlock(&init_mutex);
+-}
+-
+-/* Call mem_pools_init_late() once threading has been configured completely.
+- * This prevent the pool_sweeper thread from getting killed once the main()
+- * thread exits during deamonizing. */
+-void
+-mem_pools_init_late(void)
++mem_pools_init(void)
+ {
+ pthread_mutex_lock(&init_mutex);
+ if ((init_count++) == 0) {
+@@ -580,13 +551,12 @@ mem_pools_fini(void)
+ switch (init_count) {
+ case 0:
+ /*
+- * If init_count is already zero (as e.g. if somebody called
+- * this before mem_pools_init_late) then the sweeper was
+- * probably never even started so we don't need to stop it.
+- * Even if there's some crazy circumstance where there is a
+- * sweeper but init_count is still zero, that just means we'll
+- * leave it running. Not perfect, but far better than any
+- * known alternative.
++ * If init_count is already zero (as e.g. if somebody called this
++ * before mem_pools_init) then the sweeper was probably never even
++ * started so we don't need to stop it. Even if there's some crazy
++ * circumstance where there is a sweeper but init_count is still
++ * zero, that just means we'll leave it running. Not perfect, but
++ * far better than any known alternative.
+ */
+ break;
+ case 1: {
+@@ -594,20 +564,17 @@ mem_pools_fini(void)
+ per_thread_pool_list_t *next_pl;
+ unsigned int i;
+
+- /* if only mem_pools_init_early() was called, sweeper_tid will
+- * be invalid and the functions will error out. That is not
+- * critical. In all other cases, the sweeper_tid will be valid
+- * and the thread gets stopped. */
++ /* if mem_pools_init() was not called, sweeper_tid will be invalid
++ * and the functions will error out. That is not critical. In all
++ * other cases, the sweeper_tid will be valid and the thread gets
++ * stopped. */
+ (void)pthread_cancel(sweeper_tid);
+ (void)pthread_join(sweeper_tid, NULL);
+
+- /* Need to clean the pool_key to prevent further usage of the
+- * per_thread_pool_list_t structure that is stored for each
+- * thread.
+- * This also prevents calling pool_destructor() when a thread
+- * exits, so there is no chance on a use-after-free of the
+- * per_thread_pool_list_t structure. */
+- (void)pthread_key_delete(pool_key);
++ /* At this point all threads should have already terminated, so
++ * it should be safe to destroy all pending per_thread_pool_list_t
++ * structures that are stored for each thread. */
++ mem_pool_thread_destructor();
+
+ /* free all objects from all pools */
+ list_for_each_entry_safe(pool_list, next_pl, &pool_threads,
+@@ -642,11 +609,7 @@ mem_pools_fini(void)
+
+ #else
+ void
+-mem_pools_init_early(void)
+-{
+-}
+-void
+-mem_pools_init_late(void)
++mem_pools_init(void)
+ {
+ }
+ void
+@@ -734,7 +697,7 @@ mem_get_pool_list(void)
+ per_thread_pool_list_t *pool_list;
+ unsigned int i;
+
+- pool_list = pthread_getspecific(pool_key);
++ pool_list = thread_pool_list;
+ if (pool_list) {
+ return pool_list;
+ }
+@@ -767,7 +730,8 @@ mem_get_pool_list(void)
+ list_add(&pool_list->thr_list, &pool_threads);
+ (void)pthread_mutex_unlock(&pool_lock);
+
+- (void)pthread_setspecific(pool_key, pool_list);
++ thread_pool_list = pool_list;
++
+ return pool_list;
+ }
+
+diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
+index c05939a..2eb7b49 100644
+--- a/libglusterfs/src/syncop.c
++++ b/libglusterfs/src/syncop.c
+@@ -26,28 +26,10 @@ syncopctx_setfsuid(void *uid)
+
+ opctx = syncopctx_getctx();
+
+- /* alloc for this thread the first time */
+- if (!opctx) {
+- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx);
+- if (!opctx) {
+- ret = -1;
+- goto out;
+- }
+-
+- ret = syncopctx_setctx(opctx);
+- if (ret != 0) {
+- GF_FREE(opctx);
+- opctx = NULL;
+- goto out;
+- }
+- }
++ opctx->uid = *(uid_t *)uid;
++ opctx->valid |= SYNCOPCTX_UID;
+
+ out:
+- if (opctx && uid) {
+- opctx->uid = *(uid_t *)uid;
+- opctx->valid |= SYNCOPCTX_UID;
+- }
+-
+ return ret;
+ }
+
+@@ -66,28 +48,10 @@ syncopctx_setfsgid(void *gid)
+
+ opctx = syncopctx_getctx();
+
+- /* alloc for this thread the first time */
+- if (!opctx) {
+- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx);
+- if (!opctx) {
+- ret = -1;
+- goto out;
+- }
+-
+- ret = syncopctx_setctx(opctx);
+- if (ret != 0) {
+- GF_FREE(opctx);
+- opctx = NULL;
+- goto out;
+- }
+- }
++ opctx->gid = *(gid_t *)gid;
++ opctx->valid |= SYNCOPCTX_GID;
+
+ out:
+- if (opctx && gid) {
+- opctx->gid = *(gid_t *)gid;
+- opctx->valid |= SYNCOPCTX_GID;
+- }
+-
+ return ret;
+ }
+
+@@ -107,43 +71,20 @@ syncopctx_setfsgroups(int count, const void *groups)
+
+ opctx = syncopctx_getctx();
+
+- /* alloc for this thread the first time */
+- if (!opctx) {
+- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx);
+- if (!opctx) {
+- ret = -1;
+- goto out;
+- }
+-
+- ret = syncopctx_setctx(opctx);
+- if (ret != 0) {
+- GF_FREE(opctx);
+- opctx = NULL;
+- goto out;
+- }
+- }
+-
+ /* resize internal groups as required */
+ if (count && opctx->grpsize < count) {
+ if (opctx->groups) {
+- tmpgroups = GF_REALLOC(opctx->groups, (sizeof(gid_t) * count));
+- /* NOTE: Not really required to zero the reallocation,
+- * as ngrps controls the validity of data,
+- * making a note irrespective */
+- if (tmpgroups == NULL) {
+- opctx->grpsize = 0;
+- GF_FREE(opctx->groups);
+- opctx->groups = NULL;
+- ret = -1;
+- goto out;
+- }
+- } else {
+- tmpgroups = GF_CALLOC(count, sizeof(gid_t), gf_common_mt_syncopctx);
+- if (tmpgroups == NULL) {
+- opctx->grpsize = 0;
+- ret = -1;
+- goto out;
+- }
++ /* Group list will be updated later, so no need to keep current
++ * data and waste time copying it. It's better to free the current
++ * allocation and then allocate a fresh new memory block. */
++ GF_FREE(opctx->groups);
++ opctx->groups = NULL;
++ opctx->grpsize = 0;
++ }
++ tmpgroups = GF_MALLOC(count * sizeof(gid_t), gf_common_mt_syncopctx);
++ if (tmpgroups == NULL) {
++ ret = -1;
++ goto out;
+ }
+
+ opctx->groups = tmpgroups;
+@@ -177,28 +118,10 @@ syncopctx_setfspid(void *pid)
+
+ opctx = syncopctx_getctx();
+
+- /* alloc for this thread the first time */
+- if (!opctx) {
+- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx);
+- if (!opctx) {
+- ret = -1;
+- goto out;
+- }
+-
+- ret = syncopctx_setctx(opctx);
+- if (ret != 0) {
+- GF_FREE(opctx);
+- opctx = NULL;
+- goto out;
+- }
+- }
++ opctx->pid = *(pid_t *)pid;
++ opctx->valid |= SYNCOPCTX_PID;
+
+ out:
+- if (opctx && pid) {
+- opctx->pid = *(pid_t *)pid;
+- opctx->valid |= SYNCOPCTX_PID;
+- }
+-
+ return ret;
+ }
+
+@@ -217,28 +140,10 @@ syncopctx_setfslkowner(gf_lkowner_t *lk_owner)
+
+ opctx = syncopctx_getctx();
+
+- /* alloc for this thread the first time */
+- if (!opctx) {
+- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx);
+- if (!opctx) {
+- ret = -1;
+- goto out;
+- }
+-
+- ret = syncopctx_setctx(opctx);
+- if (ret != 0) {
+- GF_FREE(opctx);
+- opctx = NULL;
+- goto out;
+- }
+- }
++ opctx->lk_owner = *lk_owner;
++ opctx->valid |= SYNCOPCTX_LKOWNER;
+
+ out:
+- if (opctx && lk_owner) {
+- opctx->lk_owner = *lk_owner;
+- opctx->valid |= SYNCOPCTX_LKOWNER;
+- }
+-
+ return ret;
+ }
+
+diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.c b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
+index 03dac5e..e5a9db4 100644
+--- a/xlators/features/changelog/lib/src/gf-changelog-helpers.c
++++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
+@@ -64,20 +64,7 @@ gf_rfc3986_encode_space_newline(unsigned char *s, char *enc, char *estr)
+ * made a part of libglusterfs.
+ */
+
+-static pthread_key_t rl_key;
+-static pthread_once_t rl_once = PTHREAD_ONCE_INIT;
+-
+-static void
+-readline_destructor(void *ptr)
+-{
+- GF_FREE(ptr);
+-}
+-
+-static void
+-readline_once(void)
+-{
+- pthread_key_create(&rl_key, readline_destructor);
+-}
++static __thread read_line_t thread_tsd = {};
+
+ static ssize_t
+ my_read(read_line_t *tsd, int fd, char *ptr)
+@@ -97,27 +84,6 @@ my_read(read_line_t *tsd, int fd, char *ptr)
+ return 1;
+ }
+
+-static int
+-gf_readline_init_once(read_line_t **tsd)
+-{
+- if (pthread_once(&rl_once, readline_once) != 0)
+- return -1;
+-
+- *tsd = pthread_getspecific(rl_key);
+- if (*tsd)
+- goto out;
+-
+- *tsd = GF_CALLOC(1, sizeof(**tsd), gf_changelog_mt_libgfchangelog_rl_t);
+- if (!*tsd)
+- return -1;
+-
+- if (pthread_setspecific(rl_key, *tsd) != 0)
+- return -1;
+-
+-out:
+- return 0;
+-}
+-
+ ssize_t
+ gf_readline(int fd, void *vptr, size_t maxlen)
+ {
+@@ -125,10 +91,7 @@ gf_readline(int fd, void *vptr, size_t maxlen)
+ size_t rc = 0;
+ char c = ' ';
+ char *ptr = NULL;
+- read_line_t *tsd = NULL;
+-
+- if (gf_readline_init_once(&tsd))
+- return -1;
++ read_line_t *tsd = &thread_tsd;
+
+ ptr = vptr;
+ for (n = 1; n < maxlen; n++) {
+@@ -151,10 +114,7 @@ off_t
+ gf_lseek(int fd, off_t offset, int whence)
+ {
+ off_t off = 0;
+- read_line_t *tsd = NULL;
+-
+- if (gf_readline_init_once(&tsd))
+- return -1;
++ read_line_t *tsd = &thread_tsd;
+
+ off = sys_lseek(fd, offset, whence);
+ if (off == -1)
+@@ -169,10 +129,7 @@ gf_lseek(int fd, off_t offset, int whence)
+ int
+ gf_ftruncate(int fd, off_t length)
+ {
+- read_line_t *tsd = NULL;
+-
+- if (gf_readline_init_once(&tsd))
+- return -1;
++ read_line_t *tsd = &thread_tsd;
+
+ if (sys_ftruncate(fd, 0))
+ return -1;
+diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c
+index 7ed9e55..d6acb37 100644
+--- a/xlators/features/changelog/lib/src/gf-changelog.c
++++ b/xlators/features/changelog/lib/src/gf-changelog.c
+@@ -237,9 +237,8 @@ gf_changelog_init_master()
+ {
+ int ret = 0;
+
+- mem_pools_init_early();
+ ret = gf_changelog_init_context();
+- mem_pools_init_late();
++ mem_pools_init();
+
+ return ret;
+ }
+diff --git a/xlators/nfs/server/src/mount3udp_svc.c b/xlators/nfs/server/src/mount3udp_svc.c
+index d5e4169..0688779eb 100644
+--- a/xlators/nfs/server/src/mount3udp_svc.c
++++ b/xlators/nfs/server/src/mount3udp_svc.c
+@@ -216,11 +216,7 @@ mount3udp_thread(void *argv)
+
+ GF_ASSERT(nfsx);
+
+- if (glusterfs_this_set(nfsx)) {
+- gf_msg(GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_XLATOR_SET_FAIL,
+- "Failed to set xlator, nfs.mount-udp will not work");
+- return NULL;
+- }
++ glusterfs_this_set(nfsx);
+
+ transp = svcudp_create(RPC_ANYSOCK);
+ if (transp == NULL) {
+--
+1.8.3.1
+
diff --git a/0210-mem-pool.-c-h-minor-changes.patch b/0210-mem-pool.-c-h-minor-changes.patch
new file mode 100644
index 0000000..c238579
--- /dev/null
+++ b/0210-mem-pool.-c-h-minor-changes.patch
@@ -0,0 +1,129 @@
+From 77a3cac0c8aed9e084296719926a534128c31dee Mon Sep 17 00:00:00 2001
+From: Yaniv Kaul <ykaul@redhat.com>
+Date: Wed, 27 Feb 2019 15:48:42 +0200
+Subject: [PATCH 210/221] mem-pool.{c|h}: minor changes
+
+1. Removed some code that was not needed. It did not really do anything.
+2. CALLOC -> MALLOC in one place.
+
+Compile-tested only!
+
+Upstream patch:
+> BUG: 1193929
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22274
+> Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
+> Change-Id: I4419161e1bb636158e32b5d33044b06f1eef2449
+
+Change-Id: I4419161e1bb636158e32b5d33044b06f1eef2449
+Updates: bz#1722801
+Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174712
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/mem-pool.c | 37 ++++++++++++-------------------------
+ 1 file changed, 12 insertions(+), 25 deletions(-)
+
+diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
+index ab78804..ca25ffc 100644
+--- a/libglusterfs/src/mem-pool.c
++++ b/libglusterfs/src/mem-pool.c
+@@ -643,7 +643,7 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ }
+ pool = &pools[power - POOL_SMALLEST];
+
+- new = GF_CALLOC(sizeof(struct mem_pool), 1, gf_common_mt_mem_pool);
++ new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool);
+ if (!new)
+ return NULL;
+
+@@ -671,15 +671,7 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ void *
+ mem_get0(struct mem_pool *mem_pool)
+ {
+- void *ptr = NULL;
+-
+- if (!mem_pool) {
+- gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+- "invalid argument");
+- return NULL;
+- }
+-
+- ptr = mem_get(mem_pool);
++ void *ptr = mem_get(mem_pool);
+ if (ptr) {
+ #if defined(GF_DISABLE_MEMPOOL)
+ memset(ptr, 0, mem_pool->sizeof_type);
+@@ -736,12 +728,14 @@ mem_get_pool_list(void)
+ }
+
+ pooled_obj_hdr_t *
+-mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool,
+- gf_boolean_t *hit)
++mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool)
+ {
+ per_thread_pool_list_t *pool_list;
+ per_thread_pool_t *pt_pool;
+ pooled_obj_hdr_t *retval;
++#ifdef DEBUG
++ gf_boolean_t hit = _gf_true;
++#endif
+
+ pool_list = mem_get_pool_list();
+ if (!pool_list || pool_list->poison) {
+@@ -755,10 +749,6 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool,
+ pt_pool = &pool_list->pools[pool->power_of_two - POOL_SMALLEST];
+ }
+
+-#ifdef DEBUG
+- *hit = _gf_true;
+-#endif
+-
+ (void)pthread_spin_lock(&pool_list->lock);
+
+ retval = pt_pool->hot_list;
+@@ -778,7 +768,7 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool,
+ retval = malloc((1 << pt_pool->parent->power_of_two) +
+ sizeof(pooled_obj_hdr_t));
+ #ifdef DEBUG
+- *hit = _gf_false;
++ hit = _gf_false;
+ #endif
+ }
+ }
+@@ -788,7 +778,7 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool,
+ retval->pool = mem_pool;
+ retval->power_of_two = mem_pool->pool->power_of_two;
+ #ifdef DEBUG
+- if (*hit == _gf_true)
++ if (hit == _gf_true)
+ GF_ATOMIC_INC(mem_pool->hit);
+ else
+ GF_ATOMIC_INC(mem_pool->miss);
+@@ -807,19 +797,16 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool,
+ void *
+ mem_get(struct mem_pool *mem_pool)
+ {
+-#if defined(GF_DISABLE_MEMPOOL)
+- return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool);
+-#else
+- pooled_obj_hdr_t *retval;
+- gf_boolean_t hit;
+-
+ if (!mem_pool) {
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return NULL;
+ }
+
+- retval = mem_get_from_pool(mem_pool, NULL, &hit);
++#if defined(GF_DISABLE_MEMPOOL)
++ return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool);
++#else
++ pooled_obj_hdr_t *retval = mem_get_from_pool(mem_pool, NULL);
+ if (!retval) {
+ return NULL;
+ }
+--
+1.8.3.1
+
diff --git a/0211-libglusterfs-Fix-compilation-when-disable-mempool-is.patch b/0211-libglusterfs-Fix-compilation-when-disable-mempool-is.patch
new file mode 100644
index 0000000..27326a9
--- /dev/null
+++ b/0211-libglusterfs-Fix-compilation-when-disable-mempool-is.patch
@@ -0,0 +1,41 @@
+From 4fa3c0be983c3f99c2785036ded5ef5ab390419b Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Mon, 6 May 2019 15:57:16 +0530
+Subject: [PATCH 211/221] libglusterfs: Fix compilation when --disable-mempool
+ is used
+
+Upstream patch:
+> BUG: 1193929
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22665
+> Change-Id: I245c065b209bcce5db939b6a0a934ba6fd393b47
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+
+Updates: bz#1722801
+Change-Id: I245c065b209bcce5db939b6a0a934ba6fd393b47
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174713
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/mem-pool.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
+index ca25ffc..df167b6 100644
+--- a/libglusterfs/src/mem-pool.c
++++ b/libglusterfs/src/mem-pool.c
+@@ -616,6 +616,11 @@ void
+ mem_pools_fini(void)
+ {
+ }
++void
++mem_pool_thread_destructor(void)
++{
++}
++
+ #endif
+
+ struct mem_pool *
+--
+1.8.3.1
+
diff --git a/0212-core-fix-memory-allocation-issues.patch b/0212-core-fix-memory-allocation-issues.patch
new file mode 100644
index 0000000..18da11d
--- /dev/null
+++ b/0212-core-fix-memory-allocation-issues.patch
@@ -0,0 +1,169 @@
+From 0bf728030e0ad7a49e6e1737ea06ae74da9279d3 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Fri, 21 Jun 2019 11:28:08 +0200
+Subject: [PATCH 212/221] core: fix memory allocation issues
+
+Two problems have been identified that caused that gluster's memory
+usage were twice higher than required.
+
+1. An off by 1 error caused that all objects allocated from the memory
+ pools were taken from a pool bigger than required. Since each pool
+ corresponds to a size equal to a power of two, this was wasting half
+ of the available memory.
+
+2. The header information used for accounting on each memory object was
+ not taken into consideration when searching for a suitable memory
+ pool. It was added later when each individual block was allocated.
+ This made this space "invisible" to memory accounting.
+
+Credits: Thanks to Nithya Balachandran for identifying this problem and
+ testing this patch.
+
+Upstream patch:
+> BUG: 1722802
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22921
+> Change-Id: I90e27ad795fe51ca11c13080f62207451f6c138c
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Fixes: bz#1722801
+Change-Id: I90e27ad795fe51ca11c13080f62207451f6c138c
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174714
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/glusterfs/mem-pool.h | 5 ++-
+ libglusterfs/src/mem-pool.c | 57 +++++++++++++++++++----------------
+ 2 files changed, 35 insertions(+), 27 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h
+index c5a486b..be0a26d 100644
+--- a/libglusterfs/src/glusterfs/mem-pool.h
++++ b/libglusterfs/src/glusterfs/mem-pool.h
+@@ -231,7 +231,10 @@ typedef struct pooled_obj_hdr {
+ struct mem_pool *pool;
+ } pooled_obj_hdr_t;
+
+-#define AVAILABLE_SIZE(p2) (1 << (p2))
++/* Each memory block inside a pool has a fixed size that is a power of two.
++ * However each object will have a header that will reduce the available
++ * space. */
++#define AVAILABLE_SIZE(p2) ((1UL << (p2)) - sizeof(pooled_obj_hdr_t))
+
+ typedef struct per_thread_pool {
+ /* the pool that was used to request this allocation */
+diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
+index df167b6..d88041d 100644
+--- a/libglusterfs/src/mem-pool.c
++++ b/libglusterfs/src/mem-pool.c
+@@ -627,6 +627,7 @@ struct mem_pool *
+ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ unsigned long count, char *name)
+ {
++ unsigned long extra_size, size;
+ unsigned int power;
+ struct mem_pool *new = NULL;
+ struct mem_pool_shared *pool = NULL;
+@@ -637,10 +638,25 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ return NULL;
+ }
+
+- /* We ensure sizeof_type > 1 and the next power of two will be, at least,
+- * 2^POOL_SMALLEST */
+- sizeof_type |= (1 << POOL_SMALLEST) - 1;
+- power = sizeof(sizeof_type) * 8 - __builtin_clzl(sizeof_type - 1) + 1;
++ /* This is the overhead we'll have because of memory accounting for each
++ * memory block. */
++ extra_size = sizeof(pooled_obj_hdr_t);
++
++ /* We need to compute the total space needed to hold the data type and
++ * the header. Given that the smallest block size we have in the pools
++ * is 2^POOL_SMALLEST, we need to take the MAX(size, 2^POOL_SMALLEST).
++ * However, since this value is only needed to compute its rounded
++ * logarithm in base 2, and this only depends on the highest bit set,
++ * we can simply do a bitwise or with the minimum size. We need to
++ * subtract 1 for correct handling of sizes that are exactly a power
++ * of 2. */
++ size = (sizeof_type + extra_size - 1UL) | ((1UL << POOL_SMALLEST) - 1UL);
++
++ /* We compute the logarithm in base 2 rounded up of the resulting size.
++ * This value will identify which pool we need to use from the pools of
++ * powers of 2. This is equivalent to finding the position of the highest
++ * bit set. */
++ power = sizeof(size) * 8 - __builtin_clzl(size);
+ if (power > POOL_LARGEST) {
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+@@ -732,8 +748,8 @@ mem_get_pool_list(void)
+ return pool_list;
+ }
+
+-pooled_obj_hdr_t *
+-mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool)
++static pooled_obj_hdr_t *
++mem_get_from_pool(struct mem_pool *mem_pool)
+ {
+ per_thread_pool_list_t *pool_list;
+ per_thread_pool_t *pt_pool;
+@@ -747,12 +763,7 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool)
+ return NULL;
+ }
+
+- if (mem_pool) {
+- pt_pool = &pool_list
+- ->pools[mem_pool->pool->power_of_two - POOL_SMALLEST];
+- } else {
+- pt_pool = &pool_list->pools[pool->power_of_two - POOL_SMALLEST];
+- }
++ pt_pool = &pool_list->pools[mem_pool->pool->power_of_two - POOL_SMALLEST];
+
+ (void)pthread_spin_lock(&pool_list->lock);
+
+@@ -770,8 +781,7 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool)
+ } else {
+ (void)pthread_spin_unlock(&pool_list->lock);
+ GF_ATOMIC_INC(pt_pool->parent->allocs_stdc);
+- retval = malloc((1 << pt_pool->parent->power_of_two) +
+- sizeof(pooled_obj_hdr_t));
++ retval = malloc(1 << pt_pool->parent->power_of_two);
+ #ifdef DEBUG
+ hit = _gf_false;
+ #endif
+@@ -779,19 +789,14 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool)
+ }
+
+ if (retval != NULL) {
+- if (mem_pool) {
+- retval->pool = mem_pool;
+- retval->power_of_two = mem_pool->pool->power_of_two;
++ retval->pool = mem_pool;
++ retval->power_of_two = mem_pool->pool->power_of_two;
+ #ifdef DEBUG
+- if (hit == _gf_true)
+- GF_ATOMIC_INC(mem_pool->hit);
+- else
+- GF_ATOMIC_INC(mem_pool->miss);
++ if (hit == _gf_true)
++ GF_ATOMIC_INC(mem_pool->hit);
++ else
++ GF_ATOMIC_INC(mem_pool->miss);
+ #endif
+- } else {
+- retval->power_of_two = pool->power_of_two;
+- retval->pool = NULL;
+- }
+ retval->magic = GF_MEM_HEADER_MAGIC;
+ retval->pool_list = pool_list;
+ }
+@@ -811,7 +816,7 @@ mem_get(struct mem_pool *mem_pool)
+ #if defined(GF_DISABLE_MEMPOOL)
+ return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool);
+ #else
+- pooled_obj_hdr_t *retval = mem_get_from_pool(mem_pool, NULL);
++ pooled_obj_hdr_t *retval = mem_get_from_pool(mem_pool);
+ if (!retval) {
+ return NULL;
+ }
+--
+1.8.3.1
+
diff --git a/0213-cluster-dht-Strip-out-dht-xattrs.patch b/0213-cluster-dht-Strip-out-dht-xattrs.patch
new file mode 100644
index 0000000..225379b
--- /dev/null
+++ b/0213-cluster-dht-Strip-out-dht-xattrs.patch
@@ -0,0 +1,42 @@
+From ff5f06d6ba5ac87094ae5df435d1cfb38802e7ca Mon Sep 17 00:00:00 2001
+From: N Balachandran <nbalacha@redhat.com>
+Date: Tue, 18 Jun 2019 15:33:29 +0530
+Subject: [PATCH 213/221] cluster/dht: Strip out dht xattrs
+
+Some internal DHT xattrs were not being
+removed when calling getxattr in pass-through mode.
+This has been fixed.
+
+upstream patch: https://review.gluster.org/#/c/glusterfs/+/22889/
+
+>Change-Id: If7e3dbc7b495db88a566bd560888e3e9c167defa
+>fixes: bz#1721435
+>Signed-off-by: N Balachandran <nbalacha@redhat.com>
+
+
+BUG: 1721357
+Change-Id: I29bce7ea78bb4fd3b493404282cb2c48ef0bf4ee
+Signed-off-by: N Balachandran <nbalacha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174699
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index e1edb38..9a6ea5b 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -11216,6 +11216,8 @@ dht_pt_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+
+ conf = this->private;
+ dict_del(xattr, conf->xattr_name);
++ dict_del(xattr, conf->mds_xattr_key);
++ dict_del(xattr, conf->commithash_xattr_name);
+
+ if (frame->root->pid >= 0) {
+ GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
+--
+1.8.3.1
+
diff --git a/0214-geo-rep-Upgrading-config-file-to-new-version.patch b/0214-geo-rep-Upgrading-config-file-to-new-version.patch
new file mode 100644
index 0000000..711aa3b
--- /dev/null
+++ b/0214-geo-rep-Upgrading-config-file-to-new-version.patch
@@ -0,0 +1,114 @@
+From 76921775b0a6760276060409882c0556f19d8d01 Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <sacharya@redhat.com>
+Date: Wed, 29 May 2019 16:49:01 +0530
+Subject: [PATCH 214/221] geo-rep: Upgrading config file to new version
+
+- configuration handling is enhanced with patch
+https://review.gluster.org/#/c/glusterfs/+/18257/
+- hence, the old configurations are not applied when
+Geo-rep session is created in the old version and upgraded.
+
+This patch solves the issue. It,
+- checks if the config file is old.
+- parses required values from old config file and stores in new
+ config file, which ensures that configurations are applied on
+ upgrade.
+- stores old config file as backup.
+- handles changes in options introduced in
+ https://review.gluster.org/#/c/glusterfs/+/18257/
+
+>fixes: bz#1707731
+>Change-Id: Iad8da6c1e1ae8ecf7c84dfdf8ea3ac6966d8a2a0
+>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+
+backport of https://review.gluster.org/#/c/glusterfs/+/22894/
+
+Bug: 1708064
+Change-Id: Iad8da6c1e1ae8ecf7c84dfdf8ea3ac6966d8a2a0
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174743
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/gsyncd.py | 5 ++++
+ geo-replication/syncdaemon/gsyncdconfig.py | 41 ++++++++++++++++++++++++++++++
+ 2 files changed, 46 insertions(+)
+
+diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
+index effe0ce..a4c6f32 100644
+--- a/geo-replication/syncdaemon/gsyncd.py
++++ b/geo-replication/syncdaemon/gsyncd.py
+@@ -253,6 +253,11 @@ def main():
+ if args.subcmd == "slave":
+ override_from_args = True
+
++ if args.subcmd == "monitor":
++ ret = gconf.is_config_file_old(config_file, args.master, extra_tmpl_args["slavevol"])
++ if ret is not None:
++ gconf.config_upgrade(config_file, ret)
++
+ # Load Config file
+ gconf.load(GLUSTERFS_CONFDIR + "/gsyncd.conf",
+ config_file,
+diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py
+index 23a1c57..7edc582 100644
+--- a/geo-replication/syncdaemon/gsyncdconfig.py
++++ b/geo-replication/syncdaemon/gsyncdconfig.py
+@@ -14,6 +14,7 @@ try:
+ except ImportError:
+ from configparser import ConfigParser, NoSectionError
+ import os
++import shutil
+ from string import Template
+ from datetime import datetime
+
+@@ -325,6 +326,46 @@ class Gconf(object):
+
+ return False
+
++def is_config_file_old(config_file, mastervol, slavevol):
++ cnf = ConfigParser()
++ cnf.read(config_file)
++ session_section = "peers %s %s" % (mastervol, slavevol)
++ try:
++ return dict(cnf.items(session_section))
++ except NoSectionError:
++ return None
++
++def config_upgrade(config_file, ret):
++ config_file_backup = os.path.join(os.path.dirname(config_file), "gsyncd.conf.bkp")
++
++ #copy old config file in a backup file
++ shutil.copyfile(config_file, config_file_backup)
++
++ #write a new config file
++ config = ConfigParser()
++ config.add_section('vars')
++
++ for key, value in ret.items():
++ #handle option name changes
++ if key == "use_tarssh":
++ new_key = "sync-method"
++ if value == "true":
++ new_value = "tarssh"
++ else:
++ new_value = "rsync"
++ config.set('vars', new_key, new_value)
++
++ if key == "timeout":
++ new_key = "slave-timeout"
++ config.set('vars', new_key, value)
++
++ #for changes like: ignore_deletes to ignore-deletes
++ new_key = key.replace("_", "-")
++ config.set('vars', new_key, value)
++
++ with open(config_file, 'w') as configfile:
++ config.write(configfile)
++
+
+ def validate_unixtime(value):
+ try:
+--
+1.8.3.1
+
diff --git a/0215-posix-modify-storage.reserve-option-to-take-size-and.patch b/0215-posix-modify-storage.reserve-option-to-take-size-and.patch
new file mode 100644
index 0000000..3e4217b
--- /dev/null
+++ b/0215-posix-modify-storage.reserve-option-to-take-size-and.patch
@@ -0,0 +1,319 @@
+From 0c485548b4126ed907dec9941209b1b1312d0b5d Mon Sep 17 00:00:00 2001
+From: Sheetal Pamecha <spamecha@redhat.com>
+Date: Wed, 19 Jun 2019 15:08:58 +0530
+Subject: [PATCH 215/221] posix: modify storage.reserve option to take size and
+ percent
+
+* reverting changes made in
+https://review.gluster.org/#/c/glusterfs/+/21686/
+
+* Now storage.reserve can take value in percent or bytes
+
+> fixes: bz#1651445
+> Change-Id: Id4826210ec27991c55b17d1fecd90356bff3e036
+> Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+> Cherry pick from commit 5cbc87d8b8f1287e81c38b793b8d13b057208c62
+> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22900/
+
+BUG: 1573077
+Change-Id: Id4826210ec27991c55b17d1fecd90356bff3e036
+Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174744
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/posix/bug-1651445.t | 29 +++++++++-------------
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 33 -------------------------
+ xlators/storage/posix/src/posix-common.c | 33 +++++++++++--------------
+ xlators/storage/posix/src/posix-helpers.c | 26 +++++++++----------
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 15 ++++++-----
+ xlators/storage/posix/src/posix.h | 4 +--
+ 6 files changed, 51 insertions(+), 89 deletions(-)
+
+diff --git a/tests/bugs/posix/bug-1651445.t b/tests/bugs/posix/bug-1651445.t
+index f6f1833..5248d47 100644
+--- a/tests/bugs/posix/bug-1651445.t
++++ b/tests/bugs/posix/bug-1651445.t
+@@ -17,39 +17,34 @@ TEST $CLI volume start $V0
+
+ TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+-TEST $CLI volume set $V0 storage.reserve-size 10MB
++#Setting the size in bytes
++TEST $CLI volume set $V0 storage.reserve 40MB
+
+-#No effect as priority to reserve-size
+-TEST $CLI volume set $V0 storage.reserve 20
++#wait 5s to reset disk_space_full flag
++sleep 5
+
+ TEST dd if=/dev/zero of=$M0/a bs=100M count=1
+-sleep 5
++TEST dd if=/dev/zero of=$M0/b bs=10M count=1
+
+-#Below dd confirms posix is giving priority to reserve-size
+-TEST dd if=/dev/zero of=$M0/b bs=40M count=1
++# Wait 5s to update disk_space_full flag because thread check disk space
++# after every 5s
+
+ sleep 5
++# setup_lvm create lvm partition of 150M and 40M are reserve so after
++# consuming more than 110M next dd should fail
+ TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1
+
+ rm -rf $M0/*
+-#Size will reserve from the previously set reserve option = 20%
+-TEST $CLI volume set $V0 storage.reserve-size 0
+
+-#Overwrite reserve option
+-TEST $CLI volume set $V0 storage.reserve-size 40MB
++#Setting the size in percent and repeating the above steps
++TEST $CLI volume set $V0 storage.reserve 40
+
+-#wait 5s to reset disk_space_full flag
+ sleep 5
+
+-TEST dd if=/dev/zero of=$M0/a bs=100M count=1
++TEST dd if=/dev/zero of=$M0/a bs=80M count=1
+ TEST dd if=/dev/zero of=$M0/b bs=10M count=1
+
+-# Wait 5s to update disk_space_full flag because thread check disk space
+-# after every 5s
+-
+ sleep 5
+-# setup_lvm create lvm partition of 150M and 40M are reserve so after
+-# consuming more than 110M next dd should fail
+ TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1
+
+ TEST $CLI volume stop $V0
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 3a7ab83..7a83124 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -1231,30 +1231,6 @@ out:
+
+ return ret;
+ }
+-static int
+-validate_size(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, char *value,
+- char **op_errstr)
+-{
+- xlator_t *this = NULL;
+- uint64_t size = 0;
+- int ret = -1;
+-
+- this = THIS;
+- GF_VALIDATE_OR_GOTO("glusterd", this, out);
+- ret = gf_string2bytesize_uint64(value, &size);
+- if (ret < 0) {
+- gf_asprintf(op_errstr,
+- "%s is not a valid size. %s "
+- "expects a valid value in bytes",
+- value, key);
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s",
+- *op_errstr);
+- }
+-out:
+- gf_msg_debug("glusterd", 0, "Returning %d", ret);
+-
+- return ret;
+-}
+
+ /* dispatch table for VOLUME SET
+ * -----------------------------
+@@ -2854,15 +2830,6 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .op_version = GD_OP_VERSION_3_13_0,
+ },
+ {
+- .key = "storage.reserve-size",
+- .voltype = "storage/posix",
+- .value = "0",
+- .validate_fn = validate_size,
+- .description = "If set, priority will be given to "
+- "storage.reserve-size over storage.reserve",
+- .op_version = GD_OP_VERSION_7_0,
+- },
+- {
+ .option = "health-check-timeout",
+ .key = "storage.health-check-timeout",
+ .type = NO_DOC,
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index 0f70af5..bfe2cb0 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -345,12 +345,14 @@ posix_reconfigure(xlator_t *this, dict_t *options)
+ " fallback to <hostname>:<export>");
+ }
+
+- GF_OPTION_RECONF("reserve-size", priv->disk_reserve_size, options, size,
++ GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size,
+ out);
++ /* option can be any one of percent or bytes */
++ priv->disk_unit = 0;
++ if (priv->disk_reserve < 100.0)
++ priv->disk_unit = 'p';
+
+- GF_OPTION_RECONF("reserve", priv->disk_reserve_percent, options, uint32,
+- out);
+- if (priv->disk_reserve_size || priv->disk_reserve_percent) {
++ if (priv->disk_reserve) {
+ ret = posix_spawn_disk_space_check_thread(this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED,
+@@ -975,11 +977,15 @@ posix_init(xlator_t *this)
+
+ _private->disk_space_check_active = _gf_false;
+ _private->disk_space_full = 0;
+- GF_OPTION_INIT("reserve-size", _private->disk_reserve_size, size, out);
+
+- GF_OPTION_INIT("reserve", _private->disk_reserve_percent, uint32, out);
++ GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out);
++
++ /* option can be any one of percent or bytes */
++ _private->disk_unit = 0;
++ if (_private->disk_reserve < 100.0)
++ _private->disk_unit = 'p';
+
+- if (_private->disk_reserve_size || _private->disk_reserve_percent) {
++ if (_private->disk_reserve) {
+ ret = posix_spawn_disk_space_check_thread(this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED,
+@@ -1221,23 +1227,14 @@ struct volume_options posix_options[] = {
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"reserve"},
+- .type = GF_OPTION_TYPE_INT,
++ .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ .min = 0,
+ .default_value = "1",
+ .validate = GF_OPT_VALIDATE_MIN,
+- .description = "Percentage of disk space to be reserved."
++ .description = "Percentage/Size of disk space to be reserved."
+ " Set to 0 to disable",
+ .op_version = {GD_OP_VERSION_3_13_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+- {.key = {"reserve-size"},
+- .type = GF_OPTION_TYPE_SIZET,
+- .min = 0,
+- .default_value = "0",
+- .validate = GF_OPT_VALIDATE_MIN,
+- .description = "size in megabytes to be reserved for disk space."
+- " Set to 0 to disable",
+- .op_version = {GD_OP_VERSION_7_0},
+- .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"batch-fsync-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "reverse-fsync",
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 849db3d..07169b5 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -2246,11 +2246,11 @@ posix_disk_space_check(xlator_t *this)
+ struct posix_private *priv = NULL;
+ char *subvol_path = NULL;
+ int op_ret = 0;
+- uint64_t size = 0;
+- int percent = 0;
++ double size = 0;
++ double percent = 0;
+ struct statvfs buf = {0};
+- uint64_t totsz = 0;
+- uint64_t freesz = 0;
++ double totsz = 0;
++ double freesz = 0;
+
+ GF_VALIDATE_OR_GOTO(this->name, this, out);
+ priv = this->private;
+@@ -2258,14 +2258,6 @@ posix_disk_space_check(xlator_t *this)
+
+ subvol_path = priv->base_path;
+
+- if (priv->disk_reserve_size) {
+- size = priv->disk_reserve_size;
+- } else {
+- percent = priv->disk_reserve_percent;
+- totsz = (buf.f_blocks * buf.f_bsize);
+- size = ((totsz * percent) / 100);
+- }
+-
+ op_ret = sys_statvfs(subvol_path, &buf);
+
+ if (op_ret == -1) {
+@@ -2273,8 +2265,16 @@ posix_disk_space_check(xlator_t *this)
+ "statvfs failed on %s", subvol_path);
+ goto out;
+ }
+- freesz = (buf.f_bfree * buf.f_bsize);
+
++ if (priv->disk_unit == 'p') {
++ percent = priv->disk_reserve;
++ totsz = (buf.f_blocks * buf.f_bsize);
++ size = ((totsz * percent) / 100);
++ } else {
++ size = priv->disk_reserve;
++ }
++
++ freesz = (buf.f_bfree * buf.f_bsize);
+ if (freesz <= size) {
+ priv->disk_space_full = 1;
+ } else {
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index b92c411..fc847d6 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -720,7 +720,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ thread after every 5 sec sleep to working correctly storage.reserve
+ option behaviour
+ */
+- if (priv->disk_reserve_size || priv->disk_reserve_percent)
++ if (priv->disk_reserve)
+ posix_disk_space_check(this);
+
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, out);
+@@ -2306,7 +2306,7 @@ posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ };
+ struct posix_private *priv = NULL;
+ int shared_by = 1;
+- int percent = 0;
++ double percent = 0;
+ uint64_t reserved_blocks = 0;
+
+ VALIDATE_OR_GOTO(frame, out);
+@@ -2332,11 +2332,14 @@ posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ goto out;
+ }
+
+- if (priv->disk_reserve_size) {
+- reserved_blocks = priv->disk_reserve_size / buf.f_bsize;
++ if (priv->disk_unit == 'p') {
++ percent = priv->disk_reserve;
++ reserved_blocks = (((buf.f_blocks * percent) / 100) + 0.5);
+ } else {
+- percent = priv->disk_reserve_percent;
+- reserved_blocks = (buf.f_blocks * percent) / 100;
++ if (buf.f_bsize) {
++ reserved_blocks = (priv->disk_reserve + buf.f_bsize - 1) /
++ buf.f_bsize;
++ }
+ }
+
+ if (buf.f_bfree > reserved_blocks) {
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index 4364b96..b0935a7 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -225,8 +225,8 @@ struct posix_private {
+ pthread_t health_check;
+ gf_boolean_t health_check_active;
+
+- uint32_t disk_reserve_percent;
+- uint64_t disk_reserve_size;
++ double disk_reserve;
++ char disk_unit;
+ uint32_t disk_space_full;
+ pthread_t disk_space_check;
+ gf_boolean_t disk_space_check_active;
+--
+1.8.3.1
+
diff --git a/0216-Test-case-fixe-for-downstream-3.5.0.patch b/0216-Test-case-fixe-for-downstream-3.5.0.patch
new file mode 100644
index 0000000..bc4ce60
--- /dev/null
+++ b/0216-Test-case-fixe-for-downstream-3.5.0.patch
@@ -0,0 +1,29 @@
+From b2204969bb0dba5de32685e1021fa44d0c406813 Mon Sep 17 00:00:00 2001
+From: Sunil Kumar Acharya <sheggodu@redhat.com>
+Date: Tue, 25 Jun 2019 12:17:10 +0530
+Subject: [PATCH 216/221] Test case fixe for downstream 3.5.0
+
+Mark bug-1319374-THIS-crash.t as bad.
+
+BUG: 1704562
+Change-Id: I6afeb9a74ab88af7b741454367005250cd4c0e0f
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174652
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ tests/bugs/gfapi/bug-1319374-THIS-crash.t | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/tests/bugs/gfapi/bug-1319374-THIS-crash.t b/tests/bugs/gfapi/bug-1319374-THIS-crash.t
+index 8d3db42..429d71e 100755
+--- a/tests/bugs/gfapi/bug-1319374-THIS-crash.t
++++ b/tests/bugs/gfapi/bug-1319374-THIS-crash.t
+@@ -25,3 +25,5 @@ TEST $(dirname $0)/bug-1319374 $H0 $V0 $logdir/bug-1319374.log
+ cleanup_tester $(dirname $0)/bug-1319374
+
+ cleanup;
++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1723673
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1723673
+--
+1.8.3.1
+
diff --git a/0217-uss-Fix-tar-issue-with-ctime-and-uss-enabled.patch b/0217-uss-Fix-tar-issue-with-ctime-and-uss-enabled.patch
new file mode 100644
index 0000000..055b9f2
--- /dev/null
+++ b/0217-uss-Fix-tar-issue-with-ctime-and-uss-enabled.patch
@@ -0,0 +1,75 @@
+From 71ff9b7c6356e521d98ee025554b63dd23db9836 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Thu, 13 Jun 2019 22:43:47 +0530
+Subject: [PATCH 217/221] uss: Fix tar issue with ctime and uss enabled
+
+Problem:
+If ctime and uss enabled, tar still complains with 'file
+changed as we read it'
+
+Cause:
+To clear nfs cache (gluster-nfs), the ctime was incremented
+in snap-view client on stat cbk.
+
+Fix:
+The ctime should not be incremented manually. Since gluster-nfs
+is planning to be deprecated, this code is being removed to
+fix the issue.
+
+Backport of:
+ > Patch: https://review.gluster.org/22861
+ > Change-Id: Iae7f100c20fce880a50b008ba716077350281404
+ > fixes: bz#1720290
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+Change-Id: Iae7f100c20fce880a50b008ba716077350281404
+BUG: 1709301
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/173922
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ .../features/snapview-client/src/snapview-client.c | 22 +++++++++++++---------
+ 1 file changed, 13 insertions(+), 9 deletions(-)
+
+diff --git a/xlators/features/snapview-client/src/snapview-client.c b/xlators/features/snapview-client/src/snapview-client.c
+index 5d7986c..9c789ae 100644
+--- a/xlators/features/snapview-client/src/snapview-client.c
++++ b/xlators/features/snapview-client/src/snapview-client.c
+@@ -577,20 +577,24 @@ gf_svc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+ {
+- /* Consider a testcase:
++ /* TODO: FIX ME
++ * Consider a testcase:
+ * #mount -t nfs host1:/vol1 /mnt
+ * #ls /mnt
+ * #ls /mnt/.snaps (As expected this fails)
+ * #gluster volume set vol1 features.uss enable
+- * Now `ls /mnt/.snaps` should work,
+- * but fails with No such file or directory.
+- * This is because NFS client caches the list of files in
+- * a directory. This cache is updated if there are any changes
+- * in the directory attributes. To solve this problem change
+- * a attribute 'ctime' when USS is enabled
++ * Now `ls /mnt/.snaps` should work, but fails with No such file or
++ * directory. This is because NFS client (gNFS) caches the list of files
++ * in a directory. This cache is updated if there are any changes in the
++ * directory attributes. So, one way to solve this problem is to change
++ * 'ctime' attribute when USS is enabled as below.
++ *
++ * if (op_ret == 0 && IA_ISDIR(buf->ia_type))
++ * buf->ia_ctime_nsec++;
++ *
++ * But this is not the ideal solution as applications see the unexpected
++ * ctime change causing failures.
+ */
+- if (op_ret == 0 && IA_ISDIR(buf->ia_type))
+- buf->ia_ctime_nsec++;
+
+ SVC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+--
+1.8.3.1
+
diff --git a/0218-graph-shd-Use-glusterfs_graph_deactivate-to-free-the.patch b/0218-graph-shd-Use-glusterfs_graph_deactivate-to-free-the.patch
new file mode 100644
index 0000000..b7db655
--- /dev/null
+++ b/0218-graph-shd-Use-glusterfs_graph_deactivate-to-free-the.patch
@@ -0,0 +1,88 @@
+From 8cc6d8af00303c445b94715c92fe9e3e01edb867 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Mon, 24 Jun 2019 15:49:04 +0530
+Subject: [PATCH 218/221] graph/shd: Use glusterfs_graph_deactivate to free the
+ xl rec
+
+We were using glusterfs_graph_fini to free the xl rec from
+glusterfs_process_volfp as well as glusterfs_graph_cleanup.
+
+Instead we can use glusterfs_graph_deactivate, which does
+fini as well as other common rec free.
+
+Backport of:https://review.gluster.org/#/c/glusterfs/+/22904/
+
+>Change-Id: Ie4a5f2771e5254aa5ed9f00c3672a6d2cc8e4bc1
+>Updates: bz#1716695
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I09d7124366bc690ceca9e8d0adee8a0dc8081091
+BUG: 1711939
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174814
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/graph.c | 2 +-
+ libglusterfs/src/xlator.c | 9 ++++++++-
+ xlators/features/shard/src/shard.c | 3 +++
+ 3 files changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index 27d9335..5b95fd6 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -1394,7 +1394,7 @@ glusterfs_graph_cleanup(void *arg)
+
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ {
+- glusterfs_graph_fini(graph);
++ glusterfs_graph_deactivate(graph);
+ glusterfs_graph_destroy(graph);
+ }
+ pthread_mutex_unlock(&ctx->cleanup_lock);
+diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
+index 71e1ed4..d9d3441 100644
+--- a/libglusterfs/src/xlator.c
++++ b/libglusterfs/src/xlator.c
+@@ -659,6 +659,7 @@ xlator_fini_rec(xlator_t *xl)
+ trav = trav->next;
+ }
+
++ xl->cleanup_starting = 1;
+ if (xl->init_succeeded) {
+ if (xl->fini) {
+ old_THIS = THIS;
+@@ -666,8 +667,14 @@ xlator_fini_rec(xlator_t *xl)
+
+ xl->fini(xl);
+
+- if (xl->local_pool)
++ if (xl->local_pool) {
+ mem_pool_destroy(xl->local_pool);
++ xl->local_pool = NULL;
++ }
++ if (xl->itable) {
++ inode_table_destroy(xl->itable);
++ xl->itable = NULL;
++ }
+
+ THIS = old_THIS;
+ } else {
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index b248767..31c7eec 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -6785,6 +6785,9 @@ fini(xlator_t *this)
+
+ GF_VALIDATE_OR_GOTO("shard", this, out);
+
++ /*Itable was not created by shard, hence setting to NULL.*/
++ this->itable = NULL;
++
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+
+--
+1.8.3.1
+
diff --git a/0219-posix-add-posix_set_ctime-in-posix_ftruncate.patch b/0219-posix-add-posix_set_ctime-in-posix_ftruncate.patch
new file mode 100644
index 0000000..eefb890
--- /dev/null
+++ b/0219-posix-add-posix_set_ctime-in-posix_ftruncate.patch
@@ -0,0 +1,35 @@
+From d9781ed4964d9e752fc880c8cd8afcbd2c561ebe Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Wed, 26 Jun 2019 15:58:33 +0530
+Subject: [PATCH 219/221] posix : add posix_set_ctime() in posix_ftruncate()
+
+Upstream references :
+> release 6: https://review.gluster.org/#/c/glusterfs/+/22965/
+> mainline: https://review.gluster.org/#/c/glusterfs/+/22948/
+
+Change-Id: I0cb5320fea71306e0283509ae47024f23874b53b
+fixes: bz#1720163
+Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174837
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index fc847d6..c949f68 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -5059,6 +5059,8 @@ posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ goto out;
+ }
+
++ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop);
++
+ op_ret = 0;
+
+ out:
+--
+1.8.3.1
+
diff --git a/0220-graph-shd-Use-top-down-approach-while-cleaning-xlato.patch b/0220-graph-shd-Use-top-down-approach-while-cleaning-xlato.patch
new file mode 100644
index 0000000..07f702b
--- /dev/null
+++ b/0220-graph-shd-Use-top-down-approach-while-cleaning-xlato.patch
@@ -0,0 +1,190 @@
+From b963fa8bb71963127147d33bf609f439dd5bd107 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 27 Jun 2019 19:17:29 +0530
+Subject: [PATCH 220/221] graph/shd: Use top down approach while cleaning
+ xlator
+
+We were cleaning xlator from botton to top, which might
+lead to problems when upper xlators trying to access
+the xlator object loaded below.
+
+One such scenario is when fd_unref happens as part of the
+fini call which might lead to calling the releasedir to
+lower xlator. This will lead to invalid mem access
+
+Backport of:https://review.gluster.org/#/c/glusterfs/+/22968/
+
+>Change-Id: I8a6cb619256fab0b0c01a2d564fc88287c4415a0
+>Updates: bz#1716695
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I22bbf99e9451183b3e0fe61b57b2440ab4163fe5
+BUG: 1711939
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174882
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/graph.c | 10 +++++++++-
+ xlators/features/bit-rot/src/stub/bit-rot-stub.c | 1 +
+ xlators/features/changelog/src/changelog.c | 1 +
+ xlators/features/cloudsync/src/cloudsync.c | 4 +++-
+ xlators/features/index/src/index.c | 1 +
+ xlators/features/quiesce/src/quiesce.c | 1 +
+ xlators/features/read-only/src/worm.c | 1 +
+ xlators/features/sdfs/src/sdfs.c | 1 +
+ xlators/features/selinux/src/selinux.c | 2 ++
+ xlators/features/trash/src/trash.c | 1 +
+ 10 files changed, 21 insertions(+), 2 deletions(-)
+
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index 5b95fd6..172dc61 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -1193,6 +1193,14 @@ glusterfs_graph_fini(glusterfs_graph_t *graph)
+ if (trav->init_succeeded) {
+ trav->cleanup_starting = 1;
+ trav->fini(trav);
++ if (trav->local_pool) {
++ mem_pool_destroy(trav->local_pool);
++ trav->local_pool = NULL;
++ }
++ if (trav->itable) {
++ inode_table_destroy(trav->itable);
++ trav->itable = NULL;
++ }
+ trav->init_succeeded = 0;
+ }
+ trav = trav->next;
+@@ -1394,7 +1402,7 @@ glusterfs_graph_cleanup(void *arg)
+
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ {
+- glusterfs_graph_deactivate(graph);
++ glusterfs_graph_fini(graph);
+ glusterfs_graph_destroy(graph);
+ }
+ pthread_mutex_unlock(&ctx->cleanup_lock);
+diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+index 3f48a4b..03446be 100644
+--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+@@ -185,6 +185,7 @@ cleanup_lock:
+ pthread_mutex_destroy(&priv->lock);
+ free_mempool:
+ mem_pool_destroy(priv->local_pool);
++ priv->local_pool = NULL;
+ free_priv:
+ GF_FREE(priv);
+ this->private = NULL;
+diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
+index d9025f3..2862d1e 100644
+--- a/xlators/features/changelog/src/changelog.c
++++ b/xlators/features/changelog/src/changelog.c
+@@ -2790,6 +2790,7 @@ cleanup_options:
+ changelog_freeup_options(this, priv);
+ cleanup_mempool:
+ mem_pool_destroy(this->local_pool);
++ this->local_pool = NULL;
+ cleanup_priv:
+ GF_FREE(priv);
+ error_return:
+diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c
+index 26e512c..0ad987e 100644
+--- a/xlators/features/cloudsync/src/cloudsync.c
++++ b/xlators/features/cloudsync/src/cloudsync.c
+@@ -200,8 +200,10 @@ cs_init(xlator_t *this)
+
+ out:
+ if (ret == -1) {
+- if (this->local_pool)
++ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
++ this->local_pool = NULL;
++ }
+
+ cs_cleanup_private(priv);
+
+diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
+index 2f2a6d0..4ece7ff 100644
+--- a/xlators/features/index/src/index.c
++++ b/xlators/features/index/src/index.c
+@@ -2478,6 +2478,7 @@ out:
+ GF_FREE(priv);
+ this->private = NULL;
+ mem_pool_destroy(this->local_pool);
++ this->local_pool = NULL;
+ }
+
+ if (attr_inited)
+diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c
+index bfd1116..06f58c9 100644
+--- a/xlators/features/quiesce/src/quiesce.c
++++ b/xlators/features/quiesce/src/quiesce.c
+@@ -2536,6 +2536,7 @@ fini(xlator_t *this)
+ this->private = NULL;
+
+ mem_pool_destroy(priv->local_pool);
++ priv->local_pool = NULL;
+ LOCK_DESTROY(&priv->lock);
+ GF_FREE(priv);
+ out:
+diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c
+index 24196f8..7d13180 100644
+--- a/xlators/features/read-only/src/worm.c
++++ b/xlators/features/read-only/src/worm.c
+@@ -569,6 +569,7 @@ fini(xlator_t *this)
+ mem_put(priv);
+ this->private = NULL;
+ mem_pool_destroy(this->local_pool);
++ this->local_pool = NULL;
+ out:
+ return;
+ }
+diff --git a/xlators/features/sdfs/src/sdfs.c b/xlators/features/sdfs/src/sdfs.c
+index f0247fd..164c632 100644
+--- a/xlators/features/sdfs/src/sdfs.c
++++ b/xlators/features/sdfs/src/sdfs.c
+@@ -1429,6 +1429,7 @@ void
+ fini(xlator_t *this)
+ {
+ mem_pool_destroy(this->local_pool);
++ this->local_pool = NULL;
+ return;
+ }
+
+diff --git a/xlators/features/selinux/src/selinux.c b/xlators/features/selinux/src/selinux.c
+index 58b4c5d..ce5fc90 100644
+--- a/xlators/features/selinux/src/selinux.c
++++ b/xlators/features/selinux/src/selinux.c
+@@ -256,6 +256,7 @@ out:
+ GF_FREE(priv);
+ }
+ mem_pool_destroy(this->local_pool);
++ this->local_pool = NULL;
+ }
+ return ret;
+ }
+@@ -284,6 +285,7 @@ fini(xlator_t *this)
+ GF_FREE(priv);
+
+ mem_pool_destroy(this->local_pool);
++ this->local_pool = NULL;
+
+ return;
+ }
+diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
+index d668436..eb5007b 100644
+--- a/xlators/features/trash/src/trash.c
++++ b/xlators/features/trash/src/trash.c
+@@ -2523,6 +2523,7 @@ out:
+ GF_FREE(priv);
+ }
+ mem_pool_destroy(this->local_pool);
++ this->local_pool = NULL;
+ }
+ return ret;
+ }
+--
+1.8.3.1
+
diff --git a/0221-protocol-client-propagte-GF_EVENT_CHILD_PING-only-fo.patch b/0221-protocol-client-propagte-GF_EVENT_CHILD_PING-only-fo.patch
new file mode 100644
index 0000000..74e3796
--- /dev/null
+++ b/0221-protocol-client-propagte-GF_EVENT_CHILD_PING-only-fo.patch
@@ -0,0 +1,75 @@
+From ac216eae4775f7d95877b247937e2a4a4828b1b2 Mon Sep 17 00:00:00 2001
+From: Raghavendra G <rgowdapp@redhat.com>
+Date: Tue, 4 Jun 2019 19:22:45 +0530
+Subject: [PATCH 221/221] protocol/client: propagte GF_EVENT_CHILD_PING only
+ for connections to brick
+
+Two reasons:
+* ping responses from glusterd may not be relevant for Halo
+ replication. Instead, it might be interested in only knowing whether
+ the brick itself is responsive.
+* When a brick is killed, propagating GF_EVENT_CHILD_PING of ping
+ response from glusterd results in GF_EVENT_DISCONNECT spuriously
+ propagated to parent xlators. These DISCONNECT events are from the
+ connections client establishes with glusterd as part of its
+ reconnect logic. Without GF_EVENT_CHILD_PING, the last event
+ propagated to parent xlators would be the first DISCONNECT event
+ from brick and hence subsequent DISCONNECTS to glusterd are not
+ propagated as protocol/client prevents same event being propagated
+ to parent xlators consecutively. propagating GF_EVENT_CHILD_PING for
+ ping responses from glusterd would change the last_sent_event to
+ GF_EVENT_CHILD_PING and hence protocol/client cannot prevent
+ subsequent DISCONNECT events
+
+>Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
+>Fixes: bz#1716979
+>Change-Id: I50276680c52f05ca9e12149a3094923622d6eaef
+
+Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22821/
+
+BUG: 1703423
+Change-Id: I50276680c52f05ca9e12149a3094923622d6eaef
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174883
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/protocol/client/src/client.c | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index a372807..95e4be5 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -2276,6 +2276,12 @@ client_mark_fd_bad(xlator_t *this)
+ return 0;
+ }
+
++static int
++is_connection_to_brick(struct rpc_clnt *rpc)
++{
++ return (rpc->conn.config.remote_port != 0);
++}
++
+ int
+ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
+@@ -2297,10 +2303,12 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+
+ switch (event) {
+ case RPC_CLNT_PING: {
+- ret = default_notify(this, GF_EVENT_CHILD_PING, data);
+- if (ret)
+- gf_log(this->name, GF_LOG_INFO, "CHILD_PING notify failed");
+- conf->last_sent_event = GF_EVENT_CHILD_PING;
++ if (is_connection_to_brick(rpc)) {
++ ret = default_notify(this, GF_EVENT_CHILD_PING, data);
++ if (ret)
++ gf_log(this->name, GF_LOG_INFO, "CHILD_PING notify failed");
++ conf->last_sent_event = GF_EVENT_CHILD_PING;
++ }
+ break;
+ }
+ case RPC_CLNT_CONNECT: {
+--
+1.8.3.1
+
diff --git a/0222-cluster-dht-Fixed-a-memleak-in-dht_rename_cbk.patch b/0222-cluster-dht-Fixed-a-memleak-in-dht_rename_cbk.patch
new file mode 100644
index 0000000..3a492cb
--- /dev/null
+++ b/0222-cluster-dht-Fixed-a-memleak-in-dht_rename_cbk.patch
@@ -0,0 +1,109 @@
+From 5f304e003cc24ff7877ab51bdfded0dbf8ec581b Mon Sep 17 00:00:00 2001
+From: N Balachandran <nbalacha@redhat.com>
+Date: Fri, 21 Jun 2019 09:04:19 +0530
+Subject: [PATCH 222/255] cluster/dht: Fixed a memleak in dht_rename_cbk
+
+Fixed a memleak in dht_rename_cbk when creating
+a linkto file.
+
+upstream: https://review.gluster.org/#/c/glusterfs/+/22912/
+
+>Change-Id: I705adef3cb79e33806520fc2b15558e90e2c211c
+>fixes: bz#1722698
+>Signed-off-by: N Balachandran <nbalacha@redhat.com>
+
+BUG:1722512
+Change-Id: I8450cac82a0e1611e698ffac476ea5516e614236
+Signed-off-by: N Balachandran <nbalacha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175181
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Susant Palai <spalai@redhat.com>
+---
+ xlators/cluster/dht/src/dht-rename.c | 44 +++++++++++++++++++++++++++---------
+ 1 file changed, 33 insertions(+), 11 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
+index 893b451..5ba2373 100644
+--- a/xlators/cluster/dht/src/dht-rename.c
++++ b/xlators/cluster/dht/src/dht-rename.c
+@@ -1009,9 +1009,11 @@ dht_rename_links_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ {
+ xlator_t *prev = NULL;
+ dht_local_t *local = NULL;
++ call_frame_t *main_frame = NULL;
+
+ prev = cookie;
+ local = frame->local;
++ main_frame = local->main_frame;
+
+ /* TODO: Handle this case in lookup-optimize */
+ if (op_ret == -1) {
+@@ -1024,7 +1026,8 @@ dht_rename_links_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ dht_linkfile_attr_heal(frame, this);
+ }
+
+- dht_rename_unlink(frame, this);
++ dht_rename_unlink(main_frame, this);
++ DHT_STACK_DESTROY(frame);
+ return 0;
+ }
+
+@@ -1040,7 +1043,8 @@ dht_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+- loc_t link_loc = {0};
++ call_frame_t *link_frame = NULL;
++ dht_local_t *link_local = NULL;
+
+ local = frame->local;
+ prev = cookie;
+@@ -1110,18 +1114,36 @@ dht_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+
+ /* Create the linkto file for the dst file */
+ if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) {
+- loc_copy(&link_loc, &local->loc2);
+- if (link_loc.inode)
+- inode_unref(link_loc.inode);
+- link_loc.inode = inode_ref(local->loc.inode);
+- gf_uuid_copy(local->gfid, local->loc.inode->gfid);
+- gf_uuid_copy(link_loc.gfid, local->loc.inode->gfid);
+-
+- dht_linkfile_create(frame, dht_rename_links_create_cbk, this,
+- src_cached, dst_hashed, &link_loc);
++ link_frame = copy_frame(frame);
++ if (!link_frame) {
++ goto unlink;
++ }
++
++ /* fop value sent as maxvalue because it is not used
++ * anywhere in this case */
++ link_local = dht_local_init(link_frame, &local->loc2, NULL,
++ GF_FOP_MAXVALUE);
++ if (!link_local) {
++ goto unlink;
++ }
++
++ if (link_local->loc.inode)
++ inode_unref(link_local->loc.inode);
++ link_local->loc.inode = inode_ref(local->loc.inode);
++ link_local->main_frame = frame;
++ link_local->stbuf = local->stbuf;
++ gf_uuid_copy(link_local->gfid, local->loc.inode->gfid);
++
++ dht_linkfile_create(link_frame, dht_rename_links_create_cbk, this,
++ src_cached, dst_hashed, &link_local->loc);
+ return 0;
+ }
+
++unlink:
++
++ if (link_frame) {
++ DHT_STACK_DESTROY(link_frame);
++ }
+ dht_rename_unlink(frame, this);
+ return 0;
+
+--
+1.8.3.1
+
diff --git a/0223-change-get_real_filename-implementation-to-use-ENOAT.patch b/0223-change-get_real_filename-implementation-to-use-ENOAT.patch
new file mode 100644
index 0000000..a533388
--- /dev/null
+++ b/0223-change-get_real_filename-implementation-to-use-ENOAT.patch
@@ -0,0 +1,123 @@
+From 36b0bd86321436a951f225fcf2e921390ed8dc33 Mon Sep 17 00:00:00 2001
+From: Michael Adam <obnox@samba.org>
+Date: Thu, 20 Jun 2019 13:09:37 +0200
+Subject: [PATCH 223/255] change get_real_filename implementation to use
+ ENOATTR instead of ENOENT
+
+get_real_filename is implemented as a virtual extended attribute to help
+Samba implement the case-insensitive but case preserving SMB protocol
+more efficiently. It is implemented as a getxattr call on the parent directory
+with the virtual key of "get_real_filename:<entryname>" by looking for a
+spelling with different case for the provided file/dir name (<entryname>)
+and returning this correct spelling as a result if the entry is found.
+Originally (05aaec645a6262d431486eb5ac7cd702646cfcfb), the
+implementation used the ENOENT errno to return the authoritative answer
+that <entryname> does not exist in any case folding.
+
+Now this implementation is actually a violation or misuse of the defined
+API for the getxattr call which returns ENOENT for the case that the dir
+that the call is made against does not exist and ENOATTR (or the synonym
+ENODATA) for the case that the xattr does not exist.
+
+This was not a problem until the gluster fuse-bridge was changed
+to do map ENOENT to ESTALE in 59629f1da9dca670d5dcc6425f7f89b3e96b46bf,
+after which we the getxattr call for get_real_filename returned an
+ESTALE instead of ENOENT breaking the expectation in Samba.
+
+It is an independent problem that ESTALE should not leak out to user
+space but is intended to trigger retries between fuse and gluster.
+But nevertheless, the semantics seem to be incorrect here and should
+be changed.
+
+This patch changes the implementation of the get_real_filename virtual
+xattr to correctly return ENOATTR instead of ENOENT if the file/directory
+being looked up is not found.
+
+The Samba glusterfs_fuse vfs module which takes advantage of the
+get_real_filename over a fuse mount will receive a corresponding change
+to map ENOATTR to ENOENT. Without this change, it will still work
+correctly, but the performance optimization for nonexisting files is
+lost. On the other hand side, this change removes the distinction
+between the old not-implemented case and the implemented case.
+So Samba changed to treat ENOATTR like ENOENT will not work correctly
+any more against old servers that don't implement get_real_filename.
+I.e. existing files will be reported as non-existing
+
+Backport of https://review.gluster.org/c/glusterfs/+/22925
+
+Change-Id: I971b427ab8410636d5d201157d9af70e0d075b67
+fixes: bz#1724089
+Signed-off-by: Michael Adam <obnox@samba.org>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175012
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 8 ++++----
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 4 ++--
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 9a6ea5b..219b072 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -4618,7 +4618,7 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
+
+ LOCK(&frame->lock);
+ {
+- if (local->op_errno == ENODATA || local->op_errno == EOPNOTSUPP) {
++ if (local->op_errno == EOPNOTSUPP) {
+ /* Nothing to do here, we have already found
+ * a subvol which does not have the get_real_filename
+ * optimization. If condition is for simple logic.
+@@ -4627,7 +4627,7 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
+ }
+
+ if (op_ret == -1) {
+- if (op_errno == ENODATA || op_errno == EOPNOTSUPP) {
++ if (op_errno == EOPNOTSUPP) {
+ /* This subvol does not have the optimization.
+ * Better let the user know we don't support it.
+ * Remove previous results if any.
+@@ -4655,7 +4655,7 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
+ goto post_unlock;
+ }
+
+- if (op_errno == ENOENT) {
++ if (op_errno == ENOATTR) {
+ /* Do nothing, our defaults are set to this.
+ */
+ goto unlock;
+@@ -4723,7 +4723,7 @@ dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ cnt = local->call_cnt = layout->cnt;
+
+ local->op_ret = -1;
+- local->op_errno = ENOENT;
++ local->op_errno = ENOATTR;
+
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index c949f68..ea3b69c 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -2954,7 +2954,7 @@ posix_xattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ (void)sys_closedir(fd);
+
+ if (!found)
+- return -ENOENT;
++ return -ENOATTR;
+
+ ret = dict_set_dynstr(dict, (char *)key, found);
+ if (ret) {
+@@ -3422,7 +3422,7 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+- if (op_errno == ENOENT) {
++ if (op_errno == ENOATTR) {
+ gf_msg_debug(this->name, 0,
+ "Failed to get "
+ "real filename (%s, %s)",
+--
+1.8.3.1
+
diff --git a/0224-core-replace-inet_addr-with-inet_pton.patch b/0224-core-replace-inet_addr-with-inet_pton.patch
new file mode 100644
index 0000000..f9a3b56
--- /dev/null
+++ b/0224-core-replace-inet_addr-with-inet_pton.patch
@@ -0,0 +1,53 @@
+From 3528c4fb59ca4d3efda2cf0689b7549e449bb91b Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Fri, 14 Jun 2019 07:53:06 +0000
+Subject: [PATCH 224/255] core: replace inet_addr with inet_pton
+
+Fixes warning raised by RPMDiff on the use of inet_addr, which may
+impact Ipv6 support
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22866/
+
+>fixes: bz#1721385
+>Change-Id: Id2d9afa1747efa64bc79d90dd2566bff54deedeb
+>Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+
+BUG: 1698435
+Change-Id: Id2d9afa1747efa64bc79d90dd2566bff54deedeb
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175318
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/events.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c
+index 9b3a226..2509767 100644
+--- a/libglusterfs/src/events.c
++++ b/libglusterfs/src/events.c
+@@ -41,6 +41,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+ char *host = NULL;
+ struct addrinfo hints;
+ struct addrinfo *result = NULL;
++ xlator_t *this = THIS;
+
+ /* Global context */
+ ctx = THIS->ctx;
+@@ -82,7 +83,12 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+ /* Socket Configurations */
+ server.sin_family = AF_INET;
+ server.sin_port = htons(EVENT_PORT);
+- server.sin_addr.s_addr = inet_addr(host);
++ ret = inet_pton(server.sin_family, host, &server.sin_addr);
++ if (ret <= 0) {
++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
++ "inet_pton failed with return code %d", ret);
++ goto out;
++ }
+ memset(&server.sin_zero, '\0', sizeof(server.sin_zero));
+
+ va_start(arguments, fmt);
+--
+1.8.3.1
+
diff --git a/0225-tests-utils-Fix-py2-py3-util-python-scripts.patch b/0225-tests-utils-Fix-py2-py3-util-python-scripts.patch
new file mode 100644
index 0000000..5ad185d
--- /dev/null
+++ b/0225-tests-utils-Fix-py2-py3-util-python-scripts.patch
@@ -0,0 +1,448 @@
+From 9d10b1fd102dc2d5bfa71891ded52b7a8f5e08d8 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Thu, 6 Jun 2019 12:54:04 +0530
+Subject: [PATCH 225/255] tests/utils: Fix py2/py3 util python scripts
+
+Following files are fixed.
+
+tests/bugs/distribute/overlap.py
+tests/utils/changelogparser.py
+tests/utils/create-files.py
+tests/utils/gfid-access.py
+tests/utils/libcxattr.py
+
+> upstream patch link : https://review.gluster.org/#/c/glusterfs/+/22829/
+
+>Change-Id: I3db857cc19e19163d368d913eaec1269fbc37140
+>updates: bz#1193929
+>Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+Change-Id: I3db857cc19e19163d368d913eaec1269fbc37140
+BUG: 1704562
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175483
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ tests/bugs/distribute/overlap.py | 2 +-
+ tests/bugs/glusterfs/bug-902610.t | 2 +-
+ tests/utils/changelogparser.py | 5 +-
+ tests/utils/create-files.py | 9 +-
+ tests/utils/gfid-access.py | 62 +++++++++----
+ tests/utils/libcxattr.py | 22 +++--
+ tests/utils/py2py3.py | 186 ++++++++++++++++++++++++++++++++++++++
+ 7 files changed, 258 insertions(+), 30 deletions(-)
+ create mode 100644 tests/utils/py2py3.py
+
+diff --git a/tests/bugs/distribute/overlap.py b/tests/bugs/distribute/overlap.py
+index 0941d37..2813979 100755
+--- a/tests/bugs/distribute/overlap.py
++++ b/tests/bugs/distribute/overlap.py
+@@ -17,7 +17,7 @@ def calculate_one (ov, nv):
+
+ def calculate_all (values):
+ total = 0
+- nv_index = len(values) / 2
++ nv_index = len(values) // 2
+ for old_val in values[:nv_index]:
+ new_val = values[nv_index]
+ nv_index += 1
+diff --git a/tests/bugs/glusterfs/bug-902610.t b/tests/bugs/glusterfs/bug-902610.t
+index b45e92b..112c947 100755
+--- a/tests/bugs/glusterfs/bug-902610.t
++++ b/tests/bugs/glusterfs/bug-902610.t
+@@ -28,7 +28,7 @@ function get_layout()
+ fi
+
+ # Figure out where the join point is.
+- target=$( $PYTHON -c "print '%08x' % (0x$layout1_e + 1)")
++ target=$( $PYTHON -c "print('%08x' % (0x$layout1_e + 1))")
+ #echo "target for layout2 = $target" > /dev/tty
+
+ # The second layout should cover everything that the first doesn't.
+diff --git a/tests/utils/changelogparser.py b/tests/utils/changelogparser.py
+index e8e252d..3b8f81d 100644
+--- a/tests/utils/changelogparser.py
++++ b/tests/utils/changelogparser.py
+@@ -125,7 +125,10 @@ class Record(object):
+ return repr(self.__dict__)
+
+ def __str__(self):
+- return unicode(self).encode('utf-8')
++ if sys.version_info >= (3,):
++ return self.__unicode__()
++ else:
++ return unicode(self).encode('utf-8')
+
+
+ def get_num_tokens(data, tokens, version=Version.V11):
+diff --git a/tests/utils/create-files.py b/tests/utils/create-files.py
+index b2a1961..04736e9 100755
+--- a/tests/utils/create-files.py
++++ b/tests/utils/create-files.py
+@@ -19,6 +19,11 @@ import argparse
+ datsiz = 0
+ timr = 0
+
++def get_ascii_upper_alpha_digits():
++ if sys.version_info > (3,0):
++ return string.ascii_uppercase+string.digits
++ else:
++ return string.uppercase+string.digits
+
+ def setLogger(filename):
+ global logger
+@@ -111,7 +116,7 @@ def create_tar_file(fil, size, mins, maxs, rand):
+
+ def get_filename(flen):
+ size = flen
+- char = string.uppercase+string.digits
++ char = get_ascii_upper_alpha_digits()
+ st = ''.join(random.choice(char) for i in range(size))
+ ti = str((hex(int(str(time.time()).split('.')[0])))[2:])
+ return ti+"%%"+st
+@@ -175,7 +180,7 @@ def tar_files(files, file_count, inter, size, mins, maxs,
+
+
+ def setxattr_files(files, randname, dir_path):
+- char = string.uppercase+string.digits
++ char = get_ascii_upper_alpha_digits()
+ if not randname:
+ for k in range(files):
+ v = ''.join(random.choice(char) for i in range(10))
+diff --git a/tests/utils/gfid-access.py b/tests/utils/gfid-access.py
+index 556d2b4..c35c122 100755
+--- a/tests/utils/gfid-access.py
++++ b/tests/utils/gfid-access.py
+@@ -33,23 +33,51 @@ def _fmt_mkdir(l):
+ def _fmt_symlink(l1, l2):
+ return "!II%dsI%ds%ds" % (37, l1+1, l2+1)
+
+-def entry_pack_reg(gf, bn, mo, uid, gid):
+- blen = len(bn)
+- return struct.pack(_fmt_mknod(blen),
+- uid, gid, gf, mo, bn,
+- stat.S_IMODE(mo), 0, umask())
+-
+-def entry_pack_dir(gf, bn, mo, uid, gid):
+- blen = len(bn)
+- return struct.pack(_fmt_mkdir(blen),
+- uid, gid, gf, mo, bn,
+- stat.S_IMODE(mo), umask())
+-
+-def entry_pack_symlink(gf, bn, lnk, mo, uid, gid):
+- blen = len(bn)
+- llen = len(lnk)
+- return struct.pack(_fmt_symlink(blen, llen),
+- uid, gid, gf, mo, bn, lnk)
++
++if sys.version_info > (3,):
++ def entry_pack_reg(gf, bn, mo, uid, gid):
++ bn_encoded = bn.encode()
++ blen = len(bn_encoded)
++ return struct.pack(_fmt_mknod(blen),
++ uid, gid, gf.encode(), mo, bn_encoded,
++ stat.S_IMODE(mo), 0, umask())
++
++ # mkdir
++ def entry_pack_dir(gf, bn, mo, uid, gid):
++ bn_encoded = bn.encode()
++ blen = len(bn_encoded)
++ return struct.pack(_fmt_mkdir(blen),
++ uid, gid, gf.encode(), mo, bn_encoded,
++ stat.S_IMODE(mo), umask())
++ # symlink
++ def entry_pack_symlink(gf, bn, lnk, st):
++ bn_encoded = bn.encode()
++ blen = len(bn_encoded)
++ lnk_encoded = lnk.encode()
++ llen = len(lnk_encoded)
++ return struct.pack(_fmt_symlink(blen, llen),
++ st['uid'], st['gid'],
++ gf.encode(), st['mode'], bn_encoded,
++ lnk_encoded)
++
++else:
++ def entry_pack_reg(gf, bn, mo, uid, gid):
++ blen = len(bn)
++ return struct.pack(_fmt_mknod(blen),
++ uid, gid, gf, mo, bn,
++ stat.S_IMODE(mo), 0, umask())
++
++ def entry_pack_dir(gf, bn, mo, uid, gid):
++ blen = len(bn)
++ return struct.pack(_fmt_mkdir(blen),
++ uid, gid, gf, mo, bn,
++ stat.S_IMODE(mo), umask())
++
++ def entry_pack_symlink(gf, bn, lnk, mo, uid, gid):
++ blen = len(bn)
++ llen = len(lnk)
++ return struct.pack(_fmt_symlink(blen, llen),
++ uid, gid, gf, mo, bn, lnk)
+
+ if __name__ == '__main__':
+ if len(sys.argv) < 9:
+diff --git a/tests/utils/libcxattr.py b/tests/utils/libcxattr.py
+index fd0b083..3f3ed1f 100644
+--- a/tests/utils/libcxattr.py
++++ b/tests/utils/libcxattr.py
+@@ -10,7 +10,9 @@
+
+ import os
+ import sys
+-from ctypes import CDLL, c_int, create_string_buffer
++from ctypes import CDLL, c_int
++from py2py3 import bytearray_to_str, gr_create_string_buffer
++from py2py3 import gr_query_xattr, gr_lsetxattr, gr_lremovexattr
+
+
+ class Xattr(object):
+@@ -47,20 +49,23 @@ class Xattr(object):
+ @classmethod
+ def _query_xattr(cls, path, siz, syscall, *a):
+ if siz:
+- buf = create_string_buffer('\0' * siz)
++ buf = gr_create_string_buffer(siz)
+ else:
+ buf = None
+ ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz)))
+ if ret == -1:
+ cls.raise_oserr()
+ if siz:
+- return buf.raw[:ret]
++ # py2 and py3 compatibility. Convert bytes array
++ # to string
++ result = bytearray_to_str(buf.raw)
++ return result[:ret]
+ else:
+ return ret
+
+ @classmethod
+ def lgetxattr(cls, path, attr, siz=0):
+- return cls._query_xattr(path, siz, 'lgetxattr', attr)
++ return gr_query_xattr(cls, path, siz, 'lgetxattr', attr)
+
+ @classmethod
+ def lgetxattr_buf(cls, path, attr):
+@@ -74,20 +79,21 @@ class Xattr(object):
+
+ @classmethod
+ def llistxattr(cls, path, siz=0):
+- ret = cls._query_xattr(path, siz, 'llistxattr')
++ ret = gr_query_xattr(cls, path, siz, 'llistxattr')
+ if isinstance(ret, str):
+- ret = ret.split('\0')
++ ret = ret.strip('\0')
++ ret = ret.split('\0') if ret else []
+ return ret
+
+ @classmethod
+ def lsetxattr(cls, path, attr, val):
+- ret = cls.libc.lsetxattr(path, attr, val, len(val), 0)
++ ret = gr_lsetxattr(cls, path, attr, val)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def lremovexattr(cls, path, attr):
+- ret = cls.libc.lremovexattr(path, attr)
++ ret = gr_lremovexattr(cls, path, attr)
+ if ret == -1:
+ cls.raise_oserr()
+
+diff --git a/tests/utils/py2py3.py b/tests/utils/py2py3.py
+new file mode 100644
+index 0000000..63aca10
+--- /dev/null
++++ b/tests/utils/py2py3.py
+@@ -0,0 +1,186 @@
++#
++# Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
++# This file is part of GlusterFS.
++
++# This file is licensed to you under your choice of the GNU Lesser
++# General Public License, version 3 or any later version (LGPLv3 or
++# later), or the GNU General Public License, version 2 (GPLv2), in all
++# cases as published by the Free Software Foundation.
++#
++
++# All python2/python3 compatibility routines
++
++import sys
++import os
++import stat
++import struct
++from ctypes import create_string_buffer
++
++def umask():
++ return os.umask(0)
++
++if sys.version_info >= (3,):
++ def pipe():
++ (r, w) = os.pipe()
++ os.set_inheritable(r, True)
++ os.set_inheritable(w, True)
++ return (r, w)
++
++ # Raw conversion of bytearray to string. Used in the cases where
++ # buffer is created by create_string_buffer which is a 8-bit char
++ # array and passed to syscalls to fetch results. Using encode/decode
++ # doesn't work as it converts to string altering the size.
++ def bytearray_to_str(byte_arr):
++ return ''.join([chr(b) for b in byte_arr])
++
++ # Raw conversion of string to bytes. This is required to convert
++ # back the string into bytearray(c char array) to use in struc
++ # pack/unpacking. Again encode/decode can't be used as it
++ # converts it alters size.
++ def str_to_bytearray(string):
++ return bytes([ord(c) for c in string])
++
++ def gr_create_string_buffer(size):
++ return create_string_buffer(b'\0', size)
++
++ def gr_query_xattr(cls, path, size, syscall, attr=None):
++ if attr:
++ return cls._query_xattr(path.encode(), size, syscall,
++ attr.encode())
++ else:
++ return cls._query_xattr(path.encode(), size, syscall)
++
++ def gr_lsetxattr(cls, path, attr, val):
++ return cls.libc.lsetxattr(path.encode(), attr.encode(), val,
++ len(val), 0)
++
++ def gr_lremovexattr(cls, path, attr):
++ return cls.libc.lremovexattr(path.encode(), attr.encode())
++
++ def gr_cl_register(cls, brick, path, log_file, log_level, retries):
++ return cls._get_api('gf_changelog_register')(brick.encode(),
++ path.encode(),
++ log_file.encode(),
++ log_level, retries)
++
++ def gr_cl_done(cls, clfile):
++ return cls._get_api('gf_changelog_done')(clfile.encode())
++
++ def gr_cl_history_changelog(cls, changelog_path, start, end, num_parallel,
++ actual_end):
++ return cls._get_api('gf_history_changelog')(changelog_path.encode(),
++ start, end, num_parallel,
++ actual_end)
++
++ def gr_cl_history_done(cls, clfile):
++ return cls._get_api('gf_history_changelog_done')(clfile.encode())
++
++ # regular file
++
++ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
++ bn_encoded = bn.encode()
++ blen = len(bn_encoded)
++ return struct.pack(cls._fmt_mknod(blen),
++ uid, gid, gf.encode(), mo, bn_encoded,
++ stat.S_IMODE(mo), 0, umask())
++
++ def entry_pack_reg_stat(cls, gf, bn, st):
++ bn_encoded = bn.encode()
++ blen = len(bn_encoded)
++ mo = st['mode']
++ return struct.pack(cls._fmt_mknod(blen),
++ st['uid'], st['gid'],
++ gf.encode(), mo, bn_encoded,
++ stat.S_IMODE(mo), 0, umask())
++ # mkdir
++
++ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
++ bn_encoded = bn.encode()
++ blen = len(bn_encoded)
++ return struct.pack(cls._fmt_mkdir(blen),
++ uid, gid, gf.encode(), mo, bn_encoded,
++ stat.S_IMODE(mo), umask())
++ # symlink
++
++ def entry_pack_symlink(cls, gf, bn, lnk, st):
++ bn_encoded = bn.encode()
++ blen = len(bn_encoded)
++ lnk_encoded = lnk.encode()
++ llen = len(lnk_encoded)
++ return struct.pack(cls._fmt_symlink(blen, llen),
++ st['uid'], st['gid'],
++ gf.encode(), st['mode'], bn_encoded,
++ lnk_encoded)
++else:
++ def pipe():
++ (r, w) = os.pipe()
++ return (r, w)
++
++ # Raw conversion of bytearray to string
++ def bytearray_to_str(byte_arr):
++ return byte_arr
++
++ # Raw conversion of string to bytearray
++ def str_to_bytearray(string):
++ return string
++
++ def gr_create_string_buffer(size):
++ return create_string_buffer('\0', size)
++
++ def gr_query_xattr(cls, path, size, syscall, attr=None):
++ if attr:
++ return cls._query_xattr(path, size, syscall, attr)
++ else:
++ return cls._query_xattr(path, size, syscall)
++
++ def gr_lsetxattr(cls, path, attr, val):
++ return cls.libc.lsetxattr(path, attr, val, len(val), 0)
++
++ def gr_lremovexattr(cls, path, attr):
++ return cls.libc.lremovexattr(path, attr)
++
++ def gr_cl_register(cls, brick, path, log_file, log_level, retries):
++ return cls._get_api('gf_changelog_register')(brick, path, log_file,
++ log_level, retries)
++
++ def gr_cl_done(cls, clfile):
++ return cls._get_api('gf_changelog_done')(clfile)
++
++ def gr_cl_history_changelog(cls, changelog_path, start, end, num_parallel,
++ actual_end):
++ return cls._get_api('gf_history_changelog')(changelog_path, start, end,
++ num_parallel, actual_end)
++
++ def gr_cl_history_done(cls, clfile):
++ return cls._get_api('gf_history_changelog_done')(clfile)
++
++ # regular file
++
++ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
++ blen = len(bn)
++ return struct.pack(cls._fmt_mknod(blen),
++ uid, gid, gf, mo, bn,
++ stat.S_IMODE(mo), 0, umask())
++
++ def entry_pack_reg_stat(cls, gf, bn, st):
++ blen = len(bn)
++ mo = st['mode']
++ return struct.pack(cls._fmt_mknod(blen),
++ st['uid'], st['gid'],
++ gf, mo, bn,
++ stat.S_IMODE(mo), 0, umask())
++ # mkdir
++
++ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
++ blen = len(bn)
++ return struct.pack(cls._fmt_mkdir(blen),
++ uid, gid, gf, mo, bn,
++ stat.S_IMODE(mo), umask())
++ # symlink
++
++ def entry_pack_symlink(cls, gf, bn, lnk, st):
++ blen = len(bn)
++ llen = len(lnk)
++ return struct.pack(cls._fmt_symlink(blen, llen),
++ st['uid'], st['gid'],
++ gf, st['mode'], bn, lnk)
+--
+1.8.3.1
+
diff --git a/0226-geo-rep-fix-gluster-command-path-for-non-root-sessio.patch b/0226-geo-rep-fix-gluster-command-path-for-non-root-sessio.patch
new file mode 100644
index 0000000..f8f382a
--- /dev/null
+++ b/0226-geo-rep-fix-gluster-command-path-for-non-root-sessio.patch
@@ -0,0 +1,92 @@
+From 1c55f3633f748629cd0484f79b6c49101eb2df82 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Mon, 8 Jul 2019 11:47:28 +0530
+Subject: [PATCH 226/255] geo-rep : fix gluster command path for non-root
+ session
+
+Problem:
+gluster command not found.
+
+Cause:
+In Volinfo class we issue command 'gluster vol info' to get information
+about volume like getting brick_root to perform various operation.
+When geo-rep session is configured for non-root user Volinfo class
+fails to issue gluster command due to unavailability of gluster
+binary path for non-root user.
+
+Solution:
+Use config value 'slave-gluster-command-dir'/'gluster-command-dir' to get path
+for gluster command based on caller.
+
+>Backport of:
+>Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22920/.
+>fixes: bz#1722740
+>Change-Id: I4ec46373da01f5d00ecd160c4e8c6239da8b3859
+>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+BUG: 1712591
+Change-Id: Ifea2927253a9521fa459fea6de8a60085c3413f6
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175485
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/monitor.py | 4 ++--
+ geo-replication/syncdaemon/syncdutils.py | 12 +++++++++---
+ 2 files changed, 11 insertions(+), 5 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
+index c45ef24..234f3f1 100644
+--- a/geo-replication/syncdaemon/monitor.py
++++ b/geo-replication/syncdaemon/monitor.py
+@@ -369,7 +369,7 @@ def distribute(master, slave):
+ if rconf.args.use_gconf_volinfo:
+ mvol = VolinfoFromGconf(master.volume, master=True)
+ else:
+- mvol = Volinfo(master.volume, master.host)
++ mvol = Volinfo(master.volume, master.host, master=True)
+ logging.debug('master bricks: ' + repr(mvol.bricks))
+ prelude = []
+ slave_host = None
+@@ -385,7 +385,7 @@ def distribute(master, slave):
+ if rconf.args.use_gconf_volinfo:
+ svol = VolinfoFromGconf(slave.volume, master=False)
+ else:
+- svol = Volinfo(slave.volume, "localhost", prelude)
++ svol = Volinfo(slave.volume, "localhost", prelude, master=False)
+
+ sbricks = svol.bricks
+ suuid = svol.uuid
+diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
+index 3f41b5f..2ee10ac 100644
+--- a/geo-replication/syncdaemon/syncdutils.py
++++ b/geo-replication/syncdaemon/syncdutils.py
+@@ -672,7 +672,7 @@ def get_slv_dir_path(slv_host, slv_volume, gfid):
+ dir_path = ENOENT
+
+ if not slv_bricks:
+- slv_info = Volinfo(slv_volume, slv_host)
++ slv_info = Volinfo(slv_volume, slv_host, master=False)
+ slv_bricks = slv_info.bricks
+ # Result of readlink would be of format as below.
+ # readlink = "../../pgfid[0:2]/pgfid[2:4]/pgfid/basename"
+@@ -854,8 +854,14 @@ class Popen(subprocess.Popen):
+
+ class Volinfo(object):
+
+- def __init__(self, vol, host='localhost', prelude=[]):
+- po = Popen(prelude + ['gluster', '--xml', '--remote-host=' + host,
++ def __init__(self, vol, host='localhost', prelude=[], master=True):
++ if master:
++ gluster_cmd_dir = gconf.get("gluster-command-dir")
++ else:
++ gluster_cmd_dir = gconf.get("slave-gluster-command-dir")
++
++ gluster_cmd = os.path.join(gluster_cmd_dir, 'gluster')
++ po = Popen(prelude + [gluster_cmd, '--xml', '--remote-host=' + host,
+ 'volume', 'info', vol],
+ stdout=PIPE, stderr=PIPE, universal_newlines=True)
+ vix = po.stdout.read()
+--
+1.8.3.1
+
diff --git a/0227-glusterd-svc-update-pid-of-mux-volumes-from-the-shd-.patch b/0227-glusterd-svc-update-pid-of-mux-volumes-from-the-shd-.patch
new file mode 100644
index 0000000..41d482f
--- /dev/null
+++ b/0227-glusterd-svc-update-pid-of-mux-volumes-from-the-shd-.patch
@@ -0,0 +1,914 @@
+From b0815b8a84a07d17a1215c55afc38888ee9fc37c Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Mon, 24 Jun 2019 12:00:20 +0530
+Subject: [PATCH 227/255] glusterd/svc: update pid of mux volumes from the shd
+ process
+
+For a normal volume, we are updating the pid from a the
+process while we do a daemonization or at the end of the
+init if it is no-daemon mode. Along with updating the pid
+we also lock the file, to make sure that the process is
+running fine.
+
+With brick mux, we were updating the pidfile from gluterd
+after an attach/detach request.
+
+There are two problems with this approach.
+1) We are not holding a pidlock for any file other than parent
+ process.
+2) There is a chance for possible race conditions with attach/detach.
+ For example, shd start and a volume stop could race. Let's say
+ we are starting an shd and it is attached to a volume.
+ While we trying to link the pid file to the running process,
+ this would have deleted by the thread that doing a volume stop.
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22935/
+
+>Change-Id: I29a00352102877ce09ea3f376ca52affceb5cf1a
+>Updates: bz#1722541
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+Change-Id: I29a00352102877ce09ea3f376ca52affceb5cf1a
+BUG: 1721802
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175723
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ glusterfsd/src/gf_attach.c | 2 +
+ glusterfsd/src/glusterfsd-mgmt.c | 66 +++++++--
+ libglusterfs/src/glusterfs/glusterfs.h | 2 +-
+ libglusterfs/src/glusterfs/libglusterfs-messages.h | 3 +-
+ libglusterfs/src/graph.c | 154 ++++++++++++++++++++-
+ rpc/xdr/src/glusterd1-xdr.x | 1 +
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 2 +
+ xlators/mgmt/glusterd/src/glusterd-handshake.c | 42 +++++-
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 4 +
+ .../mgmt/glusterd/src/glusterd-shd-svc-helper.c | 25 ++++
+ .../mgmt/glusterd/src/glusterd-shd-svc-helper.h | 3 +
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 8 +-
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 57 ++++----
+ xlators/mgmt/glusterd/src/glusterd-syncop.c | 2 +
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 6 +-
+ 15 files changed, 325 insertions(+), 52 deletions(-)
+
+diff --git a/glusterfsd/src/gf_attach.c b/glusterfsd/src/gf_attach.c
+index 6293b9b..1bff854 100644
+--- a/glusterfsd/src/gf_attach.c
++++ b/glusterfsd/src/gf_attach.c
+@@ -65,6 +65,8 @@ send_brick_req(xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
+ brick_req.name = path;
+ brick_req.input.input_val = NULL;
+ brick_req.input.input_len = 0;
++ brick_req.dict.dict_val = NULL;
++ brick_req.dict.dict_len = 0;
+
+ req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req);
+ iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size);
+diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
+index 1d2cd1a..f930e0a 100644
+--- a/glusterfsd/src/glusterfsd-mgmt.c
++++ b/glusterfsd/src/glusterfsd-mgmt.c
+@@ -50,13 +50,16 @@ int
+ emancipate(glusterfs_ctx_t *ctx, int ret);
+ int
+ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+- char *volfile_id, char *checksum);
++ char *volfile_id, char *checksum,
++ dict_t *dict);
+ int
+ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+- gf_volfile_t *volfile_obj, char *checksum);
++ gf_volfile_t *volfile_obj, char *checksum,
++ dict_t *dict);
+ int
+ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+- char *volfile_id, char *checksum);
++ char *volfile_id, char *checksum,
++ dict_t *dict);
+ int
+ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj);
+
+@@ -75,7 +78,8 @@ mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data)
+ }
+
+ int
+-mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id)
++mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id,
++ dict_t *dict)
+ {
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+@@ -145,11 +149,11 @@ mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id)
+ * the volfile
+ */
+ ret = glusterfs_process_svc_attach_volfp(ctx, tmpfp, volfile_id,
+- sha256_hash);
++ sha256_hash, dict);
+ goto unlock;
+ }
+ ret = glusterfs_mux_volfile_reconfigure(tmpfp, ctx, volfile_obj,
+- sha256_hash);
++ sha256_hash, dict);
+ if (ret < 0) {
+ gf_msg_debug("glusterfsd-mgmt", EINVAL, "Reconfigure failed !!");
+ }
+@@ -387,6 +391,8 @@ err:
+ UNLOCK(&ctx->volfile_lock);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
++ if (xlator_req.dict.dict_val)
++ free(xlator_req.dict.dict_val);
+ free(xlator_req.name);
+ xlator_req.name = NULL;
+ return 0;
+@@ -561,6 +567,8 @@ out:
+
+ free(xlator_req.name);
+ free(xlator_req.input.input_val);
++ if (xlator_req.dict.dict_val)
++ free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ if (dict)
+@@ -982,6 +990,8 @@ out:
+ if (input)
+ dict_unref(input);
+ free(xlator_req.input.input_val); /*malloced by xdr*/
++ if (xlator_req.dict.dict_val)
++ free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ free(xlator_req.name);
+@@ -1062,6 +1072,8 @@ glusterfs_handle_attach(rpcsvc_request_t *req)
+ out:
+ UNLOCK(&ctx->volfile_lock);
+ }
++ if (xlator_req.dict.dict_val)
++ free(xlator_req.dict.dict_val);
+ free(xlator_req.input.input_val);
+ free(xlator_req.name);
+
+@@ -1077,6 +1089,7 @@ glusterfs_handle_svc_attach(rpcsvc_request_t *req)
+ };
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
++ dict_t *dict = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+@@ -1091,20 +1104,41 @@ glusterfs_handle_svc_attach(rpcsvc_request_t *req)
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
++
+ gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_41,
+ "received attach "
+ "request for volfile-id=%s",
+ xlator_req.name);
++
++ dict = dict_new();
++ if (!dict) {
++ ret = -1;
++ errno = ENOMEM;
++ goto out;
++ }
++
++ ret = dict_unserialize(xlator_req.dict.dict_val, xlator_req.dict.dict_len,
++ &dict);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42,
++ "failed to unserialize xdata to dictionary");
++ goto out;
++ }
++ dict->extra_stdfree = xlator_req.dict.dict_val;
++
+ ret = 0;
+
+ if (ctx->active) {
+ ret = mgmt_process_volfile(xlator_req.input.input_val,
+- xlator_req.input.input_len, xlator_req.name);
++ xlator_req.input.input_len, xlator_req.name,
++ dict);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42,
+ "got attach for %s but no active graph", xlator_req.name);
+ }
+ out:
++ if (dict)
++ dict_unref(dict);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
+ if (xlator_req.name)
+@@ -1241,6 +1275,8 @@ out:
+ GF_FREE(filepath);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
++ if (xlator_req.dict.dict_val)
++ free(xlator_req.dict.dict_val);
+
+ return ret;
+ }
+@@ -1313,6 +1349,8 @@ out:
+ if (dict)
+ dict_unref(dict);
+ free(xlator_req.input.input_val); // malloced by xdr
++ if (xlator_req.dict.dict_val)
++ free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ free(xlator_req.name); // malloced by xdr
+@@ -1461,6 +1499,8 @@ out:
+ if (output)
+ dict_unref(output);
+ free(brick_req.input.input_val);
++ if (brick_req.dict.dict_val)
++ free(brick_req.dict.dict_val);
+ free(brick_req.name);
+ GF_FREE(xname);
+ GF_FREE(msg);
+@@ -1654,6 +1694,8 @@ out:
+ if (dict)
+ dict_unref(dict);
+ free(node_req.input.input_val);
++ if (node_req.dict.dict_val)
++ free(node_req.dict.dict_val);
+ GF_FREE(msg);
+ GF_FREE(rsp.output.output_val);
+ GF_FREE(node_name);
+@@ -1757,6 +1799,8 @@ glusterfs_handle_nfs_profile(rpcsvc_request_t *req)
+
+ out:
+ free(nfs_req.input.input_val);
++ if (nfs_req.dict.dict_val)
++ free(nfs_req.dict.dict_val);
+ if (dict)
+ dict_unref(dict);
+ if (output)
+@@ -1835,6 +1879,8 @@ out:
+ if (dict)
+ dict_unref(dict);
+ free(xlator_req.input.input_val); // malloced by xdr
++ if (xlator_req.dict.dict_val)
++ free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ free(xlator_req.name); // malloced by xdr
+@@ -1963,7 +2009,8 @@ out:
+ if (dict)
+ dict_unref(dict);
+ free(brick_req.input.input_val);
+-
++ if (brick_req.dict.dict_val)
++ free(brick_req.dict.dict_val);
+ gf_log(THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+ }
+@@ -2213,7 +2260,8 @@ volfile:
+ size = rsp.op_ret;
+ volfile_id = frame->local;
+ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) {
+- ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id);
++ ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id,
++ dict);
+ goto post_graph_mgmt;
+ }
+
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index 9ec2365..b6327b8 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -744,7 +744,7 @@ typedef struct {
+ char vol_id[NAME_MAX + 1];
+ struct list_head volfile_list;
+ glusterfs_graph_t *graph;
+-
++ FILE *pidfp;
+ } gf_volfile_t;
+
+ glusterfs_ctx_t *
+diff --git a/libglusterfs/src/glusterfs/libglusterfs-messages.h b/libglusterfs/src/glusterfs/libglusterfs-messages.h
+index ea2aa60..7e0eebb 100644
+--- a/libglusterfs/src/glusterfs/libglusterfs-messages.h
++++ b/libglusterfs/src/glusterfs/libglusterfs-messages.h
+@@ -111,6 +111,7 @@ GLFS_MSGID(
+ LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG,
+ LG_MSG_XXH64_TO_GFID_FAILED, LG_MSG_ASYNC_WARNING, LG_MSG_ASYNC_FAILURE,
+ LG_MSG_GRAPH_CLEANUP_FAILED, LG_MSG_GRAPH_SETUP_FAILED,
+- LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED);
++ LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED,
++ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED);
+
+ #endif /* !_LG_MESSAGES_H_ */
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index 172dc61..05f76bf 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -1467,6 +1467,21 @@ out:
+ }
+
+ int
++glusterfs_svc_mux_pidfile_cleanup(gf_volfile_t *volfile_obj)
++{
++ if (!volfile_obj || !volfile_obj->pidfp)
++ return 0;
++
++ gf_msg_trace("glusterfsd", 0, "pidfile %s cleanup", volfile_obj->vol_id);
++
++ lockf(fileno(volfile_obj->pidfp), F_ULOCK, 0);
++ fclose(volfile_obj->pidfp);
++ volfile_obj->pidfp = NULL;
++
++ return 0;
++}
++
++int
+ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
+ {
+ xlator_t *last_xl = NULL;
+@@ -1502,6 +1517,7 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
+
+ list_del_init(&volfile_obj->volfile_list);
+ glusterfs_mux_xlator_unlink(parent_graph->top, xl);
++ glusterfs_svc_mux_pidfile_cleanup(volfile_obj);
+ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph);
+ parent_graph->xl_count -= graph->xl_count;
+ parent_graph->leaf_count -= graph->leaf_count;
+@@ -1531,8 +1547,126 @@ out:
+ }
+
+ int
++glusterfs_svc_mux_pidfile_setup(gf_volfile_t *volfile_obj, const char *pid_file)
++{
++ int ret = -1;
++ FILE *pidfp = NULL;
++
++ if (!pid_file || !volfile_obj)
++ goto out;
++
++ if (volfile_obj->pidfp) {
++ ret = 0;
++ goto out;
++ }
++ pidfp = fopen(pid_file, "a+");
++ if (!pidfp) {
++ goto out;
++ }
++ volfile_obj->pidfp = pidfp;
++
++ ret = lockf(fileno(pidfp), F_TLOCK, 0);
++ if (ret) {
++ ret = 0;
++ goto out;
++ }
++out:
++ return ret;
++}
++
++int
++glusterfs_svc_mux_pidfile_update(gf_volfile_t *volfile_obj,
++ const char *pid_file, pid_t pid)
++{
++ int ret = 0;
++ FILE *pidfp = NULL;
++ int old_pid;
++
++ if (!volfile_obj->pidfp) {
++ ret = glusterfs_svc_mux_pidfile_setup(volfile_obj, pid_file);
++ if (ret == -1)
++ goto out;
++ }
++ pidfp = volfile_obj->pidfp;
++ ret = fscanf(pidfp, "%d", &old_pid);
++ if (ret <= 0) {
++ goto update;
++ }
++ if (old_pid == pid) {
++ ret = 0;
++ goto out;
++ } else {
++ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
++ "Old pid=%d found in pidfile %s. Cleaning the old pid and "
++ "Updating new pid=%d",
++ old_pid, pid_file, pid);
++ }
++update:
++ ret = sys_ftruncate(fileno(pidfp), 0);
++ if (ret) {
++ gf_msg("glusterfsd", GF_LOG_ERROR, errno,
++ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
++ "pidfile %s truncation failed", pid_file);
++ goto out;
++ }
++
++ ret = fprintf(pidfp, "%d\n", pid);
++ if (ret <= 0) {
++ gf_msg("glusterfsd", GF_LOG_ERROR, errno,
++ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed",
++ pid_file);
++ goto out;
++ }
++
++ ret = fflush(pidfp);
++ if (ret) {
++ gf_msg("glusterfsd", GF_LOG_ERROR, errno,
++ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed",
++ pid_file);
++ goto out;
++ }
++out:
++ return ret;
++}
++
++int
++glusterfs_update_mux_pid(dict_t *dict, gf_volfile_t *volfile_obj)
++{
++ char *file = NULL;
++ int ret = -1;
++
++ GF_VALIDATE_OR_GOTO("graph", dict, out);
++ GF_VALIDATE_OR_GOTO("graph", volfile_obj, out);
++
++ ret = dict_get_str(dict, "pidfile", &file);
++ if (ret < 0) {
++ gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
++ "Failed to get pidfile from dict for volfile_id=%s",
++ volfile_obj->vol_id);
++ }
++
++ ret = glusterfs_svc_mux_pidfile_update(volfile_obj, file, getpid());
++ if (ret < 0) {
++ ret = -1;
++ gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
++ "Failed to update "
++ "the pidfile for volfile_id=%s",
++ volfile_obj->vol_id);
++
++ goto out;
++ }
++
++ if (ret == 1)
++ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
++ "PID %d updated in pidfile=%s", getpid(), file);
++ ret = 0;
++out:
++ return ret;
++}
++int
+ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+- char *volfile_id, char *checksum)
++ char *volfile_id, char *checksum,
++ dict_t *dict)
+ {
+ glusterfs_graph_t *graph = NULL;
+ glusterfs_graph_t *parent_graph = NULL;
+@@ -1615,18 +1749,25 @@ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+ ret = -1;
+ goto out;
+ }
++ volfile_obj->pidfp = NULL;
++ snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s",
++ volfile_id);
++
++ if (strcmp(ctx->cmd_args.process_name, "glustershd") == 0) {
++ ret = glusterfs_update_mux_pid(dict, volfile_obj);
++ if (ret == -1) {
++ goto out;
++ }
++ }
+
+ graph->used = 1;
+ parent_graph->id++;
+ list_add(&graph->list, &ctx->graphs);
+ INIT_LIST_HEAD(&volfile_obj->volfile_list);
+ volfile_obj->graph = graph;
+- snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s",
+- volfile_id);
+ memcpy(volfile_obj->volfile_checksum, checksum,
+ sizeof(volfile_obj->volfile_checksum));
+ list_add_tail(&volfile_obj->volfile_list, &ctx->volfile_list);
+-
+ gf_log_dump_graph(fp, graph);
+ graph = NULL;
+
+@@ -1654,7 +1795,8 @@ out:
+
+ int
+ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+- gf_volfile_t *volfile_obj, char *checksum)
++ gf_volfile_t *volfile_obj, char *checksum,
++ dict_t *dict)
+ {
+ glusterfs_graph_t *oldvolfile_graph = NULL;
+ glusterfs_graph_t *newvolfile_graph = NULL;
+@@ -1703,7 +1845,7 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ }
+ volfile_obj = NULL;
+ ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, vol_id,
+- checksum);
++ checksum, dict);
+ goto out;
+ }
+
+diff --git a/rpc/xdr/src/glusterd1-xdr.x b/rpc/xdr/src/glusterd1-xdr.x
+index 9b36d34..02ebec2 100644
+--- a/rpc/xdr/src/glusterd1-xdr.x
++++ b/rpc/xdr/src/glusterd1-xdr.x
+@@ -132,6 +132,7 @@ struct gd1_mgmt_brick_op_req {
+ string name<>;
+ int op;
+ opaque input<>;
++ opaque dict<>;
+ } ;
+
+ struct gd1_mgmt_brick_op_rsp {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index af8a8a4..cc1f1df 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -5423,6 +5423,8 @@ glusterd_print_client_details(FILE *fp, dict_t *dict,
+
+ brick_req->op = GLUSTERD_BRICK_STATUS;
+ brick_req->name = "";
++ brick_req->dict.dict_val = NULL;
++ brick_req->dict.dict_len = 0;
+
+ ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"),
+ brickinfo->path);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
+index 1ba58c3..86dec82 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
+@@ -203,7 +203,7 @@ out:
+
+ size_t
+ build_volfile_path(char *volume_id, char *path, size_t path_len,
+- char *trusted_str)
++ char *trusted_str, dict_t *dict)
+ {
+ struct stat stbuf = {
+ 0,
+@@ -340,11 +340,19 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
+
+ ret = glusterd_volinfo_find(volid_ptr, &volinfo);
+ if (ret == -1) {
+- gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo");
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
++ "Couldn't find volinfo for volid=%s", volid_ptr);
+ goto out;
+ }
+
+ glusterd_svc_build_shd_volfile_path(volinfo, path, path_len);
++
++ ret = glusterd_svc_set_shd_pidfile(volinfo, dict);
++ if (ret == -1) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
++ "Couldn't set pidfile in dict for volid=%s", volid_ptr);
++ goto out;
++ }
+ ret = 0;
+ goto out;
+ }
+@@ -919,6 +927,7 @@ __server_getspec(rpcsvc_request_t *req)
+ char addrstr[RPCSVC_PEER_STRLEN] = {0};
+ peer_info_t *peerinfo = NULL;
+ xlator_t *this = NULL;
++ dict_t *dict = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+@@ -971,6 +980,12 @@ __server_getspec(rpcsvc_request_t *req)
+ goto fail;
+ }
+
++ dict = dict_new();
++ if (!dict) {
++ ret = -ENOMEM;
++ goto fail;
++ }
++
+ trans = req->trans;
+ /* addrstr will be empty for cli socket connections */
+ ret = rpcsvc_transport_peername(trans, (char *)&addrstr, sizeof(addrstr));
+@@ -989,12 +1004,26 @@ __server_getspec(rpcsvc_request_t *req)
+ */
+ if (strlen(addrstr) == 0 || gf_is_local_addr(addrstr)) {
+ ret = build_volfile_path(volume, filename, sizeof(filename),
+- TRUSTED_PREFIX);
++ TRUSTED_PREFIX, dict);
+ } else {
+- ret = build_volfile_path(volume, filename, sizeof(filename), NULL);
++ ret = build_volfile_path(volume, filename, sizeof(filename), NULL,
++ dict);
+ }
+
+ if (ret == 0) {
++ if (dict->count > 0) {
++ ret = dict_allocate_and_serialize(dict, &rsp.xdata.xdata_val,
++ &rsp.xdata.xdata_len);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
++ "Failed to serialize dict "
++ "to request buffer");
++ goto fail;
++ }
++ dict->extra_free = rsp.xdata.xdata_val;
++ }
++
+ /* to allocate the proper buffer to hold the file data */
+ ret = sys_stat(filename, &stbuf);
+ if (ret < 0) {
+@@ -1036,7 +1065,6 @@ __server_getspec(rpcsvc_request_t *req)
+ goto fail;
+ }
+ }
+-
+ /* convert to XDR */
+ fail:
+ if (spec_fd >= 0)
+@@ -1056,6 +1084,10 @@ fail:
+ (xdrproc_t)xdr_gf_getspec_rsp);
+ free(args.key); // malloced by xdr
+ free(rsp.spec);
++
++ if (dict)
++ dict_unref(dict);
++
+ if (args.xdata.xdata_val)
+ free(args.xdata.xdata_val);
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 9ea695e..454877b 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -655,6 +655,8 @@ glusterd_brick_op_build_payload(glusterd_op_t op,
+ break;
+ }
+
++ brick_req->dict.dict_len = 0;
++ brick_req->dict.dict_val = NULL;
+ ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val,
+ &brick_req->input.input_len);
+ if (ret)
+@@ -723,6 +725,8 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req,
+ goto out;
+ }
+
++ brick_req->dict.dict_len = 0;
++ brick_req->dict.dict_val = NULL;
+ ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val,
+ &brick_req->input.input_len);
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
+index 57ceda9..5661e39 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
+@@ -126,3 +126,28 @@ glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd)
+ out:
+ return;
+ }
++
++int
++glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict)
++{
++ int ret = -1;
++ glusterd_svc_t *svc = NULL;
++ xlator_t *this = NULL;
++
++ this = THIS;
++ GF_VALIDATE_OR_GOTO("glusterd", this, out);
++ GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
++ GF_VALIDATE_OR_GOTO(this->name, dict, out);
++
++ svc = &(volinfo->shd.svc);
++
++ ret = dict_set_dynstr_with_alloc(dict, "pidfile", svc->proc.pidfile);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
++ "Failed to set pidfile %s in dict", svc->proc.pidfile);
++ goto out;
++ }
++ ret = 0;
++out:
++ return ret;
++}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
+index 59466ec..1f0984b 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
+@@ -36,4 +36,7 @@ glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo,
+ int
+ glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo);
+
++int
++glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict);
++
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index 8ad90a9..590169f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -258,14 +258,20 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+ gf_boolean_t shd_restart = _gf_false;
+
+ conf = THIS->private;
+- volinfo = data;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
++ volinfo = data;
+ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+
+ if (volinfo)
+ glusterd_volinfo_ref(volinfo);
+
++ if (volinfo->is_snap_volume) {
++ /* healing of a snap volume is not supported yet*/
++ ret = 0;
++ goto out;
++ }
++
+ while (conf->restart_shd) {
+ synclock_unlock(&conf->big_lock);
+ sleep(2);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index 400826f..e106111 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -519,7 +519,7 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc)
+ /* Take first entry from the process */
+ parent_svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t,
+ mux_svc);
+- sys_link(parent_svc->proc.pidfile, svc->proc.pidfile);
++ glusterd_copy_file(parent_svc->proc.pidfile, svc->proc.pidfile);
+ mux_conn = &parent_svc->conn;
+ if (volinfo)
+ volinfo->shd.attached = _gf_true;
+@@ -623,12 +623,9 @@ glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_shdsvc_t *shd = NULL;
+ glusterd_svc_t *svc = frame->cookie;
+- glusterd_svc_t *parent_svc = NULL;
+- glusterd_svc_proc_t *mux_proc = NULL;
+ glusterd_conf_t *conf = NULL;
+ int *flag = (int *)frame->local;
+ xlator_t *this = THIS;
+- int pid = -1;
+ int ret = -1;
+ gf_getspec_rsp rsp = {
+ 0,
+@@ -679,27 +676,7 @@ glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ }
+
+ if (rsp.op_ret == 0) {
+- pthread_mutex_lock(&conf->attach_lock);
+- {
+- if (!strcmp(svc->name, "glustershd")) {
+- mux_proc = svc->svc_proc;
+- if (mux_proc &&
+- !gf_is_service_running(svc->proc.pidfile, &pid)) {
+- /*
+- * When svc's are restarting, there is a chance that the
+- * attached svc might not have updated it's pid. Because
+- * it was at connection stage. So in that case, we need
+- * to retry the pid file copy.
+- */
+- parent_svc = cds_list_entry(mux_proc->svcs.next,
+- glusterd_svc_t, mux_svc);
+- if (parent_svc)
+- sys_link(parent_svc->proc.pidfile, svc->proc.pidfile);
+- }
+- }
+- svc->online = _gf_true;
+- }
+- pthread_mutex_unlock(&conf->attach_lock);
++ svc->online = _gf_true;
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "svc %s of volume %s attached successfully to pid %d", svc->name,
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+@@ -726,7 +703,7 @@ out:
+
+ extern size_t
+ build_volfile_path(char *volume_id, char *path, size_t path_len,
+- char *trusted_str);
++ char *trusted_str, dict_t *dict);
+
+ int
+ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+@@ -751,6 +728,7 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+ ssize_t req_size = 0;
+ call_frame_t *frame = NULL;
+ gd1_mgmt_brick_op_req brick_req;
++ dict_t *dict = NULL;
+ void *req = &brick_req;
+ void *errlbl = &&err;
+ struct rpc_clnt_connection *conn;
+@@ -776,6 +754,8 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+ brick_req.name = volfile_id;
+ brick_req.input.input_val = NULL;
+ brick_req.input.input_len = 0;
++ brick_req.dict.dict_val = NULL;
++ brick_req.dict.dict_len = 0;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+@@ -783,7 +763,13 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+ }
+
+ if (op == GLUSTERD_SVC_ATTACH) {
+- (void)build_volfile_path(volfile_id, path, sizeof(path), NULL);
++ dict = dict_new();
++ if (!dict) {
++ ret = -ENOMEM;
++ goto *errlbl;
++ }
++
++ (void)build_volfile_path(volfile_id, path, sizeof(path), NULL, dict);
+
+ ret = sys_stat(path, &stbuf);
+ if (ret < 0) {
+@@ -818,6 +804,18 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+ ret = -EIO;
+ goto *errlbl;
+ }
++ if (dict->count > 0) {
++ ret = dict_allocate_and_serialize(dict, &brick_req.dict.dict_val,
++ &brick_req.dict.dict_len);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0,
++ GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
++ "Failed to serialize dict "
++ "to request buffer");
++ goto *errlbl;
++ }
++ dict->extra_free = brick_req.dict.dict_val;
++ }
+
+ frame->cookie = svc;
+ frame->local = GF_CALLOC(1, sizeof(int), gf_gld_mt_int);
+@@ -862,6 +860,8 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+ GF_ATOMIC_INC(conf->blockers);
+ ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0,
+ iobref, frame, NULL, 0, NULL, 0, NULL);
++ if (dict)
++ dict_unref(dict);
+ GF_FREE(volfile_content);
+ if (spec_fd >= 0)
+ sys_close(spec_fd);
+@@ -874,6 +874,9 @@ maybe_free_iobuf:
+ iobuf_unref(iobuf);
+ }
+ err:
++ if (dict)
++ dict_unref(dict);
++
+ GF_FREE(volfile_content);
+ if (spec_fd >= 0)
+ sys_close(spec_fd);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+index 618d8bc..a8098df 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+@@ -143,6 +143,8 @@ gd_brick_op_req_free(gd1_mgmt_brick_op_req *req)
+ if (!req)
+ return;
+
++ if (req->dict.dict_val)
++ GF_FREE(req->dict.dict_val);
+ GF_FREE(req->input.input_val);
+ GF_FREE(req);
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 4c487d0..2eb5116 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -5914,6 +5914,8 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path,
+ brick_req.name = path;
+ brick_req.input.input_val = NULL;
+ brick_req.input.input_len = 0;
++ brick_req.dict.dict_val = NULL;
++ brick_req.dict.dict_len = 0;
+
+ req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req);
+ iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size);
+@@ -5977,7 +5979,7 @@ err:
+
+ extern size_t
+ build_volfile_path(char *volume_id, char *path, size_t path_len,
+- char *trusted_str);
++ char *trusted_str, dict_t *dict);
+
+ static int
+ attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo,
+@@ -6022,7 +6024,7 @@ attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo,
+ goto out;
+ }
+
+- (void)build_volfile_path(full_id, path, sizeof(path), NULL);
++ (void)build_volfile_path(full_id, path, sizeof(path), NULL, NULL);
+
+ for (tries = 15; tries > 0; --tries) {
+ rpc = rpc_clnt_ref(other_brick->rpc);
+--
+1.8.3.1
+
diff --git a/0228-locks-enable-notify-contention-by-default.patch b/0228-locks-enable-notify-contention-by-default.patch
new file mode 100644
index 0000000..310cd8b
--- /dev/null
+++ b/0228-locks-enable-notify-contention-by-default.patch
@@ -0,0 +1,39 @@
+From 21fe2ef700e76c8b7be40f21d3a4fb6b96eafaf0 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Thu, 6 Jun 2019 08:12:34 +0200
+Subject: [PATCH 228/255] locks: enable notify-contention by default
+
+This patch enables the lock contention notification by default.
+
+Upstream patch:
+> Change-Id: I10131b026a7cb09fc7c93e1e6c8549988c1d7751
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22828
+> BUG: 1717754
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: I10131b026a7cb09fc7c93e1e6c8549988c1d7751
+Fixes: bz#1720488
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/174655
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/locks/src/posix.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index adb0df5..9db5ac6 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -4796,7 +4796,7 @@ struct volume_options options[] = {
+ "be used in conjunction w/ revocation-clear-all."},
+ {.key = {"notify-contention"},
+ .type = GF_OPTION_TYPE_BOOL,
+- .default_value = "no",
++ .default_value = "yes",
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .tags = {"locks", "contention"},
+--
+1.8.3.1
+
diff --git a/0229-glusterd-Show-the-correct-brick-status-in-get-state.patch b/0229-glusterd-Show-the-correct-brick-status-in-get-state.patch
new file mode 100644
index 0000000..112c02e
--- /dev/null
+++ b/0229-glusterd-Show-the-correct-brick-status-in-get-state.patch
@@ -0,0 +1,113 @@
+From 4fc0a77db5b9760fa5c00d3803c6d11a28a00b74 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Wed, 3 Jul 2019 15:22:38 +0530
+Subject: [PATCH 229/255] glusterd: Show the correct brick status in get-state
+
+Problem: get-state does not show correct brick status if brick
+ status is not Started, it always shows started if any value
+ is set brickinfo->status
+
+Solution: Check the value of brickinfo->status to show correct status
+ in get-state
+
+> Change-Id: I12a79619024c2cf59f338220d144f2f034059b3b
+> fixes: bz#1726906
+> (Cherry pick from commit af989db23d1db00e087f2b9d3dfc43b13ef17153)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22983/)
+
+BUG: 1726991
+Change-Id: I12a79619024c2cf59f338220d144f2f034059b3b
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175355
+Tested-by: Mohit Agrawal <moagrawa@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 7 +++++--
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 28 ++++++++++++++++++++++++++++
+ xlators/mgmt/glusterd/src/glusterd-utils.h | 4 ++++
+ 3 files changed, 37 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index cc1f1df..94e1be5 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -5589,7 +5589,9 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
+ char vol_status_str[STATUS_STRLEN] = {
+ 0,
+ };
+-
++ char brick_status_str[STATUS_STRLEN] = {
++ 0,
++ };
+ this = THIS;
+ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+
+@@ -5852,8 +5854,9 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
+ brickinfo->rdma_port);
+ fprintf(fp, "Volume%d.Brick%d.port_registered: %d\n", count_bkp,
+ count, brickinfo->port_registered);
++ glusterd_brick_get_status_str(brickinfo, brick_status_str);
+ fprintf(fp, "Volume%d.Brick%d.status: %s\n", count_bkp, count,
+- brickinfo->status ? "Started" : "Stopped");
++ brick_status_str);
+
+ /*FIXME: This is a hacky way of figuring out whether a
+ * brick belongs to the hot or cold tier */
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 2eb5116..3bdfd49 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -13898,6 +13898,34 @@ out:
+ return ret;
+ }
+
++void
++glusterd_brick_get_status_str(glusterd_brickinfo_t *brickinfo, char *status_str)
++{
++ GF_VALIDATE_OR_GOTO(THIS->name, brickinfo, out);
++ GF_VALIDATE_OR_GOTO(THIS->name, status_str, out);
++
++ switch (brickinfo->status) {
++ case GF_BRICK_STOPPED:
++ sprintf(status_str, "%s", "Stopped");
++ break;
++ case GF_BRICK_STARTED:
++ sprintf(status_str, "%s", "Started");
++ break;
++ case GF_BRICK_STARTING:
++ sprintf(status_str, "%s", "Starting");
++ break;
++ case GF_BRICK_STOPPING:
++ sprintf(status_str, "%s", "Stopping");
++ break;
++ default:
++ sprintf(status_str, "%s", "None");
++ break;
++ }
++
++out:
++ return;
++}
++
+ int
+ glusterd_volume_get_transport_type_str(glusterd_volinfo_t *volinfo,
+ char *transport_type_str)
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
+index 6ad8062..5c6a453 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
+@@ -781,6 +781,10 @@ glusterd_volume_get_type_str(glusterd_volinfo_t *volinfo, char **vol_type_str);
+ int
+ glusterd_volume_get_status_str(glusterd_volinfo_t *volinfo, char *status_str);
+
++void
++glusterd_brick_get_status_str(glusterd_brickinfo_t *brickinfo,
++ char *status_str);
++
+ int
+ glusterd_volume_get_transport_type_str(glusterd_volinfo_t *volinfo,
+ char *transport_type_str);
+--
+1.8.3.1
+
diff --git a/0230-Revert-glusterd-svc-update-pid-of-mux-volumes-from-t.patch b/0230-Revert-glusterd-svc-update-pid-of-mux-volumes-from-t.patch
new file mode 100644
index 0000000..a9847ed
--- /dev/null
+++ b/0230-Revert-glusterd-svc-update-pid-of-mux-volumes-from-t.patch
@@ -0,0 +1,893 @@
+From 308fe0d81dbef9f84bb1ad8e7309e3ffc28d6394 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:37:29 +0530
+Subject: [PATCH 230/255] Revert "glusterd/svc: update pid of mux volumes from
+ the shd process"
+
+This reverts commit b0815b8a84a07d17a1215c55afc38888ee9fc37c.
+Label : DOWNSTREAM ONLY
+
+BUG: 1471742
+Change-Id: Iab11c686565e9a9c852f2b7c2d236fa1a348f96a
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175940
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfsd/src/gf_attach.c | 2 -
+ glusterfsd/src/glusterfsd-mgmt.c | 66 ++-------
+ libglusterfs/src/glusterfs/glusterfs.h | 2 +-
+ libglusterfs/src/glusterfs/libglusterfs-messages.h | 3 +-
+ libglusterfs/src/graph.c | 154 +--------------------
+ rpc/xdr/src/glusterd1-xdr.x | 1 -
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 2 -
+ xlators/mgmt/glusterd/src/glusterd-handshake.c | 42 +-----
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 4 -
+ .../mgmt/glusterd/src/glusterd-shd-svc-helper.c | 25 ----
+ .../mgmt/glusterd/src/glusterd-shd-svc-helper.h | 3 -
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 8 +-
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 57 ++++----
+ xlators/mgmt/glusterd/src/glusterd-syncop.c | 2 -
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 6 +-
+ 15 files changed, 52 insertions(+), 325 deletions(-)
+
+diff --git a/glusterfsd/src/gf_attach.c b/glusterfsd/src/gf_attach.c
+index 1bff854..6293b9b 100644
+--- a/glusterfsd/src/gf_attach.c
++++ b/glusterfsd/src/gf_attach.c
+@@ -65,8 +65,6 @@ send_brick_req(xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
+ brick_req.name = path;
+ brick_req.input.input_val = NULL;
+ brick_req.input.input_len = 0;
+- brick_req.dict.dict_val = NULL;
+- brick_req.dict.dict_len = 0;
+
+ req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req);
+ iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size);
+diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
+index f930e0a..1d2cd1a 100644
+--- a/glusterfsd/src/glusterfsd-mgmt.c
++++ b/glusterfsd/src/glusterfsd-mgmt.c
+@@ -50,16 +50,13 @@ int
+ emancipate(glusterfs_ctx_t *ctx, int ret);
+ int
+ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+- char *volfile_id, char *checksum,
+- dict_t *dict);
++ char *volfile_id, char *checksum);
+ int
+ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+- gf_volfile_t *volfile_obj, char *checksum,
+- dict_t *dict);
++ gf_volfile_t *volfile_obj, char *checksum);
+ int
+ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+- char *volfile_id, char *checksum,
+- dict_t *dict);
++ char *volfile_id, char *checksum);
+ int
+ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj);
+
+@@ -78,8 +75,7 @@ mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data)
+ }
+
+ int
+-mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id,
+- dict_t *dict)
++mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id)
+ {
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+@@ -149,11 +145,11 @@ mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id,
+ * the volfile
+ */
+ ret = glusterfs_process_svc_attach_volfp(ctx, tmpfp, volfile_id,
+- sha256_hash, dict);
++ sha256_hash);
+ goto unlock;
+ }
+ ret = glusterfs_mux_volfile_reconfigure(tmpfp, ctx, volfile_obj,
+- sha256_hash, dict);
++ sha256_hash);
+ if (ret < 0) {
+ gf_msg_debug("glusterfsd-mgmt", EINVAL, "Reconfigure failed !!");
+ }
+@@ -391,8 +387,6 @@ err:
+ UNLOCK(&ctx->volfile_lock);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
+- if (xlator_req.dict.dict_val)
+- free(xlator_req.dict.dict_val);
+ free(xlator_req.name);
+ xlator_req.name = NULL;
+ return 0;
+@@ -567,8 +561,6 @@ out:
+
+ free(xlator_req.name);
+ free(xlator_req.input.input_val);
+- if (xlator_req.dict.dict_val)
+- free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ if (dict)
+@@ -990,8 +982,6 @@ out:
+ if (input)
+ dict_unref(input);
+ free(xlator_req.input.input_val); /*malloced by xdr*/
+- if (xlator_req.dict.dict_val)
+- free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ free(xlator_req.name);
+@@ -1072,8 +1062,6 @@ glusterfs_handle_attach(rpcsvc_request_t *req)
+ out:
+ UNLOCK(&ctx->volfile_lock);
+ }
+- if (xlator_req.dict.dict_val)
+- free(xlator_req.dict.dict_val);
+ free(xlator_req.input.input_val);
+ free(xlator_req.name);
+
+@@ -1089,7 +1077,6 @@ glusterfs_handle_svc_attach(rpcsvc_request_t *req)
+ };
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+- dict_t *dict = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+@@ -1104,41 +1091,20 @@ glusterfs_handle_svc_attach(rpcsvc_request_t *req)
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+-
+ gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_41,
+ "received attach "
+ "request for volfile-id=%s",
+ xlator_req.name);
+-
+- dict = dict_new();
+- if (!dict) {
+- ret = -1;
+- errno = ENOMEM;
+- goto out;
+- }
+-
+- ret = dict_unserialize(xlator_req.dict.dict_val, xlator_req.dict.dict_len,
+- &dict);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42,
+- "failed to unserialize xdata to dictionary");
+- goto out;
+- }
+- dict->extra_stdfree = xlator_req.dict.dict_val;
+-
+ ret = 0;
+
+ if (ctx->active) {
+ ret = mgmt_process_volfile(xlator_req.input.input_val,
+- xlator_req.input.input_len, xlator_req.name,
+- dict);
++ xlator_req.input.input_len, xlator_req.name);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42,
+ "got attach for %s but no active graph", xlator_req.name);
+ }
+ out:
+- if (dict)
+- dict_unref(dict);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
+ if (xlator_req.name)
+@@ -1275,8 +1241,6 @@ out:
+ GF_FREE(filepath);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
+- if (xlator_req.dict.dict_val)
+- free(xlator_req.dict.dict_val);
+
+ return ret;
+ }
+@@ -1349,8 +1313,6 @@ out:
+ if (dict)
+ dict_unref(dict);
+ free(xlator_req.input.input_val); // malloced by xdr
+- if (xlator_req.dict.dict_val)
+- free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ free(xlator_req.name); // malloced by xdr
+@@ -1499,8 +1461,6 @@ out:
+ if (output)
+ dict_unref(output);
+ free(brick_req.input.input_val);
+- if (brick_req.dict.dict_val)
+- free(brick_req.dict.dict_val);
+ free(brick_req.name);
+ GF_FREE(xname);
+ GF_FREE(msg);
+@@ -1694,8 +1654,6 @@ out:
+ if (dict)
+ dict_unref(dict);
+ free(node_req.input.input_val);
+- if (node_req.dict.dict_val)
+- free(node_req.dict.dict_val);
+ GF_FREE(msg);
+ GF_FREE(rsp.output.output_val);
+ GF_FREE(node_name);
+@@ -1799,8 +1757,6 @@ glusterfs_handle_nfs_profile(rpcsvc_request_t *req)
+
+ out:
+ free(nfs_req.input.input_val);
+- if (nfs_req.dict.dict_val)
+- free(nfs_req.dict.dict_val);
+ if (dict)
+ dict_unref(dict);
+ if (output)
+@@ -1879,8 +1835,6 @@ out:
+ if (dict)
+ dict_unref(dict);
+ free(xlator_req.input.input_val); // malloced by xdr
+- if (xlator_req.dict.dict_val)
+- free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ free(xlator_req.name); // malloced by xdr
+@@ -2009,8 +1963,7 @@ out:
+ if (dict)
+ dict_unref(dict);
+ free(brick_req.input.input_val);
+- if (brick_req.dict.dict_val)
+- free(brick_req.dict.dict_val);
++
+ gf_log(THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+ }
+@@ -2260,8 +2213,7 @@ volfile:
+ size = rsp.op_ret;
+ volfile_id = frame->local;
+ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) {
+- ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id,
+- dict);
++ ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id);
+ goto post_graph_mgmt;
+ }
+
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index b6327b8..9ec2365 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -744,7 +744,7 @@ typedef struct {
+ char vol_id[NAME_MAX + 1];
+ struct list_head volfile_list;
+ glusterfs_graph_t *graph;
+- FILE *pidfp;
++
+ } gf_volfile_t;
+
+ glusterfs_ctx_t *
+diff --git a/libglusterfs/src/glusterfs/libglusterfs-messages.h b/libglusterfs/src/glusterfs/libglusterfs-messages.h
+index 7e0eebb..ea2aa60 100644
+--- a/libglusterfs/src/glusterfs/libglusterfs-messages.h
++++ b/libglusterfs/src/glusterfs/libglusterfs-messages.h
+@@ -111,7 +111,6 @@ GLFS_MSGID(
+ LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG,
+ LG_MSG_XXH64_TO_GFID_FAILED, LG_MSG_ASYNC_WARNING, LG_MSG_ASYNC_FAILURE,
+ LG_MSG_GRAPH_CLEANUP_FAILED, LG_MSG_GRAPH_SETUP_FAILED,
+- LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED,
+- LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED);
++ LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED);
+
+ #endif /* !_LG_MESSAGES_H_ */
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index 05f76bf..172dc61 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -1467,21 +1467,6 @@ out:
+ }
+
+ int
+-glusterfs_svc_mux_pidfile_cleanup(gf_volfile_t *volfile_obj)
+-{
+- if (!volfile_obj || !volfile_obj->pidfp)
+- return 0;
+-
+- gf_msg_trace("glusterfsd", 0, "pidfile %s cleanup", volfile_obj->vol_id);
+-
+- lockf(fileno(volfile_obj->pidfp), F_ULOCK, 0);
+- fclose(volfile_obj->pidfp);
+- volfile_obj->pidfp = NULL;
+-
+- return 0;
+-}
+-
+-int
+ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
+ {
+ xlator_t *last_xl = NULL;
+@@ -1517,7 +1502,6 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
+
+ list_del_init(&volfile_obj->volfile_list);
+ glusterfs_mux_xlator_unlink(parent_graph->top, xl);
+- glusterfs_svc_mux_pidfile_cleanup(volfile_obj);
+ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph);
+ parent_graph->xl_count -= graph->xl_count;
+ parent_graph->leaf_count -= graph->leaf_count;
+@@ -1547,126 +1531,8 @@ out:
+ }
+
+ int
+-glusterfs_svc_mux_pidfile_setup(gf_volfile_t *volfile_obj, const char *pid_file)
+-{
+- int ret = -1;
+- FILE *pidfp = NULL;
+-
+- if (!pid_file || !volfile_obj)
+- goto out;
+-
+- if (volfile_obj->pidfp) {
+- ret = 0;
+- goto out;
+- }
+- pidfp = fopen(pid_file, "a+");
+- if (!pidfp) {
+- goto out;
+- }
+- volfile_obj->pidfp = pidfp;
+-
+- ret = lockf(fileno(pidfp), F_TLOCK, 0);
+- if (ret) {
+- ret = 0;
+- goto out;
+- }
+-out:
+- return ret;
+-}
+-
+-int
+-glusterfs_svc_mux_pidfile_update(gf_volfile_t *volfile_obj,
+- const char *pid_file, pid_t pid)
+-{
+- int ret = 0;
+- FILE *pidfp = NULL;
+- int old_pid;
+-
+- if (!volfile_obj->pidfp) {
+- ret = glusterfs_svc_mux_pidfile_setup(volfile_obj, pid_file);
+- if (ret == -1)
+- goto out;
+- }
+- pidfp = volfile_obj->pidfp;
+- ret = fscanf(pidfp, "%d", &old_pid);
+- if (ret <= 0) {
+- goto update;
+- }
+- if (old_pid == pid) {
+- ret = 0;
+- goto out;
+- } else {
+- gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
+- "Old pid=%d found in pidfile %s. Cleaning the old pid and "
+- "Updating new pid=%d",
+- old_pid, pid_file, pid);
+- }
+-update:
+- ret = sys_ftruncate(fileno(pidfp), 0);
+- if (ret) {
+- gf_msg("glusterfsd", GF_LOG_ERROR, errno,
+- LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
+- "pidfile %s truncation failed", pid_file);
+- goto out;
+- }
+-
+- ret = fprintf(pidfp, "%d\n", pid);
+- if (ret <= 0) {
+- gf_msg("glusterfsd", GF_LOG_ERROR, errno,
+- LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed",
+- pid_file);
+- goto out;
+- }
+-
+- ret = fflush(pidfp);
+- if (ret) {
+- gf_msg("glusterfsd", GF_LOG_ERROR, errno,
+- LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed",
+- pid_file);
+- goto out;
+- }
+-out:
+- return ret;
+-}
+-
+-int
+-glusterfs_update_mux_pid(dict_t *dict, gf_volfile_t *volfile_obj)
+-{
+- char *file = NULL;
+- int ret = -1;
+-
+- GF_VALIDATE_OR_GOTO("graph", dict, out);
+- GF_VALIDATE_OR_GOTO("graph", volfile_obj, out);
+-
+- ret = dict_get_str(dict, "pidfile", &file);
+- if (ret < 0) {
+- gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
+- "Failed to get pidfile from dict for volfile_id=%s",
+- volfile_obj->vol_id);
+- }
+-
+- ret = glusterfs_svc_mux_pidfile_update(volfile_obj, file, getpid());
+- if (ret < 0) {
+- ret = -1;
+- gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
+- "Failed to update "
+- "the pidfile for volfile_id=%s",
+- volfile_obj->vol_id);
+-
+- goto out;
+- }
+-
+- if (ret == 1)
+- gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
+- "PID %d updated in pidfile=%s", getpid(), file);
+- ret = 0;
+-out:
+- return ret;
+-}
+-int
+ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+- char *volfile_id, char *checksum,
+- dict_t *dict)
++ char *volfile_id, char *checksum)
+ {
+ glusterfs_graph_t *graph = NULL;
+ glusterfs_graph_t *parent_graph = NULL;
+@@ -1749,25 +1615,18 @@ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+ ret = -1;
+ goto out;
+ }
+- volfile_obj->pidfp = NULL;
+- snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s",
+- volfile_id);
+-
+- if (strcmp(ctx->cmd_args.process_name, "glustershd") == 0) {
+- ret = glusterfs_update_mux_pid(dict, volfile_obj);
+- if (ret == -1) {
+- goto out;
+- }
+- }
+
+ graph->used = 1;
+ parent_graph->id++;
+ list_add(&graph->list, &ctx->graphs);
+ INIT_LIST_HEAD(&volfile_obj->volfile_list);
+ volfile_obj->graph = graph;
++ snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s",
++ volfile_id);
+ memcpy(volfile_obj->volfile_checksum, checksum,
+ sizeof(volfile_obj->volfile_checksum));
+ list_add_tail(&volfile_obj->volfile_list, &ctx->volfile_list);
++
+ gf_log_dump_graph(fp, graph);
+ graph = NULL;
+
+@@ -1795,8 +1654,7 @@ out:
+
+ int
+ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+- gf_volfile_t *volfile_obj, char *checksum,
+- dict_t *dict)
++ gf_volfile_t *volfile_obj, char *checksum)
+ {
+ glusterfs_graph_t *oldvolfile_graph = NULL;
+ glusterfs_graph_t *newvolfile_graph = NULL;
+@@ -1845,7 +1703,7 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ }
+ volfile_obj = NULL;
+ ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, vol_id,
+- checksum, dict);
++ checksum);
+ goto out;
+ }
+
+diff --git a/rpc/xdr/src/glusterd1-xdr.x b/rpc/xdr/src/glusterd1-xdr.x
+index 02ebec2..9b36d34 100644
+--- a/rpc/xdr/src/glusterd1-xdr.x
++++ b/rpc/xdr/src/glusterd1-xdr.x
+@@ -132,7 +132,6 @@ struct gd1_mgmt_brick_op_req {
+ string name<>;
+ int op;
+ opaque input<>;
+- opaque dict<>;
+ } ;
+
+ struct gd1_mgmt_brick_op_rsp {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index 94e1be5..ac788a0 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -5423,8 +5423,6 @@ glusterd_print_client_details(FILE *fp, dict_t *dict,
+
+ brick_req->op = GLUSTERD_BRICK_STATUS;
+ brick_req->name = "";
+- brick_req->dict.dict_val = NULL;
+- brick_req->dict.dict_len = 0;
+
+ ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"),
+ brickinfo->path);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
+index 86dec82..1ba58c3 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
+@@ -203,7 +203,7 @@ out:
+
+ size_t
+ build_volfile_path(char *volume_id, char *path, size_t path_len,
+- char *trusted_str, dict_t *dict)
++ char *trusted_str)
+ {
+ struct stat stbuf = {
+ 0,
+@@ -340,19 +340,11 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
+
+ ret = glusterd_volinfo_find(volid_ptr, &volinfo);
+ if (ret == -1) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+- "Couldn't find volinfo for volid=%s", volid_ptr);
++ gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo");
+ goto out;
+ }
+
+ glusterd_svc_build_shd_volfile_path(volinfo, path, path_len);
+-
+- ret = glusterd_svc_set_shd_pidfile(volinfo, dict);
+- if (ret == -1) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+- "Couldn't set pidfile in dict for volid=%s", volid_ptr);
+- goto out;
+- }
+ ret = 0;
+ goto out;
+ }
+@@ -927,7 +919,6 @@ __server_getspec(rpcsvc_request_t *req)
+ char addrstr[RPCSVC_PEER_STRLEN] = {0};
+ peer_info_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+- dict_t *dict = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+@@ -980,12 +971,6 @@ __server_getspec(rpcsvc_request_t *req)
+ goto fail;
+ }
+
+- dict = dict_new();
+- if (!dict) {
+- ret = -ENOMEM;
+- goto fail;
+- }
+-
+ trans = req->trans;
+ /* addrstr will be empty for cli socket connections */
+ ret = rpcsvc_transport_peername(trans, (char *)&addrstr, sizeof(addrstr));
+@@ -1004,26 +989,12 @@ __server_getspec(rpcsvc_request_t *req)
+ */
+ if (strlen(addrstr) == 0 || gf_is_local_addr(addrstr)) {
+ ret = build_volfile_path(volume, filename, sizeof(filename),
+- TRUSTED_PREFIX, dict);
++ TRUSTED_PREFIX);
+ } else {
+- ret = build_volfile_path(volume, filename, sizeof(filename), NULL,
+- dict);
++ ret = build_volfile_path(volume, filename, sizeof(filename), NULL);
+ }
+
+ if (ret == 0) {
+- if (dict->count > 0) {
+- ret = dict_allocate_and_serialize(dict, &rsp.xdata.xdata_val,
+- &rsp.xdata.xdata_len);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0,
+- GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
+- "Failed to serialize dict "
+- "to request buffer");
+- goto fail;
+- }
+- dict->extra_free = rsp.xdata.xdata_val;
+- }
+-
+ /* to allocate the proper buffer to hold the file data */
+ ret = sys_stat(filename, &stbuf);
+ if (ret < 0) {
+@@ -1065,6 +1036,7 @@ __server_getspec(rpcsvc_request_t *req)
+ goto fail;
+ }
+ }
++
+ /* convert to XDR */
+ fail:
+ if (spec_fd >= 0)
+@@ -1084,10 +1056,6 @@ fail:
+ (xdrproc_t)xdr_gf_getspec_rsp);
+ free(args.key); // malloced by xdr
+ free(rsp.spec);
+-
+- if (dict)
+- dict_unref(dict);
+-
+ if (args.xdata.xdata_val)
+ free(args.xdata.xdata_val);
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 454877b..9ea695e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -655,8 +655,6 @@ glusterd_brick_op_build_payload(glusterd_op_t op,
+ break;
+ }
+
+- brick_req->dict.dict_len = 0;
+- brick_req->dict.dict_val = NULL;
+ ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val,
+ &brick_req->input.input_len);
+ if (ret)
+@@ -725,8 +723,6 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req,
+ goto out;
+ }
+
+- brick_req->dict.dict_len = 0;
+- brick_req->dict.dict_val = NULL;
+ ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val,
+ &brick_req->input.input_len);
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
+index 5661e39..57ceda9 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
+@@ -126,28 +126,3 @@ glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd)
+ out:
+ return;
+ }
+-
+-int
+-glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict)
+-{
+- int ret = -1;
+- glusterd_svc_t *svc = NULL;
+- xlator_t *this = NULL;
+-
+- this = THIS;
+- GF_VALIDATE_OR_GOTO("glusterd", this, out);
+- GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
+- GF_VALIDATE_OR_GOTO(this->name, dict, out);
+-
+- svc = &(volinfo->shd.svc);
+-
+- ret = dict_set_dynstr_with_alloc(dict, "pidfile", svc->proc.pidfile);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+- "Failed to set pidfile %s in dict", svc->proc.pidfile);
+- goto out;
+- }
+- ret = 0;
+-out:
+- return ret;
+-}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
+index 1f0984b..59466ec 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
+@@ -36,7 +36,4 @@ glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo,
+ int
+ glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo);
+
+-int
+-glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict);
+-
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index 590169f..8ad90a9 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -258,20 +258,14 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+ gf_boolean_t shd_restart = _gf_false;
+
+ conf = THIS->private;
++ volinfo = data;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+- volinfo = data;
+ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+
+ if (volinfo)
+ glusterd_volinfo_ref(volinfo);
+
+- if (volinfo->is_snap_volume) {
+- /* healing of a snap volume is not supported yet*/
+- ret = 0;
+- goto out;
+- }
+-
+ while (conf->restart_shd) {
+ synclock_unlock(&conf->big_lock);
+ sleep(2);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index e106111..400826f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -519,7 +519,7 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc)
+ /* Take first entry from the process */
+ parent_svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t,
+ mux_svc);
+- glusterd_copy_file(parent_svc->proc.pidfile, svc->proc.pidfile);
++ sys_link(parent_svc->proc.pidfile, svc->proc.pidfile);
+ mux_conn = &parent_svc->conn;
+ if (volinfo)
+ volinfo->shd.attached = _gf_true;
+@@ -623,9 +623,12 @@ glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_shdsvc_t *shd = NULL;
+ glusterd_svc_t *svc = frame->cookie;
++ glusterd_svc_t *parent_svc = NULL;
++ glusterd_svc_proc_t *mux_proc = NULL;
+ glusterd_conf_t *conf = NULL;
+ int *flag = (int *)frame->local;
+ xlator_t *this = THIS;
++ int pid = -1;
+ int ret = -1;
+ gf_getspec_rsp rsp = {
+ 0,
+@@ -676,7 +679,27 @@ glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ }
+
+ if (rsp.op_ret == 0) {
+- svc->online = _gf_true;
++ pthread_mutex_lock(&conf->attach_lock);
++ {
++ if (!strcmp(svc->name, "glustershd")) {
++ mux_proc = svc->svc_proc;
++ if (mux_proc &&
++ !gf_is_service_running(svc->proc.pidfile, &pid)) {
++ /*
++ * When svc's are restarting, there is a chance that the
++ * attached svc might not have updated it's pid. Because
++ * it was at connection stage. So in that case, we need
++ * to retry the pid file copy.
++ */
++ parent_svc = cds_list_entry(mux_proc->svcs.next,
++ glusterd_svc_t, mux_svc);
++ if (parent_svc)
++ sys_link(parent_svc->proc.pidfile, svc->proc.pidfile);
++ }
++ }
++ svc->online = _gf_true;
++ }
++ pthread_mutex_unlock(&conf->attach_lock);
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "svc %s of volume %s attached successfully to pid %d", svc->name,
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+@@ -703,7 +726,7 @@ out:
+
+ extern size_t
+ build_volfile_path(char *volume_id, char *path, size_t path_len,
+- char *trusted_str, dict_t *dict);
++ char *trusted_str);
+
+ int
+ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+@@ -728,7 +751,6 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+ ssize_t req_size = 0;
+ call_frame_t *frame = NULL;
+ gd1_mgmt_brick_op_req brick_req;
+- dict_t *dict = NULL;
+ void *req = &brick_req;
+ void *errlbl = &&err;
+ struct rpc_clnt_connection *conn;
+@@ -754,8 +776,6 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+ brick_req.name = volfile_id;
+ brick_req.input.input_val = NULL;
+ brick_req.input.input_len = 0;
+- brick_req.dict.dict_val = NULL;
+- brick_req.dict.dict_len = 0;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+@@ -763,13 +783,7 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+ }
+
+ if (op == GLUSTERD_SVC_ATTACH) {
+- dict = dict_new();
+- if (!dict) {
+- ret = -ENOMEM;
+- goto *errlbl;
+- }
+-
+- (void)build_volfile_path(volfile_id, path, sizeof(path), NULL, dict);
++ (void)build_volfile_path(volfile_id, path, sizeof(path), NULL);
+
+ ret = sys_stat(path, &stbuf);
+ if (ret < 0) {
+@@ -804,18 +818,6 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+ ret = -EIO;
+ goto *errlbl;
+ }
+- if (dict->count > 0) {
+- ret = dict_allocate_and_serialize(dict, &brick_req.dict.dict_val,
+- &brick_req.dict.dict_len);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0,
+- GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
+- "Failed to serialize dict "
+- "to request buffer");
+- goto *errlbl;
+- }
+- dict->extra_free = brick_req.dict.dict_val;
+- }
+
+ frame->cookie = svc;
+ frame->local = GF_CALLOC(1, sizeof(int), gf_gld_mt_int);
+@@ -860,8 +862,6 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+ GF_ATOMIC_INC(conf->blockers);
+ ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0,
+ iobref, frame, NULL, 0, NULL, 0, NULL);
+- if (dict)
+- dict_unref(dict);
+ GF_FREE(volfile_content);
+ if (spec_fd >= 0)
+ sys_close(spec_fd);
+@@ -874,9 +874,6 @@ maybe_free_iobuf:
+ iobuf_unref(iobuf);
+ }
+ err:
+- if (dict)
+- dict_unref(dict);
+-
+ GF_FREE(volfile_content);
+ if (spec_fd >= 0)
+ sys_close(spec_fd);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+index a8098df..618d8bc 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+@@ -143,8 +143,6 @@ gd_brick_op_req_free(gd1_mgmt_brick_op_req *req)
+ if (!req)
+ return;
+
+- if (req->dict.dict_val)
+- GF_FREE(req->dict.dict_val);
+ GF_FREE(req->input.input_val);
+ GF_FREE(req);
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 3bdfd49..4525ec7 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -5914,8 +5914,6 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path,
+ brick_req.name = path;
+ brick_req.input.input_val = NULL;
+ brick_req.input.input_len = 0;
+- brick_req.dict.dict_val = NULL;
+- brick_req.dict.dict_len = 0;
+
+ req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req);
+ iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size);
+@@ -5979,7 +5977,7 @@ err:
+
+ extern size_t
+ build_volfile_path(char *volume_id, char *path, size_t path_len,
+- char *trusted_str, dict_t *dict);
++ char *trusted_str);
+
+ static int
+ attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo,
+@@ -6024,7 +6022,7 @@ attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo,
+ goto out;
+ }
+
+- (void)build_volfile_path(full_id, path, sizeof(path), NULL, NULL);
++ (void)build_volfile_path(full_id, path, sizeof(path), NULL);
+
+ for (tries = 15; tries > 0; --tries) {
+ rpc = rpc_clnt_ref(other_brick->rpc);
+--
+1.8.3.1
+
diff --git a/0231-Revert-graph-shd-Use-top-down-approach-while-cleanin.patch b/0231-Revert-graph-shd-Use-top-down-approach-while-cleanin.patch
new file mode 100644
index 0000000..3b794c9
--- /dev/null
+++ b/0231-Revert-graph-shd-Use-top-down-approach-while-cleanin.patch
@@ -0,0 +1,180 @@
+From 21f376939f03f91214218c485e7d3a2848dae4b2 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:43:44 +0530
+Subject: [PATCH 231/255] Revert "graph/shd: Use top down approach while
+ cleaning xlator"
+
+This reverts commit b963fa8bb71963127147d33bf609f439dd5bd107.
+
+Label : DOWNSTREAM ONLY
+
+BUG: 1471742
+Change-Id: Ifb8056395c5988cf7c484891bea052f5415bf9da
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175941
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/graph.c | 10 +---------
+ xlators/features/bit-rot/src/stub/bit-rot-stub.c | 1 -
+ xlators/features/changelog/src/changelog.c | 1 -
+ xlators/features/cloudsync/src/cloudsync.c | 4 +---
+ xlators/features/index/src/index.c | 1 -
+ xlators/features/quiesce/src/quiesce.c | 1 -
+ xlators/features/read-only/src/worm.c | 1 -
+ xlators/features/sdfs/src/sdfs.c | 1 -
+ xlators/features/selinux/src/selinux.c | 2 --
+ xlators/features/trash/src/trash.c | 1 -
+ 10 files changed, 2 insertions(+), 21 deletions(-)
+
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index 172dc61..5b95fd6 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -1193,14 +1193,6 @@ glusterfs_graph_fini(glusterfs_graph_t *graph)
+ if (trav->init_succeeded) {
+ trav->cleanup_starting = 1;
+ trav->fini(trav);
+- if (trav->local_pool) {
+- mem_pool_destroy(trav->local_pool);
+- trav->local_pool = NULL;
+- }
+- if (trav->itable) {
+- inode_table_destroy(trav->itable);
+- trav->itable = NULL;
+- }
+ trav->init_succeeded = 0;
+ }
+ trav = trav->next;
+@@ -1402,7 +1394,7 @@ glusterfs_graph_cleanup(void *arg)
+
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ {
+- glusterfs_graph_fini(graph);
++ glusterfs_graph_deactivate(graph);
+ glusterfs_graph_destroy(graph);
+ }
+ pthread_mutex_unlock(&ctx->cleanup_lock);
+diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+index 03446be..3f48a4b 100644
+--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+@@ -185,7 +185,6 @@ cleanup_lock:
+ pthread_mutex_destroy(&priv->lock);
+ free_mempool:
+ mem_pool_destroy(priv->local_pool);
+- priv->local_pool = NULL;
+ free_priv:
+ GF_FREE(priv);
+ this->private = NULL;
+diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
+index 2862d1e..d9025f3 100644
+--- a/xlators/features/changelog/src/changelog.c
++++ b/xlators/features/changelog/src/changelog.c
+@@ -2790,7 +2790,6 @@ cleanup_options:
+ changelog_freeup_options(this, priv);
+ cleanup_mempool:
+ mem_pool_destroy(this->local_pool);
+- this->local_pool = NULL;
+ cleanup_priv:
+ GF_FREE(priv);
+ error_return:
+diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c
+index 0ad987e..26e512c 100644
+--- a/xlators/features/cloudsync/src/cloudsync.c
++++ b/xlators/features/cloudsync/src/cloudsync.c
+@@ -200,10 +200,8 @@ cs_init(xlator_t *this)
+
+ out:
+ if (ret == -1) {
+- if (this->local_pool) {
++ if (this->local_pool)
+ mem_pool_destroy(this->local_pool);
+- this->local_pool = NULL;
+- }
+
+ cs_cleanup_private(priv);
+
+diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
+index 4ece7ff..2f2a6d0 100644
+--- a/xlators/features/index/src/index.c
++++ b/xlators/features/index/src/index.c
+@@ -2478,7 +2478,6 @@ out:
+ GF_FREE(priv);
+ this->private = NULL;
+ mem_pool_destroy(this->local_pool);
+- this->local_pool = NULL;
+ }
+
+ if (attr_inited)
+diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c
+index 06f58c9..bfd1116 100644
+--- a/xlators/features/quiesce/src/quiesce.c
++++ b/xlators/features/quiesce/src/quiesce.c
+@@ -2536,7 +2536,6 @@ fini(xlator_t *this)
+ this->private = NULL;
+
+ mem_pool_destroy(priv->local_pool);
+- priv->local_pool = NULL;
+ LOCK_DESTROY(&priv->lock);
+ GF_FREE(priv);
+ out:
+diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c
+index 7d13180..24196f8 100644
+--- a/xlators/features/read-only/src/worm.c
++++ b/xlators/features/read-only/src/worm.c
+@@ -569,7 +569,6 @@ fini(xlator_t *this)
+ mem_put(priv);
+ this->private = NULL;
+ mem_pool_destroy(this->local_pool);
+- this->local_pool = NULL;
+ out:
+ return;
+ }
+diff --git a/xlators/features/sdfs/src/sdfs.c b/xlators/features/sdfs/src/sdfs.c
+index 164c632..f0247fd 100644
+--- a/xlators/features/sdfs/src/sdfs.c
++++ b/xlators/features/sdfs/src/sdfs.c
+@@ -1429,7 +1429,6 @@ void
+ fini(xlator_t *this)
+ {
+ mem_pool_destroy(this->local_pool);
+- this->local_pool = NULL;
+ return;
+ }
+
+diff --git a/xlators/features/selinux/src/selinux.c b/xlators/features/selinux/src/selinux.c
+index ce5fc90..58b4c5d 100644
+--- a/xlators/features/selinux/src/selinux.c
++++ b/xlators/features/selinux/src/selinux.c
+@@ -256,7 +256,6 @@ out:
+ GF_FREE(priv);
+ }
+ mem_pool_destroy(this->local_pool);
+- this->local_pool = NULL;
+ }
+ return ret;
+ }
+@@ -285,7 +284,6 @@ fini(xlator_t *this)
+ GF_FREE(priv);
+
+ mem_pool_destroy(this->local_pool);
+- this->local_pool = NULL;
+
+ return;
+ }
+diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
+index eb5007b..d668436 100644
+--- a/xlators/features/trash/src/trash.c
++++ b/xlators/features/trash/src/trash.c
+@@ -2523,7 +2523,6 @@ out:
+ GF_FREE(priv);
+ }
+ mem_pool_destroy(this->local_pool);
+- this->local_pool = NULL;
+ }
+ return ret;
+ }
+--
+1.8.3.1
+
diff --git a/0232-cluster-afr-Fix-incorrect-reporting-of-gfid-type-mis.patch b/0232-cluster-afr-Fix-incorrect-reporting-of-gfid-type-mis.patch
new file mode 100644
index 0000000..b2a8f4c
--- /dev/null
+++ b/0232-cluster-afr-Fix-incorrect-reporting-of-gfid-type-mis.patch
@@ -0,0 +1,228 @@
+From 3ddf12d0710e048878fcf8786d05efe18710c74c Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Fri, 12 Jul 2019 16:44:20 +0530
+Subject: [PATCH 232/255] cluster/afr: Fix incorrect reporting of gfid & type
+ mismatch
+
+Backport of: https://review.gluster.org/#/c/glusterfs/+/22908/
+
+Problems:
+1. When checking for type and gfid mismatch, if the type or gfid
+is unknown because of missing gfid handle and the gfid xattr
+it will be reported as type or gfid mismatch and the heal will
+not complete.
+
+2. If the source selected during entry heal has null gfid the same
+will be sent to afr_lookup_and_heal_gfid(). In this function when
+we try to assign the gfid on the bricks where it does not exist,
+we are considering the same gfid and try to assign that on those
+bricks. This will fail in posix_gfid_set() since the gfid sent
+is null.
+
+Fix:
+If the gfid sent to afr_lookup_and_heal_gfid() is null choose a
+valid gfid before proceeding to assign the gfid on the bricks
+where it is missing.
+
+In afr_selfheal_detect_gfid_and_type_mismatch(), do not report
+type/gfid mismatch if the type/gfid is unknown or not set.
+
+Change-Id: Icdb4967c09a48e0a3a64ce4948d5fb0a06d7a7af
+fixes: bz#1715447
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175966
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../bug-1722507-type-mismatch-error-handling.t | 116 +++++++++++++++++++++
+ xlators/cluster/afr/src/afr-self-heal-common.c | 12 ++-
+ xlators/cluster/afr/src/afr-self-heal-entry.c | 13 +++
+ 3 files changed, 139 insertions(+), 2 deletions(-)
+ create mode 100644 tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
+
+diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
+new file mode 100644
+index 0000000..0aeaaaf
+--- /dev/null
++++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
+@@ -0,0 +1,116 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++## Start and create a volume
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume info;
++
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
++TEST $CLI volume start $V0;
++TEST $CLI volume set $V0 cluster.heal-timeout 5
++TEST $CLI volume heal $V0 disable
++EXPECT 'Started' volinfo_field $V0 'Status';
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++TEST mkdir $M0/dir
++
++##########################################################################################
++# GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
++
++TEST touch $M0/dir/file
++#TEST kill_brick $V0 $H0 $B0/$V0"1"
++
++#B0 and B2 must blame B1
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++
++# Add entry to xattrop dir to trigger index heal.
++xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
++base_entry_b0=`ls $xattrop_dir0`
++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
++ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++EXPECT "^1$" get_pending_heal_count $V0
++
++# Remove the gfid xattr and the link file on one brick.
++gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
++gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
++TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
++TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++
++# Launch heal
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
++
++# Wait for 2 second to force posix to consider that this is a valid file but
++# without gfid.
++sleep 2
++TEST $CLI volume heal $V0
++
++# Heal should not fail as the file is missing gfid xattr and the link file,
++# which is not actually the gfid or type mismatch.
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
++TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++rm -f $M0/dir/file
++
++
++###########################################################################################
++# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed.
++
++TEST $CLI volume heal $V0 disable
++TEST touch $M0/dir/file
++#TEST kill_brick $V0 $H0 $B0/$V0"1"
++
++#B0 and B2 must blame B1
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++
++# Add entry to xattrop dir to trigger index heal.
++xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
++base_entry_b0=`ls $xattrop_dir0`
++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
++ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++EXPECT "^1$" get_pending_heal_count $V0
++
++# Remove the gfid xattr and the link file on two bricks.
++gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
++gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
++TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
++TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file
++TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++
++# Launch heal
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
++
++# Wait for 2 second to force posix to consider that this is a valid file but
++# without gfid.
++sleep 2
++TEST $CLI volume heal $V0
++
++# Heal should not fail as the file is missing gfid xattr and the link file,
++# which is not actually the gfid or type mismatch.
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
++TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file
++TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++
++cleanup
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index 5157e7d..b38085a 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -55,7 +55,8 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
+ for (i = 0; i < priv->child_count; i++) {
+ if (source == -1) {
+ /* case (a) above. */
+- if (replies[i].valid && replies[i].op_ret == 0) {
++ if (replies[i].valid && replies[i].op_ret == 0 &&
++ replies[i].poststat.ia_type != IA_INVAL) {
+ ia_type = replies[i].poststat.ia_type;
+ break;
+ }
+@@ -63,7 +64,8 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
+ /* case (b) above. */
+ if (i == source)
+ continue;
+- if (sources[i] && replies[i].valid && replies[i].op_ret == 0) {
++ if (sources[i] && replies[i].valid && replies[i].op_ret == 0 &&
++ replies[i].poststat.ia_type != IA_INVAL) {
+ ia_type = replies[i].poststat.ia_type;
+ break;
+ }
+@@ -77,6 +79,12 @@ heal:
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
++
++ if (gf_uuid_is_null(gfid) &&
++ !gf_uuid_is_null(replies[i].poststat.ia_gfid) &&
++ replies[i].poststat.ia_type == ia_type)
++ gfid = replies[i].poststat.ia_gfid;
++
+ if (!gf_uuid_is_null(replies[i].poststat.ia_gfid) ||
+ replies[i].poststat.ia_type != ia_type)
+ continue;
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index a6890fa..e07b521 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -246,6 +246,19 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
+ if (replies[i].op_ret != 0)
+ continue;
+
++ if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
++ continue;
++
++ if (replies[i].poststat.ia_type == IA_INVAL)
++ continue;
++
++ if (ia_type == IA_INVAL || gf_uuid_is_null(gfid)) {
++ src_idx = i;
++ ia_type = replies[src_idx].poststat.ia_type;
++ gfid = &replies[src_idx].poststat.ia_gfid;
++ continue;
++ }
++
+ if (gf_uuid_compare(gfid, replies[i].poststat.ia_gfid) &&
+ (ia_type == replies[i].poststat.ia_type)) {
+ ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
+--
+1.8.3.1
+
diff --git a/0233-Revert-graph-shd-Use-glusterfs_graph_deactivate-to-f.patch b/0233-Revert-graph-shd-Use-glusterfs_graph_deactivate-to-f.patch
new file mode 100644
index 0000000..d8e6933
--- /dev/null
+++ b/0233-Revert-graph-shd-Use-glusterfs_graph_deactivate-to-f.patch
@@ -0,0 +1,78 @@
+From 5c85ce7363b658bc8fa643742626109efe3ade0c Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:44:04 +0530
+Subject: [PATCH 233/255] Revert "graph/shd: Use glusterfs_graph_deactivate to
+ free the xl rec"
+
+This reverts commit 8cc6d8af00303c445b94715c92fe9e3e01edb867.
+
+BUG: 1471742
+Change-Id: Ib90fe89b85f4143db29702338decec76c83872bc
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175942
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/graph.c | 2 +-
+ libglusterfs/src/xlator.c | 9 +--------
+ xlators/features/shard/src/shard.c | 3 ---
+ 3 files changed, 2 insertions(+), 12 deletions(-)
+
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index 5b95fd6..27d9335 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -1394,7 +1394,7 @@ glusterfs_graph_cleanup(void *arg)
+
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ {
+- glusterfs_graph_deactivate(graph);
++ glusterfs_graph_fini(graph);
+ glusterfs_graph_destroy(graph);
+ }
+ pthread_mutex_unlock(&ctx->cleanup_lock);
+diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
+index d9d3441..71e1ed4 100644
+--- a/libglusterfs/src/xlator.c
++++ b/libglusterfs/src/xlator.c
+@@ -659,7 +659,6 @@ xlator_fini_rec(xlator_t *xl)
+ trav = trav->next;
+ }
+
+- xl->cleanup_starting = 1;
+ if (xl->init_succeeded) {
+ if (xl->fini) {
+ old_THIS = THIS;
+@@ -667,14 +666,8 @@ xlator_fini_rec(xlator_t *xl)
+
+ xl->fini(xl);
+
+- if (xl->local_pool) {
++ if (xl->local_pool)
+ mem_pool_destroy(xl->local_pool);
+- xl->local_pool = NULL;
+- }
+- if (xl->itable) {
+- inode_table_destroy(xl->itable);
+- xl->itable = NULL;
+- }
+
+ THIS = old_THIS;
+ } else {
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index 31c7eec..b248767 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -6785,9 +6785,6 @@ fini(xlator_t *this)
+
+ GF_VALIDATE_OR_GOTO("shard", this, out);
+
+- /*Itable was not created by shard, hence setting to NULL.*/
+- this->itable = NULL;
+-
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+
+--
+1.8.3.1
+
diff --git a/0234-Revert-glusterd-shd-Change-shd-logfile-to-a-unique-n.patch b/0234-Revert-glusterd-shd-Change-shd-logfile-to-a-unique-n.patch
new file mode 100644
index 0000000..790d9d1
--- /dev/null
+++ b/0234-Revert-glusterd-shd-Change-shd-logfile-to-a-unique-n.patch
@@ -0,0 +1,220 @@
+From feeee9a35c1219b2077ea07b6fd80976960bd181 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:44:42 +0530
+Subject: [PATCH 234/255] Revert "glusterd/shd: Change shd logfile to a unique
+ name"
+
+This reverts commit 541e1400ecaec5fea0f56e8ca18f00c229906d8a.
+
+BUG: 1471742
+Change-Id: I7e0371d77db6897981f7364c04d4b9b523b865ba
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175943
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../mgmt/glusterd/src/glusterd-shd-svc-helper.c | 12 ++++++++
+ .../mgmt/glusterd/src/glusterd-shd-svc-helper.h | 6 ++++
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 14 ++++-----
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 34 +++++-----------------
+ xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c | 4 +--
+ xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h | 4 ---
+ 6 files changed, 34 insertions(+), 40 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
+index 57ceda9..9196758 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
+@@ -75,6 +75,18 @@ glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path,
+ }
+
+ void
++glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len)
++{
++ snprintf(logdir, len, "%s/shd/%s", DEFAULT_LOG_FILE_DIRECTORY, volname);
++}
++
++void
++glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len)
++{
++ snprintf(logfile, len, "%s/shd.log", logdir);
++}
++
++void
+ glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd)
+ {
+ glusterd_svc_proc_t *svc_proc = NULL;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
+index 59466ec..c70702c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
+@@ -27,6 +27,12 @@ glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path,
+ int path_len);
+
+ void
++glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len);
++
++void
++glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len);
++
++void
+ glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd);
+
+ int
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index 8ad90a9..dbe2560 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -90,8 +90,8 @@ glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn,
+ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv);
+ glusterd_svc_create_rundir(rundir);
+
+- glusterd_svc_build_logfile_path(shd_svc_name, DEFAULT_LOG_FILE_DIRECTORY,
+- logfile, sizeof(logfile));
++ glusterd_svc_build_shd_logdir(logdir, volinfo->volname, sizeof(logdir));
++ glusterd_svc_build_shd_logfile(logfile, logdir, sizeof(logfile));
+
+ /* Initialize the connection mgmt */
+ if (mux_conn && mux_svc->rpc) {
+@@ -104,7 +104,7 @@ glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn,
+ if (ret < 0)
+ goto out;
+ } else {
+- ret = mkdir_p(DEFAULT_LOG_FILE_DIRECTORY, 0755, _gf_true);
++ ret = mkdir_p(logdir, 0755, _gf_true);
+ if ((ret == -1) && (EEXIST != errno)) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create logdir %s", logdir);
+@@ -460,7 +460,6 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
+ return -1;
+
+ glusterd_volinfo_ref(volinfo);
+-
+ if (!svc->inited) {
+ ret = glusterd_shd_svc_mux_init(volinfo, svc);
+ if (ret)
+@@ -472,11 +471,12 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
+ /* Unref will happen from glusterd_svc_attach_cbk */
+ ret = glusterd_attach_svc(svc, volinfo, flags);
+ if (ret) {
++ glusterd_volinfo_unref(volinfo);
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+- "Failed to attach shd svc(volume=%s) to pid=%d",
++ "Failed to attach shd svc(volume=%s) to pid=%d. Starting"
++ "a new process",
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+- glusterd_shd_svcproc_cleanup(&volinfo->shd);
+- glusterd_volinfo_unref(volinfo);
++ ret = glusterd_recover_shd_attach_failure(volinfo, svc, flags);
+ }
+ goto out;
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index 400826f..a6e662f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -469,9 +469,6 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc)
+ glusterd_conf_t *conf = NULL;
+ glusterd_svc_t *parent_svc = NULL;
+ int pid = -1;
+- char pidfile[PATH_MAX] = {
+- 0,
+- };
+
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+@@ -481,26 +478,8 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc)
+
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+- if (svc->inited && !glusterd_proc_is_running(&(svc->proc))) {
+- /* This is the case when shd process was abnormally killed */
+- pthread_mutex_unlock(&conf->attach_lock);
+- glusterd_shd_svcproc_cleanup(&volinfo->shd);
+- pthread_mutex_lock(&conf->attach_lock);
+- }
+-
+ if (!svc->inited) {
+- glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile));
+- ret = snprintf(svc->proc.name, sizeof(svc->proc.name), "%s",
+- "glustershd");
+- if (ret < 0)
+- goto unlock;
+-
+- ret = snprintf(svc->proc.pidfile, sizeof(svc->proc.pidfile), "%s",
+- pidfile);
+- if (ret < 0)
+- goto unlock;
+-
+- if (gf_is_service_running(pidfile, &pid)) {
++ if (gf_is_service_running(svc->proc.pidfile, &pid)) {
+ /* Just connect is required, but we don't know what happens
+ * during the disconnect. So better to reattach.
+ */
+@@ -508,10 +487,10 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc)
+ }
+
+ if (!mux_proc) {
+- if (pid != -1 && sys_access(pidfile, R_OK) == 0) {
++ if (pid != -1 && sys_access(svc->proc.pidfile, R_OK) == 0) {
+ /* stale pid file, stop and unlink it */
+ glusterd_proc_stop(&svc->proc, SIGTERM, PROC_STOP_FORCE);
+- glusterd_unlink_file(pidfile);
++ glusterd_unlink_file(svc->proc.pidfile);
+ }
+ mux_proc = __gf_find_compatible_svc(GD_NODE_SHD);
+ }
+@@ -705,10 +684,11 @@ glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL,
+- "svc %s of volume %s failed to attach to pid %d", svc->name,
+- volinfo->volname, glusterd_proc_get_pid(&svc->proc));
++ "svc %s of volume %s failed to "
++ "attach to pid %d. Starting a new process",
++ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+ if (!strcmp(svc->name, "glustershd")) {
+- glusterd_shd_svcproc_cleanup(&volinfo->shd);
++ glusterd_recover_shd_attach_failure(volinfo, svc, *flag);
+ }
+ }
+ out:
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
+index fa316a6..f32dafc 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
+@@ -33,14 +33,14 @@ glusterd_svc_create_rundir(char *rundir)
+ return ret;
+ }
+
+-void
++static void
+ glusterd_svc_build_logfile_path(char *server, char *logdir, char *logfile,
+ size_t len)
+ {
+ snprintf(logfile, len, "%s/%s.log", logdir, server);
+ }
+
+-void
++static void
+ glusterd_svc_build_volfileid_path(char *server, char *volfileid, size_t len)
+ {
+ snprintf(volfileid, len, "gluster/%s", server);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
+index 5a5466a..fbc5225 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
+@@ -74,10 +74,6 @@ glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile,
+ size_t len);
+
+ void
+-glusterd_svc_build_logfile_path(char *server, char *logdir, char *logfile,
+- size_t len);
+-
+-void
+ glusterd_svc_build_svcdir(char *server, char *workdir, char *path, size_t len);
+
+ void
+--
+1.8.3.1
+
diff --git a/0235-Revert-glusterd-svc-Stop-stale-process-using-the-glu.patch b/0235-Revert-glusterd-svc-Stop-stale-process-using-the-glu.patch
new file mode 100644
index 0000000..67348f6
--- /dev/null
+++ b/0235-Revert-glusterd-svc-Stop-stale-process-using-the-glu.patch
@@ -0,0 +1,38 @@
+From b2040d8404e0ac44742cb903e3c8da2c832b2925 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:45:11 +0530
+Subject: [PATCH 235/255] Revert "glusterd/svc: Stop stale process using the
+ glusterd_proc_stop"
+
+This reverts commit fe9159ee42f0f67b01e6a495df8105ea0f66738d.
+
+BUG: 1471742
+Change-Id: Id5ac0d21319724141ad9bcb9b66435803ebe5f47
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175944
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index a6e662f..6a3ca52 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -488,9 +488,9 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc)
+
+ if (!mux_proc) {
+ if (pid != -1 && sys_access(svc->proc.pidfile, R_OK) == 0) {
+- /* stale pid file, stop and unlink it */
+- glusterd_proc_stop(&svc->proc, SIGTERM, PROC_STOP_FORCE);
+- glusterd_unlink_file(svc->proc.pidfile);
++ /* stale pid file, unlink it. */
++ kill(pid, SIGTERM);
++ sys_unlink(svc->proc.pidfile);
+ }
+ mux_proc = __gf_find_compatible_svc(GD_NODE_SHD);
+ }
+--
+1.8.3.1
+
diff --git a/0236-Revert-shd-mux-Fix-race-between-mux_proc-unlink-and-.patch b/0236-Revert-shd-mux-Fix-race-between-mux_proc-unlink-and-.patch
new file mode 100644
index 0000000..e33c7dd
--- /dev/null
+++ b/0236-Revert-shd-mux-Fix-race-between-mux_proc-unlink-and-.patch
@@ -0,0 +1,35 @@
+From 030b5681d47268c591a72035d5a2419234bd1f5f Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:44:55 +0530
+Subject: [PATCH 236/255] Revert "shd/mux: Fix race between mux_proc unlink and
+ stop"
+
+This reverts commit e386fb4f4baf834e6a8fc25cc2fbbb17eb0a7a56.
+
+BUG: 1471742
+Change-Id: I6c52835981389fc5bfeb43483feb581ad8507990
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175945
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index dbe2560..d81d760 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -694,9 +694,6 @@ glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig)
+ gf_is_service_running(svc->proc.pidfile, &pid);
+ cds_list_del_init(&svc->mux_svc);
+ empty = cds_list_empty(&svc_proc->svcs);
+- if (empty) {
+- cds_list_del_init(&svc_proc->svc_proc_list);
+- }
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+ if (empty) {
+--
+1.8.3.1
+
diff --git a/0237-Revert-ec-fini-Fix-race-between-xlator-cleanup-and-o.patch b/0237-Revert-ec-fini-Fix-race-between-xlator-cleanup-and-o.patch
new file mode 100644
index 0000000..6c88d6a
--- /dev/null
+++ b/0237-Revert-ec-fini-Fix-race-between-xlator-cleanup-and-o.patch
@@ -0,0 +1,227 @@
+From f0c3af09fd919e3646aae2821b0d6bfe4e2fd89c Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:45:58 +0530
+Subject: [PATCH 237/255] Revert "ec/fini: Fix race between xlator cleanup and
+ on going async fop"
+
+This reverts commit 9fd966aa6879ac9867381629f82eca24b950d731.
+
+BUG: 1471742
+Change-Id: I557ec138174b01d8b8f8d090acd34c179e2c632d
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175946
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-common.c | 10 ----------
+ xlators/cluster/ec/src/ec-common.h | 2 --
+ xlators/cluster/ec/src/ec-data.c | 4 +---
+ xlators/cluster/ec/src/ec-heal.c | 17 ++---------------
+ xlators/cluster/ec/src/ec-types.h | 1 -
+ xlators/cluster/ec/src/ec.c | 37 ++++++++++++-------------------------
+ 6 files changed, 15 insertions(+), 56 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index 35c2256..e2e582f 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -2956,13 +2956,3 @@ ec_manager(ec_fop_data_t *fop, int32_t error)
+
+ __ec_manager(fop, error);
+ }
+-
+-gf_boolean_t
+-__ec_is_last_fop(ec_t *ec)
+-{
+- if ((list_empty(&ec->pending_fops)) &&
+- (GF_ATOMIC_GET(ec->async_fop_count) == 0)) {
+- return _gf_true;
+- }
+- return _gf_false;
+-}
+diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
+index bf6c97d..e948342 100644
+--- a/xlators/cluster/ec/src/ec-common.h
++++ b/xlators/cluster/ec/src/ec-common.h
+@@ -204,6 +204,4 @@ void
+ ec_reset_entry_healing(ec_fop_data_t *fop);
+ char *
+ ec_msg_str(ec_fop_data_t *fop);
+-gf_boolean_t
+-__ec_is_last_fop(ec_t *ec);
+ #endif /* __EC_COMMON_H__ */
+diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
+index 8d2d9a1..6ef9340 100644
+--- a/xlators/cluster/ec/src/ec-data.c
++++ b/xlators/cluster/ec/src/ec-data.c
+@@ -202,13 +202,11 @@ ec_handle_last_pending_fop_completion(ec_fop_data_t *fop, gf_boolean_t *notify)
+ {
+ ec_t *ec = fop->xl->private;
+
+- *notify = _gf_false;
+-
+ if (!list_empty(&fop->pending_list)) {
+ LOCK(&ec->lock);
+ {
+ list_del_init(&fop->pending_list);
+- *notify = __ec_is_last_fop(ec);
++ *notify = list_empty(&ec->pending_fops);
+ }
+ UNLOCK(&ec->lock);
+ }
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index 237fea2..8844c29 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -2814,20 +2814,8 @@ int
+ ec_replace_heal_done(int ret, call_frame_t *heal, void *opaque)
+ {
+ ec_t *ec = opaque;
+- gf_boolean_t last_fop = _gf_false;
+
+- if (GF_ATOMIC_DEC(ec->async_fop_count) == 0) {
+- LOCK(&ec->lock);
+- {
+- last_fop = __ec_is_last_fop(ec);
+- }
+- UNLOCK(&ec->lock);
+- }
+ gf_msg_debug(ec->xl->name, 0, "getxattr on bricks is done ret %d", ret);
+-
+- if (last_fop)
+- ec_pending_fops_completed(ec);
+-
+ return 0;
+ }
+
+@@ -2881,15 +2869,14 @@ ec_launch_replace_heal(ec_t *ec)
+ {
+ int ret = -1;
+
++ if (!ec)
++ return ret;
+ ret = synctask_new(ec->xl->ctx->env, ec_replace_brick_heal_wrap,
+ ec_replace_heal_done, NULL, ec);
+-
+ if (ret < 0) {
+ gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d",
+ ret);
+- ec_replace_heal_done(-1, NULL, ec);
+ }
+-
+ return ret;
+ }
+
+diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
+index 4dbf4a3..1c295c0 100644
+--- a/xlators/cluster/ec/src/ec-types.h
++++ b/xlators/cluster/ec/src/ec-types.h
+@@ -643,7 +643,6 @@ struct _ec {
+ uintptr_t xl_notify; /* Bit flag representing
+ notification for bricks. */
+ uintptr_t node_mask;
+- gf_atomic_t async_fop_count; /* Number of on going asynchronous fops. */
+ xlator_t **xl_list;
+ gf_lock_t lock;
+ gf_timer_t *timer;
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index f0d58c0..df5912c 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -355,7 +355,6 @@ ec_notify_cbk(void *data)
+ ec_t *ec = data;
+ glusterfs_event_t event = GF_EVENT_MAXVAL;
+ gf_boolean_t propagate = _gf_false;
+- gf_boolean_t launch_heal = _gf_false;
+
+ LOCK(&ec->lock);
+ {
+@@ -385,11 +384,6 @@ ec_notify_cbk(void *data)
+ * still bricks DOWN, they will be healed when they
+ * come up. */
+ ec_up(ec->xl, ec);
+-
+- if (ec->shd.iamshd && !ec->shutdown) {
+- launch_heal = _gf_true;
+- GF_ATOMIC_INC(ec->async_fop_count);
+- }
+ }
+
+ propagate = _gf_true;
+@@ -397,12 +391,13 @@ ec_notify_cbk(void *data)
+ unlock:
+ UNLOCK(&ec->lock);
+
+- if (launch_heal) {
+- /* We have just brought the volume UP, so we trigger
+- * a self-heal check on the root directory. */
+- ec_launch_replace_heal(ec);
+- }
+ if (propagate) {
++ if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) {
++ /* We have just brought the volume UP, so we trigger
++ * a self-heal check on the root directory. */
++ ec_launch_replace_heal(ec);
++ }
++
+ default_notify(ec->xl, event, NULL);
+ }
+ }
+@@ -430,7 +425,7 @@ ec_disable_delays(ec_t *ec)
+ {
+ ec->shutdown = _gf_true;
+
+- return __ec_is_last_fop(ec);
++ return list_empty(&ec->pending_fops);
+ }
+
+ void
+@@ -608,10 +603,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
+ if (event == GF_EVENT_CHILD_UP) {
+ /* We need to trigger a selfheal if a brick changes
+ * to UP state. */
+- if (ec_set_up_state(ec, mask, mask) && ec->shd.iamshd &&
+- !ec->shutdown) {
+- needs_shd_check = _gf_true;
+- }
++ needs_shd_check = ec_set_up_state(ec, mask, mask);
+ } else if (event == GF_EVENT_CHILD_DOWN) {
+ ec_set_up_state(ec, mask, 0);
+ }
+@@ -641,21 +633,17 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
+ }
+ } else {
+ propagate = _gf_false;
+- needs_shd_check = _gf_false;
+- }
+-
+- if (needs_shd_check) {
+- GF_ATOMIC_INC(ec->async_fop_count);
+ }
+ }
+ unlock:
+ UNLOCK(&ec->lock);
+
+ done:
+- if (needs_shd_check) {
+- ec_launch_replace_heal(ec);
+- }
+ if (propagate) {
++ if (needs_shd_check && ec->shd.iamshd) {
++ ec_launch_replace_heal(ec);
++ }
++
+ error = default_notify(this, event, data);
+ }
+
+@@ -717,7 +705,6 @@ init(xlator_t *this)
+ ec->xl = this;
+ LOCK_INIT(&ec->lock);
+
+- GF_ATOMIC_INIT(ec->async_fop_count, 0);
+ INIT_LIST_HEAD(&ec->pending_fops);
+ INIT_LIST_HEAD(&ec->heal_waiting);
+ INIT_LIST_HEAD(&ec->healing);
+--
+1.8.3.1
+
diff --git a/0238-Revert-xlator-log-Add-more-logging-in-xlator_is_clea.patch b/0238-Revert-xlator-log-Add-more-logging-in-xlator_is_clea.patch
new file mode 100644
index 0000000..0514cd1
--- /dev/null
+++ b/0238-Revert-xlator-log-Add-more-logging-in-xlator_is_clea.patch
@@ -0,0 +1,47 @@
+From 96072cea4da1c2ba5bd87307f20b3ee2cbe6f63d Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:46:10 +0530
+Subject: [PATCH 238/255] Revert "xlator/log: Add more logging in
+ xlator_is_cleanup_starting"
+
+This reverts commit 9b94397a5a735910fab2a29670146a1feb6d890e.
+
+BUG: 1471742
+Change-Id: Icc3f0c8741ed780e265202adbb009063f657c7f7
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175947
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/xlator.c | 12 +++---------
+ 1 file changed, 3 insertions(+), 9 deletions(-)
+
+diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
+index 71e1ed4..fbfbbe2 100644
+--- a/libglusterfs/src/xlator.c
++++ b/libglusterfs/src/xlator.c
+@@ -1494,18 +1494,12 @@ xlator_is_cleanup_starting(xlator_t *this)
+ glusterfs_graph_t *graph = NULL;
+ xlator_t *xl = NULL;
+
+- if (!this) {
+- gf_msg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+- "xlator object is null, returning false");
++ if (!this)
+ goto out;
+- }
+-
+ graph = this->graph;
+- if (!graph) {
+- gf_msg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+- "Graph is not set for xlator %s", this->name);
++
++ if (!graph)
+ goto out;
+- }
+
+ xl = graph->first;
+ if (xl && xl->cleanup_starting)
+--
+1.8.3.1
+
diff --git a/0239-Revert-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch b/0239-Revert-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch
new file mode 100644
index 0000000..f36c997
--- /dev/null
+++ b/0239-Revert-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch
@@ -0,0 +1,128 @@
+From ad40c0783e84e5e54a83aeb20a52f720cc881b0c Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:46:22 +0530
+Subject: [PATCH 239/255] Revert "ec/fini: Fix race with ec_fini and ec_notify"
+
+This reverts commit 998d9b8b5e271f407e1c654c34f45f0db36abc71.
+
+BUG: 1471742
+Change-Id: Ifccb8a22d9ef96c22b32dcb4b82bf4d21cf85484
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175948
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/xlator.h | 3 ---
+ libglusterfs/src/libglusterfs.sym | 1 -
+ libglusterfs/src/xlator.c | 21 ---------------------
+ xlators/cluster/ec/src/ec-heal.c | 4 ----
+ xlators/cluster/ec/src/ec-heald.c | 6 ------
+ xlators/cluster/ec/src/ec.c | 3 ---
+ 6 files changed, 38 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
+index 09e463e..8998976 100644
+--- a/libglusterfs/src/glusterfs/xlator.h
++++ b/libglusterfs/src/glusterfs/xlator.h
+@@ -1092,7 +1092,4 @@ gluster_graph_take_reference(xlator_t *tree);
+
+ gf_boolean_t
+ mgmt_is_multiplexed_daemon(char *name);
+-
+-gf_boolean_t
+-xlator_is_cleanup_starting(xlator_t *this);
+ #endif /* _XLATOR_H */
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index 86215d2..05f93b4 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -1160,4 +1160,3 @@ glusterfs_process_svc_attach_volfp
+ glusterfs_mux_volfile_reconfigure
+ glusterfs_process_svc_detach
+ mgmt_is_multiplexed_daemon
+-xlator_is_cleanup_starting
+diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
+index fbfbbe2..022c3ed 100644
+--- a/libglusterfs/src/xlator.c
++++ b/libglusterfs/src/xlator.c
+@@ -1486,24 +1486,3 @@ mgmt_is_multiplexed_daemon(char *name)
+ }
+ return _gf_false;
+ }
+-
+-gf_boolean_t
+-xlator_is_cleanup_starting(xlator_t *this)
+-{
+- gf_boolean_t cleanup = _gf_false;
+- glusterfs_graph_t *graph = NULL;
+- xlator_t *xl = NULL;
+-
+- if (!this)
+- goto out;
+- graph = this->graph;
+-
+- if (!graph)
+- goto out;
+-
+- xl = graph->first;
+- if (xl && xl->cleanup_starting)
+- cleanup = _gf_true;
+-out:
+- return cleanup;
+-}
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index 8844c29..2fa1f11 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -2855,10 +2855,6 @@ ec_replace_brick_heal_wrap(void *opaque)
+ itable = ec->xl->itable;
+ else
+ goto out;
+-
+- if (xlator_is_cleanup_starting(ec->xl))
+- goto out;
+-
+ ret = ec_replace_heal(ec, itable->root);
+ out:
+ return ret;
+diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
+index 91512d7..edf5e11 100644
+--- a/xlators/cluster/ec/src/ec-heald.c
++++ b/xlators/cluster/ec/src/ec-heald.c
+@@ -444,9 +444,6 @@ unlock:
+ int
+ ec_shd_full_healer_spawn(xlator_t *this, int subvol)
+ {
+- if (xlator_is_cleanup_starting(this))
+- return -1;
+-
+ return ec_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol),
+ ec_shd_full_healer);
+ }
+@@ -454,9 +451,6 @@ ec_shd_full_healer_spawn(xlator_t *this, int subvol)
+ int
+ ec_shd_index_healer_spawn(xlator_t *this, int subvol)
+ {
+- if (xlator_is_cleanup_starting(this))
+- return -1;
+-
+ return ec_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol),
+ ec_shd_index_healer);
+ }
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index df5912c..264582a 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -486,9 +486,6 @@ ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state)
+ {
+ uintptr_t current_state = 0;
+
+- if (xlator_is_cleanup_starting(ec->xl))
+- return _gf_false;
+-
+ if ((ec->xl_notify & index_mask) == 0) {
+ ec->xl_notify |= index_mask;
+ ec->xl_notify_count++;
+--
+1.8.3.1
+
diff --git a/0240-Revert-glusterd-shd-Optimize-the-glustershd-manager-.patch b/0240-Revert-glusterd-shd-Optimize-the-glustershd-manager-.patch
new file mode 100644
index 0000000..54ef75e
--- /dev/null
+++ b/0240-Revert-glusterd-shd-Optimize-the-glustershd-manager-.patch
@@ -0,0 +1,54 @@
+From 9b3adb28207681f49ea97fc2c473634ff0f73db6 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:46:35 +0530
+Subject: [PATCH 240/255] Revert "glusterd/shd: Optimize the glustershd manager
+ to send reconfigure"
+
+This reverts commit 321080e55f0ae97115a9542ba5de8494e7610860.
+
+BUG: 1471742
+Change-Id: I5fa84baa3c3e72ca8eb605c7f1fafb53c68859f9
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175949
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/graph.c | 1 +
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 9 ++++-----
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index 27d9335..18fb2d9 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -1497,6 +1497,7 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
+ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph);
+ parent_graph->xl_count -= graph->xl_count;
+ parent_graph->leaf_count -= graph->leaf_count;
++ default_notify(xl, GF_EVENT_PARENT_DOWN, xl);
+ parent_graph->id++;
+ ret = 0;
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index d81d760..981cc87 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -311,11 +311,10 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+ */
+ ret = svc->stop(svc, SIGTERM);
+ } else if (volinfo) {
+- if (volinfo->status != GLUSTERD_STATUS_STARTED) {
+- ret = svc->stop(svc, SIGTERM);
+- if (ret)
+- goto out;
+- }
++ ret = svc->stop(svc, SIGTERM);
++ if (ret)
++ goto out;
++
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+ ret = svc->start(svc, flags);
+ if (ret)
+--
+1.8.3.1
+
diff --git a/0241-Revert-glusterd-svc-glusterd_svcs_stop-should-call-i.patch b/0241-Revert-glusterd-svc-glusterd_svcs_stop-should-call-i.patch
new file mode 100644
index 0000000..965fcfe
--- /dev/null
+++ b/0241-Revert-glusterd-svc-glusterd_svcs_stop-should-call-i.patch
@@ -0,0 +1,82 @@
+From 066189add979d2e4c74463592e5021bd060d5a51 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:46:47 +0530
+Subject: [PATCH 241/255] Revert "glusterd/svc: glusterd_svcs_stop should call
+ individual wrapper function"
+
+This reverts commit 79fff98f9ca5f815cf0227312b9a997d555dad29.
+
+BUG: 1471742
+Change-Id: I258040ed9be6bc3b4498c76ed51d59258c55acff
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175950
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 12 ++----------
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 10 +++++-----
+ 2 files changed, 7 insertions(+), 15 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index 981cc87..75f9a07 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -656,18 +656,10 @@ glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig)
+ int pid = -1;
+
+ conf = THIS->private;
+- GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ svc_proc = svc->svc_proc;
+- if (!svc_proc) {
+- /*
+- * This can happen when stop was called on a volume that is not shd
+- * compatible.
+- */
+- gf_msg_debug("glusterd", 0, "svc_proc is null, ie shd already stopped");
+- ret = 0;
+- goto out;
+- }
++ GF_VALIDATE_OR_GOTO("glusterd", svc_proc, out);
++ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+
+ /* Get volinfo->shd from svc object */
+ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index 6a3ca52..f7be394 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -86,25 +86,25 @@ glusterd_svcs_stop(glusterd_volinfo_t *volinfo)
+ priv = this->private;
+ GF_ASSERT(priv);
+
+- ret = priv->nfs_svc.stop(&(priv->nfs_svc), SIGKILL);
++ ret = glusterd_svc_stop(&(priv->nfs_svc), SIGKILL);
+ if (ret)
+ goto out;
+
+- ret = priv->quotad_svc.stop(&(priv->quotad_svc), SIGTERM);
++ ret = glusterd_svc_stop(&(priv->quotad_svc), SIGTERM);
+ if (ret)
+ goto out;
+
+ if (volinfo) {
+- ret = volinfo->shd.svc.stop(&(volinfo->shd.svc), SIGTERM);
++ ret = glusterd_svc_stop(&(volinfo->shd.svc), PROC_START_NO_WAIT);
+ if (ret)
+ goto out;
+ }
+
+- ret = priv->bitd_svc.stop(&(priv->bitd_svc), SIGTERM);
++ ret = glusterd_svc_stop(&(priv->bitd_svc), SIGTERM);
+ if (ret)
+ goto out;
+
+- ret = priv->scrub_svc.stop(&(priv->scrub_svc), SIGTERM);
++ ret = glusterd_svc_stop(&(priv->scrub_svc), SIGTERM);
+ out:
+ return ret;
+ }
+--
+1.8.3.1
+
diff --git a/0242-Revert-tests-shd-Add-test-coverage-for-shd-mux.patch b/0242-Revert-tests-shd-Add-test-coverage-for-shd-mux.patch
new file mode 100644
index 0000000..2174063
--- /dev/null
+++ b/0242-Revert-tests-shd-Add-test-coverage-for-shd-mux.patch
@@ -0,0 +1,427 @@
+From 48f7be493588fdf5e99dff0c3b91327e07da05f3 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:48:34 +0530
+Subject: [PATCH 242/255] Revert "tests/shd: Add test coverage for shd mux"
+
+This reverts commit b7f832288d2d2e57231d90765afc049ad7cb2f9d.
+
+BUG: 1471742
+Change-Id: Ifccac5150f07b98006714e43c77c5a4b1fd38cb8
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175951
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/glusterd-restart-shd-mux.t | 96 ---------------------
+ tests/basic/shd-mux.t | 149 ---------------------------------
+ tests/basic/volume-scale-shd-mux.t | 112 -------------------------
+ tests/volume.rc | 15 ----
+ 4 files changed, 372 deletions(-)
+ delete mode 100644 tests/basic/glusterd-restart-shd-mux.t
+ delete mode 100644 tests/basic/shd-mux.t
+ delete mode 100644 tests/basic/volume-scale-shd-mux.t
+
+diff --git a/tests/basic/glusterd-restart-shd-mux.t b/tests/basic/glusterd-restart-shd-mux.t
+deleted file mode 100644
+index a50af9d..0000000
+--- a/tests/basic/glusterd-restart-shd-mux.t
++++ /dev/null
+@@ -1,96 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../include.rc
+-. $(dirname $0)/../volume.rc
+-
+-cleanup;
+-
+-TESTS_EXPECTED_IN_LOOP=20
+-
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+-TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+-TEST $CLI volume set $V0 cluster.eager-lock off
+-TEST $CLI volume set $V0 performance.flush-behind off
+-TEST $CLI volume start $V0
+-
+-for i in $(seq 1 3); do
+- TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+- TEST $CLI volume start ${V0}_afr$i
+- TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+- TEST $CLI volume start ${V0}_ec$i
+-done
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+-
+-#Stop the glusterd
+-TEST pkill glusterd
+-#Only stopping glusterd, so there will be one shd
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" shd_count
+-TEST glusterd
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+-#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
+-#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-
+-shd_pid=$(get_shd_mux_pid $V0)
+-for i in $(seq 1 3); do
+- afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid"
+- EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path
+- ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid"
+- EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path
+-done
+-
+-#Reboot a node scenario
+-TEST pkill gluster
+-#Only stopped glusterd, so there will be one shd
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+-
+-TEST glusterd
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+-
+-#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
+-#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-
+-shd_pid=$(get_shd_mux_pid $V0)
+-for i in $(seq 1 3); do
+- afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid"
+- EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path
+- ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid"
+- EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path
+-done
+-
+-for i in $(seq 1 3); do
+- TEST $CLI volume stop ${V0}_afr$i
+- TEST $CLI volume stop ${V0}_ec$i
+-done
+-
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-
+-TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+-
+-TEST kill_brick $V0 $H0 $B0/${V0}0
+-TEST kill_brick $V0 $H0 $B0/${V0}3
+-
+-TEST touch $M0/foo{1..100}
+-
+-EXPECT_WITHIN $HEAL_TIMEOUT "^204$" get_pending_heal_count $V0
+-
+-TEST $CLI volume start ${V0} force
+-
+-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+-
+-TEST rm -rf $M0/*
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+-
+-
+-TEST $CLI volume stop ${V0}
+-TEST $CLI volume delete ${V0}
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^0$" shd_count
+-
+-cleanup
+diff --git a/tests/basic/shd-mux.t b/tests/basic/shd-mux.t
+deleted file mode 100644
+index e42a34a..0000000
+--- a/tests/basic/shd-mux.t
++++ /dev/null
+@@ -1,149 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../include.rc
+-. $(dirname $0)/../volume.rc
+-
+-cleanup;
+-
+-TESTS_EXPECTED_IN_LOOP=16
+-
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+-TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+-TEST $CLI volume set $V0 cluster.eager-lock off
+-TEST $CLI volume set $V0 performance.flush-behind off
+-TEST $CLI volume start $V0
+-TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+-
+-shd_pid=$(get_shd_mux_pid $V0)
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-
+-#Create a one more volume
+-TEST $CLI volume create ${V0}_1 replica 3 $H0:$B0/${V0}_1{0,1,2,3,4,5}
+-TEST $CLI volume start ${V0}_1
+-
+-#Check whether the shd has multiplexed or not
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_1
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}
+-
+-TEST $CLI volume set ${V0}_1 cluster.background-self-heal-count 0
+-TEST $CLI volume set ${V0}_1 cluster.eager-lock off
+-TEST $CLI volume set ${V0}_1 performance.flush-behind off
+-TEST $GFS --volfile-id=/${V0}_1 --volfile-server=$H0 $M1
+-
+-TEST kill_brick $V0 $H0 $B0/${V0}0
+-TEST kill_brick $V0 $H0 $B0/${V0}4
+-TEST kill_brick ${V0}_1 $H0 $B0/${V0}_10
+-TEST kill_brick ${V0}_1 $H0 $B0/${V0}_14
+-
+-TEST touch $M0/foo{1..100}
+-TEST touch $M1/foo{1..100}
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count ${V0}_1
+-
+-TEST $CLI volume start ${V0} force
+-TEST $CLI volume start ${V0}_1 force
+-
+-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_1
+-
+-TEST rm -rf $M0/*
+-TEST rm -rf $M1/*
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+-
+-#Stop the volume
+-TEST $CLI volume stop ${V0}_1
+-TEST $CLI volume delete ${V0}_1
+-
+-#Check the stop succeeded and detached the volume with out restarting it
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
+-
+-#Check the thread count become to earlier number after stopping
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-
+-
+-#Now create a ec volume and check mux works
+-TEST $CLI volume create ${V0}_2 disperse 6 redundancy 2 $H0:$B0/${V0}_2{0,1,2,3,4,5}
+-TEST $CLI volume start ${V0}_2
+-
+-#Check whether the shd has multiplexed or not
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_2
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}
+-
+-TEST $CLI volume set ${V0}_2 cluster.background-self-heal-count 0
+-TEST $CLI volume set ${V0}_2 cluster.eager-lock off
+-TEST $CLI volume set ${V0}_2 performance.flush-behind off
+-TEST $GFS --volfile-id=/${V0}_2 --volfile-server=$H0 $M1
+-
+-TEST kill_brick $V0 $H0 $B0/${V0}0
+-TEST kill_brick $V0 $H0 $B0/${V0}4
+-TEST kill_brick ${V0}_2 $H0 $B0/${V0}_20
+-TEST kill_brick ${V0}_2 $H0 $B0/${V0}_22
+-
+-TEST touch $M0/foo{1..100}
+-TEST touch $M1/foo{1..100}
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^404$" get_pending_heal_count ${V0}_2
+-
+-TEST $CLI volume start ${V0} force
+-TEST $CLI volume start ${V0}_2 force
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
+-
+-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_2
+-
+-TEST rm -rf $M0/*
+-TEST rm -rf $M1/*
+-
+-
+-#Stop the volume
+-TEST $CLI volume stop ${V0}_2
+-TEST $CLI volume delete ${V0}_2
+-
+-#Check the stop succeeded and detached the volume with out restarting it
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
+-
+-#Check the thread count become to zero for ec related threads
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
+-#Check the thread count become to earlier number after stopping
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-
+-for i in $(seq 1 3); do
+- TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+- TEST $CLI volume start ${V0}_afr$i
+- TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+- TEST $CLI volume start ${V0}_ec$i
+-done
+-
+-#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
+-#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-#Delete the volumes
+-for i in $(seq 1 3); do
+- TEST $CLI volume stop ${V0}_afr$i
+- TEST $CLI volume stop ${V0}_ec$i
+- TEST $CLI volume delete ${V0}_afr$i
+- TEST $CLI volume delete ${V0}_ec$i
+-done
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+-
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-
+-TEST $CLI volume stop ${V0}
+-TEST $CLI volume delete ${V0}
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+-
+-cleanup
+diff --git a/tests/basic/volume-scale-shd-mux.t b/tests/basic/volume-scale-shd-mux.t
+deleted file mode 100644
+index dd9cf83..0000000
+--- a/tests/basic/volume-scale-shd-mux.t
++++ /dev/null
+@@ -1,112 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../include.rc
+-. $(dirname $0)/../volume.rc
+-
+-cleanup;
+-
+-TESTS_EXPECTED_IN_LOOP=6
+-
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+-TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+-TEST $CLI volume set $V0 cluster.eager-lock off
+-TEST $CLI volume set $V0 performance.flush-behind off
+-TEST $CLI volume start $V0
+-
+-for i in $(seq 1 2); do
+- TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+- TEST $CLI volume start ${V0}_afr$i
+- TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+- TEST $CLI volume start ${V0}_ec$i
+-done
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+-#Check the thread count become to number of volumes*number of ec subvolume (2*6=12)
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
+-#Check the thread count become to number of volumes*number of afr subvolume (3*6=18)
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-
+-TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}{6,7,8};
+-#Check the thread count become to number of volumes*number of afr subvolume plus 3 additional threads from newly added bricks (3*6+3=21)
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^21$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-
+-#Remove the brick and check the detach is successful
+-$CLI volume remove-brick $V0 $H0:$B0/${V0}{6,7,8} force
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-
+-TEST $CLI volume add-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5};
+-#Check the thread count become to number of volumes*number of ec subvolume plus 2 additional threads from newly added bricks (2*6+6=18)
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
+-
+-#Remove the brick and check the detach is successful
+-$CLI volume remove-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5} force
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
+-
+-
+-for i in $(seq 1 2); do
+- TEST $CLI volume stop ${V0}_afr$i
+- TEST $CLI volume stop ${V0}_ec$i
+-done
+-
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-
+-TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+-
+-TEST kill_brick $V0 $H0 $B0/${V0}0
+-TEST kill_brick $V0 $H0 $B0/${V0}4
+-
+-TEST touch $M0/foo{1..100}
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
+-
+-TEST $CLI volume start ${V0} force
+-
+-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+-
+-TEST rm -rf $M0/*
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+-shd_pid=$(get_shd_mux_pid $V0)
+-TEST $CLI volume create ${V0}_distribute1 $H0:$B0/${V0}_distribute10
+-TEST $CLI volume start ${V0}_distribute1
+-
+-#Creating a non-replicate/non-ec volume should not have any effect in shd
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-EXPECT "^${shd_pid}$" get_shd_mux_pid $V0
+-
+-TEST mkdir $B0/add/
+-#Now convert the distributed volume to replicate
+-TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3}
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^9$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-
+-#scale down the volume
+-TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+-
+-TEST $CLI volume stop ${V0}
+-TEST $CLI volume delete ${V0}
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+-
+-TEST rm -rf $B0/add/
+-TEST mkdir $B0/add/
+-#Now convert the distributed volume back to replicate and make sure that a new shd is spawned
+-TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3};
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+-EXPECT_WITHIN $HEAL_TIMEOUT "^3$" number_healer_threads_shd ${V0}_distribute1 "__afr_shd_healer_wait"
+-
+-#Now convert the replica volume to distribute again and make sure the shd is now stopped
+-TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force
+-TEST rm -rf $B0/add/
+-
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+-
+-cleanup
+diff --git a/tests/volume.rc b/tests/volume.rc
+index 6a78c37..022d972 100644
+--- a/tests/volume.rc
++++ b/tests/volume.rc
+@@ -913,21 +913,6 @@ function volgen_check_ancestry {
+ fi
+ }
+
+-function get_shd_mux_pid {
+- local volume=$1
+- pid=`$CLI volume status $volume shd | awk '/Self-heal/{print $8}'`
+- echo $pid
+-}
+-
+-function shd_count {
+- ps aux | grep "glustershd" | grep -v grep | wc -l
+-}
+-
+-function number_healer_threads_shd {
+- local pid=$(get_shd_mux_pid $1)
+- pstack $pid | grep $2 | wc -l
+-}
+-
+ function get_mtime {
+ local time=$(get-mdata-xattr -m $1)
+ if [ $time == "-1" ];
+--
+1.8.3.1
+
diff --git a/0243-Revert-glusterfsd-cleanup-Protect-graph-object-under.patch b/0243-Revert-glusterfsd-cleanup-Protect-graph-object-under.patch
new file mode 100644
index 0000000..9e918d7
--- /dev/null
+++ b/0243-Revert-glusterfsd-cleanup-Protect-graph-object-under.patch
@@ -0,0 +1,154 @@
+From 4d65506ddfa0245dcaa13b14ca13b2ea762df37d Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:48:51 +0530
+Subject: [PATCH 243/255] Revert "glusterfsd/cleanup: Protect graph object
+ under a lock"
+
+This reverts commit 11b64d494c52004002f900888694d20ef8af6df6.
+
+BUG: 1471742
+Change-Id: I2717207d87ad213722de33c24e451502ed4aff48
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175952
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/graph.c | 58 ++++++++++---------------
+ libglusterfs/src/statedump.c | 16 ++-----
+ tests/bugs/glusterd/optimized-basic-testcases.t | 4 +-
+ 3 files changed, 28 insertions(+), 50 deletions(-)
+
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index 18fb2d9..4c8b02d 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -1392,12 +1392,8 @@ glusterfs_graph_cleanup(void *arg)
+ }
+ pthread_mutex_unlock(&ctx->notify_lock);
+
+- pthread_mutex_lock(&ctx->cleanup_lock);
+- {
+- glusterfs_graph_fini(graph);
+- glusterfs_graph_destroy(graph);
+- }
+- pthread_mutex_unlock(&ctx->cleanup_lock);
++ glusterfs_graph_fini(graph);
++ glusterfs_graph_destroy(graph);
+ out:
+ return NULL;
+ }
+@@ -1472,37 +1468,31 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
+
+ if (!ctx || !ctx->active || !volfile_obj)
+ goto out;
++ parent_graph = ctx->active;
++ graph = volfile_obj->graph;
++ if (!graph)
++ goto out;
++ if (graph->first)
++ xl = graph->first;
+
+- pthread_mutex_lock(&ctx->cleanup_lock);
+- {
+- parent_graph = ctx->active;
+- graph = volfile_obj->graph;
+- if (!graph)
+- goto unlock;
+- if (graph->first)
+- xl = graph->first;
+-
+- last_xl = graph->last_xl;
+- if (last_xl)
+- last_xl->next = NULL;
+- if (!xl || xl->cleanup_starting)
+- goto unlock;
++ last_xl = graph->last_xl;
++ if (last_xl)
++ last_xl->next = NULL;
++ if (!xl || xl->cleanup_starting)
++ goto out;
+
+- xl->cleanup_starting = 1;
+- gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED,
+- "detaching child %s", volfile_obj->vol_id);
++ xl->cleanup_starting = 1;
++ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED,
++ "detaching child %s", volfile_obj->vol_id);
+
+- list_del_init(&volfile_obj->volfile_list);
+- glusterfs_mux_xlator_unlink(parent_graph->top, xl);
+- parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph);
+- parent_graph->xl_count -= graph->xl_count;
+- parent_graph->leaf_count -= graph->leaf_count;
+- default_notify(xl, GF_EVENT_PARENT_DOWN, xl);
+- parent_graph->id++;
+- ret = 0;
+- }
+-unlock:
+- pthread_mutex_unlock(&ctx->cleanup_lock);
++ list_del_init(&volfile_obj->volfile_list);
++ glusterfs_mux_xlator_unlink(parent_graph->top, xl);
++ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph);
++ parent_graph->xl_count -= graph->xl_count;
++ parent_graph->leaf_count -= graph->leaf_count;
++ default_notify(xl, GF_EVENT_PARENT_DOWN, xl);
++ parent_graph->id++;
++ ret = 0;
+ out:
+ if (!ret) {
+ list_del_init(&volfile_obj->volfile_list);
+diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c
+index 0d58f8f..0cf80c0 100644
+--- a/libglusterfs/src/statedump.c
++++ b/libglusterfs/src/statedump.c
+@@ -805,17 +805,11 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
+ int brick_count = 0;
+ int len = 0;
+
++ gf_proc_dump_lock();
++
+ if (!ctx)
+ goto out;
+
+- /*
+- * Multiplexed daemons can change the active graph when attach/detach
+- * is called. So this has to be protected with the cleanup lock.
+- */
+- if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name))
+- pthread_mutex_lock(&ctx->cleanup_lock);
+- gf_proc_dump_lock();
+-
+ if (!mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name) &&
+ (ctx && ctx->active)) {
+ top = ctx->active->first;
+@@ -929,11 +923,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
+ out:
+ GF_FREE(dump_options.dump_path);
+ dump_options.dump_path = NULL;
+- if (ctx) {
+- gf_proc_dump_unlock();
+- if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name))
+- pthread_mutex_unlock(&ctx->cleanup_lock);
+- }
++ gf_proc_dump_unlock();
+
+ return;
+ }
+diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t
+index 110f1b9..d700b5e 100644
+--- a/tests/bugs/glusterd/optimized-basic-testcases.t
++++ b/tests/bugs/glusterd/optimized-basic-testcases.t
+@@ -289,9 +289,7 @@ mkdir -p /xyz/var/lib/glusterd/abc
+ TEST $CLI volume create "test" $H0:/xyz/var/lib/glusterd/abc
+ EXPECT 'Created' volinfo_field "test" 'Status';
+
+-#While taking a statedump, there is a TRY_LOCK on call_frame, which might may cause
+-#failure. So Adding a EXPECT_WITHIN
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" generate_statedump_and_check_for_glusterd_info
++EXPECT "1" generate_statedump_and_check_for_glusterd_info
+
+ cleanup_statedump `pidof glusterd`
+ cleanup
+--
+1.8.3.1
+
diff --git a/0244-Revert-ec-shd-Cleanup-self-heal-daemon-resources-dur.patch b/0244-Revert-ec-shd-Cleanup-self-heal-daemon-resources-dur.patch
new file mode 100644
index 0000000..0888021
--- /dev/null
+++ b/0244-Revert-ec-shd-Cleanup-self-heal-daemon-resources-dur.patch
@@ -0,0 +1,292 @@
+From f6d967cd70ff41a0f93c54d50128c468e9d5dea9 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:49:21 +0530
+Subject: [PATCH 244/255] Revert "ec/shd: Cleanup self heal daemon resources
+ during ec fini"
+
+This reverts commit edc238e40060773f5f5fd59fcdad8ae27d65749f.
+
+BUG: 1471742
+Change-Id: If6cb5941b964f005454a21a67938b354ef1a2037
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175953
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/syncop-utils.c | 2 -
+ xlators/cluster/afr/src/afr-self-heald.c | 5 ---
+ xlators/cluster/ec/src/ec-heald.c | 77 +++++---------------------------
+ xlators/cluster/ec/src/ec-heald.h | 3 --
+ xlators/cluster/ec/src/ec-messages.h | 3 +-
+ xlators/cluster/ec/src/ec.c | 47 -------------------
+ 6 files changed, 13 insertions(+), 124 deletions(-)
+
+diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
+index 4167db4..b842142 100644
+--- a/libglusterfs/src/syncop-utils.c
++++ b/libglusterfs/src/syncop-utils.c
+@@ -354,8 +354,6 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
+
+ if (frame) {
+ this = frame->this;
+- } else {
+- this = THIS;
+ }
+
+ /*For this functionality to be implemented in general, we need
+diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
+index 522fe5d..8bc4720 100644
+--- a/xlators/cluster/afr/src/afr-self-heald.c
++++ b/xlators/cluster/afr/src/afr-self-heald.c
+@@ -524,11 +524,6 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+-
+- if (this->cleanup_starting) {
+- return -ENOTCONN;
+- }
+-
+ if (!priv->shd.enabled)
+ return -EBUSY;
+
+diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
+index edf5e11..cba111a 100644
+--- a/xlators/cluster/ec/src/ec-heald.c
++++ b/xlators/cluster/ec/src/ec-heald.c
+@@ -71,11 +71,6 @@ disabled_loop:
+ break;
+ }
+
+- if (ec->shutdown) {
+- healer->running = _gf_false;
+- return -1;
+- }
+-
+ ret = healer->rerun;
+ healer->rerun = 0;
+
+@@ -246,11 +241,9 @@ ec_shd_index_sweep(struct subvol_healer *healer)
+ goto out;
+ }
+
+- _mask_cancellation();
+ ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
+ healer, ec_shd_index_heal, xdata,
+ ec->shd.max_threads, ec->shd.wait_qlength);
+- _unmask_cancellation();
+ out:
+ if (xdata)
+ dict_unref(xdata);
+@@ -270,11 +263,6 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ int ret = 0;
+
+ ec = this->private;
+-
+- if (this->cleanup_starting) {
+- return -ENOTCONN;
+- }
+-
+ if (ec->xl_up_count <= ec->fragments) {
+ return -ENOTCONN;
+ }
+@@ -317,15 +305,11 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
+ {
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+- int ret = -1;
+
+ ec = healer->this->private;
+ loc.inode = inode;
+- _mask_cancellation();
+- ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
+- GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+- _unmask_cancellation();
+- return ret;
++ return syncop_ftw(ec->xl_list[healer->subvol], &loc,
++ GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+ }
+
+ void *
+@@ -333,16 +317,13 @@ ec_shd_index_healer(void *data)
+ {
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+- int run = 0;
+
+ healer = data;
+ THIS = this = healer->this;
+ ec_t *ec = this->private;
+
+ for (;;) {
+- run = ec_shd_healer_wait(healer);
+- if (run == -1)
+- break;
++ ec_shd_healer_wait(healer);
+
+ if (ec->xl_up_count > ec->fragments) {
+ gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
+@@ -371,12 +352,16 @@ ec_shd_full_healer(void *data)
+
+ rootloc.inode = this->itable->root;
+ for (;;) {
+- run = ec_shd_healer_wait(healer);
+- if (run < 0) {
+- break;
+- } else if (run == 0) {
+- continue;
++ pthread_mutex_lock(&healer->mutex);
++ {
++ run = __ec_shd_healer_wait(healer);
++ if (!run)
++ healer->running = _gf_false;
+ }
++ pthread_mutex_unlock(&healer->mutex);
++
++ if (!run)
++ break;
+
+ if (ec->xl_up_count > ec->fragments) {
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
+@@ -577,41 +562,3 @@ out:
+ dict_del(output, this->name);
+ return ret;
+ }
+-
+-void
+-ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+-{
+- if (!healer)
+- return;
+-
+- pthread_cond_destroy(&healer->cond);
+- pthread_mutex_destroy(&healer->mutex);
+-}
+-
+-void
+-ec_selfheal_daemon_fini(xlator_t *this)
+-{
+- struct subvol_healer *healer = NULL;
+- ec_self_heald_t *shd = NULL;
+- ec_t *priv = NULL;
+- int i = 0;
+-
+- priv = this->private;
+- if (!priv)
+- return;
+-
+- shd = &priv->shd;
+- if (!shd->iamshd)
+- return;
+-
+- for (i = 0; i < priv->nodes; i++) {
+- healer = &shd->index_healers[i];
+- ec_destroy_healer_object(this, healer);
+-
+- healer = &shd->full_healers[i];
+- ec_destroy_healer_object(this, healer);
+- }
+-
+- GF_FREE(shd->index_healers);
+- GF_FREE(shd->full_healers);
+-}
+diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
+index 8184cf4..2eda2a7 100644
+--- a/xlators/cluster/ec/src/ec-heald.h
++++ b/xlators/cluster/ec/src/ec-heald.h
+@@ -24,7 +24,4 @@ ec_selfheal_daemon_init(xlator_t *this);
+ void
+ ec_shd_index_healer_wake(ec_t *ec);
+
+-void
+-ec_selfheal_daemon_fini(xlator_t *this);
+-
+ #endif /* __EC_HEALD_H__ */
+diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
+index ce299bb..7c28808 100644
+--- a/xlators/cluster/ec/src/ec-messages.h
++++ b/xlators/cluster/ec/src/ec-messages.h
+@@ -55,7 +55,6 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
+ EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
+ EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
+ EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
+- EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
+- EC_MSG_THREAD_CLEANUP_FAILED);
++ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);
+
+ #endif /* !_EC_MESSAGES_H_ */
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index 264582a..3c8013e 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -429,51 +429,6 @@ ec_disable_delays(ec_t *ec)
+ }
+
+ void
+-ec_cleanup_healer_object(ec_t *ec)
+-{
+- struct subvol_healer *healer = NULL;
+- ec_self_heald_t *shd = NULL;
+- void *res = NULL;
+- int i = 0;
+- gf_boolean_t is_join = _gf_false;
+-
+- shd = &ec->shd;
+- if (!shd->iamshd)
+- return;
+-
+- for (i = 0; i < ec->nodes; i++) {
+- healer = &shd->index_healers[i];
+- pthread_mutex_lock(&healer->mutex);
+- {
+- healer->rerun = 1;
+- if (healer->running) {
+- pthread_cond_signal(&healer->cond);
+- is_join = _gf_true;
+- }
+- }
+- pthread_mutex_unlock(&healer->mutex);
+- if (is_join) {
+- pthread_join(healer->thread, &res);
+- is_join = _gf_false;
+- }
+-
+- healer = &shd->full_healers[i];
+- pthread_mutex_lock(&healer->mutex);
+- {
+- healer->rerun = 1;
+- if (healer->running) {
+- pthread_cond_signal(&healer->cond);
+- is_join = _gf_true;
+- }
+- }
+- pthread_mutex_unlock(&healer->mutex);
+- if (is_join) {
+- pthread_join(healer->thread, &res);
+- is_join = _gf_false;
+- }
+- }
+-}
+-void
+ ec_pending_fops_completed(ec_t *ec)
+ {
+ if (ec->shutdown) {
+@@ -589,7 +544,6 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
+ /* If there aren't pending fops running after we have waken up
+ * them, we immediately propagate the notification. */
+ propagate = ec_disable_delays(ec);
+- ec_cleanup_healer_object(ec);
+ goto unlock;
+ }
+
+@@ -805,7 +759,6 @@ failed:
+ void
+ fini(xlator_t *this)
+ {
+- ec_selfheal_daemon_fini(this);
+ __ec_destroy_private(this);
+ }
+
+--
+1.8.3.1
+
diff --git a/0245-Revert-shd-glusterd-Serialize-shd-manager-to-prevent.patch b/0245-Revert-shd-glusterd-Serialize-shd-manager-to-prevent.patch
new file mode 100644
index 0000000..6495e38
--- /dev/null
+++ b/0245-Revert-shd-glusterd-Serialize-shd-manager-to-prevent.patch
@@ -0,0 +1,151 @@
+From 022701465f3e642cdb7942995647615baa266a35 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:49:31 +0530
+Subject: [PATCH 245/255] Revert "shd/glusterd: Serialize shd manager to
+ prevent race condition"
+
+This reverts commit 646292b4f73bf1b506d034b85787f794963d7196.
+
+BUG: 1471742
+Change-Id: Ie21fbe18965d8bdea81f4276b57960a27a4db89d
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175954
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../serialize-shd-manager-glusterd-restart.t | 54 ----------------------
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 14 ------
+ xlators/mgmt/glusterd/src/glusterd.c | 1 -
+ xlators/mgmt/glusterd/src/glusterd.h | 3 --
+ 4 files changed, 72 deletions(-)
+ delete mode 100644 tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t
+
+diff --git a/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t
+deleted file mode 100644
+index 3a27c2a..0000000
+--- a/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t
++++ /dev/null
+@@ -1,54 +0,0 @@
+-#! /bin/bash
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../cluster.rc
+-
+-function check_peers {
+-count=`$CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l`
+-echo $count
+-}
+-
+-function check_shd {
+-ps aux | grep $1 | grep glustershd | wc -l
+-}
+-
+-cleanup
+-
+-
+-TEST launch_cluster 6
+-
+-TESTS_EXPECTED_IN_LOOP=25
+-for i in $(seq 2 6); do
+- hostname="H$i"
+- TEST $CLI_1 peer probe ${!hostname}
+-done
+-
+-
+-EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers;
+-for i in $(seq 1 5); do
+-
+- TEST $CLI_1 volume create ${V0}_$i replica 3 $H1:$B1/${V0}_$i $H2:$B2/${V0}_$i $H3:$B3/${V0}_$i $H4:$B4/${V0}_$i $H5:$B5/${V0}_$i $H6:$B6/${V0}_$i
+- TEST $CLI_1 volume start ${V0}_$i force
+-
+-done
+-
+-#kill a node
+-TEST kill_node 3
+-
+-TEST $glusterd_3;
+-EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers
+-
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 check_shd $H3
+-
+-for i in $(seq 1 5); do
+-
+- TEST $CLI_1 volume stop ${V0}_$i
+- TEST $CLI_1 volume delete ${V0}_$i
+-
+-done
+-
+-for i in $(seq 1 6); do
+- hostname="H$i"
+- EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 0 check_shd ${!hostname}
+-done
+-cleanup
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index 75f9a07..a9eab42 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -254,26 +254,14 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+ {
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+- glusterd_conf_t *conf = NULL;
+- gf_boolean_t shd_restart = _gf_false;
+
+- conf = THIS->private;
+ volinfo = data;
+- GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+
+ if (volinfo)
+ glusterd_volinfo_ref(volinfo);
+
+- while (conf->restart_shd) {
+- synclock_unlock(&conf->big_lock);
+- sleep(2);
+- synclock_lock(&conf->big_lock);
+- }
+- conf->restart_shd = _gf_true;
+- shd_restart = _gf_true;
+-
+ ret = glusterd_shdsvc_create_volfile(volinfo);
+ if (ret)
+ goto out;
+@@ -322,8 +310,6 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+ }
+ }
+ out:
+- if (shd_restart)
+- conf->restart_shd = _gf_false;
+ if (volinfo)
+ glusterd_volinfo_unref(volinfo);
+ if (ret)
+diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
+index 6d7dd4a..c0973cb 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.c
++++ b/xlators/mgmt/glusterd/src/glusterd.c
+@@ -1819,7 +1819,6 @@ init(xlator_t *this)
+ conf->rpc = rpc;
+ conf->uds_rpc = uds_rpc;
+ conf->gfs_mgmt = &gd_brick_prog;
+- conf->restart_shd = _gf_false;
+ this->private = conf;
+ /* conf->workdir and conf->rundir are smaller than PATH_MAX; gcc's
+ * snprintf checking will throw an error here if sprintf is used.
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index 7d07d33..0fbc9dd 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -222,9 +222,6 @@ typedef struct {
+ gf_atomic_t blockers;
+ uint32_t mgmt_v3_lock_timeout;
+ gf_boolean_t restart_bricks;
+- gf_boolean_t restart_shd; /* This flag prevents running two shd manager
+- simultaneously
+- */
+ pthread_mutex_t attach_lock; /* Lock can be per process or a common one */
+ pthread_mutex_t volume_lock; /* We release the big_lock from lot of places
+ which might lead the modification of volinfo
+--
+1.8.3.1
+
diff --git a/0246-Revert-glusterd-shd-Keep-a-ref-on-volinfo-until-atta.patch b/0246-Revert-glusterd-shd-Keep-a-ref-on-volinfo-until-atta.patch
new file mode 100644
index 0000000..bbc780c
--- /dev/null
+++ b/0246-Revert-glusterd-shd-Keep-a-ref-on-volinfo-until-atta.patch
@@ -0,0 +1,53 @@
+From bc5e3967864d6f6ea22deb22ba72aedca8367797 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:49:41 +0530
+Subject: [PATCH 246/255] Revert "glusterd/shd: Keep a ref on volinfo until
+ attach rpc execute cbk"
+
+This reverts commit c429d3c63601e6ea15af76aa684c30bbeb746467.
+
+BUG: 1471742
+Change-Id: I614e8bdbcc5111dbf407aba047e7d2284bef8ac8
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175955
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 3 ---
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 4 ----
+ 2 files changed, 7 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index a9eab42..19eca9f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -452,11 +452,8 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
+ }
+
+ if (shd->attached) {
+- glusterd_volinfo_ref(volinfo);
+- /* Unref will happen from glusterd_svc_attach_cbk */
+ ret = glusterd_attach_svc(svc, volinfo, flags);
+ if (ret) {
+- glusterd_volinfo_unref(volinfo);
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to attach shd svc(volume=%s) to pid=%d. Starting"
+ "a new process",
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index f7be394..02945b1 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -695,10 +695,6 @@ out:
+ if (flag) {
+ GF_FREE(flag);
+ }
+-
+- if (volinfo)
+- glusterd_volinfo_unref(volinfo);
+-
+ GF_ATOMIC_DEC(conf->blockers);
+ STACK_DESTROY(frame->root);
+ return 0;
+--
+1.8.3.1
+
diff --git a/0247-Revert-afr-shd-Cleanup-self-heal-daemon-resources-du.patch b/0247-Revert-afr-shd-Cleanup-self-heal-daemon-resources-du.patch
new file mode 100644
index 0000000..dc17d72
--- /dev/null
+++ b/0247-Revert-afr-shd-Cleanup-self-heal-daemon-resources-du.patch
@@ -0,0 +1,144 @@
+From 33d59c74169192b4ba89abc915d8d785bc450fbb Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:49:54 +0530
+Subject: [PATCH 247/255] Revert "afr/shd: Cleanup self heal daemon resources
+ during afr fini"
+
+This reverts commit faaaa3452ceec6afcc18cffc9beca3fe19841cce.
+
+BUG: 1471742
+Change-Id: Id4a22ab45b89872684830f866ec4b589fca50a90
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175956
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/syncop-utils.c | 8 -----
+ xlators/cluster/afr/src/afr-self-heald.c | 2 --
+ xlators/cluster/afr/src/afr.c | 57 --------------------------------
+ 3 files changed, 67 deletions(-)
+
+diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
+index b842142..be03527 100644
+--- a/libglusterfs/src/syncop-utils.c
++++ b/libglusterfs/src/syncop-utils.c
+@@ -350,11 +350,6 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
+ gf_boolean_t cond_init = _gf_false;
+ gf_boolean_t mut_init = _gf_false;
+ gf_dirent_t entries;
+- xlator_t *this = NULL;
+-
+- if (frame) {
+- this = frame->this;
+- }
+
+ /*For this functionality to be implemented in general, we need
+ * synccond_t infra which doesn't block the executing thread. Until then
+@@ -402,9 +397,6 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
+
+ list_for_each_entry_safe(entry, tmp, &entries.list, list)
+ {
+- if (this && this->cleanup_starting)
+- goto out;
+-
+ list_del_init(&entry->list);
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) {
+ gf_dirent_entry_free(entry);
+diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
+index 8bc4720..7eb1207 100644
+--- a/xlators/cluster/afr/src/afr-self-heald.c
++++ b/xlators/cluster/afr/src/afr-self-heald.c
+@@ -373,7 +373,6 @@ afr_shd_sweep_prepare(struct subvol_healer *healer)
+
+ time(&event->start_time);
+ event->end_time = 0;
+- _mask_cancellation();
+ }
+
+ void
+@@ -395,7 +394,6 @@ afr_shd_sweep_done(struct subvol_healer *healer)
+
+ if (eh_save_history(shd->statistics[healer->subvol], history) < 0)
+ GF_FREE(history);
+- _unmask_cancellation();
+ }
+
+ int
+diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
+index a0a7551..33258a0 100644
+--- a/xlators/cluster/afr/src/afr.c
++++ b/xlators/cluster/afr/src/afr.c
+@@ -611,70 +611,13 @@ init(xlator_t *this)
+ out:
+ return ret;
+ }
+-void
+-afr_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+-{
+- int ret = -1;
+-
+- if (!healer)
+- return;
+-
+- if (healer->running) {
+- /*
+- * If there are any resources to cleanup, We need
+- * to do that gracefully using pthread_cleanup_push
+- */
+- ret = gf_thread_cleanup_xint(healer->thread);
+- if (ret)
+- gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SELF_HEAL_FAILED,
+- "Failed to clean up healer threads.");
+- healer->thread = 0;
+- }
+- pthread_cond_destroy(&healer->cond);
+- pthread_mutex_destroy(&healer->mutex);
+-}
+-
+-void
+-afr_selfheal_daemon_fini(xlator_t *this)
+-{
+- struct subvol_healer *healer = NULL;
+- afr_self_heald_t *shd = NULL;
+- afr_private_t *priv = NULL;
+- int i = 0;
+-
+- priv = this->private;
+- if (!priv)
+- return;
+-
+- shd = &priv->shd;
+- if (!shd->iamshd)
+- return;
+-
+- for (i = 0; i < priv->child_count; i++) {
+- healer = &shd->index_healers[i];
+- afr_destroy_healer_object(this, healer);
+
+- healer = &shd->full_healers[i];
+- afr_destroy_healer_object(this, healer);
+-
+- if (shd->statistics[i])
+- eh_destroy(shd->statistics[i]);
+- }
+- GF_FREE(shd->index_healers);
+- GF_FREE(shd->full_healers);
+- GF_FREE(shd->statistics);
+- if (shd->split_brain)
+- eh_destroy(shd->split_brain);
+-}
+ void
+ fini(xlator_t *this)
+ {
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+-
+- afr_selfheal_daemon_fini(this);
+-
+ LOCK(&priv->lock);
+ if (priv->timer != NULL) {
+ gf_timer_call_cancel(this->ctx, priv->timer);
+--
+1.8.3.1
+
diff --git a/0248-Revert-shd-mux-Fix-coverity-issues-introduced-by-shd.patch b/0248-Revert-shd-mux-Fix-coverity-issues-introduced-by-shd.patch
new file mode 100644
index 0000000..d721a82
--- /dev/null
+++ b/0248-Revert-shd-mux-Fix-coverity-issues-introduced-by-shd.patch
@@ -0,0 +1,151 @@
+From 469cb9e16d46f075caf609ddcb12a7c02d73ce8b Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:50:24 +0530
+Subject: [PATCH 248/255] Revert "shd/mux: Fix coverity issues introduced by
+ shd mux patch"
+
+This reverts commit 0021a4bbc9af2bfe28d4a79f76c3cd33f23dd118.
+
+BUG: 1471742
+Change-Id: I0749328412ed3cc7ae5d64baea7a90b63b489a08
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175957
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/graph.c | 21 ++++++++-------------
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 6 ------
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 24 +++++++-----------------
+ 3 files changed, 15 insertions(+), 36 deletions(-)
+
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index 4c8b02d..a492dd8 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -1470,9 +1470,7 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
+ goto out;
+ parent_graph = ctx->active;
+ graph = volfile_obj->graph;
+- if (!graph)
+- goto out;
+- if (graph->first)
++ if (graph && graph->first)
+ xl = graph->first;
+
+ last_xl = graph->last_xl;
+@@ -1593,10 +1591,12 @@ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+ parent_graph->leaf_count += graph->leaf_count;
+ parent_graph->id++;
+
+- volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t);
+ if (!volfile_obj) {
+- ret = -1;
+- goto out;
++ volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t);
++ if (!volfile_obj) {
++ ret = -1;
++ goto out;
++ }
+ }
+
+ graph->used = 1;
+@@ -1641,7 +1641,6 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ {
+ glusterfs_graph_t *oldvolfile_graph = NULL;
+ glusterfs_graph_t *newvolfile_graph = NULL;
+- char vol_id[NAME_MAX + 1];
+
+ int ret = -1;
+
+@@ -1673,9 +1672,6 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ glusterfs_graph_prepare(newvolfile_graph, ctx, newvolfile_graph->first);
+
+ if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) {
+- ret = snprintf(vol_id, sizeof(vol_id), "%s", volfile_obj->vol_id);
+- if (ret < 0)
+- goto out;
+ ret = glusterfs_process_svc_detach(ctx, volfile_obj);
+ if (ret) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL,
+@@ -1684,9 +1680,8 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ "old graph. Aborting the reconfiguration operation");
+ goto out;
+ }
+- volfile_obj = NULL;
+- ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, vol_id,
+- checksum);
++ ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp,
++ volfile_obj->vol_id, checksum);
+ goto out;
+ }
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index 19eca9f..4789843 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -101,8 +101,6 @@ glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn,
+ svc->conn.rpc = rpc_clnt_ref(mux_svc->rpc);
+ ret = snprintf(svc->conn.sockpath, sizeof(svc->conn.sockpath), "%s",
+ mux_conn->sockpath);
+- if (ret < 0)
+- goto out;
+ } else {
+ ret = mkdir_p(logdir, 0755, _gf_true);
+ if ((ret == -1) && (EEXIST != errno)) {
+@@ -675,10 +673,6 @@ glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig)
+ glusterd_volinfo_ref(volinfo);
+ svc_proc->data = volinfo;
+ ret = glusterd_svc_stop(svc, sig);
+- if (ret) {
+- glusterd_volinfo_unref(volinfo);
+- goto out;
+- }
+ }
+ if (!empty && pid != -1) {
+ ret = glusterd_detach_svc(svc, volinfo, sig);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index 02945b1..e42703c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -411,14 +411,9 @@ __gf_find_compatible_svc(gd_node_type daemon)
+ conf = THIS->private;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+
+- switch (daemon) {
+- case GD_NODE_SHD: {
+- svc_procs = &conf->shd_procs;
+- if (!svc_procs)
+- goto out;
+- } break;
+- default:
+- /* Add support for other client daemons here */
++ if (daemon == GD_NODE_SHD) {
++ svc_procs = &conf->shd_procs;
++ if (!svc_procs)
+ goto out;
+ }
+
+@@ -545,16 +540,11 @@ __gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid)
+ if (!conf)
+ return NULL;
+
+- switch (daemon) {
+- case GD_NODE_SHD: {
+- svc_procs = &conf->shd_procs;
+- if (!svc_procs)
+- return NULL;
+- } break;
+- default:
+- /* Add support for other client daemons here */
++ if (daemon == GD_NODE_SHD) {
++ svc_procs = &conf->shd_procs;
++ if (!svc_proc)
+ return NULL;
+- }
++ } /* Can be moved to switch when mux is implemented for other daemon; */
+
+ cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list)
+ {
+--
+1.8.3.1
+
diff --git a/0249-Revert-client-fini-return-fini-after-rpc-cleanup.patch b/0249-Revert-client-fini-return-fini-after-rpc-cleanup.patch
new file mode 100644
index 0000000..5547257
--- /dev/null
+++ b/0249-Revert-client-fini-return-fini-after-rpc-cleanup.patch
@@ -0,0 +1,95 @@
+From 1864a4f382f3031915e8126440a1561035487e49 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:53:20 +0530
+Subject: [PATCH 249/255] Revert "client/fini: return fini after rpc cleanup"
+
+This reverts commit d79cb2cdff6fe8d962c9ac095a7541ddf500302b.
+
+BUG: 1471742
+Change-Id: I15e6544d47fb7b6002c3b44de3fe0b2a13c84f51
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175958
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/protocol/client/src/client.c | 25 +++++--------------------
+ xlators/protocol/client/src/client.h | 6 ------
+ 2 files changed, 5 insertions(+), 26 deletions(-)
+
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index 95e4be5..532ef35 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -49,12 +49,11 @@ client_fini_complete(xlator_t *this)
+ if (!conf->destroy)
+ return 0;
+
+- pthread_mutex_lock(&conf->lock);
+- {
+- conf->fini_completed = _gf_true;
+- pthread_cond_broadcast(&conf->fini_complete_cond);
+- }
+- pthread_mutex_unlock(&conf->lock);
++ this->private = NULL;
++
++ pthread_spin_destroy(&conf->fd_lock);
++ pthread_mutex_destroy(&conf->lock);
++ GF_FREE(conf);
+
+ out:
+ return 0;
+@@ -2730,7 +2729,6 @@ init(xlator_t *this)
+ goto out;
+
+ pthread_mutex_init(&conf->lock, NULL);
+- pthread_cond_init(&conf->fini_complete_cond, NULL);
+ pthread_spin_init(&conf->fd_lock, 0);
+ INIT_LIST_HEAD(&conf->saved_fds);
+
+@@ -2789,7 +2787,6 @@ fini(xlator_t *this)
+ if (!conf)
+ return;
+
+- conf->fini_completed = _gf_false;
+ conf->destroy = 1;
+ if (conf->rpc) {
+ /* cleanup the saved-frames before last unref */
+@@ -2797,18 +2794,6 @@ fini(xlator_t *this)
+ rpc_clnt_unref(conf->rpc);
+ }
+
+- pthread_mutex_lock(&conf->lock);
+- {
+- while (!conf->fini_completed)
+- pthread_cond_wait(&conf->fini_complete_cond, &conf->lock);
+- }
+- pthread_mutex_unlock(&conf->lock);
+-
+- pthread_spin_destroy(&conf->fd_lock);
+- pthread_mutex_destroy(&conf->lock);
+- pthread_cond_destroy(&conf->fini_complete_cond);
+- GF_FREE(conf);
+-
+ /* Saved Fds */
+ /* TODO: */
+
+diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
+index 8dcd72f..f12fa61 100644
+--- a/xlators/protocol/client/src/client.h
++++ b/xlators/protocol/client/src/client.h
+@@ -235,12 +235,6 @@ typedef struct clnt_conf {
+ * up, disconnects can be
+ * logged
+ */
+-
+- gf_boolean_t old_protocol; /* used only for old-protocol testing */
+- pthread_cond_t fini_complete_cond; /* Used to wait till we finsh the fini
+- compltely, ie client_fini_complete
+- to return*/
+- gf_boolean_t fini_completed;
+ } clnt_conf_t;
+
+ typedef struct _client_fd_ctx {
+--
+1.8.3.1
+
diff --git a/0250-Revert-mgmt-shd-Implement-multiplexing-in-self-heal-.patch b/0250-Revert-mgmt-shd-Implement-multiplexing-in-self-heal-.patch
new file mode 100644
index 0000000..637a16a
--- /dev/null
+++ b/0250-Revert-mgmt-shd-Implement-multiplexing-in-self-heal-.patch
@@ -0,0 +1,4572 @@
+From ec629963d61c3ec084c95366eec5ee3a976b1213 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 11 Jul 2019 12:57:45 +0530
+Subject: [PATCH 250/255] Revert "mgmt/shd: Implement multiplexing in self heal
+ daemon"
+
+This reverts commit 2cede2b87fb3e3e0673be9cf67e7d6eec3f7879c.
+
+BUG: 1471742
+Change-Id: I3830d9189dfdb567a44935aa97dc963f4594dfdb
+fixes: bz#1471742
+Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175959
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfsd/src/glusterfsd-messages.h | 2 +-
+ glusterfsd/src/glusterfsd-mgmt.c | 238 +------
+ glusterfsd/src/glusterfsd.c | 20 +-
+ libglusterfs/src/defaults-tmpl.c | 19 +-
+ libglusterfs/src/glusterfs/glusterfs.h | 7 -
+ libglusterfs/src/glusterfs/libglusterfs-messages.h | 4 +-
+ libglusterfs/src/glusterfs/xlator.h | 3 -
+ libglusterfs/src/graph.c | 451 -------------
+ libglusterfs/src/graph.y | 3 -
+ libglusterfs/src/libglusterfs.sym | 5 -
+ libglusterfs/src/statedump.c | 3 +-
+ libglusterfs/src/xlator.c | 16 -
+ rpc/rpc-lib/src/protocol-common.h | 2 -
+ tests/basic/glusterd/heald.t | 49 +-
+ .../reset-brick-and-daemons-follow-quorum.t | 8 +-
+ tests/volume.rc | 6 +-
+ xlators/mgmt/glusterd/src/Makefile.am | 6 +-
+ xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 2 +-
+ xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c | 42 --
+ xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h | 4 +-
+ xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c | 3 +-
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 11 +-
+ xlators/mgmt/glusterd/src/glusterd-handshake.c | 21 -
+ xlators/mgmt/glusterd/src/glusterd-mem-types.h | 1 -
+ xlators/mgmt/glusterd/src/glusterd-messages.h | 4 +-
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 84 +--
+ .../mgmt/glusterd/src/glusterd-shd-svc-helper.c | 140 ----
+ .../mgmt/glusterd/src/glusterd-shd-svc-helper.h | 45 --
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 540 ++--------------
+ xlators/mgmt/glusterd/src/glusterd-shd-svc.h | 17 +-
+ xlators/mgmt/glusterd/src/glusterd-sm.c | 12 +-
+ xlators/mgmt/glusterd/src/glusterd-snapd-svc.c | 3 +-
+ xlators/mgmt/glusterd/src/glusterd-statedump.c | 3 +
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 715 +--------------------
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.h | 40 +-
+ xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c | 246 ++-----
+ xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h | 27 -
+ xlators/mgmt/glusterd/src/glusterd-tier.c | 3 +-
+ xlators/mgmt/glusterd/src/glusterd-tierd-svc.c | 4 +-
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 137 +---
+ xlators/mgmt/glusterd/src/glusterd-utils.h | 4 -
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 60 +-
+ xlators/mgmt/glusterd/src/glusterd-volgen.h | 11 +-
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 8 +-
+ xlators/mgmt/glusterd/src/glusterd.c | 12 +-
+ xlators/mgmt/glusterd/src/glusterd.h | 30 +-
+ xlators/protocol/client/src/client.c | 31 +-
+ 47 files changed, 292 insertions(+), 2810 deletions(-)
+ delete mode 100644 xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
+ delete mode 100644 xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
+
+diff --git a/glusterfsd/src/glusterfsd-messages.h b/glusterfsd/src/glusterfsd-messages.h
+index 280624c..ce6c8ca 100644
+--- a/glusterfsd/src/glusterfsd-messages.h
++++ b/glusterfsd/src/glusterfsd-messages.h
+@@ -36,6 +36,6 @@ GLFS_MSGID(GLUSTERFSD, glusterfsd_msg_1, glusterfsd_msg_2, glusterfsd_msg_3,
+ glusterfsd_msg_31, glusterfsd_msg_32, glusterfsd_msg_33,
+ glusterfsd_msg_34, glusterfsd_msg_35, glusterfsd_msg_36,
+ glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39,
+- glusterfsd_msg_40, glusterfsd_msg_41, glusterfsd_msg_42, glusterfsd_msg_43);
++ glusterfsd_msg_40);
+
+ #endif /* !_GLUSTERFSD_MESSAGES_H_ */
+diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
+index 1d2cd1a..15acc10 100644
+--- a/glusterfsd/src/glusterfsd-mgmt.c
++++ b/glusterfsd/src/glusterfsd-mgmt.c
+@@ -48,20 +48,7 @@ int
+ glusterfs_graph_unknown_options(glusterfs_graph_t *graph);
+ int
+ emancipate(glusterfs_ctx_t *ctx, int ret);
+-int
+-glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+- char *volfile_id, char *checksum);
+-int
+-glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+- gf_volfile_t *volfile_obj, char *checksum);
+-int
+-glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+- char *volfile_id, char *checksum);
+-int
+-glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj);
+
+-gf_boolean_t
+-mgmt_is_multiplexed_daemon(char *name);
+ int
+ mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data)
+ {
+@@ -75,96 +62,6 @@ mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data)
+ }
+
+ int
+-mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id)
+-{
+- glusterfs_ctx_t *ctx = NULL;
+- int ret = 0;
+- FILE *tmpfp = NULL;
+- gf_volfile_t *volfile_obj = NULL;
+- gf_volfile_t *volfile_tmp = NULL;
+- char sha256_hash[SHA256_DIGEST_LENGTH] = {
+- 0,
+- };
+- int tmp_fd = -1;
+- char template[] = "/tmp/glfs.volfile.XXXXXX";
+-
+- glusterfs_compute_sha256((const unsigned char *)volfile, size, sha256_hash);
+- ctx = THIS->ctx;
+- LOCK(&ctx->volfile_lock);
+- {
+- list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
+- {
+- if (!strcmp(volfile_id, volfile_obj->vol_id)) {
+- if (!memcmp(sha256_hash, volfile_obj->volfile_checksum,
+- sizeof(volfile_obj->volfile_checksum))) {
+- UNLOCK(&ctx->volfile_lock);
+- gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_40,
+- "No change in volfile, continuing");
+- goto out;
+- }
+- volfile_tmp = volfile_obj;
+- break;
+- }
+- }
+-
+- /* coverity[secure_temp] mkstemp uses 0600 as the mode */
+- tmp_fd = mkstemp(template);
+- if (-1 == tmp_fd) {
+- UNLOCK(&ctx->volfile_lock);
+- gf_msg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_39,
+- "Unable to create temporary file: %s", template);
+- ret = -1;
+- goto out;
+- }
+-
+- /* Calling unlink so that when the file is closed or program
+- * terminates the temporary file is deleted.
+- */
+- ret = sys_unlink(template);
+- if (ret < 0) {
+- gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_39,
+- "Unable to delete temporary file: %s", template);
+- ret = 0;
+- }
+-
+- tmpfp = fdopen(tmp_fd, "w+b");
+- if (!tmpfp) {
+- ret = -1;
+- goto unlock;
+- }
+-
+- fwrite(volfile, size, 1, tmpfp);
+- fflush(tmpfp);
+- if (ferror(tmpfp)) {
+- ret = -1;
+- goto unlock;
+- }
+-
+- if (!volfile_tmp) {
+- /* There is no checksum in the list, which means simple attach
+- * the volfile
+- */
+- ret = glusterfs_process_svc_attach_volfp(ctx, tmpfp, volfile_id,
+- sha256_hash);
+- goto unlock;
+- }
+- ret = glusterfs_mux_volfile_reconfigure(tmpfp, ctx, volfile_obj,
+- sha256_hash);
+- if (ret < 0) {
+- gf_msg_debug("glusterfsd-mgmt", EINVAL, "Reconfigure failed !!");
+- }
+- }
+-unlock:
+- UNLOCK(&ctx->volfile_lock);
+-out:
+- if (tmpfp)
+- fclose(tmpfp);
+- else if (tmp_fd != -1)
+- sys_close(tmp_fd);
+- return ret;
+-}
+-
+-int
+ mgmt_cbk_event(struct rpc_clnt *rpc, void *mydata, void *data)
+ {
+ return 0;
+@@ -1069,110 +966,6 @@ glusterfs_handle_attach(rpcsvc_request_t *req)
+ }
+
+ int
+-glusterfs_handle_svc_attach(rpcsvc_request_t *req)
+-{
+- int32_t ret = -1;
+- gd1_mgmt_brick_op_req xlator_req = {
+- 0,
+- };
+- xlator_t *this = NULL;
+- glusterfs_ctx_t *ctx = NULL;
+-
+- GF_ASSERT(req);
+- this = THIS;
+- GF_ASSERT(this);
+-
+- ctx = this->ctx;
+- ret = xdr_to_generic(req->msg[0], &xlator_req,
+- (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+-
+- if (ret < 0) {
+- /*failed to decode msg;*/
+- req->rpc_err = GARBAGE_ARGS;
+- goto out;
+- }
+- gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_41,
+- "received attach "
+- "request for volfile-id=%s",
+- xlator_req.name);
+- ret = 0;
+-
+- if (ctx->active) {
+- ret = mgmt_process_volfile(xlator_req.input.input_val,
+- xlator_req.input.input_len, xlator_req.name);
+- } else {
+- gf_msg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42,
+- "got attach for %s but no active graph", xlator_req.name);
+- }
+-out:
+- if (xlator_req.input.input_val)
+- free(xlator_req.input.input_val);
+- if (xlator_req.name)
+- free(xlator_req.name);
+- glusterfs_translator_info_response_send(req, ret, NULL, NULL);
+- return 0;
+-}
+-
+-int
+-glusterfs_handle_svc_detach(rpcsvc_request_t *req)
+-{
+- gd1_mgmt_brick_op_req xlator_req = {
+- 0,
+- };
+- ssize_t ret;
+- glusterfs_ctx_t *ctx = NULL;
+- gf_volfile_t *volfile_obj = NULL;
+- gf_volfile_t *volfile_tmp = NULL;
+-
+- ret = xdr_to_generic(req->msg[0], &xlator_req,
+- (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+- if (ret < 0) {
+- req->rpc_err = GARBAGE_ARGS;
+- return -1;
+- }
+- ctx = glusterfsd_ctx;
+-
+- LOCK(&ctx->volfile_lock);
+- {
+- list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
+- {
+- if (!strcmp(xlator_req.name, volfile_obj->vol_id)) {
+- volfile_tmp = volfile_obj;
+- break;
+- }
+- }
+-
+- if (!volfile_tmp) {
+- UNLOCK(&ctx->volfile_lock);
+- gf_msg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_41,
+- "can't detach %s - not found", xlator_req.name);
+- /*
+- * Used to be -ENOENT. However, the caller asked us to
+- * make sure it's down and if it's already down that's
+- * good enough.
+- */
+- ret = 0;
+- goto out;
+- }
+- ret = glusterfs_process_svc_detach(ctx, volfile_tmp);
+- if (ret) {
+- UNLOCK(&ctx->volfile_lock);
+- gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, glusterfsd_msg_41,
+- "Could not detach "
+- "old graph. Aborting the reconfiguration operation");
+- goto out;
+- }
+- }
+- UNLOCK(&ctx->volfile_lock);
+-out:
+- glusterfs_terminate_response_send(req, ret);
+- free(xlator_req.name);
+- xlator_req.name = NULL;
+-
+- return 0;
+-}
+-
+-int
+ glusterfs_handle_dump_metrics(rpcsvc_request_t *req)
+ {
+ int32_t ret = -1;
+@@ -2056,13 +1849,6 @@ rpcsvc_actor_t glusterfs_actors[GLUSTERD_BRICK_MAXVALUE] = {
+
+ [GLUSTERD_DUMP_METRICS] = {"DUMP METRICS", GLUSTERD_DUMP_METRICS,
+ glusterfs_handle_dump_metrics, NULL, 0, DRC_NA},
+-
+- [GLUSTERD_SVC_ATTACH] = {"ATTACH CLIENT", GLUSTERD_SVC_ATTACH,
+- glusterfs_handle_svc_attach, NULL, 0, DRC_NA},
+-
+- [GLUSTERD_SVC_DETACH] = {"DETACH CLIENT", GLUSTERD_SVC_DETACH,
+- glusterfs_handle_svc_detach, NULL, 0, DRC_NA},
+-
+ };
+
+ struct rpcsvc_program glusterfs_mop_prog = {
+@@ -2210,17 +1996,14 @@ mgmt_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ }
+
+ volfile:
++ ret = 0;
+ size = rsp.op_ret;
+- volfile_id = frame->local;
+- if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) {
+- ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id);
+- goto post_graph_mgmt;
+- }
+
+- ret = 0;
+ glusterfs_compute_sha256((const unsigned char *)rsp.spec, size,
+ sha256_hash);
+
++ volfile_id = frame->local;
++
+ LOCK(&ctx->volfile_lock);
+ {
+ locked = 1;
+@@ -2322,7 +2105,6 @@ volfile:
+ }
+
+ INIT_LIST_HEAD(&volfile_tmp->volfile_list);
+- volfile_tmp->graph = ctx->active;
+ list_add(&volfile_tmp->volfile_list, &ctx->volfile_list);
+ snprintf(volfile_tmp->vol_id, sizeof(volfile_tmp->vol_id), "%s",
+ volfile_id);
+@@ -2334,7 +2116,6 @@ volfile:
+
+ locked = 0;
+
+-post_graph_mgmt:
+ if (!is_mgmt_rpc_reconnect) {
+ need_emancipate = 1;
+ glusterfs_mgmt_pmap_signin(ctx);
+@@ -2488,21 +2269,10 @@ glusterfs_volfile_fetch(glusterfs_ctx_t *ctx)
+ {
+ xlator_t *server_xl = NULL;
+ xlator_list_t *trav;
+- gf_volfile_t *volfile_obj = NULL;
+- int ret = 0;
++ int ret;
+
+ LOCK(&ctx->volfile_lock);
+ {
+- if (ctx->active &&
+- mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) {
+- list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
+- {
+- ret |= glusterfs_volfile_fetch_one(ctx, volfile_obj->vol_id);
+- }
+- UNLOCK(&ctx->volfile_lock);
+- return ret;
+- }
+-
+ if (ctx->active) {
+ server_xl = ctx->active->first;
+ if (strcmp(server_xl->type, "protocol/server") != 0) {
+diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
+index 2172af4..5b5e996 100644
+--- a/glusterfsd/src/glusterfsd.c
++++ b/glusterfsd/src/glusterfsd.c
+@@ -2593,6 +2593,24 @@ out:
+ #endif
+
+ int
++glusterfs_graph_fini(glusterfs_graph_t *graph)
++{
++ xlator_t *trav = NULL;
++
++ trav = graph->first;
++
++ while (trav) {
++ if (trav->init_succeeded) {
++ trav->fini(trav);
++ trav->init_succeeded = 0;
++ }
++ trav = trav->next;
++ }
++
++ return 0;
++}
++
++int
+ glusterfs_process_volfp(glusterfs_ctx_t *ctx, FILE *fp)
+ {
+ glusterfs_graph_t *graph = NULL;
+@@ -2791,7 +2809,7 @@ main(int argc, char *argv[])
+
+ /* set brick_mux mode only for server process */
+ if ((ctx->process_mode != GF_SERVER_PROCESS) && cmd->brick_mux) {
+- gf_msg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_43,
++ gf_msg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_40,
+ "command line argument --brick-mux is valid only for brick "
+ "process");
+ goto out;
+diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c
+index 82e7f78..5bf64e8 100644
+--- a/libglusterfs/src/defaults-tmpl.c
++++ b/libglusterfs/src/defaults-tmpl.c
+@@ -127,12 +127,6 @@ default_notify(xlator_t *this, int32_t event, void *data, ...)
+ GF_UNUSED int ret = 0;
+ xlator_t *victim = data;
+
+- glusterfs_graph_t *graph = NULL;
+-
+- GF_VALIDATE_OR_GOTO("notify", this, out);
+- graph = this->graph;
+- GF_VALIDATE_OR_GOTO(this->name, graph, out);
+-
+ switch (event) {
+ case GF_EVENT_PARENT_UP:
+ case GF_EVENT_PARENT_DOWN: {
+@@ -165,17 +159,6 @@ default_notify(xlator_t *this, int32_t event, void *data, ...)
+ xlator_notify(parent->xlator, event, this, NULL);
+ parent = parent->next;
+ }
+-
+- if (event == GF_EVENT_CHILD_DOWN &&
+- !(this->ctx && this->ctx->master) && (graph->top == this)) {
+- /* Make sure this is not a daemon with master xlator */
+- pthread_mutex_lock(&graph->mutex);
+- {
+- graph->used = 0;
+- pthread_cond_broadcast(&graph->child_down_cond);
+- }
+- pthread_mutex_unlock(&graph->mutex);
+- }
+ } break;
+ case GF_EVENT_UPCALL: {
+ xlator_list_t *parent = this->parents;
+@@ -222,7 +205,7 @@ default_notify(xlator_t *this, int32_t event, void *data, ...)
+ * nothing to do with readability.
+ */
+ }
+-out:
++
+ return 0;
+ }
+
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index 9ec2365..2cedf1a 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -597,10 +597,6 @@ struct _glusterfs_graph {
+ int used; /* Should be set when fuse gets
+ first CHILD_UP */
+ uint32_t volfile_checksum;
+- void *last_xl; /* Stores the last xl of the graph, as of now only populated
+- in client multiplexed code path */
+- pthread_mutex_t mutex;
+- pthread_cond_t child_down_cond; /* for broadcasting CHILD_DOWN */
+ };
+ typedef struct _glusterfs_graph glusterfs_graph_t;
+
+@@ -743,7 +739,6 @@ typedef struct {
+ char volfile_checksum[SHA256_DIGEST_LENGTH];
+ char vol_id[NAME_MAX + 1];
+ struct list_head volfile_list;
+- glusterfs_graph_t *graph;
+
+ } gf_volfile_t;
+
+@@ -827,6 +822,4 @@ gf_free_mig_locks(lock_migration_info_t *locks);
+
+ int
+ glusterfs_read_secure_access_file(void);
+-int
+-glusterfs_graph_fini(glusterfs_graph_t *graph);
+ #endif /* _GLUSTERFS_H */
+diff --git a/libglusterfs/src/glusterfs/libglusterfs-messages.h b/libglusterfs/src/glusterfs/libglusterfs-messages.h
+index ea2aa60..1b72f6d 100644
+--- a/libglusterfs/src/glusterfs/libglusterfs-messages.h
++++ b/libglusterfs/src/glusterfs/libglusterfs-messages.h
+@@ -109,8 +109,6 @@ GLFS_MSGID(
+ LG_MSG_PTHREAD_ATTR_INIT_FAILED, LG_MSG_INVALID_INODE_LIST,
+ LG_MSG_COMPACT_FAILED, LG_MSG_COMPACT_STATUS, LG_MSG_UTIMENSAT_FAILED,
+ LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG,
+- LG_MSG_XXH64_TO_GFID_FAILED, LG_MSG_ASYNC_WARNING, LG_MSG_ASYNC_FAILURE,
+- LG_MSG_GRAPH_CLEANUP_FAILED, LG_MSG_GRAPH_SETUP_FAILED,
+- LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED);
++ LG_MSG_XXH64_TO_GFID_FAILED);
+
+ #endif /* !_LG_MESSAGES_H_ */
+diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
+index 8998976..b78daad 100644
+--- a/libglusterfs/src/glusterfs/xlator.h
++++ b/libglusterfs/src/glusterfs/xlator.h
+@@ -1089,7 +1089,4 @@ handle_default_options(xlator_t *xl, dict_t *options);
+
+ void
+ gluster_graph_take_reference(xlator_t *tree);
+-
+-gf_boolean_t
+-mgmt_is_multiplexed_daemon(char *name);
+ #endif /* _XLATOR_H */
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index a492dd8..bb5e67a 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -114,53 +114,6 @@ out:
+ return cert_depth;
+ }
+
+-xlator_t *
+-glusterfs_get_last_xlator(glusterfs_graph_t *graph)
+-{
+- xlator_t *trav = graph->first;
+- if (!trav)
+- return NULL;
+-
+- while (trav->next)
+- trav = trav->next;
+-
+- return trav;
+-}
+-
+-xlator_t *
+-glusterfs_mux_xlator_unlink(xlator_t *pxl, xlator_t *cxl)
+-{
+- xlator_list_t *unlink = NULL;
+- xlator_list_t *prev = NULL;
+- xlator_list_t **tmp = NULL;
+- xlator_t *next_child = NULL;
+- xlator_t *xl = NULL;
+-
+- for (tmp = &pxl->children; *tmp; tmp = &(*tmp)->next) {
+- if ((*tmp)->xlator == cxl) {
+- unlink = *tmp;
+- *tmp = (*tmp)->next;
+- if (*tmp)
+- next_child = (*tmp)->xlator;
+- break;
+- }
+- prev = *tmp;
+- }
+-
+- if (!prev)
+- xl = pxl;
+- else if (prev->xlator)
+- xl = prev->xlator->graph->last_xl;
+-
+- if (xl)
+- xl->next = next_child;
+- if (next_child)
+- next_child->prev = xl;
+-
+- GF_FREE(unlink);
+- return next_child;
+-}
+-
+ int
+ glusterfs_xlator_link(xlator_t *pxl, xlator_t *cxl)
+ {
+@@ -1139,8 +1092,6 @@ glusterfs_graph_destroy_residual(glusterfs_graph_t *graph)
+ ret = xlator_tree_free_memacct(graph->first);
+
+ list_del_init(&graph->list);
+- pthread_mutex_destroy(&graph->mutex);
+- pthread_cond_destroy(&graph->child_down_cond);
+ GF_FREE(graph);
+
+ return ret;
+@@ -1183,25 +1134,6 @@ out:
+ }
+
+ int
+-glusterfs_graph_fini(glusterfs_graph_t *graph)
+-{
+- xlator_t *trav = NULL;
+-
+- trav = graph->first;
+-
+- while (trav) {
+- if (trav->init_succeeded) {
+- trav->cleanup_starting = 1;
+- trav->fini(trav);
+- trav->init_succeeded = 0;
+- }
+- trav = trav->next;
+- }
+-
+- return 0;
+-}
+-
+-int
+ glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path,
+ glusterfs_graph_t **newgraph)
+ {
+@@ -1324,386 +1256,3 @@ glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path,
+
+ return 0;
+ }
+-int
+-glusterfs_muxsvc_cleanup_parent(glusterfs_ctx_t *ctx,
+- glusterfs_graph_t *parent_graph)
+-{
+- if (parent_graph) {
+- if (parent_graph->first) {
+- xlator_destroy(parent_graph->first);
+- }
+- ctx->active = NULL;
+- GF_FREE(parent_graph);
+- parent_graph = NULL;
+- }
+- return 0;
+-}
+-
+-void *
+-glusterfs_graph_cleanup(void *arg)
+-{
+- glusterfs_graph_t *graph = NULL;
+- glusterfs_ctx_t *ctx = THIS->ctx;
+- int ret = -1;
+- graph = arg;
+-
+- if (!graph)
+- return NULL;
+-
+- /* To destroy the graph, fitst sent a GF_EVENT_PARENT_DOWN
+- * Then wait for GF_EVENT_CHILD_DOWN to get on the top
+- * xl. Once we have GF_EVENT_CHILD_DOWN event, then proceed
+- * to fini.
+- *
+- * During fini call, this will take a last unref on rpc and
+- * rpc_transport_object.
+- */
+- if (graph->first)
+- default_notify(graph->first, GF_EVENT_PARENT_DOWN, graph->first);
+-
+- ret = pthread_mutex_lock(&graph->mutex);
+- if (ret != 0) {
+- gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED,
+- "Failed to aquire a lock");
+- goto out;
+- }
+- /* check and wait for CHILD_DOWN for top xlator*/
+- while (graph->used) {
+- ret = pthread_cond_wait(&graph->child_down_cond, &graph->mutex);
+- if (ret != 0)
+- gf_msg("glusterfs", GF_LOG_INFO, 0, LG_MSG_GRAPH_CLEANUP_FAILED,
+- "cond wait failed ");
+- }
+-
+- ret = pthread_mutex_unlock(&graph->mutex);
+- if (ret != 0) {
+- gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED,
+- "Failed to release a lock");
+- }
+-
+- /* Though we got a child down on top xlator, we have to wait until
+- * all the notifier to exit. Because there should not be any threads
+- * that access xl variables.
+- */
+- pthread_mutex_lock(&ctx->notify_lock);
+- {
+- while (ctx->notifying)
+- pthread_cond_wait(&ctx->notify_cond, &ctx->notify_lock);
+- }
+- pthread_mutex_unlock(&ctx->notify_lock);
+-
+- glusterfs_graph_fini(graph);
+- glusterfs_graph_destroy(graph);
+-out:
+- return NULL;
+-}
+-
+-glusterfs_graph_t *
+-glusterfs_muxsvc_setup_parent_graph(glusterfs_ctx_t *ctx, char *name,
+- char *type)
+-{
+- glusterfs_graph_t *parent_graph = NULL;
+- xlator_t *ixl = NULL;
+- int ret = -1;
+- parent_graph = GF_CALLOC(1, sizeof(*parent_graph),
+- gf_common_mt_glusterfs_graph_t);
+- if (!parent_graph)
+- goto out;
+-
+- INIT_LIST_HEAD(&parent_graph->list);
+-
+- ctx->active = parent_graph;
+- ixl = GF_CALLOC(1, sizeof(*ixl), gf_common_mt_xlator_t);
+- if (!ixl)
+- goto out;
+-
+- ixl->ctx = ctx;
+- ixl->graph = parent_graph;
+- ixl->options = dict_new();
+- if (!ixl->options)
+- goto out;
+-
+- ixl->name = gf_strdup(name);
+- if (!ixl->name)
+- goto out;
+-
+- ixl->is_autoloaded = 1;
+-
+- if (xlator_set_type(ixl, type) == -1) {
+- gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
+- "%s (%s) set type failed", name, type);
+- goto out;
+- }
+-
+- glusterfs_graph_set_first(parent_graph, ixl);
+- parent_graph->top = ixl;
+- ixl = NULL;
+-
+- gettimeofday(&parent_graph->dob, NULL);
+- fill_uuid(parent_graph->graph_uuid, 128);
+- parent_graph->id = ctx->graph_id++;
+- ret = 0;
+-out:
+- if (ixl)
+- xlator_destroy(ixl);
+-
+- if (ret) {
+- glusterfs_muxsvc_cleanup_parent(ctx, parent_graph);
+- parent_graph = NULL;
+- }
+- return parent_graph;
+-}
+-
+-int
+-glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
+-{
+- xlator_t *last_xl = NULL;
+- glusterfs_graph_t *graph = NULL;
+- glusterfs_graph_t *parent_graph = NULL;
+- pthread_t clean_graph = {
+- 0,
+- };
+- int ret = -1;
+- xlator_t *xl = NULL;
+-
+- if (!ctx || !ctx->active || !volfile_obj)
+- goto out;
+- parent_graph = ctx->active;
+- graph = volfile_obj->graph;
+- if (graph && graph->first)
+- xl = graph->first;
+-
+- last_xl = graph->last_xl;
+- if (last_xl)
+- last_xl->next = NULL;
+- if (!xl || xl->cleanup_starting)
+- goto out;
+-
+- xl->cleanup_starting = 1;
+- gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED,
+- "detaching child %s", volfile_obj->vol_id);
+-
+- list_del_init(&volfile_obj->volfile_list);
+- glusterfs_mux_xlator_unlink(parent_graph->top, xl);
+- parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph);
+- parent_graph->xl_count -= graph->xl_count;
+- parent_graph->leaf_count -= graph->leaf_count;
+- default_notify(xl, GF_EVENT_PARENT_DOWN, xl);
+- parent_graph->id++;
+- ret = 0;
+-out:
+- if (!ret) {
+- list_del_init(&volfile_obj->volfile_list);
+- if (graph) {
+- ret = gf_thread_create_detached(
+- &clean_graph, glusterfs_graph_cleanup, graph, "graph_clean");
+- if (ret) {
+- gf_msg("glusterfs", GF_LOG_ERROR, EINVAL,
+- LG_MSG_GRAPH_CLEANUP_FAILED,
+- "%s failed to create clean "
+- "up thread",
+- volfile_obj->vol_id);
+- ret = 0;
+- }
+- }
+- GF_FREE(volfile_obj);
+- }
+- return ret;
+-}
+-
+-int
+-glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+- char *volfile_id, char *checksum)
+-{
+- glusterfs_graph_t *graph = NULL;
+- glusterfs_graph_t *parent_graph = NULL;
+- glusterfs_graph_t *clean_graph = NULL;
+- int ret = -1;
+- xlator_t *xl = NULL;
+- xlator_t *last_xl = NULL;
+- gf_volfile_t *volfile_obj = NULL;
+- pthread_t thread_id = {
+- 0,
+- };
+-
+- if (!ctx)
+- goto out;
+- parent_graph = ctx->active;
+- graph = glusterfs_graph_construct(fp);
+- if (!graph) {
+- gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+- "failed to construct the graph");
+- goto out;
+- }
+- graph->last_xl = glusterfs_get_last_xlator(graph);
+-
+- for (xl = graph->first; xl; xl = xl->next) {
+- if (strcmp(xl->type, "mount/fuse") == 0) {
+- gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL,
+- LG_MSG_GRAPH_ATTACH_FAILED,
+- "fuse xlator cannot be specified in volume file");
+- goto out;
+- }
+- }
+-
+- graph->leaf_count = glusterfs_count_leaves(glusterfs_root(graph));
+- xl = graph->first;
+- /* TODO memory leaks everywhere need to free graph in case of error */
+- if (glusterfs_graph_prepare(graph, ctx, xl->name)) {
+- gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+- "failed to prepare graph for xlator %s", xl->name);
+- ret = -1;
+- goto out;
+- } else if (glusterfs_graph_init(graph)) {
+- gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+- "failed to initialize graph for xlator %s", xl->name);
+- ret = -1;
+- goto out;
+- } else if (glusterfs_graph_parent_up(graph)) {
+- gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+- "failed to link the graphs for xlator %s ", xl->name);
+- ret = -1;
+- goto out;
+- }
+-
+- if (!parent_graph) {
+- parent_graph = glusterfs_muxsvc_setup_parent_graph(ctx, "glustershd",
+- "debug/io-stats");
+- if (!parent_graph)
+- goto out;
+- ((xlator_t *)parent_graph->top)->next = xl;
+- clean_graph = parent_graph;
+- } else {
+- last_xl = parent_graph->last_xl;
+- if (last_xl)
+- last_xl->next = xl;
+- xl->prev = last_xl;
+- }
+- parent_graph->last_xl = graph->last_xl;
+-
+- ret = glusterfs_xlator_link(parent_graph->top, xl);
+- if (ret) {
+- gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED,
+- "parent up notification failed");
+- goto out;
+- }
+- parent_graph->xl_count += graph->xl_count;
+- parent_graph->leaf_count += graph->leaf_count;
+- parent_graph->id++;
+-
+- if (!volfile_obj) {
+- volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t);
+- if (!volfile_obj) {
+- ret = -1;
+- goto out;
+- }
+- }
+-
+- graph->used = 1;
+- parent_graph->id++;
+- list_add(&graph->list, &ctx->graphs);
+- INIT_LIST_HEAD(&volfile_obj->volfile_list);
+- volfile_obj->graph = graph;
+- snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s",
+- volfile_id);
+- memcpy(volfile_obj->volfile_checksum, checksum,
+- sizeof(volfile_obj->volfile_checksum));
+- list_add_tail(&volfile_obj->volfile_list, &ctx->volfile_list);
+-
+- gf_log_dump_graph(fp, graph);
+- graph = NULL;
+-
+- ret = 0;
+-out:
+- if (ret) {
+- if (graph) {
+- gluster_graph_take_reference(graph->first);
+- ret = gf_thread_create_detached(&thread_id, glusterfs_graph_cleanup,
+- graph, "graph_clean");
+- if (ret) {
+- gf_msg("glusterfs", GF_LOG_ERROR, EINVAL,
+- LG_MSG_GRAPH_CLEANUP_FAILED,
+- "%s failed to create clean "
+- "up thread",
+- volfile_id);
+- ret = 0;
+- }
+- }
+- if (clean_graph)
+- glusterfs_muxsvc_cleanup_parent(ctx, clean_graph);
+- }
+- return ret;
+-}
+-
+-int
+-glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+- gf_volfile_t *volfile_obj, char *checksum)
+-{
+- glusterfs_graph_t *oldvolfile_graph = NULL;
+- glusterfs_graph_t *newvolfile_graph = NULL;
+-
+- int ret = -1;
+-
+- if (!ctx) {
+- gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL,
+- "ctx is NULL");
+- goto out;
+- }
+-
+- /* Change the message id */
+- if (!volfile_obj) {
+- gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL,
+- "failed to get volfile object");
+- goto out;
+- }
+-
+- oldvolfile_graph = volfile_obj->graph;
+- if (!oldvolfile_graph) {
+- goto out;
+- }
+-
+- newvolfile_graph = glusterfs_graph_construct(newvolfile_fp);
+-
+- if (!newvolfile_graph) {
+- goto out;
+- }
+- newvolfile_graph->last_xl = glusterfs_get_last_xlator(newvolfile_graph);
+-
+- glusterfs_graph_prepare(newvolfile_graph, ctx, newvolfile_graph->first);
+-
+- if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) {
+- ret = glusterfs_process_svc_detach(ctx, volfile_obj);
+- if (ret) {
+- gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL,
+- LG_MSG_GRAPH_CLEANUP_FAILED,
+- "Could not detach "
+- "old graph. Aborting the reconfiguration operation");
+- goto out;
+- }
+- ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp,
+- volfile_obj->vol_id, checksum);
+- goto out;
+- }
+-
+- gf_msg_debug("glusterfsd-mgmt", 0,
+- "Only options have changed in the"
+- " new graph");
+-
+- ret = glusterfs_graph_reconfigure(oldvolfile_graph, newvolfile_graph);
+- if (ret) {
+- gf_msg_debug("glusterfsd-mgmt", 0,
+- "Could not reconfigure "
+- "new options in old graph");
+- goto out;
+- }
+- memcpy(volfile_obj->volfile_checksum, checksum,
+- sizeof(volfile_obj->volfile_checksum));
+-
+- ret = 0;
+-out:
+-
+- if (newvolfile_graph)
+- glusterfs_graph_destroy(newvolfile_graph);
+-
+- return ret;
+-}
+diff --git a/libglusterfs/src/graph.y b/libglusterfs/src/graph.y
+index c60ff38..5b92985 100644
+--- a/libglusterfs/src/graph.y
++++ b/libglusterfs/src/graph.y
+@@ -542,9 +542,6 @@ glusterfs_graph_new ()
+
+ INIT_LIST_HEAD (&graph->list);
+
+- pthread_mutex_init(&graph->mutex, NULL);
+- pthread_cond_init(&graph->child_down_cond, NULL);
+-
+ gettimeofday (&graph->dob, NULL);
+
+ return graph;
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index 05f93b4..4dca7de 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -1155,8 +1155,3 @@ gf_changelog_register_generic
+ gf_gfid_generate_from_xxh64
+ find_xlator_option_in_cmd_args_t
+ gf_d_type_from_ia_type
+-glusterfs_graph_fini
+-glusterfs_process_svc_attach_volfp
+-glusterfs_mux_volfile_reconfigure
+-glusterfs_process_svc_detach
+-mgmt_is_multiplexed_daemon
+diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c
+index 0cf80c0..d18b50f 100644
+--- a/libglusterfs/src/statedump.c
++++ b/libglusterfs/src/statedump.c
+@@ -810,8 +810,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
+ if (!ctx)
+ goto out;
+
+- if (!mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name) &&
+- (ctx && ctx->active)) {
++ if (ctx && ctx->active) {
+ top = ctx->active->first;
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ brick_count++;
+diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
+index 022c3ed..6bd4f09 100644
+--- a/libglusterfs/src/xlator.c
++++ b/libglusterfs/src/xlator.c
+@@ -1470,19 +1470,3 @@ gluster_graph_take_reference(xlator_t *tree)
+ }
+ return;
+ }
+-
+-gf_boolean_t
+-mgmt_is_multiplexed_daemon(char *name)
+-{
+- const char *mux_daemons[] = {"glustershd", NULL};
+- int i;
+-
+- if (!name)
+- return _gf_false;
+-
+- for (i = 0; mux_daemons[i]; i++) {
+- if (!strcmp(name, mux_daemons[i]))
+- return _gf_true;
+- }
+- return _gf_false;
+-}
+diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
+index 7275d75..779878f 100644
+--- a/rpc/rpc-lib/src/protocol-common.h
++++ b/rpc/rpc-lib/src/protocol-common.h
+@@ -245,8 +245,6 @@ enum glusterd_brick_procnum {
+ GLUSTERD_NODE_BITROT,
+ GLUSTERD_BRICK_ATTACH,
+ GLUSTERD_DUMP_METRICS,
+- GLUSTERD_SVC_ATTACH,
+- GLUSTERD_SVC_DETACH,
+ GLUSTERD_BRICK_MAXVALUE,
+ };
+
+diff --git a/tests/basic/glusterd/heald.t b/tests/basic/glusterd/heald.t
+index 7dae3c3..ca112ad 100644
+--- a/tests/basic/glusterd/heald.t
++++ b/tests/basic/glusterd/heald.t
+@@ -7,16 +7,11 @@
+ # Covers enable/disable at the moment. Will be enhanced later to include
+ # the other commands as well.
+
+-function is_pid_running {
+- local pid=$1
+- num=`ps auxww | grep glustershd | grep $pid | grep -v grep | wc -l`
+- echo $num
+-}
+-
+ cleanup;
+ TEST glusterd
+ TEST pidof glusterd
+
++volfile=$(gluster system:: getwd)"/glustershd/glustershd-server.vol"
+ #Commands should fail when volume doesn't exist
+ TEST ! $CLI volume heal non-existent-volume enable
+ TEST ! $CLI volume heal non-existent-volume disable
+@@ -25,55 +20,51 @@ TEST ! $CLI volume heal non-existent-volume disable
+ # volumes
+ TEST $CLI volume create dist $H0:$B0/dist
+ TEST $CLI volume start dist
+-TEST "[ -z $(get_shd_process_pid dist)]"
++TEST "[ -z $(get_shd_process_pid)]"
+ TEST ! $CLI volume heal dist enable
+ TEST ! $CLI volume heal dist disable
+
+ # Commands should work on replicate/disperse volume.
+ TEST $CLI volume create r2 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1
+-TEST "[ -z $(get_shd_process_pid r2)]"
++TEST "[ -z $(get_shd_process_pid)]"
+ TEST $CLI volume start r2
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+ TEST $CLI volume heal r2 enable
+ EXPECT "enable" volume_option r2 "cluster.self-heal-daemon"
+-volfiler2=$(gluster system:: getwd)"/vols/r2/r2-shd.vol"
+-EXPECT "enable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2
+-pid=$( get_shd_process_pid r2 )
++EXPECT "enable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+ TEST $CLI volume heal r2 disable
+ EXPECT "disable" volume_option r2 "cluster.self-heal-daemon"
+-EXPECT "disable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon
+-EXPECT "1" is_pid_running $pid
++EXPECT "disable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+
+ # Commands should work on disperse volume.
+ TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2
+ TEST $CLI volume start ec2
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+ TEST $CLI volume heal ec2 enable
+ EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon"
+-volfileec2=$(gluster system:: getwd)"/vols/ec2/ec2-shd.vol"
+-EXPECT "enable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2
+-pid=$(get_shd_process_pid ec2)
++EXPECT "enable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+ TEST $CLI volume heal ec2 disable
+ EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon"
+-EXPECT "disable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon
+-EXPECT "1" is_pid_running $pid
++EXPECT "disable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+
+ #Check that shd graph is rewritten correctly on volume stop/start
+-EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse
+-
+-EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate
++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
++EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
+ TEST $CLI volume stop r2
+-EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse
++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
++EXPECT "N" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
+ TEST $CLI volume stop ec2
+ # When both the volumes are stopped glustershd volfile is not modified just the
+ # process is stopped
+-TEST "[ -z $(get_shd_process_pid dist) ]"
+-TEST "[ -z $(get_shd_process_pid ec2) ]"
++TEST "[ -z $(get_shd_process_pid) ]"
+
+ TEST $CLI volume start r2
+-EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate
++EXPECT "N" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
++EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
+
+ TEST $CLI volume set r2 self-heal-daemon on
+ TEST $CLI volume set r2 cluster.self-heal-daemon off
+diff --git a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t
+index e6e65c4..cdb1a33 100644
+--- a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t
++++ b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t
+@@ -55,9 +55,9 @@ TEST kill_glusterd 1
+ #Bring back 1st glusterd
+ TEST $glusterd_1
+
+-# We need to wait till PROCESS_UP_TIMEOUT and then check shd service started
+-#on node 2, because once glusterd regains quorum, it will restart all volume
+-#level daemons
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_2
++# We need to wait till PROCESS_UP_TIMEOUT and then check shd service does not
++# come up on node 2
++sleep $PROCESS_UP_TIMEOUT
++EXPECT "N" shd_up_status_2
+
+ cleanup;
+diff --git a/tests/volume.rc b/tests/volume.rc
+index 022d972..76a8fd4 100644
+--- a/tests/volume.rc
++++ b/tests/volume.rc
+@@ -237,13 +237,11 @@ function ec_child_up_count_shd {
+ }
+
+ function get_shd_process_pid {
+- local vol=$1
+- ps auxww | grep "process-name\ glustershd" | awk '{print $2}' | head -1
++ ps auxww | grep glusterfs | grep -E "glustershd/glustershd.pid" | awk '{print $2}' | head -1
+ }
+
+ function generate_shd_statedump {
+- local vol=$1
+- generate_statedump $(get_shd_process_pid $vol)
++ generate_statedump $(get_shd_process_pid)
+ }
+
+ function generate_nfs_statedump {
+diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am
+index 11ae189..5fe5156 100644
+--- a/xlators/mgmt/glusterd/src/Makefile.am
++++ b/xlators/mgmt/glusterd/src/Makefile.am
+@@ -18,12 +18,11 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \
+ glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \
+ glusterd-mgmt.c glusterd-peer-utils.c glusterd-statedump.c \
+ glusterd-snapshot-utils.c glusterd-conn-mgmt.c \
+- glusterd-proc-mgmt.c glusterd-svc-mgmt.c \
++ glusterd-proc-mgmt.c glusterd-svc-mgmt.c glusterd-shd-svc.c \
+ glusterd-nfs-svc.c glusterd-quotad-svc.c glusterd-svc-helper.c \
+ glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \
+ glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c \
+ glusterd-reset-brick.c glusterd-tierd-svc.c glusterd-tierd-svc-helper.c \
+- glusterd-shd-svc.c glusterd-shd-svc-helper.c \
+ glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c
+
+
+@@ -39,12 +38,11 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
+ glusterd-mgmt.h glusterd-messages.h glusterd-peer-utils.h \
+ glusterd-statedump.h glusterd-snapshot-utils.h glusterd-geo-rep.h \
+ glusterd-conn-mgmt.h glusterd-conn-helper.h glusterd-proc-mgmt.h \
+- glusterd-svc-mgmt.h glusterd-nfs-svc.h \
++ glusterd-svc-mgmt.h glusterd-shd-svc.h glusterd-nfs-svc.h \
+ glusterd-quotad-svc.h glusterd-svc-helper.h glusterd-snapd-svc.h \
+ glusterd-snapd-svc-helper.h glusterd-rcu.h glusterd-bitd-svc.h \
+ glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \
+ glusterd-tierd-svc.h glusterd-tierd-svc-helper.h \
+- glusterd-shd-svc.h glusterd-shd-svc-helper.h \
+ glusterd-gfproxyd-svc.h glusterd-gfproxyd-svc-helper.h \
+ $(CONTRIBDIR)/userspace-rcu/rculist-extra.h
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+index 042a805..ad9a572 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+@@ -2863,7 +2863,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
+ }
+
+ if (start_remove && volinfo->status == GLUSTERD_STATUS_STARTED) {
+- ret = glusterd_svcs_reconfigure(volinfo);
++ ret = glusterd_svcs_reconfigure();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL,
+ "Unable to reconfigure NFS-Server");
+diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
+index 16eefa1..c6d7a00 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
++++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
+@@ -138,45 +138,3 @@ glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath,
+ glusterd_set_socket_filepath(sockfilepath, socketpath, len);
+ return 0;
+ }
+-
+-int
+-__glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+- rpc_clnt_event_t event, void *data)
+-{
+- glusterd_conf_t *conf = THIS->private;
+- glusterd_svc_proc_t *mux_proc = mydata;
+- int ret = -1;
+-
+- /* Silently ignoring this error, exactly like the current
+- * implementation */
+- if (!mux_proc)
+- return 0;
+-
+- if (event == RPC_CLNT_DESTROY) {
+- /*RPC_CLNT_DESTROY will only called after mux_proc detached from the
+- * list. So it is safe to call without lock. Processing
+- * RPC_CLNT_DESTROY under a lock will lead to deadlock.
+- */
+- if (mux_proc->data) {
+- glusterd_volinfo_unref(mux_proc->data);
+- mux_proc->data = NULL;
+- }
+- GF_FREE(mux_proc);
+- ret = 0;
+- } else {
+- pthread_mutex_lock(&conf->attach_lock);
+- {
+- ret = mux_proc->notify(mux_proc, event);
+- }
+- pthread_mutex_unlock(&conf->attach_lock);
+- }
+- return ret;
+-}
+-
+-int
+-glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+- rpc_clnt_event_t event, void *data)
+-{
+- return glusterd_big_locked_notify(rpc, mydata, event, data,
+- __glusterd_muxsvc_conn_common_notify);
+-}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
+index d1c4607..602c0ba 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
++++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
+@@ -43,11 +43,9 @@ glusterd_conn_disconnect(glusterd_conn_t *conn);
+ int
+ glusterd_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data);
+-int
+-glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+- rpc_clnt_event_t event, void *data);
+
+ int32_t
+ glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath,
+ int len);
++
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
+index b01fd4d..f9c8617 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
+@@ -370,7 +370,6 @@ int
+ glusterd_gfproxydsvc_restart()
+ {
+ glusterd_volinfo_t *volinfo = NULL;
+- glusterd_volinfo_t *tmp = NULL;
+ int ret = -1;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+@@ -381,7 +380,7 @@ glusterd_gfproxydsvc_restart()
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+- cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
++ cds_list_for_each_entry(volinfo, &conf->volumes, vol_list)
+ {
+ /* Start per volume gfproxyd svc */
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index ac788a0..cb2666b 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -5940,11 +5940,6 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
+
+ GF_FREE(rebal_data);
+
+- fprintf(fp, "Volume%d.shd_svc.online_status: %s\n", count,
+- volinfo->shd.svc.online ? "Online" : "Offline");
+- fprintf(fp, "Volume%d.shd_svc.inited: %s\n", count,
+- volinfo->shd.svc.inited ? "True" : "False");
+-
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ ret = glusterd_volume_get_hot_tier_type_str(volinfo,
+ &hot_tier_type_str);
+@@ -6014,6 +6009,12 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
+
+ fprintf(fp, "\n[Services]\n");
+
++ if (priv->shd_svc.inited) {
++ fprintf(fp, "svc%d.name: %s\n", ++count, priv->shd_svc.name);
++ fprintf(fp, "svc%d.online_status: %s\n\n", count,
++ priv->shd_svc.online ? "Online" : "Offline");
++ }
++
+ if (priv->nfs_svc.inited) {
+ fprintf(fp, "svc%d.name: %s\n", ++count, priv->nfs_svc.name);
+ fprintf(fp, "svc%d.online_status: %s\n\n", count,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
+index 1ba58c3..5599a63 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
+@@ -30,7 +30,6 @@
+ #include "rpcsvc.h"
+ #include "rpc-common-xdr.h"
+ #include "glusterd-gfproxyd-svc-helper.h"
+-#include "glusterd-shd-svc-helper.h"
+
+ extern struct rpc_clnt_program gd_peer_prog;
+ extern struct rpc_clnt_program gd_mgmt_prog;
+@@ -329,26 +328,6 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
+ goto out;
+ }
+
+- volid_ptr = strstr(volume_id, "shd/");
+- if (volid_ptr) {
+- volid_ptr = strchr(volid_ptr, '/');
+- if (!volid_ptr) {
+- ret = -1;
+- goto out;
+- }
+- volid_ptr++;
+-
+- ret = glusterd_volinfo_find(volid_ptr, &volinfo);
+- if (ret == -1) {
+- gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo");
+- goto out;
+- }
+-
+- glusterd_svc_build_shd_volfile_path(volinfo, path, path_len);
+- ret = 0;
+- goto out;
+- }
+-
+ volid_ptr = strstr(volume_id, "/snaps/");
+ if (volid_ptr) {
+ ret = get_snap_volname_and_volinfo(volid_ptr, &volname, &volinfo);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
+index 17052ce..7a784db 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h
++++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
+@@ -51,7 +51,6 @@ typedef enum gf_gld_mem_types_ {
+ gf_gld_mt_missed_snapinfo_t,
+ gf_gld_mt_snap_create_args_t,
+ gf_gld_mt_glusterd_brick_proc_t,
+- gf_gld_mt_glusterd_svc_proc_t,
+ gf_gld_mt_end,
+ } gf_gld_mem_types_t;
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
+index 424e15f..c7b3ca8 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
++++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
+@@ -298,8 +298,6 @@ GLFS_MSGID(
+ GD_MSG_LOCALTIME_LOGGING_ENABLE, GD_MSG_LOCALTIME_LOGGING_DISABLE,
+ GD_MSG_PORTS_EXHAUSTED, GD_MSG_CHANGELOG_GET_FAIL,
+ GD_MSG_MANAGER_FUNCTION_FAILED, GD_MSG_NFS_GANESHA_DISABLED,
+- GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL,
+- GD_MSG_SHD_START_FAIL, GD_MSG_SHD_OBJ_GET_FAIL, GD_MSG_SVC_ATTACH_FAIL,
+- GD_MSG_ATTACH_INFO, GD_MSG_DETACH_INFO, GD_MSG_SVC_DETACH_FAIL);
++ GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL);
+
+ #endif /* !_GLUSTERD_MESSAGES_H_ */
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 9ea695e..0d29de2 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -44,7 +44,6 @@
+ #include "glusterd-snapshot-utils.h"
+ #include "glusterd-svc-mgmt.h"
+ #include "glusterd-svc-helper.h"
+-#include "glusterd-shd-svc-helper.h"
+ #include "glusterd-shd-svc.h"
+ #include "glusterd-nfs-svc.h"
+ #include "glusterd-quotad-svc.h"
+@@ -2225,11 +2224,6 @@ glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key,
+ if (ret)
+ goto out;
+
+- svc = &(volinfo->shd.svc);
+- ret = svc->reconfigure(volinfo);
+- if (ret)
+- goto out;
+-
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+@@ -2244,7 +2238,7 @@ glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key,
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+- ret = glusterd_svcs_reconfigure(volinfo);
++ ret = glusterd_svcs_reconfigure();
+ if (ret)
+ goto out;
+ }
+@@ -2700,11 +2694,6 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
+ if (ret)
+ goto out;
+
+- svc = &(volinfo->shd.svc);
+- ret = svc->reconfigure(volinfo);
+- if (ret)
+- goto out;
+-
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+@@ -2718,7 +2707,7 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
+ }
+ }
+ if (svcs_reconfigure) {
+- ret = glusterd_svcs_reconfigure(NULL);
++ ret = glusterd_svcs_reconfigure();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
+ "Unable to restart "
+@@ -3103,11 +3092,6 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+ if (ret)
+ goto out;
+
+- svc = &(volinfo->shd.svc);
+- ret = svc->reconfigure(volinfo);
+- if (ret)
+- goto out;
+-
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+@@ -3123,7 +3107,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+- ret = glusterd_svcs_reconfigure(volinfo);
++ ret = glusterd_svcs_reconfigure();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
+ "Unable to restart services");
+@@ -3156,11 +3140,6 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+ if (ret)
+ goto out;
+
+- svc = &(volinfo->shd.svc);
+- ret = svc->reconfigure(volinfo);
+- if (ret)
+- goto out;
+-
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+@@ -3176,7 +3155,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+- ret = glusterd_svcs_reconfigure(volinfo);
++ ret = glusterd_svcs_reconfigure();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
+ "Unable to restart services");
+@@ -3383,7 +3362,7 @@ glusterd_op_stats_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+- ret = glusterd_svcs_reconfigure(volinfo);
++ ret = glusterd_svcs_reconfigure();
+ if (ret)
+ goto out;
+ }
+@@ -3666,6 +3645,14 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+ other_count++;
+ node_count++;
+
++ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
++ ret = glusterd_add_node_to_dict(priv->shd_svc.name, rsp_dict, 0,
++ vol_opts);
++ if (ret)
++ goto out;
++ other_count++;
++ node_count++;
++
+ } else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) {
+ ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict, 0,
+ vol_opts);
+@@ -3699,12 +3686,6 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+ goto out;
+ other_count++;
+ node_count++;
+- } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+- ret = glusterd_add_shd_to_dict(volinfo, rsp_dict, other_index);
+- if (ret)
+- goto out;
+- other_count++;
+- node_count++;
+ } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) {
+ ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick);
+ if (ret)
+@@ -3767,19 +3748,6 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+ node_count++;
+ }
+
+- if (glusterd_is_shd_compatible_volume(volinfo)) {
+- shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts);
+- if (shd_enabled) {
+- ret = glusterd_add_shd_to_dict(volinfo, rsp_dict,
+- other_index);
+- if (ret)
+- goto out;
+- other_count++;
+- other_index++;
+- node_count++;
+- }
+- }
+-
+ nfs_disabled = dict_get_str_boolean(vol_opts, NFS_DISABLE_MAP_KEY,
+ _gf_false);
+ if (!nfs_disabled) {
+@@ -3792,6 +3760,18 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+ node_count++;
+ }
+
++ if (glusterd_is_shd_compatible_volume(volinfo))
++ shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts);
++ if (shd_enabled) {
++ ret = glusterd_add_node_to_dict(priv->shd_svc.name, rsp_dict,
++ other_index, vol_opts);
++ if (ret)
++ goto out;
++ other_count++;
++ node_count++;
++ other_index++;
++ }
++
+ if (glusterd_is_volume_quota_enabled(volinfo)) {
+ ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict,
+ other_index, vol_opts);
+@@ -6904,18 +6884,16 @@ glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op,
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+- glusterd_svc_t *svc = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+- svc = &(volinfo->shd.svc);
+
+ switch (heal_op) {
+ case GF_SHD_OP_INDEX_SUMMARY:
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT:
+- if (!svc->online) {
++ if (!priv->shd_svc.online) {
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL,
+ "Received "
+@@ -6936,7 +6914,7 @@ glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op,
+ break;
+
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+- if (!svc->online) {
++ if (!priv->shd_svc.online) {
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL,
+ "Received "
+@@ -7071,7 +7049,7 @@ glusterd_bricks_select_heal_volume(dict_t *dict, char **op_errstr,
+ ret = -1;
+ goto out;
+ } else {
+- pending_node->node = &(volinfo->shd.svc);
++ pending_node->node = &(priv->shd_svc);
+ pending_node->type = GD_NODE_SHD;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
+@@ -7205,7 +7183,6 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
+ glusterd_pending_node_t *pending_node = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+- glusterd_svc_t *svc = NULL;
+
+ GF_ASSERT(dict);
+
+@@ -7301,8 +7278,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
+
+ ret = 0;
+ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+- svc = &(volinfo->shd.svc);
+- if (!svc->online) {
++ if (!priv->shd_svc.online) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SELF_HEALD_DISABLED,
+ "Self-heal daemon is not running");
+@@ -7314,7 +7290,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
+ ret = -1;
+ goto out;
+ }
+- pending_node->node = svc;
++ pending_node->node = &(priv->shd_svc);
+ pending_node->type = GD_NODE_SHD;
+ pending_node->index = 0;
+ cds_list_add_tail(&pending_node->list, selected);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
+deleted file mode 100644
+index 9196758..0000000
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
++++ /dev/null
+@@ -1,140 +0,0 @@
+-/*
+- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+- This file is part of GlusterFS.
+-
+- This file is licensed to you under your choice of the GNU Lesser
+- General Public License, version 3 or any later version (LGPLv3 or
+- later), or the GNU General Public License, version 2 (GPLv2), in all
+- cases as published by the Free Software Foundation.
+-*/
+-
+-#include "glusterd.h"
+-#include "glusterd-utils.h"
+-#include "glusterd-shd-svc-helper.h"
+-#include "glusterd-messages.h"
+-#include "glusterd-volgen.h"
+-
+-void
+-glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path,
+- int path_len)
+-{
+- char sockfilepath[PATH_MAX] = {
+- 0,
+- };
+- char rundir[PATH_MAX] = {
+- 0,
+- };
+- int32_t len = 0;
+- glusterd_conf_t *priv = THIS->private;
+-
+- if (!priv)
+- return;
+-
+- GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv);
+- len = snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir,
+- uuid_utoa(MY_UUID));
+- if ((len < 0) || (len >= sizeof(sockfilepath))) {
+- sockfilepath[0] = 0;
+- }
+-
+- glusterd_set_socket_filepath(sockfilepath, path, path_len);
+-}
+-
+-void
+-glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path,
+- int path_len)
+-{
+- char rundir[PATH_MAX] = {
+- 0,
+- };
+- glusterd_conf_t *priv = THIS->private;
+-
+- if (!priv)
+- return;
+-
+- GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv);
+-
+- snprintf(path, path_len, "%s/%s-shd.pid", rundir, volinfo->volname);
+-}
+-
+-void
+-glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path,
+- int path_len)
+-{
+- char workdir[PATH_MAX] = {
+- 0,
+- };
+- glusterd_conf_t *priv = THIS->private;
+-
+- if (!priv)
+- return;
+-
+- GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv);
+-
+- snprintf(path, path_len, "%s/%s-shd.vol", workdir, volinfo->volname);
+-}
+-
+-void
+-glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len)
+-{
+- snprintf(logdir, len, "%s/shd/%s", DEFAULT_LOG_FILE_DIRECTORY, volname);
+-}
+-
+-void
+-glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len)
+-{
+- snprintf(logfile, len, "%s/shd.log", logdir);
+-}
+-
+-void
+-glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd)
+-{
+- glusterd_svc_proc_t *svc_proc = NULL;
+- glusterd_svc_t *svc = NULL;
+- glusterd_conf_t *conf = NULL;
+- gf_boolean_t need_unref = _gf_false;
+- rpc_clnt_t *rpc = NULL;
+-
+- conf = THIS->private;
+- if (!conf)
+- return;
+-
+- GF_VALIDATE_OR_GOTO(THIS->name, conf, out);
+- GF_VALIDATE_OR_GOTO(THIS->name, shd, out);
+-
+- svc = &shd->svc;
+- shd->attached = _gf_false;
+-
+- if (svc->conn.rpc) {
+- rpc_clnt_unref(svc->conn.rpc);
+- svc->conn.rpc = NULL;
+- }
+-
+- pthread_mutex_lock(&conf->attach_lock);
+- {
+- svc_proc = svc->svc_proc;
+- svc->svc_proc = NULL;
+- svc->inited = _gf_false;
+- cds_list_del_init(&svc->mux_svc);
+- glusterd_unlink_file(svc->proc.pidfile);
+-
+- if (svc_proc && cds_list_empty(&svc_proc->svcs)) {
+- cds_list_del_init(&svc_proc->svc_proc_list);
+- /* We cannot free svc_proc list from here. Because
+- * if there are pending events on the rpc, it will
+- * try to access the corresponding svc_proc, so unrefing
+- * rpc request and then cleaning up the memory is carried
+- * from the notify function upon RPC_CLNT_DESTROY destroy.
+- */
+- need_unref = _gf_true;
+- rpc = svc_proc->rpc;
+- svc_proc->rpc = NULL;
+- }
+- }
+- pthread_mutex_unlock(&conf->attach_lock);
+- /*rpc unref has to be performed outside the lock*/
+- if (need_unref && rpc)
+- rpc_clnt_unref(rpc);
+-out:
+- return;
+-}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
+deleted file mode 100644
+index c70702c..0000000
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
++++ /dev/null
+@@ -1,45 +0,0 @@
+-/*
+- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+- This file is part of GlusterFS.
+-
+- This file is licensed to you under your choice of the GNU Lesser
+- General Public License, version 3 or any later version (LGPLv3 or
+- later), or the GNU General Public License, version 2 (GPLv2), in all
+- cases as published by the Free Software Foundation.
+-*/
+-
+-#ifndef _GLUSTERD_SHD_SVC_HELPER_H_
+-#define _GLUSTERD_SHD_SVC_HELPER_H_
+-
+-#include "glusterd.h"
+-#include "glusterd-svc-mgmt.h"
+-
+-void
+-glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path,
+- int path_len);
+-
+-void
+-glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path,
+- int path_len);
+-
+-void
+-glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path,
+- int path_len);
+-
+-void
+-glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len);
+-
+-void
+-glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len);
+-
+-void
+-glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd);
+-
+-int
+-glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo,
+- glusterd_svc_t *svc, int flags);
+-
+-int
+-glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo);
+-
+-#endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+index 4789843..f5379b0 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+@@ -13,10 +13,9 @@
+ #include "glusterd.h"
+ #include "glusterd-utils.h"
+ #include "glusterd-volgen.h"
++#include "glusterd-svc-mgmt.h"
+ #include "glusterd-shd-svc.h"
+-#include "glusterd-shd-svc-helper.h"
+ #include "glusterd-svc-helper.h"
+-#include "glusterd-store.h"
+
+ #define GD_SHD_PROCESS_NAME "--process-name"
+ char *shd_svc_name = "glustershd";
+@@ -24,145 +23,27 @@ char *shd_svc_name = "glustershd";
+ void
+ glusterd_shdsvc_build(glusterd_svc_t *svc)
+ {
+- int ret = -1;
+- ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name);
+- if (ret < 0)
+- return;
+-
+- CDS_INIT_LIST_HEAD(&svc->mux_svc);
+ svc->manager = glusterd_shdsvc_manager;
+ svc->start = glusterd_shdsvc_start;
+- svc->stop = glusterd_shdsvc_stop;
+- svc->reconfigure = glusterd_shdsvc_reconfigure;
++ svc->stop = glusterd_svc_stop;
+ }
+
+ int
+-glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn,
+- glusterd_svc_proc_t *mux_svc)
++glusterd_shdsvc_init(glusterd_svc_t *svc)
+ {
+- int ret = -1;
+- char rundir[PATH_MAX] = {
+- 0,
+- };
+- char sockpath[PATH_MAX] = {
+- 0,
+- };
+- char pidfile[PATH_MAX] = {
+- 0,
+- };
+- char volfile[PATH_MAX] = {
+- 0,
+- };
+- char logdir[PATH_MAX] = {
+- 0,
+- };
+- char logfile[PATH_MAX] = {
+- 0,
+- };
+- char volfileid[256] = {0};
+- glusterd_svc_t *svc = NULL;
+- glusterd_volinfo_t *volinfo = NULL;
+- glusterd_conf_t *priv = NULL;
+- glusterd_muxsvc_conn_notify_t notify = NULL;
+- xlator_t *this = NULL;
+- char *volfileserver = NULL;
+- int32_t len = 0;
+-
+- this = THIS;
+- GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+-
+- priv = this->private;
+- GF_VALIDATE_OR_GOTO(this->name, priv, out);
+-
+- volinfo = data;
+- GF_VALIDATE_OR_GOTO(this->name, data, out);
+- GF_VALIDATE_OR_GOTO(this->name, mux_svc, out);
+-
+- svc = &(volinfo->shd.svc);
+-
+- ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name);
+- if (ret < 0)
+- goto out;
+-
+- notify = glusterd_muxsvc_common_rpc_notify;
+- glusterd_store_perform_node_state_store(volinfo);
+-
+- GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv);
+- glusterd_svc_create_rundir(rundir);
+-
+- glusterd_svc_build_shd_logdir(logdir, volinfo->volname, sizeof(logdir));
+- glusterd_svc_build_shd_logfile(logfile, logdir, sizeof(logfile));
+-
+- /* Initialize the connection mgmt */
+- if (mux_conn && mux_svc->rpc) {
+- /* multiplexed svc */
+- svc->conn.frame_timeout = mux_conn->frame_timeout;
+- /* This will be unrefed from glusterd_shd_svcproc_cleanup*/
+- svc->conn.rpc = rpc_clnt_ref(mux_svc->rpc);
+- ret = snprintf(svc->conn.sockpath, sizeof(svc->conn.sockpath), "%s",
+- mux_conn->sockpath);
+- } else {
+- ret = mkdir_p(logdir, 0755, _gf_true);
+- if ((ret == -1) && (EEXIST != errno)) {
+- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
+- "Unable to create logdir %s", logdir);
+- goto out;
+- }
+-
+- glusterd_svc_build_shd_socket_filepath(volinfo, sockpath,
+- sizeof(sockpath));
+- ret = glusterd_muxsvc_conn_init(&(svc->conn), mux_svc, sockpath, 600,
+- notify);
+- if (ret)
+- goto out;
+- /* This will be unrefed when the last svcs is detached from the list */
+- if (!mux_svc->rpc)
+- mux_svc->rpc = rpc_clnt_ref(svc->conn.rpc);
+- }
+-
+- /* Initialize the process mgmt */
+- glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile));
+- glusterd_svc_build_shd_volfile_path(volinfo, volfile, PATH_MAX);
+- len = snprintf(volfileid, sizeof(volfileid), "shd/%s", volinfo->volname);
+- if ((len < 0) || (len >= sizeof(volfileid))) {
+- ret = -1;
+- goto out;
+- }
+-
+- if (dict_get_strn(this->options, "transport.socket.bind-address",
+- SLEN("transport.socket.bind-address"),
+- &volfileserver) != 0) {
+- volfileserver = "localhost";
+- }
+- ret = glusterd_proc_init(&(svc->proc), shd_svc_name, pidfile, logdir,
+- logfile, volfile, volfileid, volfileserver);
+- if (ret)
+- goto out;
+-
+-out:
+- gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+- return ret;
++ return glusterd_svc_init(svc, shd_svc_name);
+ }
+
+-int
+-glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo)
++static int
++glusterd_shdsvc_create_volfile()
+ {
+ char filepath[PATH_MAX] = {
+ 0,
+ };
+-
+ int ret = -1;
++ glusterd_conf_t *conf = THIS->private;
+ dict_t *mod_dict = NULL;
+
+- glusterd_svc_build_shd_volfile_path(volinfo, filepath, PATH_MAX);
+- if (!glusterd_is_shd_compatible_volume(volinfo)) {
+- /* If volfile exist, delete it. This case happens when we
+- * change from replica/ec to distribute.
+- */
+- (void)glusterd_unlink_file(filepath);
+- ret = 0;
+- goto out;
+- }
+ mod_dict = dict_new();
+ if (!mod_dict)
+ goto out;
+@@ -183,7 +64,9 @@ glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo)
+ if (ret)
+ goto out;
+
+- ret = glusterd_shdsvc_generate_volfile(volinfo, filepath, mod_dict);
++ glusterd_svc_build_volfile_path(shd_svc_name, conf->workdir, filepath,
++ sizeof(filepath));
++ ret = glusterd_create_global_volfile(build_shd_graph, filepath, mod_dict);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to create volfile");
+@@ -198,89 +81,26 @@ out:
+ return ret;
+ }
+
+-gf_boolean_t
+-glusterd_svcs_shd_compatible_volumes_stopped(glusterd_svc_t *svc)
+-{
+- glusterd_svc_proc_t *svc_proc = NULL;
+- glusterd_shdsvc_t *shd = NULL;
+- glusterd_svc_t *temp_svc = NULL;
+- glusterd_volinfo_t *volinfo = NULL;
+- gf_boolean_t comp = _gf_false;
+- glusterd_conf_t *conf = THIS->private;
+-
+- GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+- GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+- pthread_mutex_lock(&conf->attach_lock);
+- {
+- svc_proc = svc->svc_proc;
+- if (!svc_proc)
+- goto unlock;
+- cds_list_for_each_entry(temp_svc, &svc_proc->svcs, mux_svc)
+- {
+- /* Get volinfo->shd from svc object */
+- shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
+- if (!shd) {
+- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
+- "Failed to get shd object "
+- "from shd service");
+- goto unlock;
+- }
+-
+- /* Get volinfo from shd */
+- volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
+- if (!volinfo) {
+- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+- "Failed to get volinfo from "
+- "from shd");
+- goto unlock;
+- }
+- if (!glusterd_is_shd_compatible_volume(volinfo))
+- continue;
+- if (volinfo->status == GLUSTERD_STATUS_STARTED)
+- goto unlock;
+- }
+- comp = _gf_true;
+- }
+-unlock:
+- pthread_mutex_unlock(&conf->attach_lock);
+-out:
+- return comp;
+-}
+-
+ int
+ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+ {
+- int ret = -1;
++ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+
+- volinfo = data;
+- GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+- GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+-
+- if (volinfo)
+- glusterd_volinfo_ref(volinfo);
+-
+- ret = glusterd_shdsvc_create_volfile(volinfo);
+- if (ret)
+- goto out;
+-
+- if (!glusterd_is_shd_compatible_volume(volinfo)) {
+- ret = 0;
+- if (svc->inited) {
+- /* This means glusterd was running for this volume and now
+- * it was converted to a non-shd volume. So just stop the shd
+- */
+- ret = svc->stop(svc, SIGTERM);
++ if (!svc->inited) {
++ ret = glusterd_shdsvc_init(svc);
++ if (ret) {
++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC,
++ "Failed to init shd "
++ "service");
++ goto out;
++ } else {
++ svc->inited = _gf_true;
++ gf_msg_debug(THIS->name, 0, "shd service initialized");
+ }
+- goto out;
+ }
+
+- ret = glusterd_shd_svc_mux_init(volinfo, svc);
+- if (ret) {
+- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC,
+- "Failed to init shd service");
+- goto out;
+- }
++ volinfo = data;
+
+ /* If all the volumes are stopped or all shd compatible volumes
+ * are stopped then stop the service if:
+@@ -290,26 +110,31 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+ * - volinfo is NULL or
+ * - volinfo is present and volume is shd compatible
+ */
+- if (glusterd_svcs_shd_compatible_volumes_stopped(svc)) {
+- /* TODO
+- * Take a lock and detach all svc's to stop the process
+- * also reset the init flag
+- */
+- ret = svc->stop(svc, SIGTERM);
+- } else if (volinfo) {
+- ret = svc->stop(svc, SIGTERM);
+- if (ret)
+- goto out;
++ if (glusterd_are_all_volumes_stopped() ||
++ glusterd_all_shd_compatible_volumes_stopped()) {
++ if (!(volinfo && !glusterd_is_shd_compatible_volume(volinfo))) {
++ ret = svc->stop(svc, SIGTERM);
++ }
++ } else {
++ if (!(volinfo && !glusterd_is_shd_compatible_volume(volinfo))) {
++ ret = glusterd_shdsvc_create_volfile();
++ if (ret)
++ goto out;
++
++ ret = svc->stop(svc, SIGTERM);
++ if (ret)
++ goto out;
+
+- if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+ ret = svc->start(svc, flags);
+ if (ret)
+ goto out;
++
++ ret = glusterd_conn_connect(&(svc->conn));
++ if (ret)
++ goto out;
+ }
+ }
+ out:
+- if (volinfo)
+- glusterd_volinfo_unref(volinfo);
+ if (ret)
+ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+@@ -318,7 +143,7 @@ out:
+ }
+
+ int
+-glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags)
++glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
+ {
+ int ret = -1;
+ char glusterd_uuid_option[PATH_MAX] = {0};
+@@ -363,136 +188,31 @@ glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags)
+ goto out;
+
+ ret = glusterd_svc_start(svc, flags, cmdline);
+- if (ret)
+- goto out;
+
+- ret = glusterd_conn_connect(&(svc->conn));
+ out:
+ if (cmdline)
+ dict_unref(cmdline);
+- return ret;
+-}
+
+-int
+-glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo,
+- glusterd_svc_t *svc, int flags)
+-{
+- int ret = -1;
+- glusterd_svc_proc_t *mux_proc = NULL;
+- glusterd_conf_t *conf = NULL;
+-
+- conf = THIS->private;
+-
+- if (!conf || !volinfo || !svc)
+- return -1;
+- glusterd_shd_svcproc_cleanup(&volinfo->shd);
+- mux_proc = glusterd_svcprocess_new();
+- if (!mux_proc) {
+- return -1;
+- }
+- ret = glusterd_shdsvc_init(volinfo, NULL, mux_proc);
+- if (ret)
+- return -1;
+- pthread_mutex_lock(&conf->attach_lock);
+- {
+- cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs);
+- svc->svc_proc = mux_proc;
+- cds_list_del_init(&svc->mux_svc);
+- cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs);
+- }
+- pthread_mutex_unlock(&conf->attach_lock);
+-
+- ret = glusterd_new_shd_svc_start(svc, flags);
+- if (!ret) {
+- volinfo->shd.attached = _gf_true;
+- }
+- return ret;
+-}
+-
+-int
+-glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
+-{
+- int ret = -1;
+- glusterd_shdsvc_t *shd = NULL;
+- glusterd_volinfo_t *volinfo = NULL;
+- glusterd_conf_t *conf = NULL;
+-
+- GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+- conf = THIS->private;
+- GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+-
+- /* Get volinfo->shd from svc object */
+- shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
+- if (!shd) {
+- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
+- "Failed to get shd object "
+- "from shd service");
+- return -1;
+- }
+-
+- /* Get volinfo from shd */
+- volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
+- if (!volinfo) {
+- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+- "Failed to get volinfo from "
+- "from shd");
+- return -1;
+- }
+-
+- if (volinfo->status != GLUSTERD_STATUS_STARTED)
+- return -1;
+-
+- glusterd_volinfo_ref(volinfo);
+- if (!svc->inited) {
+- ret = glusterd_shd_svc_mux_init(volinfo, svc);
+- if (ret)
+- goto out;
+- }
+-
+- if (shd->attached) {
+- ret = glusterd_attach_svc(svc, volinfo, flags);
+- if (ret) {
+- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+- "Failed to attach shd svc(volume=%s) to pid=%d. Starting"
+- "a new process",
+- volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+- ret = glusterd_recover_shd_attach_failure(volinfo, svc, flags);
+- }
+- goto out;
+- }
+- ret = glusterd_new_shd_svc_start(svc, flags);
+- if (!ret) {
+- shd->attached = _gf_true;
+- }
+-out:
+- if (volinfo)
+- glusterd_volinfo_unref(volinfo);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+ }
+
+ int
+-glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo)
++glusterd_shdsvc_reconfigure()
+ {
+ int ret = -1;
+ xlator_t *this = NULL;
++ glusterd_conf_t *priv = NULL;
+ gf_boolean_t identical = _gf_false;
+- dict_t *mod_dict = NULL;
+- glusterd_svc_t *svc = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+- if (!volinfo) {
+- /* reconfigure will be called separately*/
+- ret = 0;
+- goto out;
+- }
++ priv = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+- glusterd_volinfo_ref(volinfo);
+- svc = &(volinfo->shd.svc);
+- if (glusterd_svcs_shd_compatible_volumes_stopped(svc))
++ if (glusterd_all_shd_compatible_volumes_stopped())
+ goto manager;
+
+ /*
+@@ -500,42 +220,8 @@ glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo)
+ * and cksum i.e. "character-by-character". If YES, then
+ * NOTHING has been changed, just return.
+ */
+-
+- if (!glusterd_is_shd_compatible_volume(volinfo)) {
+- if (svc->inited)
+- goto manager;
+-
+- /* Nothing to do if not shd compatible */
+- ret = 0;
+- goto out;
+- }
+- mod_dict = dict_new();
+- if (!mod_dict)
+- goto out;
+-
+- ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0);
+- if (ret)
+- goto out;
+-
+- ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on");
+- if (ret)
+- goto out;
+-
+- ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on");
+- if (ret)
+- goto out;
+-
+- ret = dict_set_int32(mod_dict, "graph-check", 1);
+- if (ret)
+- goto out;
+-
+- ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on");
+- if (ret)
+- goto out;
+-
+- ret = glusterd_volume_svc_check_volfile_identical(
+- "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile,
+- &identical);
++ ret = glusterd_svc_check_volfile_identical(priv->shd_svc.name,
++ build_shd_graph, &identical);
+ if (ret)
+ goto out;
+
+@@ -550,9 +236,8 @@ glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo)
+ * changed, then inform the xlator to reconfigure the options.
+ */
+ identical = _gf_false; /* RESET the FLAG */
+- ret = glusterd_volume_svc_check_topology_identical(
+- "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile,
+- &identical);
++ ret = glusterd_svc_check_topology_identical(priv->shd_svc.name,
++ build_shd_graph, &identical);
+ if (ret)
+ goto out;
+
+@@ -560,7 +245,7 @@ glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo)
+ * options to shd volfile, so that shd will be reconfigured.
+ */
+ if (identical) {
+- ret = glusterd_shdsvc_create_volfile(volinfo);
++ ret = glusterd_shdsvc_create_volfile();
+ if (ret == 0) { /* Only if above PASSES */
+ ret = glusterd_fetchspec_notify(THIS);
+ }
+@@ -568,129 +253,12 @@ glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo)
+ }
+ manager:
+ /*
+- * shd volfile's topology has been changed. volfile needs
+- * to be RECONFIGURED to ACT on the changed volfile.
++ * shd volfile's topology has been changed. shd server needs
++ * to be RESTARTED to ACT on the changed volfile.
+ */
+- ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
++ ret = priv->shd_svc.manager(&(priv->shd_svc), NULL, PROC_START_NO_WAIT);
+
+ out:
+- if (volinfo)
+- glusterd_volinfo_unref(volinfo);
+- if (mod_dict)
+- dict_unref(mod_dict);
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
+ }
+-
+-int
+-glusterd_shdsvc_restart()
+-{
+- glusterd_volinfo_t *volinfo = NULL;
+- glusterd_volinfo_t *tmp = NULL;
+- int ret = -1;
+- xlator_t *this = THIS;
+- glusterd_conf_t *conf = NULL;
+- glusterd_svc_t *svc = NULL;
+-
+- GF_VALIDATE_OR_GOTO("glusterd", this, out);
+-
+- conf = this->private;
+- GF_VALIDATE_OR_GOTO(this->name, conf, out);
+-
+- pthread_mutex_lock(&conf->volume_lock);
+- cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
+- {
+- glusterd_volinfo_ref(volinfo);
+- pthread_mutex_unlock(&conf->volume_lock);
+- /* Start per volume shd svc */
+- if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+- svc = &(volinfo->shd.svc);
+- ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SHD_START_FAIL,
+- "Couldn't start shd for "
+- "vol: %s on restart",
+- volinfo->volname);
+- gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s",
+- volinfo->volname, svc->name);
+- glusterd_volinfo_unref(volinfo);
+- goto out;
+- }
+- }
+- glusterd_volinfo_unref(volinfo);
+- pthread_mutex_lock(&conf->volume_lock);
+- }
+- pthread_mutex_unlock(&conf->volume_lock);
+-out:
+- return ret;
+-}
+-
+-int
+-glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig)
+-{
+- int ret = -1;
+- glusterd_svc_proc_t *svc_proc = NULL;
+- glusterd_shdsvc_t *shd = NULL;
+- glusterd_volinfo_t *volinfo = NULL;
+- gf_boolean_t empty = _gf_false;
+- glusterd_conf_t *conf = NULL;
+- int pid = -1;
+-
+- conf = THIS->private;
+- GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+- svc_proc = svc->svc_proc;
+- GF_VALIDATE_OR_GOTO("glusterd", svc_proc, out);
+- GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+-
+- /* Get volinfo->shd from svc object */
+- shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
+- if (!shd) {
+- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
+- "Failed to get shd object "
+- "from shd service");
+- return -1;
+- }
+-
+- /* Get volinfo from shd */
+- volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
+- if (!volinfo) {
+- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+- "Failed to get volinfo from "
+- "from shd");
+- return -1;
+- }
+-
+- glusterd_volinfo_ref(volinfo);
+- pthread_mutex_lock(&conf->attach_lock);
+- {
+- gf_is_service_running(svc->proc.pidfile, &pid);
+- cds_list_del_init(&svc->mux_svc);
+- empty = cds_list_empty(&svc_proc->svcs);
+- }
+- pthread_mutex_unlock(&conf->attach_lock);
+- if (empty) {
+- /* Unref will happen when destroying the connection */
+- glusterd_volinfo_ref(volinfo);
+- svc_proc->data = volinfo;
+- ret = glusterd_svc_stop(svc, sig);
+- }
+- if (!empty && pid != -1) {
+- ret = glusterd_detach_svc(svc, volinfo, sig);
+- if (ret)
+- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
+- "shd service is failed to detach volume %s from pid %d",
+- volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+- else
+- gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_STOP_SUCCESS,
+- "Shd service is detached for volume %s from pid %d",
+- volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+- }
+- svc->online = _gf_false;
+- (void)glusterd_unlink_file((char *)svc->proc.pidfile);
+- glusterd_shd_svcproc_cleanup(shd);
+- ret = 0;
+- glusterd_volinfo_unref(volinfo);
+-out:
+- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+- return ret;
+-}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h
+index 55b409f..775a9d4 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h
++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h
+@@ -12,20 +12,12 @@
+ #define _GLUSTERD_SHD_SVC_H_
+
+ #include "glusterd-svc-mgmt.h"
+-#include "glusterd.h"
+-
+-typedef struct glusterd_shdsvc_ glusterd_shdsvc_t;
+-struct glusterd_shdsvc_ {
+- glusterd_svc_t svc;
+- gf_boolean_t attached;
+-};
+
+ void
+ glusterd_shdsvc_build(glusterd_svc_t *svc);
+
+ int
+-glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn,
+- glusterd_svc_proc_t *svc_proc);
++glusterd_shdsvc_init(glusterd_svc_t *svc);
+
+ int
+ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags);
+@@ -35,11 +27,4 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags);
+
+ int
+ glusterd_shdsvc_reconfigure();
+-
+-int
+-glusterd_shdsvc_restart();
+-
+-int
+-glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig);
+-
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
+index 943b1c6..54a7bd1 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
+@@ -748,16 +748,6 @@ glusterd_peer_detach_cleanup(glusterd_conf_t *priv)
+ }
+ }
+
+- if (glusterd_is_shd_compatible_volume(volinfo)) {
+- svc = &(volinfo->shd.svc);
+- ret = svc->stop(svc, SIGTERM);
+- if (ret) {
+- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
+- "Failed "
+- "to stop shd daemon service");
+- }
+- }
+-
+ if (glusterd_is_gfproxyd_enabled(volinfo)) {
+ svc = &(volinfo->gfproxyd.svc);
+ ret = svc->stop(svc, SIGTERM);
+@@ -785,7 +775,7 @@ glusterd_peer_detach_cleanup(glusterd_conf_t *priv)
+ }
+
+ /*Reconfigure all daemon services upon peer detach*/
+- ret = glusterd_svcs_reconfigure(NULL);
++ ret = glusterd_svcs_reconfigure();
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
+ "Failed to reconfigure all daemon services.");
+diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
+index 1da4076..56bab07 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
+@@ -366,7 +366,6 @@ int
+ glusterd_snapdsvc_restart()
+ {
+ glusterd_volinfo_t *volinfo = NULL;
+- glusterd_volinfo_t *tmp = NULL;
+ int ret = 0;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+@@ -377,7 +376,7 @@ glusterd_snapdsvc_restart()
+ conf = this->private;
+ GF_ASSERT(conf);
+
+- cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
++ cds_list_for_each_entry(volinfo, &conf->volumes, vol_list)
+ {
+ /* Start per volume snapd svc */
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-statedump.c b/xlators/mgmt/glusterd/src/glusterd-statedump.c
+index 69d4cf4..f5ecde7 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-statedump.c
++++ b/xlators/mgmt/glusterd/src/glusterd-statedump.c
+@@ -202,6 +202,9 @@ glusterd_dump_priv(xlator_t *this)
+ gf_proc_dump_build_key(key, "glusterd", "ping-timeout");
+ gf_proc_dump_write(key, "%d", priv->ping_timeout);
+
++ gf_proc_dump_build_key(key, "glusterd", "shd.online");
++ gf_proc_dump_write(key, "%d", priv->shd_svc.online);
++
+ gf_proc_dump_build_key(key, "glusterd", "nfs.online");
+ gf_proc_dump_write(key, "%d", priv->nfs_svc.online);
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index e42703c..ca19a75 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -7,7 +7,6 @@
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+-#include <signal.h>
+
+ #include <glusterfs/globals.h>
+ #include <glusterfs/run.h>
+@@ -21,14 +20,12 @@
+ #include "glusterd-bitd-svc.h"
+ #include "glusterd-tierd-svc.h"
+ #include "glusterd-tierd-svc-helper.h"
+-#include "glusterd-shd-svc-helper.h"
+ #include "glusterd-scrub-svc.h"
+ #include "glusterd-svc-helper.h"
+ #include <glusterfs/syscall.h>
+-#include "glusterd-snapshot-utils.h"
+
+ int
+-glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo)
++glusterd_svcs_reconfigure()
+ {
+ int ret = 0;
+ xlator_t *this = THIS;
+@@ -46,11 +43,9 @@ glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo)
+ goto out;
+
+ svc_name = "self-heald";
+- if (volinfo) {
+- ret = glusterd_shdsvc_reconfigure(volinfo);
+- if (ret)
+- goto out;
+- }
++ ret = glusterd_shdsvc_reconfigure();
++ if (ret)
++ goto out;
+
+ if (conf->op_version == GD_OP_VERSION_MIN)
+ goto out;
+@@ -74,7 +69,7 @@ out:
+ }
+
+ int
+-glusterd_svcs_stop(glusterd_volinfo_t *volinfo)
++glusterd_svcs_stop()
+ {
+ int ret = 0;
+ xlator_t *this = NULL;
+@@ -90,15 +85,13 @@ glusterd_svcs_stop(glusterd_volinfo_t *volinfo)
+ if (ret)
+ goto out;
+
+- ret = glusterd_svc_stop(&(priv->quotad_svc), SIGTERM);
++ ret = glusterd_svc_stop(&(priv->shd_svc), SIGTERM);
+ if (ret)
+ goto out;
+
+- if (volinfo) {
+- ret = glusterd_svc_stop(&(volinfo->shd.svc), PROC_START_NO_WAIT);
+- if (ret)
+- goto out;
+- }
++ ret = glusterd_svc_stop(&(priv->quotad_svc), SIGTERM);
++ if (ret)
++ goto out;
+
+ ret = glusterd_svc_stop(&(priv->bitd_svc), SIGTERM);
+ if (ret)
+@@ -128,6 +121,12 @@ glusterd_svcs_manager(glusterd_volinfo_t *volinfo)
+ if (ret)
+ goto out;
+
++ ret = conf->shd_svc.manager(&(conf->shd_svc), volinfo, PROC_START_NO_WAIT);
++ if (ret == -EINVAL)
++ ret = 0;
++ if (ret)
++ goto out;
++
+ if (conf->op_version == GD_OP_VERSION_MIN)
+ goto out;
+
+@@ -144,15 +143,6 @@ glusterd_svcs_manager(glusterd_volinfo_t *volinfo)
+ if (ret)
+ goto out;
+
+- if (volinfo) {
+- ret = volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo,
+- PROC_START_NO_WAIT);
+- if (ret == -EINVAL)
+- ret = 0;
+- if (ret)
+- goto out;
+- }
+-
+ ret = conf->scrub_svc.manager(&(conf->scrub_svc), NULL, PROC_START_NO_WAIT);
+ if (ret == -EINVAL)
+ ret = 0;
+@@ -279,678 +269,3 @@ out:
+ GF_FREE(tmpvol);
+ return ret;
+ }
+-
+-int
+-glusterd_volume_svc_check_volfile_identical(
+- char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo,
+- glusterd_vol_graph_builder_t builder, gf_boolean_t *identical)
+-{
+- char orgvol[PATH_MAX] = {
+- 0,
+- };
+- char *tmpvol = NULL;
+- xlator_t *this = NULL;
+- int ret = -1;
+- int need_unlink = 0;
+- int tmp_fd = -1;
+-
+- this = THIS;
+-
+- GF_VALIDATE_OR_GOTO(this->name, this, out);
+- GF_VALIDATE_OR_GOTO(this->name, identical, out);
+-
+- /* This builds volfile for volume level dameons */
+- glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol,
+- sizeof(orgvol));
+-
+- ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name);
+- if (ret < 0) {
+- goto out;
+- }
+-
+- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+- tmp_fd = mkstemp(tmpvol);
+- if (tmp_fd < 0) {
+- gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+- "Unable to create temp file"
+- " %s:(%s)",
+- tmpvol, strerror(errno));
+- ret = -1;
+- goto out;
+- }
+-
+- need_unlink = 1;
+-
+- ret = builder(volinfo, tmpvol, mode_dict);
+- if (ret)
+- goto out;
+-
+- ret = glusterd_check_files_identical(orgvol, tmpvol, identical);
+-out:
+- if (need_unlink)
+- sys_unlink(tmpvol);
+-
+- if (tmpvol != NULL)
+- GF_FREE(tmpvol);
+-
+- if (tmp_fd >= 0)
+- sys_close(tmp_fd);
+-
+- return ret;
+-}
+-
+-int
+-glusterd_volume_svc_check_topology_identical(
+- char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo,
+- glusterd_vol_graph_builder_t builder, gf_boolean_t *identical)
+-{
+- char orgvol[PATH_MAX] = {
+- 0,
+- };
+- char *tmpvol = NULL;
+- glusterd_conf_t *conf = NULL;
+- xlator_t *this = THIS;
+- int ret = -1;
+- int tmpclean = 0;
+- int tmpfd = -1;
+-
+- if ((!identical) || (!this) || (!this->private))
+- goto out;
+-
+- conf = this->private;
+- GF_VALIDATE_OR_GOTO(this->name, conf, out);
+-
+- /* This builds volfile for volume level dameons */
+- glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol,
+- sizeof(orgvol));
+- /* Create the temporary volfile */
+- ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name);
+- if (ret < 0) {
+- goto out;
+- }
+-
+- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+- tmpfd = mkstemp(tmpvol);
+- if (tmpfd < 0) {
+- gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+- "Unable to create temp file"
+- " %s:(%s)",
+- tmpvol, strerror(errno));
+- ret = -1;
+- goto out;
+- }
+-
+- tmpclean = 1; /* SET the flag to unlink() tmpfile */
+-
+- ret = builder(volinfo, tmpvol, mode_dict);
+- if (ret)
+- goto out;
+-
+- /* Compare the topology of volfiles */
+- ret = glusterd_check_topology_identical(orgvol, tmpvol, identical);
+-out:
+- if (tmpfd >= 0)
+- sys_close(tmpfd);
+- if (tmpclean)
+- sys_unlink(tmpvol);
+- if (tmpvol != NULL)
+- GF_FREE(tmpvol);
+- return ret;
+-}
+-
+-void *
+-__gf_find_compatible_svc(gd_node_type daemon)
+-{
+- glusterd_svc_proc_t *svc_proc = NULL;
+- glusterd_svc_proc_t *return_proc = NULL;
+- glusterd_svc_t *parent_svc = NULL;
+- struct cds_list_head *svc_procs = NULL;
+- glusterd_conf_t *conf = NULL;
+- int pid = -1;
+-
+- conf = THIS->private;
+- GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+-
+- if (daemon == GD_NODE_SHD) {
+- svc_procs = &conf->shd_procs;
+- if (!svc_procs)
+- goto out;
+- }
+-
+- cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list)
+- {
+- parent_svc = cds_list_entry(svc_proc->svcs.next, glusterd_svc_t,
+- mux_svc);
+- if (!return_proc)
+- return_proc = svc_proc;
+-
+- /* If there is an already running shd daemons, select it. Otehrwise
+- * select the first one.
+- */
+- if (parent_svc && gf_is_service_running(parent_svc->proc.pidfile, &pid))
+- return (void *)svc_proc;
+- /*
+- * Logic to select one process goes here. Currently there is only one
+- * shd_proc. So selecting the first one;
+- */
+- }
+-out:
+- return return_proc;
+-}
+-
+-glusterd_svc_proc_t *
+-glusterd_svcprocess_new()
+-{
+- glusterd_svc_proc_t *new_svcprocess = NULL;
+-
+- new_svcprocess = GF_CALLOC(1, sizeof(*new_svcprocess),
+- gf_gld_mt_glusterd_svc_proc_t);
+-
+- if (!new_svcprocess)
+- return NULL;
+-
+- CDS_INIT_LIST_HEAD(&new_svcprocess->svc_proc_list);
+- CDS_INIT_LIST_HEAD(&new_svcprocess->svcs);
+- new_svcprocess->notify = glusterd_muxsvc_common_rpc_notify;
+- return new_svcprocess;
+-}
+-
+-int
+-glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc)
+-{
+- int ret = -1;
+- glusterd_svc_proc_t *mux_proc = NULL;
+- glusterd_conn_t *mux_conn = NULL;
+- glusterd_conf_t *conf = NULL;
+- glusterd_svc_t *parent_svc = NULL;
+- int pid = -1;
+-
+- GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+- GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+- conf = THIS->private;
+- GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+- GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+-
+- pthread_mutex_lock(&conf->attach_lock);
+- {
+- if (!svc->inited) {
+- if (gf_is_service_running(svc->proc.pidfile, &pid)) {
+- /* Just connect is required, but we don't know what happens
+- * during the disconnect. So better to reattach.
+- */
+- mux_proc = __gf_find_compatible_svc_from_pid(GD_NODE_SHD, pid);
+- }
+-
+- if (!mux_proc) {
+- if (pid != -1 && sys_access(svc->proc.pidfile, R_OK) == 0) {
+- /* stale pid file, unlink it. */
+- kill(pid, SIGTERM);
+- sys_unlink(svc->proc.pidfile);
+- }
+- mux_proc = __gf_find_compatible_svc(GD_NODE_SHD);
+- }
+- if (mux_proc) {
+- /* Take first entry from the process */
+- parent_svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t,
+- mux_svc);
+- sys_link(parent_svc->proc.pidfile, svc->proc.pidfile);
+- mux_conn = &parent_svc->conn;
+- if (volinfo)
+- volinfo->shd.attached = _gf_true;
+- } else {
+- mux_proc = glusterd_svcprocess_new();
+- if (!mux_proc) {
+- ret = -1;
+- goto unlock;
+- }
+- cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs);
+- }
+- svc->svc_proc = mux_proc;
+- cds_list_del_init(&svc->mux_svc);
+- cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs);
+- ret = glusterd_shdsvc_init(volinfo, mux_conn, mux_proc);
+- if (ret) {
+- pthread_mutex_unlock(&conf->attach_lock);
+- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC,
+- "Failed to init shd "
+- "service");
+- goto out;
+- }
+- gf_msg_debug(THIS->name, 0, "shd service initialized");
+- svc->inited = _gf_true;
+- }
+- ret = 0;
+- }
+-unlock:
+- pthread_mutex_unlock(&conf->attach_lock);
+-out:
+- return ret;
+-}
+-
+-void *
+-__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid)
+-{
+- glusterd_svc_proc_t *svc_proc = NULL;
+- struct cds_list_head *svc_procs = NULL;
+- glusterd_svc_t *svc = NULL;
+- pid_t mux_pid = -1;
+- glusterd_conf_t *conf = NULL;
+-
+- conf = THIS->private;
+- if (!conf)
+- return NULL;
+-
+- if (daemon == GD_NODE_SHD) {
+- svc_procs = &conf->shd_procs;
+- if (!svc_proc)
+- return NULL;
+- } /* Can be moved to switch when mux is implemented for other daemon; */
+-
+- cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list)
+- {
+- cds_list_for_each_entry(svc, &svc_proc->svcs, mux_svc)
+- {
+- if (gf_is_service_running(svc->proc.pidfile, &mux_pid)) {
+- if (mux_pid == pid) {
+- /*TODO
+- * inefficient loop, but at the moment, there is only
+- * one shd.
+- */
+- return svc_proc;
+- }
+- }
+- }
+- }
+- return NULL;
+-}
+-
+-static int32_t
+-my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame)
+-{
+- call_frame_t *frame = v_frame;
+- xlator_t *this = NULL;
+- glusterd_conf_t *conf = NULL;
+-
+- GF_VALIDATE_OR_GOTO("glusterd", frame, out);
+- this = frame->this;
+- GF_VALIDATE_OR_GOTO("glusterd", this, out);
+- conf = this->private;
+- GF_VALIDATE_OR_GOTO(this->name, conf, out);
+-
+- GF_ATOMIC_DEC(conf->blockers);
+-
+- STACK_DESTROY(frame->root);
+-out:
+- return 0;
+-}
+-
+-static int32_t
+-glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count,
+- void *v_frame)
+-{
+- call_frame_t *frame = v_frame;
+- glusterd_volinfo_t *volinfo = NULL;
+- glusterd_shdsvc_t *shd = NULL;
+- glusterd_svc_t *svc = frame->cookie;
+- glusterd_svc_t *parent_svc = NULL;
+- glusterd_svc_proc_t *mux_proc = NULL;
+- glusterd_conf_t *conf = NULL;
+- int *flag = (int *)frame->local;
+- xlator_t *this = THIS;
+- int pid = -1;
+- int ret = -1;
+- gf_getspec_rsp rsp = {
+- 0,
+- };
+-
+- GF_VALIDATE_OR_GOTO("glusterd", this, out);
+- conf = this->private;
+- GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+- GF_VALIDATE_OR_GOTO("glusterd", frame, out);
+- GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+-
+- frame->local = NULL;
+- frame->cookie = NULL;
+-
+- if (!strcmp(svc->name, "glustershd")) {
+- /* Get volinfo->shd from svc object */
+- shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
+- if (!shd) {
+- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
+- "Failed to get shd object "
+- "from shd service");
+- goto out;
+- }
+-
+- /* Get volinfo from shd */
+- volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
+- if (!volinfo) {
+- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+- "Failed to get volinfo from "
+- "from shd");
+- goto out;
+- }
+- }
+-
+- if (!iov) {
+- gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+- "iov is NULL");
+- ret = -1;
+- goto out;
+- }
+-
+- ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+- if (ret < 0) {
+- gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+- "XDR decoding error");
+- ret = -1;
+- goto out;
+- }
+-
+- if (rsp.op_ret == 0) {
+- pthread_mutex_lock(&conf->attach_lock);
+- {
+- if (!strcmp(svc->name, "glustershd")) {
+- mux_proc = svc->svc_proc;
+- if (mux_proc &&
+- !gf_is_service_running(svc->proc.pidfile, &pid)) {
+- /*
+- * When svc's are restarting, there is a chance that the
+- * attached svc might not have updated it's pid. Because
+- * it was at connection stage. So in that case, we need
+- * to retry the pid file copy.
+- */
+- parent_svc = cds_list_entry(mux_proc->svcs.next,
+- glusterd_svc_t, mux_svc);
+- if (parent_svc)
+- sys_link(parent_svc->proc.pidfile, svc->proc.pidfile);
+- }
+- }
+- svc->online = _gf_true;
+- }
+- pthread_mutex_unlock(&conf->attach_lock);
+- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL,
+- "svc %s of volume %s attached successfully to pid %d", svc->name,
+- volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+- } else {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL,
+- "svc %s of volume %s failed to "
+- "attach to pid %d. Starting a new process",
+- svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+- if (!strcmp(svc->name, "glustershd")) {
+- glusterd_recover_shd_attach_failure(volinfo, svc, *flag);
+- }
+- }
+-out:
+- if (flag) {
+- GF_FREE(flag);
+- }
+- GF_ATOMIC_DEC(conf->blockers);
+- STACK_DESTROY(frame->root);
+- return 0;
+-}
+-
+-extern size_t
+-build_volfile_path(char *volume_id, char *path, size_t path_len,
+- char *trusted_str);
+-
+-int
+-__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+- struct rpc_clnt *rpc, char *volfile_id,
+- int op)
+-{
+- int ret = -1;
+- struct iobuf *iobuf = NULL;
+- struct iobref *iobref = NULL;
+- struct iovec iov = {
+- 0,
+- };
+- char path[PATH_MAX] = {
+- '\0',
+- };
+- struct stat stbuf = {
+- 0,
+- };
+- int32_t spec_fd = -1;
+- size_t file_len = -1;
+- char *volfile_content = NULL;
+- ssize_t req_size = 0;
+- call_frame_t *frame = NULL;
+- gd1_mgmt_brick_op_req brick_req;
+- void *req = &brick_req;
+- void *errlbl = &&err;
+- struct rpc_clnt_connection *conn;
+- xlator_t *this = THIS;
+- glusterd_conf_t *conf = THIS->private;
+- extern struct rpc_clnt_program gd_brick_prog;
+- fop_cbk_fn_t cbkfn = my_callback;
+-
+- if (!rpc) {
+- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PARAM_NULL,
+- "called with null rpc");
+- return -1;
+- }
+-
+- conn = &rpc->conn;
+- if (!conn->connected || conn->disconnected) {
+- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONNECT_RETURNED,
+- "not connected yet");
+- return -1;
+- }
+-
+- brick_req.op = op;
+- brick_req.name = volfile_id;
+- brick_req.input.input_val = NULL;
+- brick_req.input.input_len = 0;
+-
+- frame = create_frame(this, this->ctx->pool);
+- if (!frame) {
+- goto *errlbl;
+- }
+-
+- if (op == GLUSTERD_SVC_ATTACH) {
+- (void)build_volfile_path(volfile_id, path, sizeof(path), NULL);
+-
+- ret = sys_stat(path, &stbuf);
+- if (ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL,
+- "Unable to stat %s (%s)", path, strerror(errno));
+- ret = -EINVAL;
+- goto *errlbl;
+- }
+-
+- file_len = stbuf.st_size;
+- volfile_content = GF_MALLOC(file_len + 1, gf_common_mt_char);
+- if (!volfile_content) {
+- ret = -ENOMEM;
+- goto *errlbl;
+- }
+- spec_fd = open(path, O_RDONLY);
+- if (spec_fd < 0) {
+- gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL,
+- "failed to read volfile %s", path);
+- ret = -EIO;
+- goto *errlbl;
+- }
+- ret = sys_read(spec_fd, volfile_content, file_len);
+- if (ret == file_len) {
+- brick_req.input.input_val = volfile_content;
+- brick_req.input.input_len = file_len;
+- } else {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL,
+- "read failed on path %s. File size=%" GF_PRI_SIZET
+- "read size=%d",
+- path, file_len, ret);
+- ret = -EIO;
+- goto *errlbl;
+- }
+-
+- frame->cookie = svc;
+- frame->local = GF_CALLOC(1, sizeof(int), gf_gld_mt_int);
+- *((int *)frame->local) = flags;
+- cbkfn = glusterd_svc_attach_cbk;
+- }
+-
+- req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req);
+- iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size);
+- if (!iobuf) {
+- goto *errlbl;
+- }
+- errlbl = &&maybe_free_iobuf;
+-
+- iov.iov_base = iobuf->ptr;
+- iov.iov_len = iobuf_pagesize(iobuf);
+-
+- iobref = iobref_new();
+- if (!iobref) {
+- goto *errlbl;
+- }
+- errlbl = &&free_iobref;
+-
+- iobref_add(iobref, iobuf);
+- /*
+- * Drop our reference to the iobuf. The iobref should already have
+- * one after iobref_add, so when we unref that we'll free the iobuf as
+- * well. This allows us to pass just the iobref as frame->local.
+- */
+- iobuf_unref(iobuf);
+- /* Set the pointer to null so we don't free it on a later error. */
+- iobuf = NULL;
+-
+- /* Create the xdr payload */
+- ret = xdr_serialize_generic(iov, req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+- if (ret == -1) {
+- goto *errlbl;
+- }
+- iov.iov_len = ret;
+-
+- /* Send the msg */
+- GF_ATOMIC_INC(conf->blockers);
+- ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0,
+- iobref, frame, NULL, 0, NULL, 0, NULL);
+- GF_FREE(volfile_content);
+- if (spec_fd >= 0)
+- sys_close(spec_fd);
+- return ret;
+-
+-free_iobref:
+- iobref_unref(iobref);
+-maybe_free_iobuf:
+- if (iobuf) {
+- iobuf_unref(iobuf);
+- }
+-err:
+- GF_FREE(volfile_content);
+- if (spec_fd >= 0)
+- sys_close(spec_fd);
+- if (frame)
+- STACK_DESTROY(frame->root);
+- return -1;
+-}
+-
+-int
+-glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int flags)
+-{
+- glusterd_conf_t *conf = THIS->private;
+- int ret = -1;
+- int tries;
+- rpc_clnt_t *rpc = NULL;
+-
+- GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+- GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+- GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+-
+- gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_ATTACH_INFO,
+- "adding svc %s (volume=%s) to existing "
+- "process with pid %d",
+- svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+-
+- rpc = rpc_clnt_ref(svc->conn.rpc);
+- for (tries = 15; tries > 0; --tries) {
+- if (rpc) {
+- pthread_mutex_lock(&conf->attach_lock);
+- {
+- ret = __glusterd_send_svc_configure_req(
+- svc, flags, rpc, svc->proc.volfileid, GLUSTERD_SVC_ATTACH);
+- }
+- pthread_mutex_unlock(&conf->attach_lock);
+- if (!ret) {
+- volinfo->shd.attached = _gf_true;
+- goto out;
+- }
+- }
+- /*
+- * It might not actually be safe to manipulate the lock
+- * like this, but if we don't then the connection can
+- * never actually complete and retries are useless.
+- * Unfortunately, all of the alternatives (e.g. doing
+- * all of this in a separate thread) are much more
+- * complicated and risky.
+- * TBD: see if there's a better way
+- */
+- synclock_unlock(&conf->big_lock);
+- sleep(1);
+- synclock_lock(&conf->big_lock);
+- }
+- ret = -1;
+- gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL,
+- "attach failed for %s(volume=%s)", svc->name, volinfo->volname);
+-out:
+- if (rpc)
+- rpc_clnt_unref(rpc);
+- return ret;
+-}
+-
+-int
+-glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig)
+-{
+- glusterd_conf_t *conf = THIS->private;
+- int ret = -1;
+- int tries;
+- rpc_clnt_t *rpc = NULL;
+-
+- GF_VALIDATE_OR_GOTO(THIS->name, conf, out);
+- GF_VALIDATE_OR_GOTO(THIS->name, svc, out);
+- GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out);
+-
+- gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DETACH_INFO,
+- "removing svc %s (volume=%s) from existing "
+- "process with pid %d",
+- svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+-
+- rpc = rpc_clnt_ref(svc->conn.rpc);
+- for (tries = 15; tries > 0; --tries) {
+- if (rpc) {
+- /*For detach there is no flags, and we are not using sig.*/
+- pthread_mutex_lock(&conf->attach_lock);
+- {
+- ret = __glusterd_send_svc_configure_req(svc, 0, svc->conn.rpc,
+- svc->proc.volfileid,
+- GLUSTERD_SVC_DETACH);
+- }
+- pthread_mutex_unlock(&conf->attach_lock);
+- if (!ret) {
+- goto out;
+- }
+- }
+- /*
+- * It might not actually be safe to manipulate the lock
+- * like this, but if we don't then the connection can
+- * never actually complete and retries are useless.
+- * Unfortunately, all of the alternatives (e.g. doing
+- * all of this in a separate thread) are much more
+- * complicated and risky.
+- * TBD: see if there's a better way
+- */
+- synclock_unlock(&conf->big_lock);
+- sleep(1);
+- synclock_lock(&conf->big_lock);
+- }
+- ret = -1;
+- gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_DETACH_FAIL,
+- "detach failed for %s(volume=%s)", svc->name, volinfo->volname);
+-out:
+- if (rpc)
+- rpc_clnt_unref(rpc);
+- return ret;
+-}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h
+index 5def246..cc98e78 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h
+@@ -16,10 +16,10 @@
+ #include "glusterd-volgen.h"
+
+ int
+-glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo);
++glusterd_svcs_reconfigure();
+
+ int
+-glusterd_svcs_stop(glusterd_volinfo_t *vol);
++glusterd_svcs_stop();
+
+ int
+ glusterd_svcs_manager(glusterd_volinfo_t *volinfo);
+@@ -41,41 +41,5 @@ int
+ glusterd_svc_check_tier_topology_identical(char *svc_name,
+ glusterd_volinfo_t *volinfo,
+ gf_boolean_t *identical);
+-int
+-glusterd_volume_svc_check_volfile_identical(char *svc_name, dict_t *mode_dict,
+- glusterd_volinfo_t *volinfo,
+- glusterd_vol_graph_builder_t,
+- gf_boolean_t *identical);
+-int
+-glusterd_volume_svc_check_topology_identical(char *svc_name, dict_t *mode_dict,
+- glusterd_volinfo_t *volinfo,
+- glusterd_vol_graph_builder_t,
+- gf_boolean_t *identical);
+-void
+-glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol,
+- char *volfile, size_t len);
+-void *
+-__gf_find_compatible_svc(gd_node_type daemon);
+-
+-glusterd_svc_proc_t *
+-glusterd_svcprocess_new();
+-
+-int
+-glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc);
+-
+-void *
+-__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid);
+-
+-int
+-glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo,
+- int flags);
+-
+-int
+-glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig);
+-
+-int
+-__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flag,
+- struct rpc_clnt *rpc, char *volfile_id,
+- int op);
+
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
+index f32dafc..4cd4cea 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
+@@ -18,7 +18,6 @@
+ #include "glusterd-conn-mgmt.h"
+ #include "glusterd-messages.h"
+ #include <glusterfs/syscall.h>
+-#include "glusterd-shd-svc-helper.h"
+
+ int
+ glusterd_svc_create_rundir(char *rundir)
+@@ -168,75 +167,68 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline)
+ GF_ASSERT(this);
+
+ priv = this->private;
+- GF_VALIDATE_OR_GOTO("glusterd", priv, out);
+- GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+-
+- pthread_mutex_lock(&priv->attach_lock);
+- {
+- if (glusterd_proc_is_running(&(svc->proc))) {
+- ret = 0;
+- goto unlock;
+- }
++ GF_ASSERT(priv);
+
+- ret = sys_access(svc->proc.volfile, F_OK);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_NOT_FOUND,
+- "Volfile %s is not present", svc->proc.volfile);
+- goto unlock;
+- }
++ if (glusterd_proc_is_running(&(svc->proc))) {
++ ret = 0;
++ goto out;
++ }
+
+- runinit(&runner);
++ ret = sys_access(svc->proc.volfile, F_OK);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_NOT_FOUND,
++ "Volfile %s is not present", svc->proc.volfile);
++ goto out;
++ }
+
+- if (this->ctx->cmd_args.valgrind) {
+- len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log",
+- svc->proc.logfile, svc->name);
+- if ((len < 0) || (len >= PATH_MAX)) {
+- ret = -1;
+- goto unlock;
+- }
++ runinit(&runner);
+
+- runner_add_args(&runner, "valgrind", "--leak-check=full",
+- "--trace-children=yes", "--track-origins=yes",
+- NULL);
+- runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
++ if (this->ctx->cmd_args.valgrind) {
++ len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log",
++ svc->proc.logfile, svc->name);
++ if ((len < 0) || (len >= PATH_MAX)) {
++ ret = -1;
++ goto out;
+ }
+
+- runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s",
+- svc->proc.volfileserver, "--volfile-id",
+- svc->proc.volfileid, "-p", svc->proc.pidfile, "-l",
+- svc->proc.logfile, "-S", svc->conn.sockpath, NULL);
++ runner_add_args(&runner, "valgrind", "--leak-check=full",
++ "--trace-children=yes", "--track-origins=yes", NULL);
++ runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
++ }
+
+- if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY,
+- SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY),
+- &localtime_logging) == 0) {
+- if (strcmp(localtime_logging, "enable") == 0)
+- runner_add_arg(&runner, "--localtime-logging");
+- }
+- if (dict_get_strn(priv->opts, GLUSTERD_DAEMON_LOG_LEVEL_KEY,
+- SLEN(GLUSTERD_DAEMON_LOG_LEVEL_KEY),
+- &log_level) == 0) {
+- snprintf(daemon_log_level, 30, "--log-level=%s", log_level);
+- runner_add_arg(&runner, daemon_log_level);
+- }
++ runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s",
++ svc->proc.volfileserver, "--volfile-id",
++ svc->proc.volfileid, "-p", svc->proc.pidfile, "-l",
++ svc->proc.logfile, "-S", svc->conn.sockpath, NULL);
++
++ if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY,
++ SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY),
++ &localtime_logging) == 0) {
++ if (strcmp(localtime_logging, "enable") == 0)
++ runner_add_arg(&runner, "--localtime-logging");
++ }
++ if (dict_get_strn(priv->opts, GLUSTERD_DAEMON_LOG_LEVEL_KEY,
++ SLEN(GLUSTERD_DAEMON_LOG_LEVEL_KEY), &log_level) == 0) {
++ snprintf(daemon_log_level, 30, "--log-level=%s", log_level);
++ runner_add_arg(&runner, daemon_log_level);
++ }
+
+- if (cmdline)
+- dict_foreach(cmdline, svc_add_args, (void *)&runner);
++ if (cmdline)
++ dict_foreach(cmdline, svc_add_args, (void *)&runner);
+
+- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS,
+- "Starting %s service", svc->name);
++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS,
++ "Starting %s service", svc->name);
+
+- if (flags == PROC_START_NO_WAIT) {
+- ret = runner_run_nowait(&runner);
+- } else {
+- synclock_unlock(&priv->big_lock);
+- {
+- ret = runner_run(&runner);
+- }
+- synclock_lock(&priv->big_lock);
++ if (flags == PROC_START_NO_WAIT) {
++ ret = runner_run_nowait(&runner);
++ } else {
++ synclock_unlock(&priv->big_lock);
++ {
++ ret = runner_run(&runner);
+ }
++ synclock_lock(&priv->big_lock);
+ }
+-unlock:
+- pthread_mutex_unlock(&priv->attach_lock);
++
+ out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+
+@@ -289,8 +281,7 @@ glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile,
+
+ glusterd_svc_build_svcdir(server, workdir, dir, sizeof(dir));
+
+- if (!strcmp(server, "quotad"))
+- /*quotad has different volfile name*/
++ if (!strcmp(server, "quotad")) /*quotad has different volfile name*/
+ snprintf(volfile, len, "%s/%s.vol", dir, server);
+ else
+ snprintf(volfile, len, "%s/%s-server.vol", dir, server);
+@@ -375,138 +366,3 @@ glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event)
+
+ return ret;
+ }
+-
+-void
+-glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol,
+- char *volfile, size_t len)
+-{
+- GF_ASSERT(len == PATH_MAX);
+-
+- if (!strcmp(server, "glustershd")) {
+- glusterd_svc_build_shd_volfile_path(vol, volfile, len);
+- }
+-}
+-
+-int
+-glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *mux_proc,
+- rpc_clnt_event_t event)
+-{
+- int ret = 0;
+- glusterd_svc_t *svc = NULL;
+- glusterd_svc_t *tmp = NULL;
+- xlator_t *this = NULL;
+- gf_boolean_t need_logging = _gf_false;
+-
+- this = THIS;
+- GF_ASSERT(this);
+-
+- if (!mux_proc) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL,
+- "Failed to get the svc proc data");
+- return -1;
+- }
+-
+- /* Currently this function was used for shd svc, if this function is
+- * using for another svc, change ths glustershd reference. We can get
+- * the svc name from any of the attached svc's
+- */
+- switch (event) {
+- case RPC_CLNT_CONNECT:
+- gf_msg_debug(this->name, 0,
+- "glustershd has connected with glusterd.");
+- gf_event(EVENT_SVC_CONNECTED, "svc_name=glustershd");
+- cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc)
+- {
+- if (svc->online)
+- continue;
+- svc->online = _gf_true;
+- }
+- break;
+-
+- case RPC_CLNT_DISCONNECT:
+- cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc)
+- {
+- if (svc->online) {
+- if (!need_logging)
+- need_logging = _gf_true;
+- svc->online = _gf_false;
+- }
+- }
+- if (need_logging) {
+- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NODE_DISCONNECTED,
+- "glustershd has disconnected from glusterd.");
+- gf_event(EVENT_SVC_DISCONNECTED, "svc_name=glustershd");
+- }
+- break;
+-
+- default:
+- gf_msg_trace(this->name, 0, "got some other RPC event %d", event);
+- break;
+- }
+-
+- return ret;
+-}
+-
+-int
+-glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc,
+- char *sockpath, int frame_timeout,
+- glusterd_muxsvc_conn_notify_t notify)
+-{
+- int ret = -1;
+- dict_t *options = NULL;
+- struct rpc_clnt *rpc = NULL;
+- xlator_t *this = THIS;
+- glusterd_svc_t *svc = NULL;
+-
+- options = dict_new();
+- if (!this || !options)
+- goto out;
+-
+- svc = cds_list_entry(conn, glusterd_svc_t, conn);
+- if (!svc) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL,
+- "Failed to get the service");
+- goto out;
+- }
+-
+- ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout);
+- if (ret)
+- goto out;
+-
+- ret = dict_set_int32n(options, "transport.socket.ignore-enoent",
+- SLEN("transport.socket.ignore-enoent"), 1);
+- if (ret)
+- goto out;
+-
+- /* @options is free'd by rpc_transport when destroyed */
+- rpc = rpc_clnt_new(options, this, (char *)svc->name, 16);
+- if (!rpc) {
+- ret = -1;
+- goto out;
+- }
+-
+- ret = rpc_clnt_register_notify(rpc, glusterd_muxsvc_conn_common_notify,
+- mux_proc);
+- if (ret)
+- goto out;
+-
+- ret = snprintf(conn->sockpath, sizeof(conn->sockpath), "%s", sockpath);
+- if (ret < 0)
+- goto out;
+- else
+- ret = 0;
+-
+- conn->frame_timeout = frame_timeout;
+- conn->rpc = rpc;
+- mux_proc->notify = notify;
+-out:
+- if (options)
+- dict_unref(options);
+- if (ret) {
+- if (rpc) {
+- rpc_clnt_unref(rpc);
+- rpc = NULL;
+- }
+- }
+- return ret;
+-}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
+index fbc5225..c850bfd 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
+@@ -13,12 +13,9 @@
+
+ #include "glusterd-proc-mgmt.h"
+ #include "glusterd-conn-mgmt.h"
+-#include "glusterd-rcu.h"
+
+ struct glusterd_svc_;
+-
+ typedef struct glusterd_svc_ glusterd_svc_t;
+-typedef struct glusterd_svc_proc_ glusterd_svc_proc_t;
+
+ typedef void (*glusterd_svc_build_t)(glusterd_svc_t *svc);
+
+@@ -28,17 +25,6 @@ typedef int (*glusterd_svc_start_t)(glusterd_svc_t *svc, int flags);
+ typedef int (*glusterd_svc_stop_t)(glusterd_svc_t *svc, int sig);
+ typedef int (*glusterd_svc_reconfigure_t)(void *data);
+
+-typedef int (*glusterd_muxsvc_conn_notify_t)(glusterd_svc_proc_t *mux_proc,
+- rpc_clnt_event_t event);
+-
+-struct glusterd_svc_proc_ {
+- struct cds_list_head svc_proc_list;
+- struct cds_list_head svcs;
+- glusterd_muxsvc_conn_notify_t notify;
+- rpc_clnt_t *rpc;
+- void *data;
+-};
+-
+ struct glusterd_svc_ {
+ char name[NAME_MAX];
+ glusterd_conn_t conn;
+@@ -49,8 +35,6 @@ struct glusterd_svc_ {
+ gf_boolean_t online;
+ gf_boolean_t inited;
+ glusterd_svc_reconfigure_t reconfigure;
+- glusterd_svc_proc_t *svc_proc;
+- struct cds_list_head mux_svc;
+ };
+
+ int
+@@ -85,15 +69,4 @@ glusterd_svc_reconfigure(int (*create_volfile)());
+ int
+ glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event);
+
+-int
+-glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *conn,
+- rpc_clnt_event_t event);
+-
+-int
+-glusterd_proc_get_pid(glusterd_proc_t *proc);
+-
+-int
+-glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc,
+- char *sockpath, int frame_timeout,
+- glusterd_muxsvc_conn_notify_t notify);
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-tier.c b/xlators/mgmt/glusterd/src/glusterd-tier.c
+index 23a9592..4dc0d44 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-tier.c
++++ b/xlators/mgmt/glusterd/src/glusterd-tier.c
+@@ -27,7 +27,6 @@
+ #include "glusterd-messages.h"
+ #include "glusterd-mgmt.h"
+ #include "glusterd-syncop.h"
+-#include "glusterd-shd-svc-helper.h"
+
+ #include <sys/wait.h>
+ #include <dlfcn.h>
+@@ -616,7 +615,7 @@ glusterd_op_remove_tier_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+
+ if (cmd == GF_DEFRAG_CMD_DETACH_START &&
+ volinfo->status == GLUSTERD_STATUS_STARTED) {
+- ret = glusterd_svcs_reconfigure(volinfo);
++ ret = glusterd_svcs_reconfigure();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL,
+ "Unable to reconfigure NFS-Server");
+diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c
+index ab463f1..04ceec5 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c
++++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c
+@@ -83,6 +83,7 @@ glusterd_tierdsvc_init(void *data)
+ goto out;
+
+ notify = glusterd_svc_common_rpc_notify;
++ glusterd_store_perform_node_state_store(volinfo);
+
+ volinfo->type = GF_CLUSTER_TYPE_TIER;
+
+@@ -394,7 +395,6 @@ int
+ glusterd_tierdsvc_restart()
+ {
+ glusterd_volinfo_t *volinfo = NULL;
+- glusterd_volinfo_t *tmp = NULL;
+ int ret = 0;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+@@ -405,7 +405,7 @@ glusterd_tierdsvc_restart()
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+- cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
++ cds_list_for_each_entry(volinfo, &conf->volumes, vol_list)
+ {
+ /* Start per volume tierd svc */
+ if (volinfo->status == GLUSTERD_STATUS_STARTED &&
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 4525ec7..2aa975b 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -61,7 +61,6 @@
+ #include "glusterd-server-quorum.h"
+ #include <glusterfs/quota-common-utils.h>
+ #include <glusterfs/common-utils.h>
+-#include "glusterd-shd-svc-helper.h"
+
+ #include "xdr-generic.h"
+ #include <sys/resource.h>
+@@ -625,17 +624,13 @@ glusterd_volinfo_t *
+ glusterd_volinfo_unref(glusterd_volinfo_t *volinfo)
+ {
+ int refcnt = -1;
+- glusterd_conf_t *conf = THIS->private;
+
+- pthread_mutex_lock(&conf->volume_lock);
++ pthread_mutex_lock(&volinfo->reflock);
+ {
+- pthread_mutex_lock(&volinfo->reflock);
+- {
+- refcnt = --volinfo->refcnt;
+- }
+- pthread_mutex_unlock(&volinfo->reflock);
++ refcnt = --volinfo->refcnt;
+ }
+- pthread_mutex_unlock(&conf->volume_lock);
++ pthread_mutex_unlock(&volinfo->reflock);
++
+ if (!refcnt) {
+ glusterd_volinfo_delete(volinfo);
+ return NULL;
+@@ -707,7 +702,6 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo)
+ glusterd_snapdsvc_build(&new_volinfo->snapd.svc);
+ glusterd_tierdsvc_build(&new_volinfo->tierd.svc);
+ glusterd_gfproxydsvc_build(&new_volinfo->gfproxyd.svc);
+- glusterd_shdsvc_build(&new_volinfo->shd.svc);
+
+ pthread_mutex_init(&new_volinfo->reflock, NULL);
+ *volinfo = glusterd_volinfo_ref(new_volinfo);
+@@ -1073,11 +1067,11 @@ glusterd_volinfo_delete(glusterd_volinfo_t *volinfo)
+ gf_store_handle_destroy(volinfo->snapd.handle);
+
+ glusterd_auth_cleanup(volinfo);
+- glusterd_shd_svcproc_cleanup(&volinfo->shd);
+
+ pthread_mutex_destroy(&volinfo->reflock);
+ GF_FREE(volinfo);
+ ret = 0;
++
+ out:
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
+@@ -3929,7 +3923,6 @@ glusterd_spawn_daemons(void *opaque)
+ ret = glusterd_snapdsvc_restart();
+ ret = glusterd_tierdsvc_restart();
+ ret = glusterd_gfproxydsvc_restart();
+- ret = glusterd_shdsvc_restart();
+ return ret;
+ }
+
+@@ -4880,9 +4873,6 @@ glusterd_delete_stale_volume(glusterd_volinfo_t *stale_volinfo,
+ svc = &(stale_volinfo->snapd.svc);
+ (void)svc->manager(svc, stale_volinfo, PROC_START_NO_WAIT);
+ }
+- svc = &(stale_volinfo->shd.svc);
+- (void)svc->manager(svc, stale_volinfo, PROC_START_NO_WAIT);
+-
+ (void)glusterd_volinfo_remove(stale_volinfo);
+
+ return 0;
+@@ -4997,15 +4987,6 @@ glusterd_import_friend_volume(dict_t *peer_data, int count)
+ glusterd_volinfo_unref(old_volinfo);
+ }
+
+- ret = glusterd_store_volinfo(new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
+- "Failed to store "
+- "volinfo for volume %s",
+- new_volinfo->volname);
+- goto out;
+- }
+-
+ if (glusterd_is_volume_started(new_volinfo)) {
+ (void)glusterd_start_bricks(new_volinfo);
+ if (glusterd_is_snapd_enabled(new_volinfo)) {
+@@ -5014,10 +4995,15 @@ glusterd_import_friend_volume(dict_t *peer_data, int count)
+ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name);
+ }
+ }
+- svc = &(new_volinfo->shd.svc);
+- if (svc->manager(svc, new_volinfo, PROC_START_NO_WAIT)) {
+- gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name);
+- }
++ }
++
++ ret = glusterd_store_volinfo(new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
++ "Failed to store "
++ "volinfo for volume %s",
++ new_volinfo->volname);
++ goto out;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services(new_volinfo);
+@@ -5521,7 +5507,9 @@ glusterd_add_node_to_dict(char *server, dict_t *dict, int count,
+ glusterd_svc_build_pidfile_path(server, priv->rundir, pidfile,
+ sizeof(pidfile));
+
+- if (strcmp(server, priv->nfs_svc.name) == 0)
++ if (strcmp(server, priv->shd_svc.name) == 0)
++ svc = &(priv->shd_svc);
++ else if (strcmp(server, priv->nfs_svc.name) == 0)
+ svc = &(priv->nfs_svc);
+ else if (strcmp(server, priv->quotad_svc.name) == 0)
+ svc = &(priv->quotad_svc);
+@@ -5552,6 +5540,9 @@ glusterd_add_node_to_dict(char *server, dict_t *dict, int count,
+ if (!strcmp(server, priv->nfs_svc.name))
+ ret = dict_set_nstrn(dict, key, keylen, "NFS Server",
+ SLEN("NFS Server"));
++ else if (!strcmp(server, priv->shd_svc.name))
++ ret = dict_set_nstrn(dict, key, keylen, "Self-heal Daemon",
++ SLEN("Self-heal Daemon"));
+ else if (!strcmp(server, priv->quotad_svc.name))
+ ret = dict_set_nstrn(dict, key, keylen, "Quota Daemon",
+ SLEN("Quota Daemon"));
+@@ -9115,21 +9106,6 @@ glusterd_friend_remove_cleanup_vols(uuid_t uuid)
+ "to stop snapd daemon service");
+ }
+ }
+-
+- if (glusterd_is_shd_compatible_volume(volinfo)) {
+- /*
+- * Sending stop request for all volumes. So it is fine
+- * to send stop for mux shd
+- */
+- svc = &(volinfo->shd.svc);
+- ret = svc->stop(svc, SIGTERM);
+- if (ret) {
+- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
+- "Failed "
+- "to stop shd daemon service");
+- }
+- }
+-
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ svc = &(volinfo->tierd.svc);
+ ret = svc->stop(svc, SIGTERM);
+@@ -9155,7 +9131,7 @@ glusterd_friend_remove_cleanup_vols(uuid_t uuid)
+ }
+
+ /* Reconfigure all daemon services upon peer detach */
+- ret = glusterd_svcs_reconfigure(NULL);
++ ret = glusterd_svcs_reconfigure();
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
+ "Failed to reconfigure all daemon services.");
+@@ -14746,74 +14722,3 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo)
+ return _gf_true;
+ return _gf_false;
+ }
+-
+-int32_t
+-glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
+- int32_t count)
+-{
+- int ret = -1;
+- int32_t pid = -1;
+- int32_t brick_online = -1;
+- char key[64] = {0};
+- int keylen;
+- char *pidfile = NULL;
+- xlator_t *this = NULL;
+- char *uuid_str = NULL;
+-
+- this = THIS;
+- GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+-
+- GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
+- GF_VALIDATE_OR_GOTO(this->name, dict, out);
+-
+- keylen = snprintf(key, sizeof(key), "brick%d.hostname", count);
+- ret = dict_set_nstrn(dict, key, keylen, "Self-heal Daemon",
+- SLEN("Self-heal Daemon"));
+- if (ret)
+- goto out;
+-
+- keylen = snprintf(key, sizeof(key), "brick%d.path", count);
+- uuid_str = gf_strdup(uuid_utoa(MY_UUID));
+- if (!uuid_str) {
+- ret = -1;
+- goto out;
+- }
+- ret = dict_set_dynstrn(dict, key, keylen, uuid_str);
+- if (ret)
+- goto out;
+- uuid_str = NULL;
+-
+- /* shd doesn't have a port. but the cli needs a port key with
+- * a zero value to parse.
+- * */
+-
+- keylen = snprintf(key, sizeof(key), "brick%d.port", count);
+- ret = dict_set_int32n(dict, key, keylen, 0);
+- if (ret)
+- goto out;
+-
+- pidfile = volinfo->shd.svc.proc.pidfile;
+-
+- brick_online = gf_is_service_running(pidfile, &pid);
+-
+- /* If shd is not running, then don't print the pid */
+- if (!brick_online)
+- pid = -1;
+- keylen = snprintf(key, sizeof(key), "brick%d.pid", count);
+- ret = dict_set_int32n(dict, key, keylen, pid);
+- if (ret)
+- goto out;
+-
+- keylen = snprintf(key, sizeof(key), "brick%d.status", count);
+- ret = dict_set_int32n(dict, key, keylen, brick_online);
+-
+-out:
+- if (uuid_str)
+- GF_FREE(uuid_str);
+- if (ret)
+- gf_msg(this ? this->name : "glusterd", GF_LOG_ERROR, 0,
+- GD_MSG_DICT_SET_FAILED,
+- "Returning %d. adding values to dict failed", ret);
+-
+- return ret;
+-}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
+index 5c6a453..ead16b2 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
+@@ -881,8 +881,4 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo);
+
+ char *
+ search_brick_path_from_proc(pid_t brick_pid, char *brickpath);
+-
+-int32_t
+-glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
+- int32_t count);
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 8b58d40..5e0214e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -36,7 +36,6 @@
+ #include "glusterd-svc-mgmt.h"
+ #include "glusterd-svc-helper.h"
+ #include "glusterd-snapd-svc-helper.h"
+-#include "glusterd-shd-svc-helper.h"
+ #include "glusterd-gfproxyd-svc-helper.h"
+
+ struct gd_validate_reconf_opts {
+@@ -4865,7 +4864,7 @@ volgen_get_shd_key(int type)
+ static int
+ volgen_set_shd_key_enable(dict_t *set_dict, const int type)
+ {
+- int ret = 0;
++ int ret = -1;
+
+ switch (type) {
+ case GF_CLUSTER_TYPE_REPLICATE:
+@@ -5156,15 +5155,24 @@ out:
+ static int
+ build_shd_volume_graph(xlator_t *this, volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, dict_t *mod_dict,
+- dict_t *set_dict, gf_boolean_t graph_check)
++ dict_t *set_dict, gf_boolean_t graph_check,
++ gf_boolean_t *valid_config)
+ {
+ volgen_graph_t cgraph = {0};
+ int ret = 0;
+ int clusters = -1;
+
++ if (!graph_check && (volinfo->status != GLUSTERD_STATUS_STARTED))
++ goto out;
++
+ if (!glusterd_is_shd_compatible_volume(volinfo))
+ goto out;
+
++ /* Shd graph is valid only when there is at least one
++ * replica/disperse volume is present
++ */
++ *valid_config = _gf_true;
++
+ ret = prepare_shd_volume_options(volinfo, mod_dict, set_dict);
+ if (ret)
+ goto out;
+@@ -5194,16 +5202,19 @@ out:
+ }
+
+ int
+-build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph,
+- dict_t *mod_dict)
++build_shd_graph(volgen_graph_t *graph, dict_t *mod_dict)
+ {
++ glusterd_volinfo_t *voliter = NULL;
+ xlator_t *this = NULL;
++ glusterd_conf_t *priv = NULL;
+ dict_t *set_dict = NULL;
+ int ret = 0;
++ gf_boolean_t valid_config = _gf_false;
+ xlator_t *iostxl = NULL;
+ gf_boolean_t graph_check = _gf_false;
+
+ this = THIS;
++ priv = this->private;
+
+ set_dict = dict_new();
+ if (!set_dict) {
+@@ -5213,18 +5224,26 @@ build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph,
+
+ if (mod_dict)
+ graph_check = dict_get_str_boolean(mod_dict, "graph-check", 0);
+- iostxl = volgen_graph_add_as(graph, "debug/io-stats", volinfo->volname);
++ iostxl = volgen_graph_add_as(graph, "debug/io-stats", "glustershd");
+ if (!iostxl) {
+ ret = -1;
+ goto out;
+ }
+
+- ret = build_shd_volume_graph(this, graph, volinfo, mod_dict, set_dict,
+- graph_check);
++ cds_list_for_each_entry(voliter, &priv->volumes, vol_list)
++ {
++ ret = build_shd_volume_graph(this, graph, voliter, mod_dict, set_dict,
++ graph_check, &valid_config);
++ ret = dict_reset(set_dict);
++ if (ret)
++ goto out;
++ }
+
+ out:
+ if (set_dict)
+ dict_unref(set_dict);
++ if (!valid_config)
++ ret = -EINVAL;
+ return ret;
+ }
+
+@@ -6541,10 +6560,6 @@ glusterd_create_volfiles(glusterd_volinfo_t *volinfo)
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR, "Could not generate gfproxy volfiles");
+
+- ret = glusterd_shdsvc_create_volfile(volinfo);
+- if (ret)
+- gf_log(this->name, GF_LOG_ERROR, "Could not generate shd volfiles");
+-
+ dict_del_sizen(volinfo->dict, "skip-CLIOT");
+
+ out:
+@@ -6625,7 +6640,7 @@ validate_shdopts(glusterd_volinfo_t *volinfo, dict_t *val_dict,
+ ret = dict_set_int32_sizen(val_dict, "graph-check", 1);
+ if (ret)
+ goto out;
+- ret = build_shd_graph(volinfo, &graph, val_dict);
++ ret = build_shd_graph(&graph, val_dict);
+ if (!ret)
+ ret = graph_reconf_validateopt(&graph.graph, op_errstr);
+
+@@ -7002,22 +7017,3 @@ gd_is_boolean_option(char *key)
+
+ return _gf_false;
+ }
+-
+-int
+-glusterd_shdsvc_generate_volfile(glusterd_volinfo_t *volinfo, char *filename,
+- dict_t *mode_dict)
+-{
+- int ret = -1;
+- volgen_graph_t graph = {
+- 0,
+- };
+-
+- graph.type = GF_SHD;
+- ret = build_shd_graph(volinfo, &graph, mode_dict);
+- if (!ret)
+- ret = volgen_write_volfile(&graph, filename);
+-
+- volgen_graph_free(&graph);
+-
+- return ret;
+-}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h
+index 897d8fa..f9fc068 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.h
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h
+@@ -66,7 +66,6 @@ typedef enum {
+ GF_REBALANCED = 1,
+ GF_QUOTAD,
+ GF_SNAPD,
+- GF_SHD,
+ } glusterd_graph_type_t;
+
+ struct volgen_graph {
+@@ -78,8 +77,6 @@ typedef struct volgen_graph volgen_graph_t;
+
+ typedef int (*glusterd_graph_builder_t)(volgen_graph_t *graph,
+ dict_t *mod_dict);
+-typedef int (*glusterd_vol_graph_builder_t)(glusterd_volinfo_t *,
+- char *filename, dict_t *mod_dict);
+
+ #define COMPLETE_OPTION(key, completion, ret) \
+ do { \
+@@ -204,8 +201,7 @@ void
+ glusterd_get_shd_filepath(char *filename);
+
+ int
+-build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph,
+- dict_t *mod_dict);
++build_shd_graph(volgen_graph_t *graph, dict_t *mod_dict);
+
+ int
+ build_nfs_graph(volgen_graph_t *graph, dict_t *mod_dict);
+@@ -317,9 +313,4 @@ glusterd_generate_gfproxyd_volfile(glusterd_volinfo_t *volinfo);
+
+ int
+ glusterd_build_gfproxyd_volfile(glusterd_volinfo_t *volinfo, char *filename);
+-
+-int
+-glusterd_shdsvc_generate_volfile(glusterd_volinfo_t *volinfo, char *filename,
+- dict_t *mode_dict);
+-
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index 4c3ad50..1ea8ba6 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -1940,7 +1940,7 @@ static int
+ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo,
+ dict_t *dict, char **op_errstr)
+ {
+- glusterd_svc_t *svc = NULL;
++ glusterd_conf_t *priv = NULL;
+ gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID;
+ int ret = 0;
+ char msg[2408] = {
+@@ -1950,6 +1950,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo,
+ "Self-heal daemon is not running. "
+ "Check self-heal daemon log file.";
+
++ priv = this->private;
+ ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"),
+ (int32_t *)&heal_op);
+ if (ret) {
+@@ -1958,7 +1959,6 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo,
+ goto out;
+ }
+
+- svc = &(volinfo->shd.svc);
+ switch (heal_op) {
+ case GF_SHD_OP_INVALID:
+ case GF_SHD_OP_HEAL_ENABLE: /* This op should be handled in volume-set*/
+@@ -1988,7 +1988,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo,
+ goto out;
+ }
+
+- if (!svc->online) {
++ if (!priv->shd_svc.online) {
+ ret = -1;
+ *op_errstr = gf_strdup(offline_msg);
+ goto out;
+@@ -2009,7 +2009,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo,
+ goto out;
+ }
+
+- if (!svc->online) {
++ if (!priv->shd_svc.online) {
+ ret = -1;
+ *op_errstr = gf_strdup(offline_msg);
+ goto out;
+diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
+index c0973cb..d360312 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.c
++++ b/xlators/mgmt/glusterd/src/glusterd.c
+@@ -1537,6 +1537,14 @@ init(xlator_t *this)
+ exit(1);
+ }
+
++ ret = glusterd_init_var_run_dirs(this, rundir, GLUSTERD_GLUSTERSHD_RUN_DIR);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED,
++ "Unable to create "
++ "glustershd running directory");
++ exit(1);
++ }
++
+ ret = glusterd_init_var_run_dirs(this, rundir, GLUSTERD_NFS_RUN_DIR);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED,
+@@ -1811,9 +1819,6 @@ init(xlator_t *this)
+ CDS_INIT_LIST_HEAD(&conf->snapshots);
+ CDS_INIT_LIST_HEAD(&conf->missed_snaps_list);
+ CDS_INIT_LIST_HEAD(&conf->brick_procs);
+- CDS_INIT_LIST_HEAD(&conf->shd_procs);
+- pthread_mutex_init(&conf->attach_lock, NULL);
+- pthread_mutex_init(&conf->volume_lock, NULL);
+
+ pthread_mutex_init(&conf->mutex, NULL);
+ conf->rpc = rpc;
+@@ -1894,6 +1899,7 @@ init(xlator_t *this)
+ glusterd_mgmt_v3_lock_timer_init();
+ glusterd_txn_opinfo_dict_init();
+
++ glusterd_shdsvc_build(&conf->shd_svc);
+ glusterd_nfssvc_build(&conf->nfs_svc);
+ glusterd_quotadsvc_build(&conf->quotad_svc);
+ glusterd_bitdsvc_build(&conf->bitd_svc);
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index 0fbc9dd..2be005c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -28,7 +28,6 @@
+ #include "glusterd-sm.h"
+ #include "glusterd-snapd-svc.h"
+ #include "glusterd-tierd-svc.h"
+-#include "glusterd-shd-svc.h"
+ #include "glusterd-bitd-svc.h"
+ #include "glusterd1-xdr.h"
+ #include "protocol-common.h"
+@@ -173,6 +172,7 @@ typedef struct {
+ char workdir[VALID_GLUSTERD_PATHMAX];
+ char rundir[VALID_GLUSTERD_PATHMAX];
+ rpcsvc_t *rpc;
++ glusterd_svc_t shd_svc;
+ glusterd_svc_t nfs_svc;
+ glusterd_svc_t bitd_svc;
+ glusterd_svc_t scrub_svc;
+@@ -181,7 +181,6 @@ typedef struct {
+ struct cds_list_head volumes;
+ struct cds_list_head snapshots; /*List of snap volumes */
+ struct cds_list_head brick_procs; /* List of brick processes */
+- struct cds_list_head shd_procs; /* List of shd processes */
+ pthread_mutex_t xprt_lock;
+ struct list_head xprt_list;
+ pthread_mutex_t import_volumes;
+@@ -222,11 +221,6 @@ typedef struct {
+ gf_atomic_t blockers;
+ uint32_t mgmt_v3_lock_timeout;
+ gf_boolean_t restart_bricks;
+- pthread_mutex_t attach_lock; /* Lock can be per process or a common one */
+- pthread_mutex_t volume_lock; /* We release the big_lock from lot of places
+- which might lead the modification of volinfo
+- list.
+- */
+ gf_atomic_t thread_count;
+ } glusterd_conf_t;
+
+@@ -519,7 +513,6 @@ struct glusterd_volinfo_ {
+
+ glusterd_snapdsvc_t snapd;
+ glusterd_tierdsvc_t tierd;
+- glusterd_shdsvc_t shd;
+ glusterd_gfproxydsvc_t gfproxyd;
+ int32_t quota_xattr_version;
+ gf_boolean_t stage_deleted; /* volume has passed staging
+@@ -646,6 +639,7 @@ typedef enum {
+ #define GLUSTERD_DEFAULT_SNAPS_BRICK_DIR "/gluster/snaps"
+ #define GLUSTERD_BITD_RUN_DIR "/bitd"
+ #define GLUSTERD_SCRUB_RUN_DIR "/scrub"
++#define GLUSTERD_GLUSTERSHD_RUN_DIR "/glustershd"
+ #define GLUSTERD_NFS_RUN_DIR "/nfs"
+ #define GLUSTERD_QUOTAD_RUN_DIR "/quotad"
+ #define GLUSTER_SHARED_STORAGE_BRICK_DIR GLUSTERD_DEFAULT_WORKDIR "/ss_brick"
+@@ -701,26 +695,6 @@ typedef ssize_t (*gd_serialize_t)(struct iovec outmsg, void *args);
+ } \
+ } while (0)
+
+-#define GLUSTERD_GET_SHD_RUNDIR(path, volinfo, priv) \
+- do { \
+- int32_t _shd_dir_len; \
+- _shd_dir_len = snprintf(path, PATH_MAX, "%s/shd/%s", priv->rundir, \
+- volinfo->volname); \
+- if ((_shd_dir_len < 0) || (_shd_dir_len >= PATH_MAX)) { \
+- path[0] = 0; \
+- } \
+- } while (0)
+-
+-#define GLUSTERD_GET_SHD_PID_FILE(path, volinfo, priv) \
+- do { \
+- int32_t _shd_pid_len; \
+- _shd_pid_len = snprintf(path, PATH_MAX, "%s/shd/%s-shd.pid", \
+- priv->rundir, volinfo->volname); \
+- if ((_shd_pid_len < 0) || (_shd_pid_len >= PATH_MAX)) { \
+- path[0] = 0; \
+- } \
+- } while (0)
+-
+ #define GLUSTERD_GET_VOLUME_PID_DIR(path, volinfo, priv) \
+ do { \
+ int32_t _vol_pid_len; \
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index 532ef35..e156d4d 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -46,6 +46,7 @@ client_fini_complete(xlator_t *this)
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ clnt_conf_t *conf = this->private;
++
+ if (!conf->destroy)
+ return 0;
+
+@@ -68,11 +69,6 @@ client_notify_dispatch_uniq(xlator_t *this, int32_t event, void *data, ...)
+ return 0;
+
+ return client_notify_dispatch(this, event, data);
+-
+- /* Please avoid any code that access xlator object here
+- * Because for a child down event, once we do the signal
+- * we will start cleanup.
+- */
+ }
+
+ int
+@@ -109,11 +105,6 @@ client_notify_dispatch(xlator_t *this, int32_t event, void *data, ...)
+ }
+ pthread_mutex_unlock(&ctx->notify_lock);
+
+- /* Please avoid any code that access xlator object here
+- * Because for a child down event, once we do the signal
+- * we will start cleanup.
+- */
+-
+ return ret;
+ }
+
+@@ -2287,7 +2278,6 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ {
+ xlator_t *this = NULL;
+ clnt_conf_t *conf = NULL;
+- gf_boolean_t is_parent_down = _gf_false;
+ int ret = 0;
+
+ this = mydata;
+@@ -2351,19 +2341,6 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ if (conf->portmap_err_logged)
+ conf->disconnect_err_logged = 1;
+ }
+- /*
+- * Once we complete the child down notification,
+- * There is a chance that the graph might get freed,
+- * So it is not safe to access any xlator contens
+- * So here we are checking whether the parent is down
+- * or not.
+- */
+- pthread_mutex_lock(&conf->lock);
+- {
+- is_parent_down = conf->parent_down;
+- }
+- pthread_mutex_unlock(&conf->lock);
+-
+ /* If the CHILD_DOWN event goes to parent xlator
+ multiple times, the logic of parent xlator notify
+ may get screwed up.. (eg. CHILD_MODIFIED event in
+@@ -2371,12 +2348,6 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ to parent are genuine */
+ ret = client_notify_dispatch_uniq(this, GF_EVENT_CHILD_DOWN,
+ NULL);
+- if (is_parent_down) {
+- /* If parent is down, then there should not be any
+- * operation after a child down.
+- */
+- goto out;
+- }
+ if (ret)
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ PC_MSG_CHILD_DOWN_NOTIFY_FAILED,
+--
+1.8.3.1
+
diff --git a/0251-tests-Fix-bug-1717819-metadata-split-brain-detection.patch b/0251-tests-Fix-bug-1717819-metadata-split-brain-detection.patch
new file mode 100644
index 0000000..028f92c
--- /dev/null
+++ b/0251-tests-Fix-bug-1717819-metadata-split-brain-detection.patch
@@ -0,0 +1,57 @@
+From 7a04fb9999f5d25c17f5593eed5e98d0f5a1932d Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Mon, 15 Jul 2019 14:30:52 +0530
+Subject: [PATCH 251/255] tests: Fix
+ bug-1717819-metadata-split-brain-detection.t failure
+
+<Backport of: https://review.gluster.org/#/c/glusterfs/+/23043/>
+
+Problem:
+tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t fails
+intermittently in test cases #49 & #50, which compare the values of the
+user set xattr values after enabling the heal. We are not waiting for
+the heal to complete before comparing those values, which might lead
+those tests to fail.
+
+Fix:
+Wait till the HEAL-TIMEOUT before comparing the xattr values.
+Also cheking for the shd to come up and the bricks to connect to the shd
+process in another case.
+
+Change-Id: I0021c2d5d251111c695e2bf18c63e8189e456114
+fixes: bz#1704562
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/176071
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t
+index 94b8bf3..76d1f21 100644
+--- a/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t
++++ b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t
+@@ -76,6 +76,10 @@ EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0
+
+ # Launch heal
+ TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+ TEST $CLI volume heal $V0
+ EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+@@ -117,6 +121,8 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+ B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file)
+ B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file)
+--
+1.8.3.1
+
diff --git a/0252-glusterd-do-not-mark-skip_locking-as-true-for-geo-re.patch b/0252-glusterd-do-not-mark-skip_locking-as-true-for-geo-re.patch
new file mode 100644
index 0000000..b722dff
--- /dev/null
+++ b/0252-glusterd-do-not-mark-skip_locking-as-true-for-geo-re.patch
@@ -0,0 +1,63 @@
+From 5a35a996257d6aaa7fa55ff1e1aac407dd4824fe Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Fri, 12 Jul 2019 16:28:04 +0530
+Subject: [PATCH 252/255] glusterd: do not mark skip_locking as true for
+ geo-rep operations
+
+We need to send the commit req to peers in case of geo-rep
+operations even though it is a no volname operation. In commit
+phase peers try to set the txn_opinfo which will fail because
+it is a no volname operation where we don't require a commit
+phase. We mark skip_locking as true for no volname operations,
+but we have to give an exception to geo-rep operations, so that
+they can set txn_opinfo in commit phase.
+
+Please refer to detailed RCA at the bug: 1729463
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23034/
+
+>fixes: bz#1729463
+>Change-Id: I9f2478b12a281f6e052035c0563c40543493a3fc
+>Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+Change-Id: I9f2478b12a281f6e052035c0563c40543493a3fc
+BUG: 1727785
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/176032
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Shwetha Acharya <sacharya@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index cb2666b..2e73c98 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -1078,7 +1078,11 @@ __glusterd_handle_stage_op(rpcsvc_request_t *req)
+
+ /* In cases where there is no volname, the receivers won't have a
+ * transaction opinfo created, as for those operations, the locking
+- * phase where the transaction opinfos are created, won't be called. */
++ * phase where the transaction opinfos are created, won't be called.
++ * skip_locking will be true for all such transaction and we clear
++ * the txn_opinfo after the staging phase, except for geo-replication
++ * operations where we need to access txn_opinfo in the later phases also.
++ */
+ ret = glusterd_get_txn_opinfo(txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "No transaction's opinfo set");
+@@ -1087,7 +1091,8 @@ __glusterd_handle_stage_op(rpcsvc_request_t *req)
+ glusterd_txn_opinfo_init(&txn_op_info, &state, &op_req.op,
+ req_ctx->dict, req);
+
+- txn_op_info.skip_locking = _gf_true;
++ if (req_ctx->op != GD_OP_GSYNC_SET)
++ txn_op_info.skip_locking = _gf_true;
+ ret = glusterd_set_txn_opinfo(txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+--
+1.8.3.1
+
diff --git a/0253-core-fix-deadlock-between-statedump-and-fd_anonymous.patch b/0253-core-fix-deadlock-between-statedump-and-fd_anonymous.patch
new file mode 100644
index 0000000..d313482
--- /dev/null
+++ b/0253-core-fix-deadlock-between-statedump-and-fd_anonymous.patch
@@ -0,0 +1,246 @@
+From ea7f11b989896d76b8d091d26bc0241bce9413f8 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Thu, 4 Jul 2019 13:21:33 +0200
+Subject: [PATCH 253/255] core: fix deadlock between statedump and
+ fd_anonymous()
+
+There exists a deadlock between statedump generation and fd_anonymous()
+function because they are acquiring inode table lock and inode lock in
+reverse order.
+
+This patch modifies fd_anonymous() so that it takes inode lock only when
+it's really necessary, avoiding the deadlock.
+
+Upstream patch:
+> Change-Id: I24355447f0ea1b39e2546782ad07f0512cc381e7
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22995
+> BUG: 1727068
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: I24355447f0ea1b39e2546782ad07f0512cc381e7
+Fixes: bz#1722209
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/176096
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/fd.c | 137 ++++++++++++++++++++++----------------------------
+ 1 file changed, 61 insertions(+), 76 deletions(-)
+
+diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
+index b8aac72..314546a 100644
+--- a/libglusterfs/src/fd.c
++++ b/libglusterfs/src/fd.c
+@@ -532,7 +532,7 @@ fd_unref(fd_t *fd)
+ return;
+ }
+
+-fd_t *
++static fd_t *
+ __fd_bind(fd_t *fd)
+ {
+ list_del_init(&fd->inode_list);
+@@ -562,9 +562,9 @@ fd_bind(fd_t *fd)
+ }
+
+ static fd_t *
+-__fd_create(inode_t *inode, uint64_t pid)
++fd_allocate(inode_t *inode, uint64_t pid)
+ {
+- fd_t *fd = NULL;
++ fd_t *fd;
+
+ if (inode == NULL) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+@@ -573,64 +573,67 @@ __fd_create(inode_t *inode, uint64_t pid)
+ }
+
+ fd = mem_get0(inode->table->fd_mem_pool);
+- if (!fd)
+- goto out;
++ if (fd == NULL) {
++ return NULL;
++ }
+
+ fd->xl_count = inode->table->xl->graph->xl_count + 1;
+
+ fd->_ctx = GF_CALLOC(1, (sizeof(struct _fd_ctx) * fd->xl_count),
+ gf_common_mt_fd_ctx);
+- if (!fd->_ctx)
+- goto free_fd;
++ if (fd->_ctx == NULL) {
++ goto failed;
++ }
+
+ fd->lk_ctx = fd_lk_ctx_create();
+- if (!fd->lk_ctx)
+- goto free_fd_ctx;
+-
+- fd->inode = inode_ref(inode);
+- fd->pid = pid;
+- INIT_LIST_HEAD(&fd->inode_list);
+-
+- LOCK_INIT(&fd->lock);
+-out:
+- return fd;
++ if (fd->lk_ctx != NULL) {
++ /* We need to take a reference from the inode, but we cannot do it
++ * here because this function can be called with the inode lock taken
++ * and inode_ref() takes the inode's table lock. This is the reverse
++ * of the logical lock acquisition order and can cause a deadlock. So
++ * we simply assign the inode here and we delefate the inode reference
++ * responsibility to the caller (when this function succeeds and the
++ * inode lock is released). This is safe because the caller must hold
++ * a reference of the inode to use it, so it's guaranteed that the
++ * number of references won't reach 0 before the caller finishes.
++ *
++ * TODO: minimize use of locks in favor of atomic operations to avoid
++ * these dependencies. */
++ fd->inode = inode;
++ fd->pid = pid;
++ INIT_LIST_HEAD(&fd->inode_list);
++ LOCK_INIT(&fd->lock);
++ GF_ATOMIC_INIT(fd->refcount, 1);
++ return fd;
++ }
+
+-free_fd_ctx:
+ GF_FREE(fd->_ctx);
+-free_fd:
++
++failed:
+ mem_put(fd);
+
+ return NULL;
+ }
+
+ fd_t *
+-fd_create(inode_t *inode, pid_t pid)
++fd_create_uint64(inode_t *inode, uint64_t pid)
+ {
+- fd_t *fd = NULL;
+-
+- fd = __fd_create(inode, (uint64_t)pid);
+- if (!fd)
+- goto out;
++ fd_t *fd;
+
+- fd = fd_ref(fd);
++ fd = fd_allocate(inode, pid);
++ if (fd != NULL) {
++ /* fd_allocate() doesn't get a reference from the inode. We need to
++ * take it here in case of success. */
++ inode_ref(inode);
++ }
+
+-out:
+ return fd;
+ }
+
+ fd_t *
+-fd_create_uint64(inode_t *inode, uint64_t pid)
++fd_create(inode_t *inode, pid_t pid)
+ {
+- fd_t *fd = NULL;
+-
+- fd = __fd_create(inode, pid);
+- if (!fd)
+- goto out;
+-
+- fd = fd_ref(fd);
+-
+-out:
+- return fd;
++ return fd_create_uint64(inode, (uint64_t)pid);
+ }
+
+ static fd_t *
+@@ -719,10 +722,13 @@ __fd_lookup_anonymous(inode_t *inode, int32_t flags)
+ return fd;
+ }
+
+-static fd_t *
+-__fd_anonymous(inode_t *inode, int32_t flags)
++fd_t *
++fd_anonymous_with_flags(inode_t *inode, int32_t flags)
+ {
+ fd_t *fd = NULL;
++ bool ref = false;
++
++ LOCK(&inode->lock);
+
+ fd = __fd_lookup_anonymous(inode, flags);
+
+@@ -730,54 +736,33 @@ __fd_anonymous(inode_t *inode, int32_t flags)
+ __fd_lookup_anonymous(), so no need of one more fd_ref().
+ if (!fd); then both create and bind won't bump up the ref
+ count, so we have to call fd_ref() after bind. */
+- if (!fd) {
+- fd = __fd_create(inode, 0);
+-
+- if (!fd)
+- return NULL;
+-
+- fd->anonymous = _gf_true;
+- fd->flags = GF_ANON_FD_FLAGS | flags;
++ if (fd == NULL) {
++ fd = fd_allocate(inode, 0);
++ if (fd != NULL) {
++ fd->anonymous = _gf_true;
++ fd->flags = GF_ANON_FD_FLAGS | (flags & O_DIRECT);
+
+- __fd_bind(fd);
++ __fd_bind(fd);
+
+- __fd_ref(fd);
++ ref = true;
++ }
+ }
+
+- return fd;
+-}
+-
+-fd_t *
+-fd_anonymous(inode_t *inode)
+-{
+- fd_t *fd = NULL;
++ UNLOCK(&inode->lock);
+
+- LOCK(&inode->lock);
+- {
+- fd = __fd_anonymous(inode, GF_ANON_FD_FLAGS);
++ if (ref) {
++ /* fd_allocate() doesn't get a reference from the inode. We need to
++ * take it here in case of success. */
++ inode_ref(inode);
+ }
+- UNLOCK(&inode->lock);
+
+ return fd;
+ }
+
+ fd_t *
+-fd_anonymous_with_flags(inode_t *inode, int32_t flags)
++fd_anonymous(inode_t *inode)
+ {
+- fd_t *fd = NULL;
+-
+- if (flags & O_DIRECT)
+- flags = GF_ANON_FD_FLAGS | O_DIRECT;
+- else
+- flags = GF_ANON_FD_FLAGS;
+-
+- LOCK(&inode->lock);
+- {
+- fd = __fd_anonymous(inode, flags);
+- }
+- UNLOCK(&inode->lock);
+-
+- return fd;
++ return fd_anonymous_with_flags(inode, 0);
+ }
+
+ fd_t *
+--
+1.8.3.1
+
diff --git a/0254-Detach-iot_worker-to-release-its-resources.patch b/0254-Detach-iot_worker-to-release-its-resources.patch
new file mode 100644
index 0000000..6019436
--- /dev/null
+++ b/0254-Detach-iot_worker-to-release-its-resources.patch
@@ -0,0 +1,43 @@
+From 2bbb097d087bb5ef142775500708f11ccd31bac0 Mon Sep 17 00:00:00 2001
+From: Liguang Li <liguang.lee6@gmail.com>
+Date: Fri, 21 Jun 2019 12:18:58 +0800
+Subject: [PATCH 254/255] Detach iot_worker to release its resources
+
+When iot_worker terminates, its resources have not been reaped, which
+will consumes lots of memory.
+
+Detach iot_worker to automically release its resources back to the
+system.
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22918/
+
+>fixes: bz#1729107
+>Change-Id: I71fabb2940e76ad54dc56b4c41aeeead2644b8bb
+>Signed-off-by: Liguang Li <liguang.lee6@gmail.com>
+
+BUG:1729108
+Change-Id: I71fabb2940e76ad54dc56b4c41aeeead2644b8bb
+Signed-off-by: Liguang Li <liguang.lee6@gmail.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/176106
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/performance/io-threads/src/io-threads.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
+index 060d923..1d96691 100644
+--- a/xlators/performance/io-threads/src/io-threads.c
++++ b/xlators/performance/io-threads/src/io-threads.c
+@@ -835,6 +835,7 @@ __iot_workers_scale(iot_conf_t *conf)
+ ret = gf_thread_create(&thread, &conf->w_attr, iot_worker, conf,
+ "iotwr%03hx", conf->curr_count & 0x3ff);
+ if (ret == 0) {
++ pthread_detach(thread);
+ conf->curr_count++;
+ gf_msg_debug(conf->this->name, 0,
+ "scaled threads to %d (queue_size=%d/%d)",
+--
+1.8.3.1
+
diff --git a/0255-Revert-tier-shd-glusterd-with-shd-mux-the-shd-volfil.patch b/0255-Revert-tier-shd-glusterd-with-shd-mux-the-shd-volfil.patch
new file mode 100644
index 0000000..89a8aaf
--- /dev/null
+++ b/0255-Revert-tier-shd-glusterd-with-shd-mux-the-shd-volfil.patch
@@ -0,0 +1,104 @@
+From 684a4949552164d3469329b3f959de4369d54faa Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Sun, 14 Jul 2019 08:06:11 +0530
+Subject: [PATCH 255/255] Revert "tier/shd/glusterd: with shd mux, the shd
+ volfile path have to be updated for tier-heald.t"
+
+This reverts commit 6e7d333625ecd9f7402c2e839338350fa86eaf45.
+
+Updates: bz#1471742
+Change-Id: I6c27634999f72b5bbb35d5d13cdebda7af072b01
+Reviewed-on: https://code.engineering.redhat.com/gerrit/176017
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/tier/tier-heald.t | 35 +++++++++++++++--------------------
+ 1 file changed, 15 insertions(+), 20 deletions(-)
+
+diff --git a/tests/basic/tier/tier-heald.t b/tests/basic/tier/tier-heald.t
+index 0ec9e43..a8e634f 100644
+--- a/tests/basic/tier/tier-heald.t
++++ b/tests/basic/tier/tier-heald.t
+@@ -11,7 +11,7 @@ cleanup;
+ TEST glusterd
+ TEST pidof glusterd
+
+-r2_volfile=$(gluster system:: getwd)"/vols/r2/r2-shd.vol"
++volfile=$(gluster system:: getwd)"/glustershd/glustershd-server.vol"
+
+ # Commands should fail when both tiers are not of distribute type.
+ # Glustershd shouldn't be running as long as there are no replicate/disperse
+@@ -34,56 +34,51 @@ TEST $CLI volume tier r2 attach $H0:$B0/r2_hot
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+ TEST $CLI volume heal r2 enable
+ EXPECT "enable" volume_option r2 "cluster.self-heal-daemon"
+-EXPECT "enable" volgen_volume_option $r2_volfile r2-replicate-0 cluster replicate self-heal-daemon
++EXPECT "enable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+ TEST $CLI volume heal r2 disable
+ EXPECT "disable" volume_option r2 "cluster.self-heal-daemon"
+-EXPECT "disable" volgen_volume_option $r2_volfile r2-replicate-0 cluster replicate self-heal-daemon
++EXPECT "disable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+ # Commands should work on disperse volume.
+ TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2
+ TEST $CLI volume start ec2
+
+-ec2_volfile=$(gluster system:: getwd)"/vols/ec2/ec2-shd.vol"
+-
+ TEST $CLI volume tier ec2 attach replica 2 $H0:$B0/ec2_hot{1..4}
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+ TEST $CLI volume heal ec2 enable
+ EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon"
+-EXPECT "enable" volgen_volume_option $ec2_volfile ec2-disperse-0 cluster disperse self-heal-daemon
++EXPECT "enable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+ TEST $CLI volume heal ec2 disable
+ EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon"
+-EXPECT "disable" volgen_volume_option $ec2_volfile ec2-disperse-0 cluster disperse self-heal-daemon
++EXPECT "disable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+
+ #Check that shd graph is rewritten correctly on volume stop/start
+-EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse
+-EXPECT "Y" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate
++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
++EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
+ TEST $CLI volume stop r2
+-EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse
+-
+-# Has been commented as the validations after stop using volfile dont hold true.
+-#EXPECT "N" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate
++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
++EXPECT "N" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
+ TEST $CLI volume stop ec2
+ # When both the volumes are stopped glustershd volfile is not modified just the
+ # process is stopped
+ TEST "[ -z $(get_shd_process_pid) ]"
+
+ TEST $CLI volume start r2
+-# Has been commented as the validations after stop using volfile dont hold true.
+-#EXPECT "N" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse
+-EXPECT "Y" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate
++EXPECT "N" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
++EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
+
+ TEST $CLI volume start ec2
+
+-EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse
+-EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-replicate-0 cluster replicate
++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
++EXPECT "Y" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate
+
+ TEST $CLI volume tier ec2 detach force
+
+-EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse
+-EXPECT "N" volgen_volume_exists $ec2_volfile ec2-replicate-0 cluster replicate
++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
++EXPECT "N" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate
+
+ TEST $CLI volume set r2 self-heal-daemon on
+ TEST $CLI volume set r2 cluster.self-heal-daemon off
+--
+1.8.3.1
+
diff --git a/0256-features-snapview-server-use-the-same-volfile-server.patch b/0256-features-snapview-server-use-the-same-volfile-server.patch
new file mode 100644
index 0000000..d410373
--- /dev/null
+++ b/0256-features-snapview-server-use-the-same-volfile-server.patch
@@ -0,0 +1,117 @@
+From f90df1167bc70c634ba33c181232321da6770709 Mon Sep 17 00:00:00 2001
+From: Raghavendra Bhat <raghavendra@redhat.com>
+Date: Tue, 25 Jun 2019 10:51:33 -0400
+Subject: [PATCH 256/261] features/snapview-server: use the same volfile server
+ for gfapi options
+
+snapview server xlator makes use of "localhost" as the volfile server while
+initing the new glfs instance to talk to a snapshot. While localhost is fine,
+better use the same volfile server that was used to start the snapshot
+daemon containing the snapview-server xlator.
+
+Upstream Patch:
+>Change-Id: I4485d39b0e3d066f481adc6958ace53ea33237f7
+>fixes: bz#1725211
+>Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
+> patch: https://review.gluster.org/#/c/glusterfs/+/22974/
+
+BUG: 1722757
+Change-Id: I4485d39b0e3d066f481adc6958ace53ea33237f7
+Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/175984
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../snapview-server/src/snapview-server-helpers.c | 44 ++++++++++++++++++++--
+ .../snapview-server/src/snapview-server-messages.h | 2 +-
+ 2 files changed, 42 insertions(+), 4 deletions(-)
+
+diff --git a/xlators/features/snapview-server/src/snapview-server-helpers.c b/xlators/features/snapview-server/src/snapview-server-helpers.c
+index 5514a54..62c1dda 100644
+--- a/xlators/features/snapview-server/src/snapview-server-helpers.c
++++ b/xlators/features/snapview-server/src/snapview-server-helpers.c
+@@ -476,6 +476,7 @@ __svs_initialise_snapshot_volume(xlator_t *this, const char *name,
+ char logfile[PATH_MAX] = {
+ 0,
+ };
++ char *volfile_server = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+@@ -512,14 +513,50 @@ __svs_initialise_snapshot_volume(xlator_t *this, const char *name,
+ goto out;
+ }
+
+- ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
++ /*
++ * Before, localhost was used as the volfile server. But, with that
++ * method, accessing snapshots started giving ENOENT error if a
++ * specific bind address is mentioned in the glusterd volume file.
++ * Check the bug https://bugzilla.redhat.com/show_bug.cgi?id=1725211.
++ * So, the new method is tried below, where, snapview-server first
++ * uses the volfile server used by the snapd (obtained from the
++ * command line arguments saved in the global context of the process).
++ * If the volfile server in global context is NULL, then localhost
++ * is tried (like before).
++ */
++ if (this->ctx->cmd_args.volfile_server) {
++ volfile_server = gf_strdup(this->ctx->cmd_args.volfile_server);
++ if (!volfile_server) {
++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
++ SVS_MSG_VOLFILE_SERVER_GET_FAIL,
++ "failed to copy volfile server %s. ",
++ this->ctx->cmd_args.volfile_server);
++ ret = -1;
++ goto out;
++ }
++ } else {
++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
++ SVS_MSG_VOLFILE_SERVER_GET_FAIL,
++ "volfile server is NULL in cmd args. "
++ "Trying with localhost");
++ volfile_server = gf_strdup("localhost");
++ if (!volfile_server) {
++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
++ SVS_MSG_VOLFILE_SERVER_GET_FAIL,
++ "failed to copy volfile server localhost.");
++ ret = -1;
++ goto out;
++ }
++ }
++
++ ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 24007);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, local_errno,
+ SVS_MSG_SET_VOLFILE_SERVR_FAILED,
+ "setting the "
+- "volfile server for snap volume %s "
++ "volfile server %s for snap volume %s "
+ "failed",
+- dirent->name);
++ volfile_server, dirent->name);
+ goto out;
+ }
+
+@@ -561,6 +598,7 @@ out:
+ dirent->fs = fs;
+ }
+
++ GF_FREE(volfile_server);
+ return fs;
+ }
+
+diff --git a/xlators/features/snapview-server/src/snapview-server-messages.h b/xlators/features/snapview-server/src/snapview-server-messages.h
+index 8548015..f634ab5 100644
+--- a/xlators/features/snapview-server/src/snapview-server-messages.h
++++ b/xlators/features/snapview-server/src/snapview-server-messages.h
+@@ -49,6 +49,6 @@ GLFS_MSGID(SNAPVIEW_SERVER, SVS_MSG_NO_MEMORY, SVS_MSG_MEM_ACNT_FAILED,
+ SVS_MSG_CLOSEDIR_FAILED, SVS_MSG_CLOSE_FAILED,
+ SVS_MSG_GFID_GEN_FAILED, SVS_MSG_GLFS_NEW_FAILED,
+ SVS_MSG_SET_VOLFILE_SERVR_FAILED, SVS_MSG_SET_LOGGING_FAILED,
+- SVS_MSG_GLFS_INIT_FAILED);
++ SVS_MSG_VOLFILE_SERVER_GET_FAIL, SVS_MSG_GLFS_INIT_FAILED);
+
+ #endif /* !_SNAPVIEW_CLIENT_MESSAGES_H_ */
+--
+1.8.3.1
+
diff --git a/0257-geo-rep-Test-case-for-upgrading-config-file.patch b/0257-geo-rep-Test-case-for-upgrading-config-file.patch
new file mode 100644
index 0000000..ffe44f1
--- /dev/null
+++ b/0257-geo-rep-Test-case-for-upgrading-config-file.patch
@@ -0,0 +1,80 @@
+From ed6cd2b7674896c810fdd059e35a0d319aacb068 Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <sacharya@redhat.com>
+Date: Tue, 2 Jul 2019 15:00:25 +0530
+Subject: [PATCH 257/261] geo-rep: Test case for upgrading config file
+
+Added test case for the patch
+https://review.gluster.org/#/c/glusterfs/+/22894/4
+
+Also updated if else structure in gsyncdconfig.py to avoid
+repeated occurance of values in new configfile.
+
+>fixes: bz#1707731
+>Change-Id: If97e1d37ac52dbd17d47be6cb659fc5a3ccab6d7
+>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+
+backport of https://review.gluster.org/#/c/glusterfs/+/22982/
+
+Bug: 1708064
+Change-Id: If97e1d37ac52dbd17d47be6cb659fc5a3ccab6d7
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/176603
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/gsyncdconfig.py | 11 +++++------
+ tests/00-geo-rep/georep-basic-dr-rsync.t | 13 +++++++++++++
+ 2 files changed, 18 insertions(+), 6 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py
+index 7edc582..1fc451f 100644
+--- a/geo-replication/syncdaemon/gsyncdconfig.py
++++ b/geo-replication/syncdaemon/gsyncdconfig.py
+@@ -353,15 +353,14 @@ def config_upgrade(config_file, ret):
+ new_value = "tarssh"
+ else:
+ new_value = "rsync"
+- config.set('vars', new_key, new_value)
+-
+- if key == "timeout":
++ config.set('vars', new_key, new_value)
++ elif key == "timeout":
+ new_key = "slave-timeout"
+ config.set('vars', new_key, value)
+-
+ #for changes like: ignore_deletes to ignore-deletes
+- new_key = key.replace("_", "-")
+- config.set('vars', new_key, value)
++ else:
++ new_key = key.replace("_", "-")
++ config.set('vars', new_key, value)
+
+ with open(config_file, 'w') as configfile:
+ config.write(configfile)
+diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t
+index 428e9ed..b432635 100644
+--- a/tests/00-geo-rep/georep-basic-dr-rsync.t
++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t
+@@ -212,6 +212,19 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave
+ #Verify arequal for whole volume
+ EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
++#Test config upgrade BUG: 1707731
++config_file=$GLUSTERD_WORKDIR/geo-replication/${GMV0}_${SH0}_${GSV0}/gsyncd.conf
++cat >> $config_file<<EOL
++[peers ${GMV0} ${GSV0}]
++use_tarssh = true
++timeout = 1
++EOL
++TEST $GEOREP_CLI $master $slave stop
++TEST $GEOREP_CLI $master $slave start
++#verify that the config file is updated
++EXPECT "1" echo $(grep -Fc "vars" $config_file)
++EXPECT "1" echo $(grep -Fc "sync-method = tarssh" $config_file)
++EXPECT "1" echo $(grep -Fc "slave-timeout = 1" $config_file)
+ #Stop Geo-rep
+ TEST $GEOREP_CLI $master $slave stop
+
+--
+1.8.3.1
+
diff --git a/0258-geo-rep-Fix-mount-broker-setup-issue.patch b/0258-geo-rep-Fix-mount-broker-setup-issue.patch
new file mode 100644
index 0000000..ba2a81f
--- /dev/null
+++ b/0258-geo-rep-Fix-mount-broker-setup-issue.patch
@@ -0,0 +1,53 @@
+From de24d64c2599e48a05e8792b845cfecc210a6fc5 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Mon, 22 Jul 2019 17:35:21 +0530
+Subject: [PATCH 258/261] geo-rep: Fix mount broker setup issue
+
+The patch [1] added validation in gverify.sh to check if the gluster
+binary on slave by executing gluster directly on slave. But for
+non-root users, even though gluster binary is present, path is not
+found when executed via ssh. Hence validate the gluster binary using
+bash builtin 'type' command.
+
+[1] https://review.gluster.org/19224
+
+Backport of:
+ > Patch: https://review.gluster.org/23089
+ > Change-Id: I93ca62c0c5b1e16263e586ddbbca8407d60ca126
+ > fixes: bz#1731920
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+Change-Id: I93ca62c0c5b1e16263e586ddbbca8407d60ca126
+BUG: 1720992
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/176727
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ geo-replication/src/gverify.sh | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/geo-replication/src/gverify.sh b/geo-replication/src/gverify.sh
+index 7c88f9f..692c1d6 100755
+--- a/geo-replication/src/gverify.sh
++++ b/geo-replication/src/gverify.sh
+@@ -207,13 +207,13 @@ function main()
+ fi;
+
+ if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then
+- err=$((ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "gluster --version") 2>&1)
++ ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "type -p gluster"
+ else
+- err=$((ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "gluster --version") 2>&1)
++ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "type -p gluster"
+ fi
+
+ if [ $? -ne 0 ]; then
+- echo "FORCE_BLOCKER|gluster command on $2@$3 failed. Error: $err" > $log_file
++ echo "FORCE_BLOCKER|gluster command not found on $3 for user $2." > $log_file
+ exit 1;
+ fi;
+
+--
+1.8.3.1
+
diff --git a/0259-gluster-block-tuning-perf-options.patch b/0259-gluster-block-tuning-perf-options.patch
new file mode 100644
index 0000000..a8ef9f4
--- /dev/null
+++ b/0259-gluster-block-tuning-perf-options.patch
@@ -0,0 +1,47 @@
+From 775a62906030e5b5dc60f17284a7d516ce4118f9 Mon Sep 17 00:00:00 2001
+From: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
+Date: Thu, 27 Jun 2019 13:18:32 +0530
+Subject: [PATCH 259/261] gluster-block: tuning perf options
+
+As per the perf experiment run by Elvir, with NVME devices used for OCP (gluster)
+it was seen that particularly read operations (read/randread) benefited from
+these options.
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22963/
+
+>Change-Id: Ibec4b96afd28e6f7e757b6ef203ccdbc0d9854d5
+>Fixes: bz#1727852
+>Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
+
+Change-Id: Ibec4b96afd28e6f7e757b6ef203ccdbc0d9854d5
+BUG: 1708180
+Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/176724
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ extras/group-gluster-block | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/extras/group-gluster-block b/extras/group-gluster-block
+index 56b406e..1e39801 100644
+--- a/extras/group-gluster-block
++++ b/extras/group-gluster-block
+@@ -5,6 +5,14 @@ performance.stat-prefetch=off
+ performance.open-behind=off
+ performance.readdir-ahead=off
+ performance.strict-o-direct=on
++performance.client-io-threads=on
++performance.io-thread-count=32
++performance.high-prio-threads=32
++performance.normal-prio-threads=32
++performance.low-prio-threads=32
++performance.least-prio-threads=4
++client.event-threads=8
++server.event-threads=8
+ network.remote-dio=disable
+ cluster.eager-lock=enable
+ cluster.quorum-type=auto
+--
+1.8.3.1
+
diff --git a/0260-ctime-Set-mdata-xattr-on-legacy-files.patch b/0260-ctime-Set-mdata-xattr-on-legacy-files.patch
new file mode 100644
index 0000000..f07fb21
--- /dev/null
+++ b/0260-ctime-Set-mdata-xattr-on-legacy-files.patch
@@ -0,0 +1,885 @@
+From fc0903de1f7565e06db9d41e6dfd62221a745d24 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Mon, 24 Jun 2019 13:06:49 +0530
+Subject: [PATCH 260/261] ctime: Set mdata xattr on legacy files
+
+Problem:
+The files which were created before ctime enabled would not
+have "trusted.glusterfs.mdata"(stores time attributes) xattr.
+Upon fops which modifies either ctime or mtime, the xattr
+gets created with latest ctime, mtime and atime, which is
+incorrect. It should update only the corresponding time
+attribute and rest from backend
+
+Solution:
+Creating xattr with values from brick is not possible as
+each brick of replica set would have different times.
+So create the xattr upon successful lookup if the xattr
+is not created
+
+Note To Reviewers:
+The time attributes used to set xattr is got from successful
+lookup. Instead of sending the whole iatt over the wire via
+setxattr, a structure called mdata_iatt is sent. The mdata_iatt
+contains only time attributes.
+
+Backport of
+ > Patch: https://review.gluster.org/22936
+ > Change-Id: I5e535631ddef04195361ae0364336410a2895dd4
+ > fixes: bz#1593542
+
+Change-Id: I5e535631ddef04195361ae0364336410a2895dd4
+BUG: 1715422
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/176725
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Amar Tumballi Suryanarayan <amarts@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ libglusterfs/src/dict.c | 59 ++++++++++
+ libglusterfs/src/glusterfs/dict.h | 5 +
+ libglusterfs/src/glusterfs/glusterfs.h | 3 +
+ libglusterfs/src/glusterfs/iatt.h | 20 ++++
+ libglusterfs/src/libglusterfs.sym | 3 +
+ rpc/xdr/src/glusterfs-fops.x | 1 +
+ rpc/xdr/src/glusterfs3.h | 59 ++++++++++
+ rpc/xdr/src/glusterfs4-xdr.x | 12 ++
+ rpc/xdr/src/libgfxdr.sym | 3 +-
+ tests/basic/ctime/ctime-mdata-legacy-files.t | 83 +++++++++++++
+ xlators/features/utime/src/utime-messages.h | 3 +-
+ xlators/features/utime/src/utime.c | 154 ++++++++++++++++++++++---
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 17 +++
+ xlators/storage/posix/src/posix-messages.h | 3 +-
+ xlators/storage/posix/src/posix-metadata.c | 103 ++++++++++-------
+ xlators/storage/posix/src/posix-metadata.h | 4 +
+ 16 files changed, 475 insertions(+), 57 deletions(-)
+ create mode 100644 tests/basic/ctime/ctime-mdata-legacy-files.t
+
+diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
+index 6917df9..d8cdda4 100644
+--- a/libglusterfs/src/dict.c
++++ b/libglusterfs/src/dict.c
+@@ -124,6 +124,7 @@ int32_t
+ is_data_equal(data_t *one, data_t *two)
+ {
+ struct iatt *iatt1, *iatt2;
++ struct mdata_iatt *mdata_iatt1, *mdata_iatt2;
+
+ if (!one || !two || !one->data || !two->data) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+@@ -188,6 +189,24 @@ is_data_equal(data_t *one, data_t *two)
+ */
+ return 1;
+ }
++ if (one->data_type == GF_DATA_TYPE_MDATA) {
++ if ((one->len < sizeof(struct mdata_iatt)) ||
++ (two->len < sizeof(struct mdata_iatt))) {
++ return 0;
++ }
++ mdata_iatt1 = (struct mdata_iatt *)one->data;
++ mdata_iatt2 = (struct mdata_iatt *)two->data;
++
++ if (mdata_iatt1->ia_atime != mdata_iatt2->ia_atime ||
++ mdata_iatt1->ia_mtime != mdata_iatt2->ia_mtime ||
++ mdata_iatt1->ia_ctime != mdata_iatt2->ia_ctime ||
++ mdata_iatt1->ia_atime_nsec != mdata_iatt2->ia_atime_nsec ||
++ mdata_iatt1->ia_mtime_nsec != mdata_iatt2->ia_mtime_nsec ||
++ mdata_iatt1->ia_ctime_nsec != mdata_iatt2->ia_ctime_nsec) {
++ return 0;
++ }
++ return 1;
++ }
+
+ if (one->len != two->len)
+ return 0;
+@@ -1078,6 +1097,7 @@ static char *data_type_name[GF_DATA_TYPE_MAX] = {
+ [GF_DATA_TYPE_PTR] = "pointer",
+ [GF_DATA_TYPE_GFUUID] = "gf-uuid",
+ [GF_DATA_TYPE_IATT] = "iatt",
++ [GF_DATA_TYPE_MDATA] = "mdata",
+ };
+
+ int64_t
+@@ -2666,6 +2686,45 @@ err:
+ }
+
+ int
++dict_set_mdata(dict_t *this, char *key, struct mdata_iatt *mdata,
++ bool is_static)
++{
++ return dict_set_bin_common(this, key, mdata, sizeof(struct mdata_iatt),
++ is_static, GF_DATA_TYPE_MDATA);
++}
++
++int
++dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata)
++{
++ data_t *data = NULL;
++ int ret = -EINVAL;
++
++ if (!this || !key || !mdata) {
++ goto err;
++ }
++ ret = dict_get_with_ref(this, key, &data);
++ if (ret < 0) {
++ goto err;
++ }
++
++ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_MDATA, key, -EINVAL);
++ if (data->len < sizeof(struct mdata_iatt)) {
++ gf_msg("glusterfs", GF_LOG_ERROR, ENOBUFS, LG_MSG_UNDERSIZED_BUF,
++ "data value for '%s' is smaller than expected", key);
++ ret = -ENOBUFS;
++ goto err;
++ }
++
++ memcpy(mdata, data->data, min(data->len, sizeof(struct mdata_iatt)));
++
++err:
++ if (data)
++ data_unref(data);
++
++ return ret;
++}
++
++int
+ dict_set_iatt(dict_t *this, char *key, struct iatt *iatt, bool is_static)
+ {
+ return dict_set_bin_common(this, key, iatt, sizeof(struct iatt), is_static,
+diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h
+index 022f564..8239c7a 100644
+--- a/libglusterfs/src/glusterfs/dict.h
++++ b/libglusterfs/src/glusterfs/dict.h
+@@ -392,6 +392,11 @@ GF_MUST_CHECK int
+ dict_set_iatt(dict_t *this, char *key, struct iatt *iatt, bool is_static);
+ GF_MUST_CHECK int
+ dict_get_iatt(dict_t *this, char *key, struct iatt *iatt);
++GF_MUST_CHECK int
++dict_set_mdata(dict_t *this, char *key, struct mdata_iatt *mdata,
++ bool is_static);
++GF_MUST_CHECK int
++dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata);
+
+ void
+ dict_dump_to_statedump(dict_t *dict, char *dict_name, char *domain);
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index 2cedf1a..79c93ae 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -229,6 +229,9 @@ enum gf_internal_fop_indicator {
+ #define VIRTUAL_QUOTA_XATTR_CLEANUP_KEY "glusterfs.quota-xattr-cleanup"
+ #define QUOTA_READ_ONLY_KEY "trusted.glusterfs.quota.read-only"
+
++/* ctime related */
++#define CTIME_MDATA_XDATA_KEY "set-ctime-mdata"
++
+ /* afr related */
+ #define AFR_XATTR_PREFIX "trusted.afr"
+
+diff --git a/libglusterfs/src/glusterfs/iatt.h b/libglusterfs/src/glusterfs/iatt.h
+index bee7a0a..f03d68b 100644
+--- a/libglusterfs/src/glusterfs/iatt.h
++++ b/libglusterfs/src/glusterfs/iatt.h
+@@ -92,6 +92,15 @@ struct old_iatt {
+ uint32_t ia_ctime_nsec;
+ };
+
++struct mdata_iatt {
++ int64_t ia_atime; /* last access time */
++ int64_t ia_mtime; /* last modification time */
++ int64_t ia_ctime; /* last status change time */
++ uint32_t ia_atime_nsec;
++ uint32_t ia_mtime_nsec;
++ uint32_t ia_ctime_nsec;
++};
++
+ /* 64-bit mask for valid members in struct iatt. */
+ #define IATT_TYPE 0x0000000000000001U
+ #define IATT_MODE 0x0000000000000002U
+@@ -313,6 +322,17 @@ st_mode_from_ia(ia_prot_t prot, ia_type_t type)
+ return st_mode;
+ }
+
++static inline void
++iatt_to_mdata(struct mdata_iatt *mdata, struct iatt *iatt)
++{
++ mdata->ia_atime = iatt->ia_atime;
++ mdata->ia_atime_nsec = iatt->ia_atime_nsec;
++ mdata->ia_mtime = iatt->ia_mtime;
++ mdata->ia_mtime_nsec = iatt->ia_mtime_nsec;
++ mdata->ia_ctime = iatt->ia_ctime;
++ mdata->ia_ctime_nsec = iatt->ia_ctime_nsec;
++}
++
+ static inline int
+ iatt_from_stat(struct iatt *iatt, struct stat *stat)
+ {
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index 4dca7de..b161380 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -380,6 +380,7 @@ dict_get_bin
+ dict_get_double
+ dict_get_gfuuid
+ dict_get_iatt
++dict_get_mdata
+ dict_get_int16
+ dict_get_int32
+ dict_get_int32n
+@@ -417,6 +418,7 @@ dict_set_dynstrn
+ dict_set_dynstr_with_alloc
+ dict_set_gfuuid
+ dict_set_iatt
++dict_set_mdata
+ dict_set_int16
+ dict_set_int32
+ dict_set_int32n
+@@ -509,6 +511,7 @@ fop_lease_stub
+ fop_link_stub
+ fop_lk_stub
+ fop_log_level
++fop_lookup_cbk_stub
+ fop_lookup_stub
+ fop_mkdir_stub
+ fop_mknod_stub
+diff --git a/rpc/xdr/src/glusterfs-fops.x b/rpc/xdr/src/glusterfs-fops.x
+index bacf0773..651f8de 100644
+--- a/rpc/xdr/src/glusterfs-fops.x
++++ b/rpc/xdr/src/glusterfs-fops.x
+@@ -245,5 +245,6 @@ enum gf_dict_data_type_t {
+ GF_DATA_TYPE_PTR,
+ GF_DATA_TYPE_GFUUID,
+ GF_DATA_TYPE_IATT,
++ GF_DATA_TYPE_MDATA,
+ GF_DATA_TYPE_MAX
+ };
+diff --git a/rpc/xdr/src/glusterfs3.h b/rpc/xdr/src/glusterfs3.h
+index 5521f4d..86b3a4c 100644
+--- a/rpc/xdr/src/glusterfs3.h
++++ b/rpc/xdr/src/glusterfs3.h
+@@ -585,6 +585,34 @@ out:
+ }
+
+ static inline void
++gfx_mdata_iatt_to_mdata_iatt(struct gfx_mdata_iatt *gf_mdata_iatt,
++ struct mdata_iatt *mdata_iatt)
++{
++ if (!mdata_iatt || !gf_mdata_iatt)
++ return;
++ mdata_iatt->ia_atime = gf_mdata_iatt->ia_atime;
++ mdata_iatt->ia_atime_nsec = gf_mdata_iatt->ia_atime_nsec;
++ mdata_iatt->ia_mtime = gf_mdata_iatt->ia_mtime;
++ mdata_iatt->ia_mtime_nsec = gf_mdata_iatt->ia_mtime_nsec;
++ mdata_iatt->ia_ctime = gf_mdata_iatt->ia_ctime;
++ mdata_iatt->ia_ctime_nsec = gf_mdata_iatt->ia_ctime_nsec;
++}
++
++static inline void
++gfx_mdata_iatt_from_mdata_iatt(struct gfx_mdata_iatt *gf_mdata_iatt,
++ struct mdata_iatt *mdata_iatt)
++{
++ if (!mdata_iatt || !gf_mdata_iatt)
++ return;
++ gf_mdata_iatt->ia_atime = mdata_iatt->ia_atime;
++ gf_mdata_iatt->ia_atime_nsec = mdata_iatt->ia_atime_nsec;
++ gf_mdata_iatt->ia_mtime = mdata_iatt->ia_mtime;
++ gf_mdata_iatt->ia_mtime_nsec = mdata_iatt->ia_mtime_nsec;
++ gf_mdata_iatt->ia_ctime = mdata_iatt->ia_ctime;
++ gf_mdata_iatt->ia_ctime_nsec = mdata_iatt->ia_ctime_nsec;
++}
++
++static inline void
+ gfx_stat_to_iattx(struct gfx_iattx *gf_stat, struct iatt *iatt)
+ {
+ if (!iatt || !gf_stat)
+@@ -721,6 +749,12 @@ dict_to_xdr(dict_t *this, gfx_dict *dict)
+ gfx_stat_from_iattx(&xpair->value.gfx_value_u.iatt,
+ (struct iatt *)dpair->value->data);
+ break;
++ case GF_DATA_TYPE_MDATA:
++ index++;
++ gfx_mdata_iatt_from_mdata_iatt(
++ &xpair->value.gfx_value_u.mdata_iatt,
++ (struct mdata_iatt *)dpair->value->data);
++ break;
+ case GF_DATA_TYPE_GFUUID:
+ index++;
+ memcpy(&xpair->value.gfx_value_u.uuid, dpair->value->data,
+@@ -787,6 +821,7 @@ xdr_to_dict(gfx_dict *dict, dict_t **to)
+ dict_t *this = NULL;
+ unsigned char *uuid = NULL;
+ struct iatt *iatt = NULL;
++ struct mdata_iatt *mdata_iatt = NULL;
+
+ if (!to || !dict)
+ goto out;
+@@ -854,6 +889,30 @@ xdr_to_dict(gfx_dict *dict, dict_t **to)
+ gfx_stat_to_iattx(&xpair->value.gfx_value_u.iatt, iatt);
+ ret = dict_set_iatt(this, key, iatt, false);
+ break;
++ case GF_DATA_TYPE_MDATA:
++ mdata_iatt = GF_CALLOC(1, sizeof(struct mdata_iatt),
++ gf_common_mt_char);
++ if (!mdata_iatt) {
++ errno = ENOMEM;
++ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
++ "failed to allocate memory. key: %s", key);
++ ret = -1;
++ goto out;
++ }
++ gfx_mdata_iatt_to_mdata_iatt(
++ &xpair->value.gfx_value_u.mdata_iatt, mdata_iatt);
++ ret = dict_set_mdata(this, key, mdata_iatt, false);
++ if (ret != 0) {
++ GF_FREE(mdata_iatt);
++ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM,
++ LG_MSG_DICT_SET_FAILED,
++ "failed to set the key (%s)"
++ " into dict",
++ key);
++ ret = -1;
++ goto out;
++ }
++ break;
+ case GF_DATA_TYPE_PTR:
+ case GF_DATA_TYPE_STR_OLD:
+ value = GF_MALLOC(xpair->value.gfx_value_u.other.other_len + 1,
+diff --git a/rpc/xdr/src/glusterfs4-xdr.x b/rpc/xdr/src/glusterfs4-xdr.x
+index bec0872..6f92b70 100644
+--- a/rpc/xdr/src/glusterfs4-xdr.x
++++ b/rpc/xdr/src/glusterfs4-xdr.x
+@@ -46,6 +46,16 @@ struct gfx_iattx {
+ unsigned int mode; /* type of file and rwx mode */
+ };
+
++struct gfx_mdata_iatt {
++ hyper ia_atime; /* last access time */
++ hyper ia_mtime; /* last modification time */
++ hyper ia_ctime; /* last status change time */
++
++ unsigned int ia_atime_nsec;
++ unsigned int ia_mtime_nsec;
++ unsigned int ia_ctime_nsec;
++};
++
+ union gfx_value switch (gf_dict_data_type_t type) {
+ case GF_DATA_TYPE_INT:
+ hyper value_int;
+@@ -62,6 +72,8 @@ union gfx_value switch (gf_dict_data_type_t type) {
+ case GF_DATA_TYPE_PTR:
+ case GF_DATA_TYPE_STR_OLD:
+ opaque other<>;
++ case GF_DATA_TYPE_MDATA:
++ gfx_mdata_iatt mdata_iatt;
+ };
+
+ /* AUTH */
+diff --git a/rpc/xdr/src/libgfxdr.sym b/rpc/xdr/src/libgfxdr.sym
+index 22cdf30..dd4ac85 100644
+--- a/rpc/xdr/src/libgfxdr.sym
++++ b/rpc/xdr/src/libgfxdr.sym
+@@ -251,6 +251,7 @@ xdr_to_write3args
+ xdr_vector_round_up
+ xdr_gfx_read_rsp
+ xdr_gfx_iattx
++xdr_gfx_mdata_iatt
+ xdr_gfx_value
+ xdr_gfx_dict_pair
+ xdr_gfx_dict
+@@ -344,4 +345,4 @@ xdr_compound_req_v2
+ xdr_gfx_compound_req
+ xdr_compound_rsp_v2
+ xdr_gfx_compound_rsp
+-xdr_gfx_copy_file_range_req
+\ No newline at end of file
++xdr_gfx_copy_file_range_req
+diff --git a/tests/basic/ctime/ctime-mdata-legacy-files.t b/tests/basic/ctime/ctime-mdata-legacy-files.t
+new file mode 100644
+index 0000000..2e782d5
+--- /dev/null
++++ b/tests/basic/ctime/ctime-mdata-legacy-files.t
+@@ -0,0 +1,83 @@
++#!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++cleanup;
++
++###############################################################################
++#Replica volume
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume start $V0
++
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
++
++#Disable ctime and create file, file doesn't contain "trusted.glusterfs.mdata" xattr
++TEST $CLI volume set $V0 ctime off
++
++TEST "mkdir $M0/DIR"
++TEST "echo hello_world > $M0/DIR/FILE"
++
++#Verify absence of xattr
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR/FILE"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR/FILE"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR/FILE"
++
++#Enable ctime
++TEST $CLI volume set $V0 ctime on
++sleep 3
++TEST stat $M0/DIR/FILE
++
++#Verify presence "trusted.glusterfs.mdata" xattr on backend
++#The lookup above should have created xattr
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR/FILE"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR/FILE"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR/FILE"
++
++###############################################################################
++#Disperse Volume
++
++TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2}
++TEST $CLI volume set $V1 performance.stat-prefetch off
++TEST $CLI volume start $V1
++
++TEST glusterfs --volfile-id=$V1 --volfile-server=$H0 --entry-timeout=0 $M1;
++
++#Disable ctime and create file, file doesn't contain "trusted.glusterfs.mdata" xattr
++TEST $CLI volume set $V1 ctime off
++TEST "mkdir $M1/DIR"
++TEST "echo hello_world > $M1/DIR/FILE"
++
++#Verify absence of xattr
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR/FILE"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR/FILE"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR/FILE"
++
++#Enable ctime
++TEST $CLI volume set $V1 ctime on
++sleep 3
++TEST stat $M1/DIR/FILE
++
++#Verify presence "trusted.glusterfs.mdata" xattr on backend
++#The lookup above should have created xattr
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR/FILE"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR/FILE"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR/FILE"
++
++cleanup;
++###############################################################################
+diff --git a/xlators/features/utime/src/utime-messages.h b/xlators/features/utime/src/utime-messages.h
+index bac18ab..bd40265 100644
+--- a/xlators/features/utime/src/utime-messages.h
++++ b/xlators/features/utime/src/utime-messages.h
+@@ -23,6 +23,7 @@
+ * glfs-message-id.h.
+ */
+
+-GLFS_MSGID(UTIME, UTIME_MSG_NO_MEMORY);
++GLFS_MSGID(UTIME, UTIME_MSG_NO_MEMORY, UTIME_MSG_SET_MDATA_FAILED,
++ UTIME_MSG_DICT_SET_FAILED);
+
+ #endif /* __UTIME_MESSAGES_H__ */
+diff --git a/xlators/features/utime/src/utime.c b/xlators/features/utime/src/utime.c
+index 877c751..2a986e7 100644
+--- a/xlators/features/utime/src/utime.c
++++ b/xlators/features/utime/src/utime.c
+@@ -9,8 +9,10 @@
+ */
+
+ #include "utime.h"
++#include "utime-helpers.h"
+ #include "utime-messages.h"
+ #include "utime-mem-types.h"
++#include <glusterfs/call-stub.h>
+
+ int32_t
+ gf_utime_invalidate(xlator_t *this, inode_t *inode)
+@@ -133,6 +135,124 @@ mem_acct_init(xlator_t *this)
+ }
+
+ int32_t
++gf_utime_set_mdata_setxattr_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int op_ret, int op_errno,
++ dict_t *xdata)
++{
++ /* Don't fail lookup if mdata setxattr fails */
++ if (op_ret) {
++ gf_msg(this->name, GF_LOG_ERROR, op_errno, UTIME_MSG_SET_MDATA_FAILED,
++ "dict set of key for set-ctime-mdata failed");
++ }
++ call_resume(frame->local);
++ return 0;
++}
++
++int32_t
++gf_utime_set_mdata_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, inode_t *inode,
++ struct iatt *stbuf, dict_t *xdata,
++ struct iatt *postparent)
++{
++ dict_t *dict = NULL;
++ struct mdata_iatt *mdata = NULL;
++ int ret = 0;
++ loc_t loc = {
++ 0,
++ };
++
++ if (!op_ret && dict_get(xdata, GF_XATTR_MDATA_KEY) == NULL) {
++ dict = dict_new();
++ if (!dict) {
++ op_errno = ENOMEM;
++ goto err;
++ }
++ mdata = GF_MALLOC(sizeof(struct mdata_iatt), gf_common_mt_char);
++ if (mdata == NULL) {
++ op_errno = ENOMEM;
++ goto err;
++ }
++ iatt_to_mdata(mdata, stbuf);
++ ret = dict_set_mdata(dict, CTIME_MDATA_XDATA_KEY, mdata, _gf_false);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY,
++ "dict set of key for set-ctime-mdata failed");
++ goto err;
++ }
++ frame->local = fop_lookup_cbk_stub(frame, default_lookup_cbk, op_ret,
++ op_errno, inode, stbuf, xdata,
++ postparent);
++ if (!frame->local) {
++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY,
++ "lookup_cbk stub allocation failed");
++ goto stub_err;
++ }
++
++ loc.inode = inode_ref(inode);
++ gf_uuid_copy(loc.gfid, stbuf->ia_gfid);
++ STACK_WIND(frame, gf_utime_set_mdata_setxattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->setxattr, &loc, dict, 0, NULL);
++
++ dict_unref(dict);
++ inode_unref(loc.inode);
++ return 0;
++ }
++
++ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xdata,
++ postparent);
++ return 0;
++
++err:
++ if (mdata) {
++ GF_FREE(mdata);
++ }
++stub_err:
++ if (dict) {
++ dict_unref(dict);
++ }
++ STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
++ return 0;
++}
++
++int
++gf_utime_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
++{
++ int op_errno = -1;
++ int ret = -1;
++
++ VALIDATE_OR_GOTO(frame, err);
++ VALIDATE_OR_GOTO(this, err);
++ VALIDATE_OR_GOTO(loc, err);
++ VALIDATE_OR_GOTO(loc->inode, err);
++
++ xdata = xdata ? dict_ref(xdata) : dict_new();
++ if (!xdata) {
++ op_errno = ENOMEM;
++ goto err;
++ }
++
++ ret = dict_set_int8(xdata, GF_XATTR_MDATA_KEY, 1);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, -ret, UTIME_MSG_DICT_SET_FAILED,
++ "%s: Unable to set dict value for %s", loc->path,
++ GF_XATTR_MDATA_KEY);
++ op_errno = -ret;
++ goto free_dict;
++ }
++
++ STACK_WIND(frame, gf_utime_set_mdata_lookup_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->lookup, loc, xdata);
++ dict_unref(xdata);
++ return 0;
++
++free_dict:
++ dict_unref(xdata);
++err:
++ STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
++ return 0;
++}
++
++int32_t
+ init(xlator_t *this)
+ {
+ utime_priv_t *utime = NULL;
+@@ -182,19 +302,27 @@ notify(xlator_t *this, int event, void *data, ...)
+ }
+
+ struct xlator_fops fops = {
+- /* TODO: Need to go through other fops and
+- * check if they modify time attributes
+- */
+- .rename = gf_utime_rename, .mknod = gf_utime_mknod,
+- .readv = gf_utime_readv, .fremovexattr = gf_utime_fremovexattr,
+- .open = gf_utime_open, .create = gf_utime_create,
+- .mkdir = gf_utime_mkdir, .writev = gf_utime_writev,
+- .rmdir = gf_utime_rmdir, .fallocate = gf_utime_fallocate,
+- .truncate = gf_utime_truncate, .symlink = gf_utime_symlink,
+- .zerofill = gf_utime_zerofill, .link = gf_utime_link,
+- .ftruncate = gf_utime_ftruncate, .unlink = gf_utime_unlink,
+- .setattr = gf_utime_setattr, .fsetattr = gf_utime_fsetattr,
+- .opendir = gf_utime_opendir, .removexattr = gf_utime_removexattr,
++ .rename = gf_utime_rename,
++ .mknod = gf_utime_mknod,
++ .readv = gf_utime_readv,
++ .fremovexattr = gf_utime_fremovexattr,
++ .open = gf_utime_open,
++ .create = gf_utime_create,
++ .mkdir = gf_utime_mkdir,
++ .writev = gf_utime_writev,
++ .rmdir = gf_utime_rmdir,
++ .fallocate = gf_utime_fallocate,
++ .truncate = gf_utime_truncate,
++ .symlink = gf_utime_symlink,
++ .zerofill = gf_utime_zerofill,
++ .link = gf_utime_link,
++ .ftruncate = gf_utime_ftruncate,
++ .unlink = gf_utime_unlink,
++ .setattr = gf_utime_setattr,
++ .fsetattr = gf_utime_fsetattr,
++ .opendir = gf_utime_opendir,
++ .removexattr = gf_utime_removexattr,
++ .lookup = gf_utime_lookup,
+ };
+ struct xlator_cbks cbks = {
+ .invalidate = gf_utime_invalidate,
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index ea3b69c..d22bbc2 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -2625,6 +2625,9 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ gf_cs_obj_state state = -1;
+ int i = 0;
+ int len;
++ struct mdata_iatt mdata_iatt = {
++ 0,
++ };
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+@@ -2638,6 +2641,20 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
++ ret = dict_get_mdata(dict, CTIME_MDATA_XDATA_KEY, &mdata_iatt);
++ if (ret == 0) {
++ /* This is initiated by lookup when ctime feature is enabled to create
++ * "trusted.glusterfs.mdata" xattr if not present. These are the files
++ * which were created when ctime feature is disabled.
++ */
++ ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, &mdata_iatt,
++ &op_errno);
++ if (ret != 0) {
++ op_ret = -1;
++ }
++ goto out;
++ }
++
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h
+index 3229275..15e23ff 100644
+--- a/xlators/storage/posix/src/posix-messages.h
++++ b/xlators/storage/posix/src/posix-messages.h
+@@ -68,6 +68,7 @@ GLFS_MSGID(POSIX, P_MSG_XATTR_FAILED, P_MSG_NULL_GFID, P_MSG_FCNTL_FAILED,
+ P_MSG_FALLOCATE_FAILED, P_MSG_STOREMDATA_FAILED,
+ P_MSG_FETCHMDATA_FAILED, P_MSG_GETMDATA_FAILED,
+ P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE, P_MSG_MUTEX_FAILED,
+- P_MSG_COPY_FILE_RANGE_FAILED, P_MSG_TIMER_DELETE_FAILED);
++ P_MSG_COPY_FILE_RANGE_FAILED, P_MSG_TIMER_DELETE_FAILED,
++ P_MSG_NOMEM);
+
+ #endif /* !_GLUSTERD_MESSAGES_H_ */
+diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
+index 5a5e6cd..647c0bb 100644
+--- a/xlators/storage/posix/src/posix-metadata.c
++++ b/xlators/storage/posix/src/posix-metadata.c
+@@ -245,6 +245,10 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
+ if (ret == -1 || !mdata) {
+ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
+ if (!mdata) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
++ "Could not allocate mdata. file: %s: gfid: %s",
++ real_path ? real_path : "null",
++ inode ? uuid_utoa(inode->gfid) : "null");
+ ret = -1;
+ goto out;
+ }
+@@ -262,18 +266,8 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
+ }
+ } else {
+ /* Failed to get mdata from disk, xattr missing.
+- * This happens on two cases.
+- * 1. File is created before ctime is enabled.
+- * 2. On new file creation.
+- *
+- * Do nothing, just return success. It is as
+- * good as ctime feature is not enabled for this
+- * file. For files created before ctime is enabled,
+- * time attributes gets updated into ctime structure
+- * once the metadata modification fop happens and
+- * time attributes become consistent eventually.
+- * For new files, it would obviously get updated
+- * before the fop completion.
++ * This happens when the file is created before
++ * ctime is enabled.
+ */
+ if (stbuf && op_errno != ENOENT) {
+ ret = 0;
+@@ -345,6 +339,54 @@ posix_compare_timespec(struct timespec *first, struct timespec *second)
+ return first->tv_sec - second->tv_sec;
+ }
+
++int
++posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
++ struct mdata_iatt *mdata_iatt, int *op_errno)
++{
++ posix_mdata_t *mdata = NULL;
++ int ret = 0;
++
++ GF_VALIDATE_OR_GOTO("posix", this, out);
++ GF_VALIDATE_OR_GOTO(this->name, inode, out);
++
++ LOCK(&inode->lock);
++ {
++ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
++ if (!mdata) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
++ "Could not allocate mdata. gfid: %s",
++ uuid_utoa(inode->gfid));
++ ret = -1;
++ *op_errno = ENOMEM;
++ goto unlock;
++ }
++
++ mdata->version = 1;
++ mdata->flags = 0;
++ mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
++ mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
++ mdata->atime.tv_sec = mdata_iatt->ia_atime;
++ mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
++ mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
++ mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
++
++ __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
++
++ ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STOREMDATA_FAILED,
++ "gfid: %s key:%s ", uuid_utoa(inode->gfid),
++ GF_XATTR_MDATA_KEY);
++ *op_errno = errno;
++ goto unlock;
++ }
++ }
++unlock:
++ UNLOCK(&inode->lock);
++out:
++ return ret;
++}
++
+ /* posix_set_mdata_xattr updates the posix_mdata_t based on the flag
+ * in inode context and stores it on disk
+ */
+@@ -372,6 +414,9 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+ */
+ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
+ if (!mdata) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
++ "Could not allocate mdata. file: %s: gfid: %s",
++ real_path ? real_path : "null", uuid_utoa(inode->gfid));
+ ret = -1;
+ goto unlock;
+ }
+@@ -386,35 +431,11 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+ __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
+ } else {
+ /*
+- * This is the first time creating the time
+- * attr. This happens when you activate this
+- * feature, and the legacy file will not have
+- * any xattr set.
+- *
+- * New files will create extended attributes.
+- */
+-
+- /*
+- * TODO: This is wrong approach, because before
+- * creating fresh xattr, we should consult
+- * to all replica and/or distribution set.
+- *
+- * We should contact the time management
+- * xlators, and ask them to create an xattr.
+- */
+- /* We should not be relying on backend file's
+- * time attributes to load the initial ctime
+- * time attribute structure. This is incorrect
+- * as each replica set would have witnessed the
+- * file creation at different times.
+- *
+- * For new file creation, ctime, atime and mtime
+- * should be same, hence initiate the ctime
+- * structure with the time from the frame. But
+- * for the files which were created before ctime
+- * feature is enabled, this is not accurate but
+- * still fine as the times would get eventually
+- * accurate.
++ * This is the first time creating the time attr. This happens
++ * when you activate this feature. On this code path, only new
++ * files will create mdata xattr. The legacy files (files
++ * created before ctime enabled) will not have any xattr set.
++ * The xattr on legacy file will be set via lookup.
+ */
+
+ /* Don't create xattr with utimes/utimensat, only update if
+diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h
+index 3416148..dc25e59 100644
+--- a/xlators/storage/posix/src/posix-metadata.h
++++ b/xlators/storage/posix/src/posix-metadata.h
+@@ -53,5 +53,9 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this,
+ const char *real_path_in, int fd_in, inode_t *inode_in,
+ struct iatt *stbuf_in, const char *read_path_put,
+ int fd_out, inode_t *inode_out, struct iatt *stbuf_out);
++int
++posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
++ struct mdata_iatt *mdata_iatt,
++ int *op_errno);
+
+ #endif /* _POSIX_METADATA_H */
+--
+1.8.3.1
+
diff --git a/0261-features-utime-Fix-mem_put-crash.patch b/0261-features-utime-Fix-mem_put-crash.patch
new file mode 100644
index 0000000..2c3fe9e
--- /dev/null
+++ b/0261-features-utime-Fix-mem_put-crash.patch
@@ -0,0 +1,52 @@
+From 1aa175f353325775517daf1d48a19799e0cafc7a Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Mon, 22 Jul 2019 20:55:33 +0530
+Subject: [PATCH 261/261] features/utime: Fix mem_put crash
+
+Problem:
+When frame->local is not null FRAME_DESTROY calls mem_put on it.
+Since the stub is already destroyed in call_resume(), it leads
+to crash
+
+Fix:
+Set frame->local to NULL before calling call_resume()
+
+Backport of:
+ > Patch: https://review.gluster.org/23091
+ > fixes: bz#1593542
+ > Change-Id: I0f8adf406f4cefdb89d7624ba7a9d9c2eedfb1de
+ > Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+
+BUG: 1715422
+Change-Id: I0f8adf406f4cefdb89d7624ba7a9d9c2eedfb1de
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/176726
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/features/utime/src/utime.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/features/utime/src/utime.c b/xlators/features/utime/src/utime.c
+index 2a986e7..e3a80b6 100644
+--- a/xlators/features/utime/src/utime.c
++++ b/xlators/features/utime/src/utime.c
+@@ -139,12 +139,14 @@ gf_utime_set_mdata_setxattr_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xdata)
+ {
++ call_stub_t *stub = frame->local;
+ /* Don't fail lookup if mdata setxattr fails */
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, UTIME_MSG_SET_MDATA_FAILED,
+ "dict set of key for set-ctime-mdata failed");
+ }
+- call_resume(frame->local);
++ frame->local = NULL;
++ call_resume(stub);
+ return 0;
+ }
+
+--
+1.8.3.1
+
diff --git a/0262-glusterd-ctime-Disable-ctime-by-default.patch b/0262-glusterd-ctime-Disable-ctime-by-default.patch
new file mode 100644
index 0000000..95adf23
--- /dev/null
+++ b/0262-glusterd-ctime-Disable-ctime-by-default.patch
@@ -0,0 +1,78 @@
+From c44c9f2003b703d64a2a06c53f5a2b85e9dc7a11 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Wed, 31 Jul 2019 09:23:42 -0400
+Subject: [PATCH 262/262] glusterd/ctime: Disable ctime by default
+
+The ctime feature, in combination with gfid2path
+causes peformance dip on rename workloads. Hence
+disabling the feature by default.
+
+Change-Id: I280527eea4dc19bba39fb6a5e74760823a056dc9
+Label : DOWNSTREAM ONLY
+BUG: 1713890
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/177421
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-by: Amar Tumballi Suryanarayan <amarts@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 10 +++++++---
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +-
+ xlators/storage/posix/src/posix-common.c | 2 +-
+ 3 files changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 5e0214e..539e8a5 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -4382,14 +4382,18 @@ client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ goto out;
+ }
+ }
+- /* a. ret will be -1 if features.ctime is not set in the volinfo->dict which
+- * means ctime should be loaded into the graph.
++ /* a. ret will be 0 (returned default) if features.ctime is not set
++ * in the volinfo->dict which means ctime should not be loaded into
++ * the graph. It is disabled by default.
+ * b. ret will be 1 if features.ctime is explicitly turned on through
+ * volume set and in that case ctime should be loaded into the graph.
+ * c. ret will be 0 if features.ctime is explicitly turned off and in that
+ * case ctime shouldn't be loaded into the graph.
+ */
+- ret = dict_get_str_boolean(set_dict, "features.ctime", -1);
++ ret = dict_get_str_boolean(set_dict, "features.ctime", 0);
++ if (ret == -1)
++ goto out;
++
+ if (conf->op_version >= GD_OP_VERSION_5_0 && ret) {
+ xl = volgen_graph_add(graph, "features/utime", volname);
+ if (!xl) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 7a83124..8ce338e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -3680,7 +3680,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "features.ctime",
+ .voltype = "features/utime",
+ .validate_fn = validate_boolean,
+- .value = "on",
++ .value = "off",
+ .option = "!utime",
+ .op_version = GD_OP_VERSION_4_1_0,
+ .description = "enable/disable utime translator on the volume.",
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index bfe2cb0..d738692 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -1374,7 +1374,7 @@ struct volume_options posix_options[] = {
+ "SHA256 checksum. MD5 otherwise."},
+ {.key = {"ctime"},
+ .type = GF_OPTION_TYPE_BOOL,
+- .default_value = "on",
++ .default_value = "off",
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .tags = {"ctime"},
+--
+1.8.3.1
+
diff --git a/0263-tests-fix-ctime-related-tests.patch b/0263-tests-fix-ctime-related-tests.patch
new file mode 100644
index 0000000..3c8fabb
--- /dev/null
+++ b/0263-tests-fix-ctime-related-tests.patch
@@ -0,0 +1,75 @@
+From 427dab431f7e8c4c8a01e9e9ed0892708a3d22d2 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Mon, 5 Aug 2019 08:33:29 +0530
+Subject: [PATCH 263/265] tests: fix ctime related tests
+
+With ctime being disabled by default, certain tests need to explicitly
+turn this option off to sanitize the functionality
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Id70310b4b09e36bf66756fea447186bb073b5604
+BUG: 1704562
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/177704
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/ctime/ctime-noatime.t | 1 +
+ tests/basic/ctime/ctime-readdir.t | 1 +
+ tests/bugs/glusterd/bug-1696046.t | 8 +++++---
+ 3 files changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/tests/basic/ctime/ctime-noatime.t b/tests/basic/ctime/ctime-noatime.t
+index 609ccbd..a6c8d9c 100644
+--- a/tests/basic/ctime/ctime-noatime.t
++++ b/tests/basic/ctime/ctime-noatime.t
+@@ -20,6 +20,7 @@ function atime_compare {
+ TEST glusterd
+ TEST pidof glusterd
+ TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume set $V0 ctime on
+ TEST $CLI volume set $V0 performance.stat-prefetch off
+ TEST $CLI volume set $V0 performance.read-ahead off
+ TEST $CLI volume set $V0 performance.quick-read off
+diff --git a/tests/basic/ctime/ctime-readdir.t b/tests/basic/ctime/ctime-readdir.t
+index 4564fc1..fa069b3 100644
+--- a/tests/basic/ctime/ctime-readdir.t
++++ b/tests/basic/ctime/ctime-readdir.t
+@@ -9,6 +9,7 @@ TEST glusterd
+
+ TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3};
+ TEST $CLI volume set $V0 performance.stat-prefetch on
++TEST $CLI volume set $V0 ctime on
+ TEST $CLI volume set $V0 performance.readdir-ahead off
+ TEST $CLI volume start $V0;
+
+diff --git a/tests/bugs/glusterd/bug-1696046.t b/tests/bugs/glusterd/bug-1696046.t
+index e1c1eb2..f7992f5 100644
+--- a/tests/bugs/glusterd/bug-1696046.t
++++ b/tests/bugs/glusterd/bug-1696046.t
+@@ -22,6 +22,8 @@ TEST pidof glusterd;
+ TEST $CLI volume set all cluster.brick-multiplex on
+ TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3};
+ TEST $CLI volume create $V1 replica 3 $H0:$B0/${V1}{1,2,3};
++TEST $CLI volume set $V0 ctime on
++TEST $CLI volume set $V1 ctime on
+
+ ## Start volume and verify
+ TEST $CLI volume start $V0;
+@@ -64,9 +66,9 @@ TEST $CLI volume set $V0 diagnostics.brick-log-level DEBUG
+ # Do some operation
+ touch $M0/file1
+
+-# Check debug message debug message should be exist only for V0
+-# Server xlator is common in brick_mux so after enabling DEBUG log
+-# some debug message should be available for other xlators like posix
++# Check debug message should exist only for V0 server xlator is common in
++# brick_mux so after enabling DEBUG log some debug message should be available
++# for other xlators like posix
+
+ brick_log_file=$logdir"/bricks/"`brick-log-file-name`
+ nofdlog=$(cat $brick_log_file | grep file1 | grep -v server | wc -l)
+--
+1.8.3.1
+
diff --git a/0264-gfapi-Fix-deadlock-while-processing-upcall.patch b/0264-gfapi-Fix-deadlock-while-processing-upcall.patch
new file mode 100644
index 0000000..41ac9ee
--- /dev/null
+++ b/0264-gfapi-Fix-deadlock-while-processing-upcall.patch
@@ -0,0 +1,259 @@
+From 52dc121c412de9c1cc3058d782b949dc7b25dc3e Mon Sep 17 00:00:00 2001
+From: Soumya Koduri <skoduri@redhat.com>
+Date: Thu, 25 Jul 2019 12:56:12 +0530
+Subject: [PATCH 264/265] gfapi: Fix deadlock while processing upcall
+
+As mentioned in bug1733166, there could be potential deadlock
+while processing upcalls depending on how each xlator choose
+to act on it. The right way of fixing such issues
+is to change rpc callback communication process.
+- https://github.com/gluster/glusterfs/issues/697
+
+Till then, making changes in gfapi layer to avoid any I/O
+processing.
+
+This is backport of below mainline patch
+> https://review.gluster.org/#/c/glusterfs/+/23108/
+> bz#1733166
+> https://review.gluster.org/#/c/glusterfs/+/23107/ (release-6)
+
+Change-Id: I2079e95339e5d761d5060707f4555cfacab95c83
+fixes: bz#1733520
+Signed-off-by: Soumya Koduri <skoduri@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/177675
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ api/src/glfs-fops.c | 164 +++++++++++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 131 insertions(+), 33 deletions(-)
+
+diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
+index 396f18c..e6adea5 100644
+--- a/api/src/glfs-fops.c
++++ b/api/src/glfs-fops.c
+@@ -34,7 +34,7 @@
+
+ struct upcall_syncop_args {
+ struct glfs *fs;
+- struct glfs_upcall *up_arg;
++ struct gf_upcall upcall_data;
+ };
+
+ #define READDIRBUF_SIZE (sizeof(struct dirent) + GF_NAME_MAX + 1)
+@@ -5716,8 +5716,28 @@ out:
+ static int
+ upcall_syncop_args_free(struct upcall_syncop_args *args)
+ {
+- if (args && args->up_arg)
+- GLFS_FREE(args->up_arg);
++ dict_t *dict = NULL;
++ struct gf_upcall *upcall_data = NULL;
++
++ if (args) {
++ upcall_data = &args->upcall_data;
++ switch (upcall_data->event_type) {
++ case GF_UPCALL_CACHE_INVALIDATION:
++ dict = ((struct gf_upcall_cache_invalidation *)(upcall_data
++ ->data))
++ ->dict;
++ break;
++ case GF_UPCALL_RECALL_LEASE:
++ dict = ((struct gf_upcall_recall_lease *)(upcall_data->data))
++ ->dict;
++ break;
++ }
++ if (dict)
++ dict_unref(dict);
++
++ GF_FREE(upcall_data->client_uid);
++ GF_FREE(upcall_data->data);
++ }
+ GF_FREE(args);
+ return 0;
+ }
+@@ -5727,14 +5747,7 @@ glfs_upcall_syncop_cbk(int ret, call_frame_t *frame, void *opaque)
+ {
+ struct upcall_syncop_args *args = opaque;
+
+- /* Here we not using upcall_syncop_args_free as application
+- * will be cleaning up the args->up_arg using glfs_free
+- * post processing upcall.
+- */
+- if (ret) {
+- upcall_syncop_args_free(args);
+- } else
+- GF_FREE(args);
++ (void)upcall_syncop_args_free(args);
+
+ return 0;
+ }
+@@ -5743,29 +5756,17 @@ static int
+ glfs_cbk_upcall_syncop(void *opaque)
+ {
+ struct upcall_syncop_args *args = opaque;
++ struct gf_upcall *upcall_data = NULL;
+ struct glfs_upcall *up_arg = NULL;
+ struct glfs *fs;
++ int ret = -1;
+
+ fs = args->fs;
+- up_arg = args->up_arg;
+-
+- if (fs->up_cbk && up_arg) {
+- (fs->up_cbk)(up_arg, fs->up_data);
+- return 0;
+- }
+-
+- return -1;
+-}
++ upcall_data = &args->upcall_data;
+
+-static struct upcall_syncop_args *
+-upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data)
+-{
+- struct upcall_syncop_args *args = NULL;
+- int ret = -1;
+- struct glfs_upcall *up_arg = NULL;
+-
+- if (!fs || !upcall_data)
++ if (!upcall_data) {
+ goto out;
++ }
+
+ up_arg = GLFS_CALLOC(1, sizeof(struct gf_upcall), glfs_release_upcall,
+ glfs_mt_upcall_entry_t);
+@@ -5795,6 +5796,8 @@ upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data)
+ if (up_arg->reason == GLFS_UPCALL_EVENT_NULL) {
+ gf_msg(THIS->name, GF_LOG_DEBUG, errno, API_MSG_INVALID_ENTRY,
+ "Upcall_EVENT_NULL received. Skipping it.");
++ ret = 0;
++ GLFS_FREE(up_arg);
+ goto out;
+ } else if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ENTRY,
+@@ -5802,6 +5805,85 @@ upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data)
+ goto out;
+ }
+
++ if (fs->up_cbk && up_arg)
++ (fs->up_cbk)(up_arg, fs->up_data);
++
++ /* application takes care of calling glfs_free on up_arg post
++ * their processing */
++
++out:
++ return ret;
++}
++
++static struct gf_upcall_cache_invalidation *
++gf_copy_cache_invalidation(struct gf_upcall_cache_invalidation *src)
++{
++ struct gf_upcall_cache_invalidation *dst = NULL;
++
++ if (!src)
++ goto out;
++
++ dst = GF_CALLOC(1, sizeof(struct gf_upcall_cache_invalidation),
++ glfs_mt_upcall_entry_t);
++
++ if (!dst) {
++ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED,
++ "Upcall entry allocation failed.");
++ goto out;
++ }
++
++ dst->flags = src->flags;
++ dst->expire_time_attr = src->expire_time_attr;
++ dst->stat = src->stat;
++ dst->p_stat = src->p_stat;
++ dst->oldp_stat = src->oldp_stat;
++
++ if (src->dict)
++ dst->dict = dict_copy_with_ref(src->dict, NULL);
++
++ return dst;
++out:
++ return NULL;
++}
++
++static struct gf_upcall_recall_lease *
++gf_copy_recall_lease(struct gf_upcall_recall_lease *src)
++{
++ struct gf_upcall_recall_lease *dst = NULL;
++
++ if (!src)
++ goto out;
++
++ dst = GF_CALLOC(1, sizeof(struct gf_upcall_recall_lease),
++ glfs_mt_upcall_entry_t);
++
++ if (!dst) {
++ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED,
++ "Upcall entry allocation failed.");
++ goto out;
++ }
++
++ dst->lease_type = src->lease_type;
++ memcpy(dst->tid, src->tid, 16);
++
++ if (src->dict)
++ dst->dict = dict_copy_with_ref(src->dict, NULL);
++
++ return dst;
++out:
++ return NULL;
++}
++
++static struct upcall_syncop_args *
++upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data)
++{
++ struct upcall_syncop_args *args = NULL;
++ int ret = -1;
++ struct gf_upcall *t_data = NULL;
++
++ if (!fs || !upcall_data)
++ goto out;
++
+ args = GF_CALLOC(1, sizeof(struct upcall_syncop_args),
+ glfs_mt_upcall_entry_t);
+ if (!args) {
+@@ -5819,15 +5901,31 @@ upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data)
+ * notification without taking any lock/ref.
+ */
+ args->fs = fs;
+- args->up_arg = up_arg;
++ t_data = &(args->upcall_data);
++ t_data->client_uid = gf_strdup(upcall_data->client_uid);
+
+- /* application takes care of calling glfs_free on up_arg post
+- * their processing */
++ gf_uuid_copy(t_data->gfid, upcall_data->gfid);
++ t_data->event_type = upcall_data->event_type;
++
++ switch (t_data->event_type) {
++ case GF_UPCALL_CACHE_INVALIDATION:
++ t_data->data = gf_copy_cache_invalidation(
++ (struct gf_upcall_cache_invalidation *)upcall_data->data);
++ break;
++ case GF_UPCALL_RECALL_LEASE:
++ t_data->data = gf_copy_recall_lease(
++ (struct gf_upcall_recall_lease *)upcall_data->data);
++ break;
++ }
++
++ if (!t_data->data)
++ goto out;
+
+ return args;
+ out:
+- if (up_arg) {
+- GLFS_FREE(up_arg);
++ if (ret) {
++ GF_FREE(args->upcall_data.client_uid);
++ GF_FREE(args);
+ }
+
+ return NULL;
+--
+1.8.3.1
+
diff --git a/0265-fuse-add-missing-GF_FREE-to-fuse_interrupt.patch b/0265-fuse-add-missing-GF_FREE-to-fuse_interrupt.patch
new file mode 100644
index 0000000..33663f0
--- /dev/null
+++ b/0265-fuse-add-missing-GF_FREE-to-fuse_interrupt.patch
@@ -0,0 +1,47 @@
+From 7455900798446681fea1a2693fac9b423ad9722a Mon Sep 17 00:00:00 2001
+From: Csaba Henk <csaba@redhat.com>
+Date: Tue, 9 Jul 2019 11:06:49 +0200
+Subject: [PATCH 265/265] fuse: add missing GF_FREE to fuse_interrupt
+
+Upstream:
+(Reviewed on https://review.gluster.org/c/glusterfs/+/23016)
+> Change-Id: Id7e003e4a53d0a0057c1c84e1cd704c80a6cb015
+> Fixes: bz#1728047
+> Signed-off-by: Csaba Henk <csaba@redhat.com>
+
+BUG: 1734423
+Change-Id: I50640bf9b56349ab9b07140bdce8a45a7d07ba7a
+Signed-off-by: Csaba Henk <csaba@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/177298
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mount/fuse/src/fuse-bridge.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index c05866b..1c946a2 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -661,7 +661,7 @@ fuse_interrupt(xlator_t *this, fuse_in_header_t *finh, void *msg,
+ " failed to allocate timed message",
+ finh->unique, fii->unique);
+
+- return;
++ goto out;
+ }
+
+ dmsg->fuse_out_header.unique = finh->unique;
+@@ -673,6 +673,9 @@ fuse_interrupt(xlator_t *this, fuse_in_header_t *finh, void *msg,
+
+ send_fuse_timed(this, dmsg);
+ }
++
++out:
++ GF_FREE(finh);
+ }
+
+ /*
+--
+1.8.3.1
+
diff --git a/0266-geo-rep-Fix-mount-broker-setup-issue.patch b/0266-geo-rep-Fix-mount-broker-setup-issue.patch
new file mode 100644
index 0000000..bc4f84e
--- /dev/null
+++ b/0266-geo-rep-Fix-mount-broker-setup-issue.patch
@@ -0,0 +1,63 @@
+From cb9d0fa4bd2664556f0564406037f9fb7fb781a6 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Wed, 31 Jul 2019 15:40:55 +0530
+Subject: [PATCH 266/276] geo-rep: Fix mount broker setup issue
+
+Even the use builtin 'type' command as in patch [1]
+causes issues if argument in question is not part of PATH
+environment variable for that user. This patch fixes the
+same by doing source /etc/profile. This was already being
+used in another part of script.
+
+[1] https://review.gluster.org/23089
+
+Backport of:
+ > Patch: https://review.gluster.org/23136
+ > Change-Id: Iceb78835967ec6a4350983eec9af28398410c002
+ > fixes: bz#1734738
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+Change-Id: Iceb78835967ec6a4350983eec9af28398410c002
+BUG: 1734734
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/177867
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/src/gverify.sh | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/geo-replication/src/gverify.sh b/geo-replication/src/gverify.sh
+index 692c1d6..f5f70d2 100755
+--- a/geo-replication/src/gverify.sh
++++ b/geo-replication/src/gverify.sh
+@@ -180,6 +180,8 @@ function main()
+ > $log_file
+
+ inet6=$7
++ local cmd_line
++ local ver
+
+ # Use FORCE_BLOCKER flag in the error message to differentiate
+ # between the errors which the force command should bypass
+@@ -206,13 +208,14 @@ function main()
+ exit 1;
+ fi;
+
++ cmd_line=$(cmd_slave);
+ if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then
+- ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "type -p gluster"
++ ver=$(ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 bash -c "'$cmd_line'")
+ else
+- ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "type -p gluster"
++ ver=$(ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 bash -c "'$cmd_line'")
+ fi
+
+- if [ $? -ne 0 ]; then
++ if [ -z "$ver" ]; then
+ echo "FORCE_BLOCKER|gluster command not found on $3 for user $2." > $log_file
+ exit 1;
+ fi;
+--
+1.8.3.1
+
diff --git a/0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch b/0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch
new file mode 100644
index 0000000..0be69e8
--- /dev/null
+++ b/0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch
@@ -0,0 +1,143 @@
+From cf13847a6341b7519ed0dc51e3b9ecf12444a3e4 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Mon, 29 Jul 2019 16:22:10 +0530
+Subject: [PATCH 267/276] posix/ctime: Fix race during lookup ctime xattr heal
+
+Problem:
+Ctime heals the ctime xattr ("trusted.glusterfs.mdata") in lookup
+if it's not present. In a multi client scenario, there is a race
+which results in updating the ctime xattr to older value.
+
+e.g. Let c1 and c2 be two clients and file1 be the file which
+doesn't have the ctime xattr. Let the ctime of file1 be t1.
+(from backend, ctime heals time attributes from backend when not present).
+
+Now following operations are done on mount
+c1 -> ls -l /mnt/file1 | c2 -> ls -l /mnt/file1;echo "append" >> /mnt/file1;
+
+The race is that the both c1 and c2 didn't fetch the ctime xattr in lookup,
+so both of them tries to heal ctime to time 't1'. If c2 wins the race and
+appends the file before c1 heals it, it sets the time to 't1' and updates
+it to 't2' (because of append). Now c1 proceeds to heal and sets it to 't1'
+which is incorrect.
+
+Solution:
+Compare the times during heal and only update the larger time. This is the
+general approach used in ctime feature but got missed with healing legacy
+files.
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23131/
+
+>fixes: bz#1734299
+>Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
+>Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+BUG: 1734305
+Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/177866
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/storage/posix/src/posix-metadata.c | 76 +++++++++++++++++++++++-------
+ 1 file changed, 58 insertions(+), 18 deletions(-)
+
+diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
+index 647c0bb..57791fa 100644
+--- a/xlators/storage/posix/src/posix-metadata.c
++++ b/xlators/storage/posix/src/posix-metadata.c
+@@ -344,33 +344,73 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
+ struct mdata_iatt *mdata_iatt, int *op_errno)
+ {
+ posix_mdata_t *mdata = NULL;
++ posix_mdata_t imdata = {
++ 0,
++ };
+ int ret = 0;
++ gf_boolean_t mdata_already_set = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("posix", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+- mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
+- if (!mdata) {
+- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
+- "Could not allocate mdata. gfid: %s",
+- uuid_utoa(inode->gfid));
+- ret = -1;
+- *op_errno = ENOMEM;
+- goto unlock;
+- }
++ ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
++ if (ret == 0 && mdata) {
++ mdata_already_set = _gf_true;
++ } else if (ret == -1 || !mdata) {
++ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
++ if (!mdata) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
++ "Could not allocate mdata. gfid: %s",
++ uuid_utoa(inode->gfid));
++ ret = -1;
++ *op_errno = ENOMEM;
++ goto unlock;
++ }
++
++ ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata,
++ op_errno);
++ if (ret == 0) {
++ /* Got mdata from disk. This is a race, another client
++ * has healed the xattr during lookup. So set it in inode
++ * ctx */
++ __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
++ mdata_already_set = _gf_true;
++ } else {
++ *op_errno = 0;
++ mdata->version = 1;
++ mdata->flags = 0;
++ mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
++ mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
++ mdata->atime.tv_sec = mdata_iatt->ia_atime;
++ mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
++ mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
++ mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
+
+- mdata->version = 1;
+- mdata->flags = 0;
+- mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
+- mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
+- mdata->atime.tv_sec = mdata_iatt->ia_atime;
+- mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
+- mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
+- mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
++ __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
++ }
++ }
+
+- __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
++ if (mdata_already_set) {
++ /* Compare and update the larger time */
++ imdata.ctime.tv_sec = mdata_iatt->ia_ctime;
++ imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
++ imdata.atime.tv_sec = mdata_iatt->ia_atime;
++ imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec;
++ imdata.mtime.tv_sec = mdata_iatt->ia_mtime;
++ imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
++
++ if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) {
++ mdata->ctime = imdata.ctime;
++ }
++ if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) {
++ mdata->mtime = imdata.mtime;
++ }
++ if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) {
++ mdata->atime = imdata.atime;
++ }
++ }
+
+ ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata);
+ if (ret) {
+--
+1.8.3.1
+
diff --git a/0268-rpc-transport-have-default-listen-port.patch b/0268-rpc-transport-have-default-listen-port.patch
new file mode 100644
index 0000000..176a907
--- /dev/null
+++ b/0268-rpc-transport-have-default-listen-port.patch
@@ -0,0 +1,46 @@
+From 872e344c0ab40c37b1872c32f5d5fddc097a1460 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Mon, 5 Aug 2019 21:16:35 +0530
+Subject: [PATCH 268/276] rpc/transport: have default listen-port
+
+With release-6, we now can have transport.socket.listen-port parameter
+configurable in glusterd.vol. However the default value wasn't defined
+in the code and this breaks the backward compatibility where if one has
+a modified glusterd.vol file, then post upgrade the same file will be
+retained and the new changes introduced as part of the release wouldn't
+be available in the glusterd.vol. So it's important that for each new
+options introduced in glusterd.vol file backward compatibility is
+guaranteed.
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23160/
+
+>Fixes: bz#1737676
+>Change-Id: I776b28bff786320cda299fe673d824024dc9803e
+>Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+
+BUG: 1734534
+Change-Id: I776b28bff786320cda299fe673d824024dc9803e
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/177862
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ rpc/rpc-transport/socket/src/name.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/rpc/rpc-transport/socket/src/name.c b/rpc/rpc-transport/socket/src/name.c
+index ca14402..7f18cc4 100644
+--- a/rpc/rpc-transport/socket/src/name.c
++++ b/rpc/rpc-transport/socket/src/name.c
+@@ -367,6 +367,8 @@ af_inet_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
+ listen_port_data = dict_get(options, "transport.socket.listen-port");
+ if (listen_port_data) {
+ listen_port = data_to_uint16(listen_port_data);
++ } else {
++ listen_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
+ }
+
+ listen_host_data = dict_get(options, "transport.socket.bind-address");
+--
+1.8.3.1
+
diff --git a/0269-ec-fix-truncate-lock-to-cover-the-write-in-tuncate-c.patch b/0269-ec-fix-truncate-lock-to-cover-the-write-in-tuncate-c.patch
new file mode 100644
index 0000000..f2cb5f2
--- /dev/null
+++ b/0269-ec-fix-truncate-lock-to-cover-the-write-in-tuncate-c.patch
@@ -0,0 +1,58 @@
+From 7c2d6e82d7d3430ad8a557b6ae726765f7e874e9 Mon Sep 17 00:00:00 2001
+From: Kinglong Mee <kinglongmee@gmail.com>
+Date: Fri, 12 Apr 2019 11:35:55 +0800
+Subject: [PATCH 269/276] ec: fix truncate lock to cover the write in tuncate
+ clean
+
+ec_truncate_clean does writing under the lock granted for truncate,
+but the lock is calculated by ec_adjust_offset_up, so that,
+the write in ec_truncate_clean is out of lock.
+
+fixes: bz#1732770
+Upstream-patch: https://review.gluster.org/c/glusterfs/+/22552
+Change-Id: Idbe1fd48d26afe49c36b77db9f12e0907f5a4134
+Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/177973
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-inode-write.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
+index a903664..ea55140 100644
+--- a/xlators/cluster/ec/src/ec-inode-write.c
++++ b/xlators/cluster/ec/src/ec-inode-write.c
+@@ -1405,6 +1405,7 @@ int32_t
+ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
+ {
+ ec_cbk_data_t *cbk;
++ off_t offset_down;
+
+ switch (state) {
+ case EC_STATE_INIT:
+@@ -1416,16 +1417,19 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
+ /* Fall through */
+
+ case EC_STATE_LOCK:
++ offset_down = fop->user_size;
++ ec_adjust_offset_down(fop->xl->private, &offset_down, _gf_true);
++
+ if (fop->id == GF_FOP_TRUNCATE) {
+ ec_lock_prepare_inode(
+ fop, &fop->loc[0],
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
+- fop->offset, EC_RANGE_FULL);
++ offset_down, EC_RANGE_FULL);
+ } else {
+ ec_lock_prepare_fd(
+ fop, fop->fd,
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
+- fop->offset, EC_RANGE_FULL);
++ offset_down, EC_RANGE_FULL);
+ }
+ ec_lock(fop);
+
+--
+1.8.3.1
+
diff --git a/0270-cluster-ec-inherit-healing-from-lock-when-it-has-inf.patch b/0270-cluster-ec-inherit-healing-from-lock-when-it-has-inf.patch
new file mode 100644
index 0000000..5015e2c
--- /dev/null
+++ b/0270-cluster-ec-inherit-healing-from-lock-when-it-has-inf.patch
@@ -0,0 +1,42 @@
+From 84d8a0ca5b521b9d87679ffebe420fe69869961d Mon Sep 17 00:00:00 2001
+From: Kinglong Mee <kinglongmee@gmail.com>
+Date: Mon, 8 Jul 2019 21:13:28 +0800
+Subject: [PATCH 270/276] cluster/ec: inherit healing from lock when it has
+ info
+
+If lock has info, fop should inherit healing mask from it.
+Otherwise, fop cannot inherit right healing when changed_flags is zero.
+
+Upstream-patch: https://review.gluster.org/c/glusterfs/+/23010
+Change-Id: Ife80c9169d2c555024347a20300b0583f7e8a87f
+fixes: bz#1732792
+Signed-off-by: Kinglong Mee <mijinlong@horiscale.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/177974
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-common.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index e2e582f..db1ff5b 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -1412,11 +1412,12 @@ ec_get_size_version(ec_lock_link_t *link)
+ set_dirty = ec_set_dirty_flag(link, ctx, dirty);
+
+ /* If ec metadata has already been retrieved, do not try again. */
+- if (ctx->have_info && (!set_dirty)) {
++ if (ctx->have_info) {
+ if (ec_is_data_fop(fop->id)) {
+ fop->healing |= lock->healing;
+ }
+- goto unlock;
++ if (!set_dirty)
++ goto unlock;
+ }
+
+ /* Determine if there's something we need to retrieve for the current
+--
+1.8.3.1
+
diff --git a/0271-cluster-ec-fix-EIO-error-for-concurrent-writes-on-sp.patch b/0271-cluster-ec-fix-EIO-error-for-concurrent-writes-on-sp.patch
new file mode 100644
index 0000000..26ec3e7
--- /dev/null
+++ b/0271-cluster-ec-fix-EIO-error-for-concurrent-writes-on-sp.patch
@@ -0,0 +1,116 @@
+From 52d71ad0e5c27808e7d8eea8a0920298837e408c Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Wed, 17 Jul 2019 14:50:22 +0200
+Subject: [PATCH 271/276] cluster/ec: fix EIO error for concurrent writes on
+ sparse files
+
+EC doesn't allow concurrent writes on overlapping areas, they are
+serialized. However non-overlapping writes are serviced in parallel.
+When a write is not aligned, EC first needs to read the entire chunk
+from disk, apply the modified fragment and write it again.
+
+The problem appears on sparse files because a write to an offset
+implicitly creates data on offsets below it (so, in some way, they
+are overlapping). For example, if a file is empty and we read 10 bytes
+from offset 10, read() will return 0 bytes. Now, if we write one byte
+at offset 1M and retry the same read, the system call will return 10
+bytes (all containing 0's).
+
+So if we have two writes, the first one at offset 10 and the second one
+at offset 1M, EC will send both in parallel because they do not overlap.
+However, the first one will try to read missing data from the first chunk
+(i.e. offsets 0 to 9) to recombine the entire chunk and do the final write.
+This read will happen in parallel with the write to 1M. What could happen
+is that half of the bricks process the write before the read, and the
+half do the read before the write. Some bricks will return 10 bytes of
+data while the otherw will return 0 bytes (because the file on the brick
+has not been expanded yet).
+
+When EC tries to recombine the answers from the bricks, it can't, because
+it needs more than half consistent answers to recover the data. So this
+read fails with EIO error. This error is propagated to the parent write,
+which is aborted and EIO is returned to the application.
+
+The issue happened because EC assumed that a write to a given offset
+implies that offsets below it exist.
+
+This fix prevents the read of the chunk from bricks if the current size
+of the file is smaller than the read chunk offset. This size is
+correctly tracked, so this fixes the issue.
+
+Also modifying ec-stripe.t file for Test #13 within it.
+In this patch, if a file size is less than the offset we are writing, we
+fill zeros in head and tail and do not consider it strip cache miss.
+That actually make sense as we know what data that part holds and there is
+no need of reading it from bricks.
+
+Upstream-patch: https://review.gluster.org/c/glusterfs/+/23066
+Change-Id: Ic342e8c35c555b8534109e9314c9a0710b6225d6
+fixes: bz#1731448
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/177975
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/ec/ec-stripe.t | 2 +-
+ xlators/cluster/ec/src/ec-inode-write.c | 26 +++++++++++++++++---------
+ 2 files changed, 18 insertions(+), 10 deletions(-)
+
+diff --git a/tests/basic/ec/ec-stripe.t b/tests/basic/ec/ec-stripe.t
+index 1e940eb..98b9229 100644
+--- a/tests/basic/ec/ec-stripe.t
++++ b/tests/basic/ec/ec-stripe.t
+@@ -202,7 +202,7 @@ TEST truncate -s 0 $B0/test_file
+ TEST truncate -s 0 $M0/test_file
+ TEST dd if=$B0/misc_file of=$B0/test_file bs=1022 count=5 oflag=seek_bytes,sync seek=400 conv=notrunc
+ TEST dd if=$B0/misc_file of=$M0/test_file bs=1022 count=5 oflag=seek_bytes,sync seek=400 conv=notrunc
+-check_statedump_md5sum 4 5
++check_statedump_md5sum 4 4
+ clean_file_unmount
+
+ ### 14 - Truncate to invalidate all but one the stripe in cache ####
+diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
+index ea55140..a45e6d6 100644
+--- a/xlators/cluster/ec/src/ec-inode-write.c
++++ b/xlators/cluster/ec/src/ec-inode-write.c
+@@ -2013,20 +2013,28 @@ ec_writev_start(ec_fop_data_t *fop)
+ if (err != 0) {
+ goto failed_fd;
+ }
++ tail = fop->size - fop->user_size - fop->head;
+ if (fop->head > 0) {
+- found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_HEAD);
+- if (!found_stripe) {
+- if (ec_make_internal_fop_xdata(&xdata)) {
+- err = -ENOMEM;
+- goto failed_xdata;
++ if (current > fop->offset) {
++ found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_HEAD);
++ if (!found_stripe) {
++ if (ec_make_internal_fop_xdata(&xdata)) {
++ err = -ENOMEM;
++ goto failed_xdata;
++ }
++ ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN,
++ ec_writev_merge_head, NULL, fd, ec->stripe_size,
++ fop->offset, 0, xdata);
++ }
++ } else {
++ memset(fop->vector[0].iov_base, 0, fop->head);
++ memset(fop->vector[0].iov_base + fop->size - tail, 0, tail);
++ if (ec->stripe_cache && (fop->size <= ec->stripe_size)) {
++ ec_add_stripe_in_cache(ec, fop);
+ }
+- ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN,
+- ec_writev_merge_head, NULL, fd, ec->stripe_size,
+- fop->offset, 0, xdata);
+ }
+ }
+
+- tail = fop->size - fop->user_size - fop->head;
+ if ((tail > 0) && ((fop->head == 0) || (fop->size > ec->stripe_size))) {
+ /* Current locking scheme will make sure the 'current' below will
+ * never decrease while the fop is in progress, so the checks will
+--
+1.8.3.1
+
diff --git a/0272-cluster-ec-Always-read-from-good-mask.patch b/0272-cluster-ec-Always-read-from-good-mask.patch
new file mode 100644
index 0000000..a6193e4
--- /dev/null
+++ b/0272-cluster-ec-Always-read-from-good-mask.patch
@@ -0,0 +1,90 @@
+From 220b95085847b5f6a9e5bee7a9519efe72600e6a Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Thu, 18 Jul 2019 11:25:31 +0530
+Subject: [PATCH 272/276] cluster/ec: Always read from good-mask
+
+There are cases where fop->mask may have fop->healing added
+and readv shouldn't be wound on fop->healing. To avoid this
+always wind readv to lock->good_mask
+
+Upstream-patch: https://review.gluster.org/c/glusterfs/+/23069
+fixes: bz#1730914
+Change-Id: I2226ef0229daf5ff315d51e868b980ee48060b87
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/177976
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-common.c | 3 +++
+ xlators/cluster/ec/src/ec-inode-write.c | 27 ++++++++++++++++++++++-----
+ 2 files changed, 25 insertions(+), 5 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index db1ff5b..28b31c9 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -654,6 +654,9 @@ ec_child_select(ec_fop_data_t *fop)
+ * unlock should go on all subvols where lock is performed*/
+ if (fop->parent && !ec_internal_op(fop)) {
+ fop->mask &= (fop->parent->mask & ~fop->parent->healing);
++ if (ec_is_data_fop(fop->id)) {
++ fop->healing |= fop->parent->healing;
++ }
+ }
+
+ if ((fop->mask & ~ec->xl_up) != 0) {
+diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
+index a45e6d6..4f35b6d 100644
+--- a/xlators/cluster/ec/src/ec-inode-write.c
++++ b/xlators/cluster/ec/src/ec-inode-write.c
+@@ -1977,6 +1977,20 @@ ec_get_and_merge_stripe(ec_t *ec, ec_fop_data_t *fop, ec_stripe_part_t which)
+ return found;
+ }
+
++static uintptr_t
++ec_get_lock_good_mask(inode_t *inode, xlator_t *xl)
++{
++ ec_lock_t *lock = NULL;
++ ec_inode_t *ictx = NULL;
++ LOCK(&inode->lock);
++ {
++ ictx = __ec_inode_get(inode, xl);
++ lock = ictx->inode_lock;
++ }
++ UNLOCK(&inode->lock);
++ return lock->good_mask;
++}
++
+ void
+ ec_writev_start(ec_fop_data_t *fop)
+ {
+@@ -2022,9 +2036,10 @@ ec_writev_start(ec_fop_data_t *fop)
+ err = -ENOMEM;
+ goto failed_xdata;
+ }
+- ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN,
+- ec_writev_merge_head, NULL, fd, ec->stripe_size,
+- fop->offset, 0, xdata);
++ ec_readv(fop->frame, fop->xl,
++ ec_get_lock_good_mask(fop->fd->inode, fop->xl),
++ EC_MINIMUM_MIN, ec_writev_merge_head, NULL, fd,
++ ec->stripe_size, fop->offset, 0, xdata);
+ }
+ } else {
+ memset(fop->vector[0].iov_base, 0, fop->head);
+@@ -2047,8 +2062,10 @@ ec_writev_start(ec_fop_data_t *fop)
+ err = -ENOMEM;
+ goto failed_xdata;
+ }
+- ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN,
+- ec_writev_merge_tail, NULL, fd, ec->stripe_size,
++ ec_readv(fop->frame, fop->xl,
++ ec_get_lock_good_mask(fop->fd->inode, fop->xl),
++ EC_MINIMUM_MIN, ec_writev_merge_tail, NULL, fd,
++ ec->stripe_size,
+ fop->offset + fop->size - ec->stripe_size, 0, xdata);
+ }
+ } else {
+--
+1.8.3.1
+
diff --git a/0273-cluster-ec-Fix-reopen-flags-to-avoid-misbehavior.patch b/0273-cluster-ec-Fix-reopen-flags-to-avoid-misbehavior.patch
new file mode 100644
index 0000000..5c01cb5
--- /dev/null
+++ b/0273-cluster-ec-Fix-reopen-flags-to-avoid-misbehavior.patch
@@ -0,0 +1,86 @@
+From d5f931b334ac7abccaf30d277ce3ca9cfae0da5b Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Mon, 29 Jul 2019 14:08:37 +0530
+Subject: [PATCH 273/276] cluster/ec: Fix reopen flags to avoid misbehavior
+
+Problem:
+when a file needs to be re-opened O_APPEND and O_EXCL
+flags are not filtered in EC.
+
+- O_APPEND should be filtered because EC doesn't send O_APPEND below EC for
+open to make sure writes happen on the individual fragments instead of at the
+end of the file.
+
+- O_EXCL should be filtered because shd could have created the file so even
+when file exists open should succeed
+
+- O_CREAT should be filtered because open happens with gfid as parameter. So
+open fop will create just the gfid which will lead to problems.
+
+Fix:
+Filter out these two flags in reopen.
+
+Upstream-patch:https://review.gluster.org/#/c/glusterfs/+/23121/
+Change-Id: Ia280470fcb5188a09caa07bf665a2a94bce23bc4
+fixes: bz#1735514
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/177977
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-common.c | 4 +++-
+ xlators/cluster/ec/src/ec-inode-write.c | 7 +++++--
+ 2 files changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index 28b31c9..5fb4610 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -101,6 +101,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask)
+ {
+ uintptr_t need_open = 0;
+ int ret = 0;
++ int32_t flags = 0;
+ loc_t loc = {
+ 0,
+ };
+@@ -121,6 +122,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask)
+ goto out;
+ }
+
++ flags = fop->fd->flags & (~(O_TRUNC | O_APPEND | O_CREAT | O_EXCL));
+ if (IA_IFDIR == fop->fd->inode->ia_type) {
+ ec_opendir(fop->frame, fop->xl, need_open,
+ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL,
+@@ -128,7 +130,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask)
+ } else {
+ ec_open(fop->frame, fop->xl, need_open,
+ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc,
+- fop->fd->flags & (~O_TRUNC), fop->fd, NULL);
++ flags, fop->fd, NULL);
+ }
+
+ out:
+diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
+index 4f35b6d..2f28e11 100644
+--- a/xlators/cluster/ec/src/ec-inode-write.c
++++ b/xlators/cluster/ec/src/ec-inode-write.c
+@@ -1985,10 +1985,13 @@ ec_get_lock_good_mask(inode_t *inode, xlator_t *xl)
+ LOCK(&inode->lock);
+ {
+ ictx = __ec_inode_get(inode, xl);
+- lock = ictx->inode_lock;
++ if (ictx)
++ lock = ictx->inode_lock;
+ }
+ UNLOCK(&inode->lock);
+- return lock->good_mask;
++ if (lock)
++ return lock->good_mask;
++ return 0;
+ }
+
+ void
+--
+1.8.3.1
+
diff --git a/0274-cluster-ec-Update-lock-good_mask-on-parent-fop-failu.patch b/0274-cluster-ec-Update-lock-good_mask-on-parent-fop-failu.patch
new file mode 100644
index 0000000..0307e25
--- /dev/null
+++ b/0274-cluster-ec-Update-lock-good_mask-on-parent-fop-failu.patch
@@ -0,0 +1,49 @@
+From 4c2aa7adef3df500043dd45614d20c9987e6c0d9 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Fri, 2 Aug 2019 12:05:09 +0530
+Subject: [PATCH 274/276] cluster/ec: Update lock->good_mask on parent fop
+ failure
+
+When discard/truncate performs write fop, it should do so
+after updating lock->good_mask to make sure readv happens
+on the correct mask
+
+Upstream-patch: https://review.gluster.org/c/glusterfs/+/23147
+fixes: bz#1730914
+Change-Id: Idfef0bbcca8860d53707094722e6ba3f81c583b7
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/177978
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-common.h | 2 ++
+ xlators/cluster/ec/src/ec-inode-write.c | 2 ++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
+index e948342..3c69471 100644
+--- a/xlators/cluster/ec/src/ec-common.h
++++ b/xlators/cluster/ec/src/ec-common.h
+@@ -204,4 +204,6 @@ void
+ ec_reset_entry_healing(ec_fop_data_t *fop);
+ char *
+ ec_msg_str(ec_fop_data_t *fop);
++void
++ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop);
+ #endif /* __EC_COMMON_H__ */
+diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
+index 2f28e11..8bfa3b4 100644
+--- a/xlators/cluster/ec/src/ec-inode-write.c
++++ b/xlators/cluster/ec/src/ec-inode-write.c
+@@ -89,6 +89,8 @@ ec_update_write(ec_fop_data_t *fop, uintptr_t mask, off_t offset, uint64_t size)
+ goto out;
+ }
+
++ if (fop->locks[0].lock)
++ ec_lock_update_good(fop->locks[0].lock, fop);
+ vector.iov_base = iobuf->ptr;
+ vector.iov_len = size;
+ memset(vector.iov_base, 0, vector.iov_len);
+--
+1.8.3.1
+
diff --git a/0275-cluster-ec-Create-heal-task-with-heal-process-id.patch b/0275-cluster-ec-Create-heal-task-with-heal-process-id.patch
new file mode 100644
index 0000000..ba3d85a
--- /dev/null
+++ b/0275-cluster-ec-Create-heal-task-with-heal-process-id.patch
@@ -0,0 +1,74 @@
+From 0864f1ad12394a5748d92aa0ed5b455135426bc3 Mon Sep 17 00:00:00 2001
+From: Ashish Pandey <aspandey@redhat.com>
+Date: Tue, 30 Jul 2019 10:32:39 +0530
+Subject: [PATCH 275/276] cluster/ec: Create heal task with heal process id
+
+Problem:
+ec_data_undo_pending calls syncop_fxattrop->SYNCOP without
+a frame. In this case SYNCOP gets the frame of the task.
+However, when we create a synctask for heal we provide
+frame as NULL.
+Now, if the read-only feature is ON, it will receive the
+process ID of the shd as 0 and will consider that it as
+not an internal process. This will prevent healing of a
+file with "Read-only file system" error message log.
+
+Solution:
+While launching heal, create a synctask using frame and set
+process id of the SHD which is -6.
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23129/
+
+>Change-Id: I37195399c85de322cbcac75633888922c4e3db4a
+>Fixes: bz#1734252
+
+BUG: 1733531
+Change-Id: I37195399c85de322cbcac75633888922c4e3db4a
+Signed-off-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/178038
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/cluster/ec/src/ec-heal.c | 20 +++++++++++++++++++-
+ 1 file changed, 19 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index 2fa1f11..0f0f398 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -2647,13 +2647,31 @@ void
+ ec_launch_heal(ec_t *ec, ec_fop_data_t *fop)
+ {
+ int ret = 0;
++ call_frame_t *frame = NULL;
++
++ frame = create_frame(ec->xl, ec->xl->ctx->pool);
++ if (!frame) {
++ goto out;
++ ret = -1;
++ }
++
++ ec_owner_set(frame, frame->root);
++ /*Do heal as root*/
++ frame->root->uid = 0;
++ frame->root->gid = 0;
++ /*Mark the fops as internal*/
++ frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
+
+ ret = synctask_new(ec->xl->ctx->env, ec_synctask_heal_wrap, ec_heal_done,
+- NULL, fop);
++ frame, fop);
++out:
+ if (ret < 0) {
+ ec_fop_set_error(fop, ENOMEM);
+ ec_heal_fail(ec, fop);
+ }
++
++ if (frame)
++ STACK_DESTROY(frame->root);
+ }
+
+ void
+--
+1.8.3.1
+
diff --git a/0276-features-utime-always-update-ctime-at-setattr.patch b/0276-features-utime-always-update-ctime-at-setattr.patch
new file mode 100644
index 0000000..f19663b
--- /dev/null
+++ b/0276-features-utime-always-update-ctime-at-setattr.patch
@@ -0,0 +1,74 @@
+From 7f5658a299081cec4c77d3cca4e70099cd59b1fc Mon Sep 17 00:00:00 2001
+From: Kinglong Mee <kinglongmee@gmail.com>
+Date: Mon, 5 Aug 2019 11:08:02 +0800
+Subject: [PATCH 276/276] features/utime: always update ctime at setattr
+
+For the nfs EXCLUSIVE mode create may sets a later time
+to mtime (at verifier), it should not set to ctime for
+storage.ctime does not allowed set ctime to a earlier time.
+
+ /* Earlier, mdata was updated only if the existing time is less
+ * than the time to be updated. This would fail the scenarios
+ * where mtime can be set to any time using the syscall. Hence
+ * just updating without comparison. But the ctime is not
+ * allowed to changed to older date.
+ */
+
+According to kernel's setattr, always set ctime at setattr,
+and doesnot set ctime from mtime at storage.ctime.
+
+>Change-Id: I5cfde6cb7f8939da9617506e3dc80bd840e0d749
+>fixes: bz#1737288
+>Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
+Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/23154/
+
+BUG: 1737705
+Change-Id: I5cfde6cb7f8939da9617506e3dc80bd840e0d749
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/178225
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/features/utime/src/utime-gen-fops-c.py | 13 +------------
+ xlators/storage/posix/src/posix-metadata.c | 2 +-
+ 2 files changed, 2 insertions(+), 13 deletions(-)
+
+diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py
+index 8730a51..a8637ff 100755
+--- a/xlators/features/utime/src/utime-gen-fops-c.py
++++ b/xlators/features/utime/src/utime-gen-fops-c.py
+@@ -82,18 +82,7 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+ {
+ gl_timespec_get(&frame->root->ctime);
+-
+- if (!valid) {
+- frame->root->flags |= MDATA_CTIME;
+- }
+-
+- if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
+- frame->root->flags |= MDATA_CTIME;
+- }
+-
+- if (valid & GF_SET_ATTR_MODE) {
+- frame->root->flags |= MDATA_CTIME;
+- }
++ frame->root->flags |= MDATA_CTIME;
+
+ STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
+diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
+index 57791fa..5cbdc98 100644
+--- a/xlators/storage/posix/src/posix-metadata.c
++++ b/xlators/storage/posix/src/posix-metadata.c
+@@ -631,7 +631,7 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+ tv.tv_sec = stbuf->ia_mtime;
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_mtime_nsec);
+
+- flag.ctime = 1;
++ flag.ctime = 0;
+ flag.mtime = 1;
+ flag.atime = 0;
+
+--
+1.8.3.1
+
diff --git a/0277-geo-rep-Fix-Config-Get-Race.patch b/0277-geo-rep-Fix-Config-Get-Race.patch
new file mode 100644
index 0000000..45dada1
--- /dev/null
+++ b/0277-geo-rep-Fix-Config-Get-Race.patch
@@ -0,0 +1,109 @@
+From f40570f2f784dc61edb061a4931dcfc16bf51e7e Mon Sep 17 00:00:00 2001
+From: Aravinda VK <avishwan@redhat.com>
+Date: Mon, 5 Aug 2019 19:00:21 +0530
+Subject: [PATCH 277/284] geo-rep: Fix Config Get Race
+
+When two threads(sync jobs) in Geo-rep worker calls `gconf.get` and
+`gconf.getr`(realtime) at the sametime, `getr` resets the conf object
+and other one gets None. Thread Lock is introduced to fix the issue.
+
+```
+ File "/usr/libexec/glusterfs/python/syncdaemon/syncdutils.py",
+ line 368, in twrap
+ tf(*aargs)
+ File "/usr/libexec/glusterfs/python/syncdaemon/master.py", line 1987,
+ in syncjob
+ po = self.sync_engine(pb, self.log_err)
+ File "/usr/libexec/glusterfs/python/syncdaemon/resource.py",
+ line 1444, in rsync
+ rconf.ssh_ctl_args + \
+AttributeError: 'NoneType' object has no attribute 'split'
+```
+
+Backport of:
+ > Patch: https://review.gluster.org/#/c/glusterfs/+/23158/
+ > Change-Id: I9c245e5c36338265354e158f5baa32b119eb2da5
+ > Updates: bz#1737484
+ > Signed-off-by: Aravinda VK <avishwan@redhat.com>
+
+Change-Id: I9c245e5c36338265354e158f5baa32b119eb2da5
+BUG: 1729915
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/178960
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/gsyncdconfig.py | 27 +++++++++++++++++++++------
+ 1 file changed, 21 insertions(+), 6 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py
+index 1fc451f..38f3594 100644
+--- a/geo-replication/syncdaemon/gsyncdconfig.py
++++ b/geo-replication/syncdaemon/gsyncdconfig.py
+@@ -17,6 +17,7 @@ import os
+ import shutil
+ from string import Template
+ from datetime import datetime
++from threading import Lock
+
+
+ # Global object which can be used in other modules
+@@ -35,6 +36,7 @@ class GconfInvalidValue(Exception):
+ class Gconf(object):
+ def __init__(self, default_conf_file, custom_conf_file=None,
+ args={}, extra_tmpl_args={}, override_from_args=False):
++ self.lock = Lock()
+ self.default_conf_file = default_conf_file
+ self.custom_conf_file = custom_conf_file
+ self.tmp_conf_file = None
+@@ -163,6 +165,11 @@ class Gconf(object):
+ if value is not None and not self._is_valid_value(name, value):
+ raise GconfInvalidValue()
+
++
++ def _load_with_lock(self):
++ with self.lock:
++ self._load()
++
+ def _load(self):
+ self.gconf = {}
+ self.template_conf = []
+@@ -230,12 +237,19 @@ class Gconf(object):
+ self._tmpl_substitute()
+ self._do_typecast()
+
+- def reload(self):
++ def reload(self, with_lock=True):
+ if self._is_config_changed():
+- self._load()
++ if with_lock:
++ self._load_with_lock()
++ else:
++ self._load()
+
+- def get(self, name, default_value=None):
+- return self.gconf.get(name, default_value)
++ def get(self, name, default_value=None, with_lock=True):
++ if with_lock:
++ with self.lock:
++ return self.gconf.get(name, default_value)
++ else:
++ return self.gconf.get(name, default_value)
+
+ def getall(self, show_defaults=False, show_non_configurable=False):
+ cnf = {}
+@@ -276,8 +290,9 @@ class Gconf(object):
+ return cnf
+
+ def getr(self, name, default_value=None):
+- self.reload()
+- return self.get(name, default_value)
++ with self.lock:
++ self.reload(with_lock=False)
++ return self.get(name, default_value, with_lock=False)
+
+ def get_help(self, name=None):
+ pass
+--
+1.8.3.1
+
diff --git a/0278-geo-rep-Fix-worker-connection-issue.patch b/0278-geo-rep-Fix-worker-connection-issue.patch
new file mode 100644
index 0000000..00cb48f
--- /dev/null
+++ b/0278-geo-rep-Fix-worker-connection-issue.patch
@@ -0,0 +1,45 @@
+From 924a25990948c9d76001cf4134fc5a2fcbf5c02c Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Fri, 16 Aug 2019 15:38:49 +0530
+Subject: [PATCH 278/284] geo-rep: Fix worker connection issue
+
+All the workers connects to primary slave node. It should
+connect to available slave nodes in round robin fashion
+and choose different slave node if the corresponding slave
+node is down. This patch fixes the same.
+
+Thanks Aravinda for the help in root causing this.
+
+Backport of:
+ > Patch: https://review.gluster.org/23247/
+ > Change-Id: I9f8e7744f4adb8a24833cf173681d109710f98cb
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+ > Updates: bz#1737484
+
+Change-Id: I9f8e7744f4adb8a24833cf173681d109710f98cb
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+BUG: 1729915
+Reviewed-on: https://code.engineering.redhat.com/gerrit/178961
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/subcmds.py | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py
+index 4ece7e0..8de7db2 100644
+--- a/geo-replication/syncdaemon/subcmds.py
++++ b/geo-replication/syncdaemon/subcmds.py
+@@ -73,7 +73,8 @@ def subcmd_worker(args):
+ Popen.init_errhandler()
+ fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+ local = GLUSTER("localhost", args.master)
+- slavehost, slavevol = args.slave.split("::")
++ slavevol = args.slave.split("::")[-1]
++ slavehost = args.resource_remote
+ remote = SSH(slavehost, slavevol)
+ remote.connect_remote()
+ local.connect()
+--
+1.8.3.1
+
diff --git a/0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch b/0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch
new file mode 100644
index 0000000..3bbd56c
--- /dev/null
+++ b/0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch
@@ -0,0 +1,253 @@
+From bf24623765817ede84ea47f3265f5e6c2ae17ee7 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Tue, 16 Jul 2019 20:36:57 +0530
+Subject: [PATCH 279/284] posix: In brick_mux brick is crashed while start/stop
+ volume in loop
+
+Problem: In brick_mux environment sometime brick is crashed while
+ volume stop/start in a loop.Brick is crashed in janitor task
+ at the time of accessing priv.If posix priv is cleaned up before
+ call janitor task then janitor task is crashed.
+
+Solution: To avoid the crash in brick_mux environment introduce a new
+ flag janitor_task_stop in posix_private and before send CHILD_DOWN event
+ wait for update the flag by janitor_task_done
+
+> Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
+> fixes: bz#1730409
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Cherry picked from commit f138d3fa2237e7fa940ecf17153fd700350c4138)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23060/)
+
+Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
+fixex: bz#1729971
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/178934
+Tested-by: Mohit Agrawal <moagrawa@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/xlator.h | 3 +++
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 5 ++--
+ xlators/protocol/server/src/server.c | 6 ++++-
+ xlators/storage/posix/src/posix-common.c | 40 +++++++++++++++++++++++++++++-
+ xlators/storage/posix/src/posix-helpers.c | 16 ++++++++++++
+ xlators/storage/posix/src/posix.h | 3 +++
+ 6 files changed, 69 insertions(+), 4 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
+index b78daad..da551e9 100644
+--- a/libglusterfs/src/glusterfs/xlator.h
++++ b/libglusterfs/src/glusterfs/xlator.h
+@@ -861,6 +861,9 @@ struct _xlator {
+
+ /* Flag to notify got CHILD_DOWN event for detach brick */
+ uint32_t notify_down;
++
++ /* Flag to avoid throw duplicate PARENT_DOWN event */
++ uint32_t parent_down;
+ };
+
+ /* This would be the only structure which needs to be exported by
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 2aa975b..812c698 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -4082,8 +4082,9 @@ out:
+ if (msg[0]) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s",
+ msg);
+- gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
+- new_brickinfo->hostname, new_brickinfo->path);
++ if (new_brickinfo)
++ gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
++ new_brickinfo->hostname, new_brickinfo->path);
+ }
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
+ return ret;
+diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
+index 6ae63ba..a5f09fe 100644
+--- a/xlators/protocol/server/src/server.c
++++ b/xlators/protocol/server/src/server.c
+@@ -580,6 +580,7 @@ server_graph_janitor_threads(void *data)
+ gf_boolean_t victim_found = _gf_false;
+ xlator_list_t **trav_p = NULL;
+ xlator_t *top = NULL;
++ uint32_t parent_down = 0;
+
+ GF_ASSERT(data);
+
+@@ -598,7 +599,10 @@ server_graph_janitor_threads(void *data)
+ victim = (*trav_p)->xlator;
+ if (victim->cleanup_starting &&
+ strcmp(victim->name, victim_name) == 0) {
+- victim_found = _gf_true;
++ parent_down = victim->parent_down;
++ victim->parent_down = 1;
++ if (!parent_down)
++ victim_found = _gf_true;
+ break;
+ }
+ }
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index d738692..69857d9 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -146,10 +146,15 @@ int32_t
+ posix_notify(xlator_t *this, int32_t event, void *data, ...)
+ {
+ xlator_t *victim = data;
++ struct posix_private *priv = this->private;
++ int ret = 0;
++ struct timespec sleep_till = {
++ 0,
++ };
+
+ switch (event) {
+ case GF_EVENT_PARENT_UP: {
+- /* Tell the parent that posix xlator is up */
++ /* the parent that posix xlator is up */
+ default_notify(this, GF_EVENT_CHILD_UP, data);
+ } break;
+
+@@ -158,6 +163,31 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
+ break;
+ gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
+ victim->name);
++
++ if (priv->janitor) {
++ pthread_mutex_lock(&priv->janitor_mutex);
++ {
++ priv->janitor_task_stop = _gf_true;
++ ret = gf_tw_del_timer(this->ctx->tw->timer_wheel,
++ priv->janitor);
++ if (!ret) {
++ clock_gettime(CLOCK_REALTIME, &sleep_till);
++ sleep_till.tv_sec += 1;
++ /* Wait to set janitor_task flag to _gf_false by
++ * janitor_task_done */
++ while (priv->janitor_task_stop) {
++ (void)pthread_cond_timedwait(&priv->janitor_cond,
++ &priv->janitor_mutex,
++ &sleep_till);
++ clock_gettime(CLOCK_REALTIME, &sleep_till);
++ sleep_till.tv_sec += 1;
++ }
++ }
++ }
++ pthread_mutex_unlock(&priv->janitor_mutex);
++ GF_FREE(priv->janitor);
++ }
++ priv->janitor = NULL;
+ default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
+ } break;
+ default:
+@@ -1008,6 +1038,8 @@ posix_init(xlator_t *this)
+
+ pthread_mutex_init(&_private->fsync_mutex, NULL);
+ pthread_cond_init(&_private->fsync_cond, NULL);
++ pthread_mutex_init(&_private->janitor_mutex, NULL);
++ pthread_cond_init(&_private->janitor_cond, NULL);
+ INIT_LIST_HEAD(&_private->fsyncs);
+ ret = posix_spawn_ctx_janitor_thread(this);
+ if (ret)
+@@ -1128,6 +1160,7 @@ posix_fini(xlator_t *this)
+ (void)gf_thread_cleanup_xint(priv->disk_space_check);
+ priv->disk_space_check = 0;
+ }
++
+ if (priv->janitor) {
+ /*TODO: Make sure the synctask is also complete */
+ ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
+@@ -1135,8 +1168,10 @@ posix_fini(xlator_t *this)
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED,
+ "Failed to delete janitor timer");
+ }
++ GF_FREE(priv->janitor);
+ priv->janitor = NULL;
+ }
++
+ if (priv->fsyncer) {
+ (void)gf_thread_cleanup_xint(priv->fsyncer);
+ priv->fsyncer = 0;
+@@ -1148,6 +1183,9 @@ posix_fini(xlator_t *this)
+ GF_FREE(priv->base_path);
+ LOCK_DESTROY(&priv->lock);
+ pthread_mutex_destroy(&priv->fsync_mutex);
++ pthread_cond_destroy(&priv->fsync_cond);
++ pthread_mutex_destroy(&priv->janitor_mutex);
++ pthread_cond_destroy(&priv->janitor_cond);
+ GF_FREE(priv->hostname);
+ GF_FREE(priv->trash_path);
+ GF_FREE(priv);
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 07169b5..ef5bfd5 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -1432,12 +1432,24 @@ posix_janitor_task_done(int ret, call_frame_t *frame, void *data)
+ this = data;
+ priv = this->private;
+
++ pthread_mutex_lock(&priv->janitor_mutex);
++ {
++ if (priv->janitor_task_stop) {
++ priv->janitor_task_stop = _gf_false;
++ pthread_cond_signal(&priv->janitor_cond);
++ pthread_mutex_unlock(&priv->janitor_mutex);
++ goto out;
++ }
++ }
++ pthread_mutex_unlock(&priv->janitor_mutex);
++
+ LOCK(&priv->lock);
+ {
+ __posix_janitor_timer_start(this);
+ }
+ UNLOCK(&priv->lock);
+
++out:
+ return 0;
+ }
+
+@@ -1456,6 +1468,9 @@ posix_janitor_task(void *data)
+ old_this = THIS;
+ THIS = this;
+
++ if (!priv)
++ goto out;
++
+ time(&now);
+ if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
+ if (priv->disable_landfill_purge) {
+@@ -1475,6 +1490,7 @@ posix_janitor_task(void *data)
+
+ THIS = old_this;
+
++out:
+ return 0;
+ }
+
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index b0935a7..64288a7 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -203,6 +203,8 @@ struct posix_private {
+ struct list_head fsyncs;
+ pthread_mutex_t fsync_mutex;
+ pthread_cond_t fsync_cond;
++ pthread_mutex_t janitor_mutex;
++ pthread_cond_t janitor_cond;
+ int fsync_queue_count;
+
+ enum {
+@@ -257,6 +259,7 @@ struct posix_private {
+
+ gf_boolean_t fips_mode_rchecksum;
+ gf_boolean_t ctime;
++ gf_boolean_t janitor_task_stop;
+ };
+
+ typedef struct {
+--
+1.8.3.1
+
diff --git a/0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch b/0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch
new file mode 100644
index 0000000..38b4d48
--- /dev/null
+++ b/0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch
@@ -0,0 +1,153 @@
+From 2d7d9165c6a8619eef553859b4b7136b8e9ccb55 Mon Sep 17 00:00:00 2001
+From: Anoop C S <anoopcs@redhat.com>
+Date: Sat, 10 Aug 2019 10:30:26 +0530
+Subject: [PATCH 280/284] performance/md-cache: Do not skip caching of null
+ character xattr values
+
+Null character string is a valid xattr value in file system. But for
+those xattrs processed by md-cache, it does not update its entries if
+value is null('\0'). This results in ENODATA when those xattrs are
+queried afterwards via getxattr() causing failures in basic operations
+like create, copy etc in a specially configured Samba setup for Mac OS
+clients.
+
+On the other side snapview-server is internally setting empty string("")
+as value for xattrs received as part of listxattr() and are not intended
+to be cached. Therefore we try to maintain that behaviour using an
+additional dictionary key to prevent updation of entries in getxattr()
+and fgetxattr() callbacks in md-cache.
+
+Credits: Poornima G <pgurusid@redhat.com>
+
+Backport of https://review.gluster.org/c/glusterfs/+/23206
+
+Change-Id: I7859cbad0a06ca6d788420c2a495e658699c6ff7
+Fixes: bz#1732376
+Signed-off-by: Anoop C S <anoopcs@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/179048
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/md-cache/bug-1726205.t | 22 +++++++++++++++
+ .../features/snapview-server/src/snapview-server.c | 12 ++++++++-
+ xlators/performance/md-cache/src/md-cache.c | 31 +++++++++-------------
+ 3 files changed, 45 insertions(+), 20 deletions(-)
+ create mode 100644 tests/bugs/md-cache/bug-1726205.t
+
+diff --git a/tests/bugs/md-cache/bug-1726205.t b/tests/bugs/md-cache/bug-1726205.t
+new file mode 100644
+index 0000000..795130e
+--- /dev/null
++++ b/tests/bugs/md-cache/bug-1726205.t
+@@ -0,0 +1,22 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup;
++
++TEST glusterd;
++
++TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
++
++TEST $CLI volume start $V0
++
++TEST $CLI volume set $V0 group samba
++
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++TEST touch $M0/file
++TEST "setfattr -n "user.DosStream.Zone.Identifier:\$DATA" -v '\0' $M0/file"
++TEST "getfattr -n "user.DosStream.Zone.Identifier:\$DATA" -e hex $M0/file | grep -q 0x00"
++
++cleanup;
+diff --git a/xlators/features/snapview-server/src/snapview-server.c b/xlators/features/snapview-server/src/snapview-server.c
+index b4998b8..1d6a5e5 100644
+--- a/xlators/features/snapview-server/src/snapview-server.c
++++ b/xlators/features/snapview-server/src/snapview-server.c
+@@ -828,7 +828,8 @@ out:
+ * back into the dict. But to get the values for those xattrs it has to do the
+ * getxattr operation on each xattr which might turn out to be a costly
+ * operation. So for each of the xattrs present in the list, a 0 byte value
+- * ("") is set into the dict before unwinding. This can be treated as an
++ * ("") is set into the dict before unwinding. Since ("") is also a valid xattr
++ * value(in a file system) we use an extra key in the same dictionary as an
+ * indicator to other xlators which want to cache the xattrs (as of now,
+ * md-cache which caches acl and selinux related xattrs) to not to cache the
+ * values of the xattrs present in the dict.
+@@ -871,6 +872,15 @@ svs_add_xattrs_to_dict(xlator_t *this, dict_t *dict, char *list, ssize_t size)
+ list_offset += strlen(keybuffer) + 1;
+ } /* while (remaining_size > 0) */
+
++ /* Add an additional key to indicate that we don't need to cache these
++ * xattrs(with value "") */
++ ret = dict_set_str(dict, "glusterfs.skip-cache", "");
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DICT_SET_FAILED,
++ "dict set operation for the key glusterfs.skip-cache failed.");
++ goto out;
++ }
++
+ ret = 0;
+
+ out:
+diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
+index 6e0468f..a6b363f 100644
+--- a/xlators/performance/md-cache/src/md-cache.c
++++ b/xlators/performance/md-cache/src/md-cache.c
+@@ -698,25 +698,6 @@ updatefn(dict_t *dict, char *key, data_t *value, void *data)
+ }
+ }
+
+- /* posix xlator as part of listxattr will send both names
+- * and values of the xattrs in the dict. But as per man page
+- * listxattr is mainly supposed to send names of the all the
+- * xattrs. gfapi, as of now will put all the keys it obtained
+- * in the dict (sent by posix) into a buffer provided by the
+- * caller (thus the values of those xattrs are lost). If some
+- * xlator makes gfapi based calls (ex: snapview-server), then
+- * it has to unwind the calls by putting those names it got
+- * in the buffer again into the dict. But now it would not be
+- * having the values for those xattrs. So it might just put
+- * a 0 byte value ("") into the dict for each xattr and unwind
+- * the call. So the xlators which cache the xattrs (as of now
+- * md-cache caches the acl and selinux related xattrs), should
+- * not update their cache if the value of a xattr is a 0 byte
+- * data (i.e. "").
+- */
+- if (value->len == 1 && value->data[0] == '\0')
+- return 0;
+-
+ if (dict_set(u->dict, key, value) < 0) {
+ u->ret = -1;
+ return -1;
+@@ -2406,6 +2387,12 @@ mdc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ goto out;
+ }
+
++ if (dict_get(xattr, "glusterfs.skip-cache")) {
++ gf_msg(this->name, GF_LOG_DEBUG, 0, 0,
++ "Skipping xattr update due to empty value");
++ goto out;
++ }
++
+ mdc_inode_xatt_set(this, local->loc.inode, xdata);
+
+ out:
+@@ -2488,6 +2475,12 @@ mdc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ goto out;
+ }
+
++ if (dict_get(xattr, "glusterfs.skip-cache")) {
++ gf_msg(this->name, GF_LOG_DEBUG, 0, 0,
++ "Skipping xattr update due to empty value");
++ goto out;
++ }
++
+ mdc_inode_xatt_set(this, local->fd->inode, xdata);
+
+ out:
+--
+1.8.3.1
+
diff --git a/0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch b/0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch
new file mode 100644
index 0000000..5af12d1
--- /dev/null
+++ b/0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch
@@ -0,0 +1,105 @@
+From fa3cc9971bf1bf4ea52edfedc0cea67a0d6990d1 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Tue, 20 Aug 2019 15:49:40 +0530
+Subject: [PATCH 281/284] ctime: Fix incorrect realtime passed to
+ frame->root->ctime
+
+On systems that don't support "timespec_get"(e.g., centos6), it
+was using "clock_gettime" with "CLOCK_MONOTONIC" to get unix epoch
+time which is incorrect. This patch introduces "timespec_now_realtime"
+which uses "clock_gettime" with "CLOCK_REALTIME" which fixes
+the issue.
+
+Backport of:
+ > Patch: https://review.gluster.org/23274/
+ > Change-Id: I57be35ce442d7e05319e82112b687eb4f28d7612
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+ > fixes: bz#1743652
+
+Change-Id: I57be35ce442d7e05319e82112b687eb4f28d7612
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+BUG: 1743611
+Reviewed-on: https://code.engineering.redhat.com/gerrit/179185
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/timespec.h | 2 ++
+ libglusterfs/src/libglusterfs.sym | 1 +
+ libglusterfs/src/timespec.c | 22 ++++++++++++++++++++++
+ xlators/features/utime/src/utime-helpers.c | 2 +-
+ 4 files changed, 26 insertions(+), 1 deletion(-)
+
+diff --git a/libglusterfs/src/glusterfs/timespec.h b/libglusterfs/src/glusterfs/timespec.h
+index 871871d..bb9ab44 100644
+--- a/libglusterfs/src/glusterfs/timespec.h
++++ b/libglusterfs/src/glusterfs/timespec.h
+@@ -21,6 +21,8 @@
+ void
+ timespec_now(struct timespec *ts);
+ void
++timespec_now_realtime(struct timespec *ts);
++void
+ timespec_adjust_delta(struct timespec *ts, struct timespec delta);
+ void
+ timespec_sub(const struct timespec *begin, const struct timespec *end,
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index b161380..467a1b7 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -1073,6 +1073,7 @@ sys_accept
+ tbf_init
+ tbf_throttle
+ timespec_now
++timespec_now_realtime
+ timespec_sub
+ timespec_adjust_delta
+ timespec_cmp
+diff --git a/libglusterfs/src/timespec.c b/libglusterfs/src/timespec.c
+index c01527f..d0d5005 100644
+--- a/libglusterfs/src/timespec.c
++++ b/libglusterfs/src/timespec.c
+@@ -71,6 +71,28 @@ timespec_now(struct timespec *ts)
+ }
+
+ void
++timespec_now_realtime(struct timespec *ts)
++{
++#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS || \
++ defined GF_BSD_HOST_OS
++ if (0 == clock_gettime(CLOCK_REALTIME, ts)) {
++ return;
++ }
++#endif
++
++ /* Fall back to gettimeofday()*/
++ struct timeval tv = {
++ 0,
++ };
++ if (0 == gettimeofday(&tv, NULL)) {
++ TIMEVAL_TO_TIMESPEC(&tv, ts);
++ return;
++ }
++
++ return;
++}
++
++void
+ timespec_adjust_delta(struct timespec *ts, struct timespec delta)
+ {
+ ts->tv_nsec = ((ts->tv_nsec + delta.tv_nsec) % 1000000000);
+diff --git a/xlators/features/utime/src/utime-helpers.c b/xlators/features/utime/src/utime-helpers.c
+index 79cc014..29d9ad9 100644
+--- a/xlators/features/utime/src/utime-helpers.c
++++ b/xlators/features/utime/src/utime-helpers.c
+@@ -17,7 +17,7 @@ gl_timespec_get(struct timespec *ts)
+ #ifdef TIME_UTC
+ timespec_get(ts, TIME_UTC);
+ #else
+- timespec_now(ts);
++ timespec_now_realtime(ts);
+ #endif
+ }
+
+--
+1.8.3.1
+
diff --git a/0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch b/0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch
new file mode 100644
index 0000000..37a0f12
--- /dev/null
+++ b/0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch
@@ -0,0 +1,116 @@
+From 98c9fc8d774ae153ca6b44d3337cf5d9f7a030e2 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Fri, 16 Aug 2019 16:07:03 +0530
+Subject: [PATCH 282/284] geo-rep: Fix the name of changelog archive file
+
+Background:
+The processed changelogs are archived each month in a single tar file.
+The default format is "archive_YYYYMM.tar" which is specified as "%%Y%%m"
+in configuration file.
+
+Problem:
+The created changelog archive file didn't have corresponding year
+and month. It created as "archive_%Y%m.tar" on python2 only systems.
+
+Cause and Fix:
+Geo-rep expects "%Y%m" after the ConfigParser reads it from config file.
+Since it was "%%Y%%m" in config file, geo-rep used to get correct value
+"%Y%m" in python3 and "%%Y%%m" in python2 which is incorrect.
+The fix can be to use "%Y%m" in config file but that fails in python3.
+So the fix is to use "RawConfigParser" in geo-rep and use "%Y%m". This
+works both in python2 and python3.
+
+Backport of:
+ > Patch: https://review.gluster.org/23248
+ > Change-Id: Ie5b7d2bc04d0d53cd1769e064c2d67aaf95d557c
+ > fixes: bz#1741890
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+Change-Id: Ie5b7d2bc04d0d53cd1769e064c2d67aaf95d557c
+BUG: 1743634
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/179188
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/gsyncd.conf.in | 2 +-
+ geo-replication/syncdaemon/gsyncdconfig.py | 14 +++++++-------
+ 2 files changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in
+index c2e4f0d..5ebd57a 100644
+--- a/geo-replication/gsyncd.conf.in
++++ b/geo-replication/gsyncd.conf.in
+@@ -109,7 +109,7 @@ type=int
+ help=Minimum time interval in seconds for passive worker to become Active
+
+ [changelog-archive-format]
+-value=%%Y%%m
++value=%Y%m
+ help=Processed changelogs will be archived in working directory. Pattern for archive file
+
+ [use-meta-volume]
+diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py
+index 38f3594..f823311 100644
+--- a/geo-replication/syncdaemon/gsyncdconfig.py
++++ b/geo-replication/syncdaemon/gsyncdconfig.py
+@@ -10,9 +10,9 @@
+ #
+
+ try:
+- from ConfigParser import ConfigParser, NoSectionError
++ from ConfigParser import RawConfigParser, NoSectionError
+ except ImportError:
+- from configparser import ConfigParser, NoSectionError
++ from configparser import RawConfigParser, NoSectionError
+ import os
+ import shutil
+ from string import Template
+@@ -94,7 +94,7 @@ class Gconf(object):
+ if name != "all" and not self._is_configurable(name):
+ raise GconfNotConfigurable()
+
+- cnf = ConfigParser()
++ cnf = RawConfigParser()
+ with open(self.custom_conf_file) as f:
+ cnf.readfp(f)
+
+@@ -138,7 +138,7 @@ class Gconf(object):
+ if curr_val == value:
+ return True
+
+- cnf = ConfigParser()
++ cnf = RawConfigParser()
+ with open(self.custom_conf_file) as f:
+ cnf.readfp(f)
+
+@@ -178,7 +178,7 @@ class Gconf(object):
+ self.session_conf_items = []
+ self.default_values = {}
+
+- conf = ConfigParser()
++ conf = RawConfigParser()
+ # Default Template config file
+ with open(self.default_conf_file) as f:
+ conf.readfp(f)
+@@ -342,7 +342,7 @@ class Gconf(object):
+ return False
+
+ def is_config_file_old(config_file, mastervol, slavevol):
+- cnf = ConfigParser()
++ cnf = RawConfigParser()
+ cnf.read(config_file)
+ session_section = "peers %s %s" % (mastervol, slavevol)
+ try:
+@@ -357,7 +357,7 @@ def config_upgrade(config_file, ret):
+ shutil.copyfile(config_file, config_file_backup)
+
+ #write a new config file
+- config = ConfigParser()
++ config = RawConfigParser()
+ config.add_section('vars')
+
+ for key, value in ret.items():
+--
+1.8.3.1
+
diff --git a/0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch b/0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch
new file mode 100644
index 0000000..eb9d8f8
--- /dev/null
+++ b/0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch
@@ -0,0 +1,285 @@
+From 55eb2e7642e3428eaa1b2d833c0daa1d34b98324 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Thu, 8 Aug 2019 10:05:12 +0530
+Subject: [PATCH 283/284] ctime: Fix ctime issue with utime family of syscalls
+
+When atime|mtime is updated via utime family of syscalls,
+ctime is not updated. This patch fixes the same.
+
+Backport of:
+ > Patch: https://review.gluster.org/23177
+ > Change-Id: I7f86d8f8a1e06a332c3449b5bbdbf128c9690f25
+ > fixes: bz#1738786
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+Change-Id: I7f86d8f8a1e06a332c3449b5bbdbf128c9690f25
+BUG: 1743627
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/179184
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/utime/src/utime-gen-fops-c.py | 13 +++-
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 8 +--
+ xlators/storage/posix/src/posix-metadata.c | 96 ++++++++++++++------------
+ xlators/storage/posix/src/posix-metadata.h | 3 +-
+ 4 files changed, 68 insertions(+), 52 deletions(-)
+
+diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py
+index a8637ff..8730a51 100755
+--- a/xlators/features/utime/src/utime-gen-fops-c.py
++++ b/xlators/features/utime/src/utime-gen-fops-c.py
+@@ -82,7 +82,18 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+ {
+ gl_timespec_get(&frame->root->ctime);
+- frame->root->flags |= MDATA_CTIME;
++
++ if (!valid) {
++ frame->root->flags |= MDATA_CTIME;
++ }
++
++ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
++ frame->root->flags |= MDATA_CTIME;
++ }
++
++ if (valid & GF_SET_ATTR_MODE) {
++ frame->root->flags |= MDATA_CTIME;
++ }
+
+ STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index d22bbc2..e0ea85b 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -425,8 +425,8 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ real_path);
+ goto out;
+ }
+- posix_update_utime_in_mdata(this, real_path, -1, loc->inode, stbuf,
+- valid);
++ posix_update_utime_in_mdata(this, real_path, -1, loc->inode,
++ &frame->root->ctime, stbuf, valid);
+ }
+
+ if (valid & GF_SET_ATTR_CTIME && !priv->ctime) {
+@@ -652,8 +652,8 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ fd);
+ goto out;
+ }
+- posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode, stbuf,
+- valid);
++ posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode,
++ &frame->root->ctime, stbuf, valid);
+ }
+
+ if (!valid) {
+diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
+index 5cbdc98..532daa2 100644
+--- a/xlators/storage/posix/src/posix-metadata.c
++++ b/xlators/storage/posix/src/posix-metadata.c
+@@ -432,8 +432,10 @@ out:
+ */
+ static int
+ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+- inode_t *inode, struct timespec *time, struct iatt *stbuf,
+- posix_mdata_flag_t *flag, gf_boolean_t update_utime)
++ inode_t *inode, struct timespec *time,
++ struct timespec *u_atime, struct timespec *u_mtime,
++ struct iatt *stbuf, posix_mdata_flag_t *flag,
++ gf_boolean_t update_utime)
+ {
+ posix_mdata_t *mdata = NULL;
+ int ret = -1;
+@@ -443,6 +445,10 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, time, out);
+
++ if (update_utime && (!u_atime || !u_mtime)) {
++ goto out;
++ }
++
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
+@@ -506,32 +512,30 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+ }
+ }
+
+- /* Earlier, mdata was updated only if the existing time is less
+- * than the time to be updated. This would fail the scenarios
+- * where mtime can be set to any time using the syscall. Hence
+- * just updating without comparison. But the ctime is not
+- * allowed to changed to older date.
+- */
+-
+- if (flag->ctime && posix_compare_timespec(time, &mdata->ctime) > 0) {
+- mdata->ctime = *time;
+- }
+-
+ /* In distributed systems, there could be races with fops
+ * updating mtime/atime which could result in different
+ * mtime/atime for same file. So this makes sure, only the
+ * highest time is retained. If the mtime/atime update comes
+ * from the explicit utime syscall, it is allowed to set to
+- * previous time
++ * previous or future time but the ctime is always set to
++ * current time.
+ */
+ if (update_utime) {
++ if (flag->ctime &&
++ posix_compare_timespec(time, &mdata->ctime) > 0) {
++ mdata->ctime = *time;
++ }
+ if (flag->mtime) {
+- mdata->mtime = *time;
++ mdata->mtime = *u_mtime;
+ }
+ if (flag->atime) {
+- mdata->atime = *time;
++ mdata->atime = *u_atime;
+ }
+ } else {
++ if (flag->ctime &&
++ posix_compare_timespec(time, &mdata->ctime) > 0) {
++ mdata->ctime = *time;
++ }
+ if (flag->mtime &&
+ posix_compare_timespec(time, &mdata->mtime) > 0) {
+ mdata->mtime = *time;
+@@ -584,15 +588,22 @@ out:
+ */
+ void
+ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+- inode_t *inode, struct iatt *stbuf, int valid)
++ inode_t *inode, struct timespec *ctime,
++ struct iatt *stbuf, int valid)
+ {
+ int32_t ret = 0;
+ #if defined(HAVE_UTIMENSAT)
+- struct timespec tv = {
++ struct timespec tv_atime = {
++ 0,
++ };
++ struct timespec tv_mtime = {
+ 0,
+ };
+ #else
+- struct timeval tv = {
++ struct timeval tv_atime = {
++ 0,
++ };
++ struct timeval tv_mtime = {
+ 0,
+ };
+ #endif
+@@ -611,35 +622,28 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+ */
+ if (inode && priv->ctime) {
+ if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {
+- tv.tv_sec = stbuf->ia_atime;
+- SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_atime_nsec);
++ tv_atime.tv_sec = stbuf->ia_atime;
++ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_atime, stbuf->ia_atime_nsec);
+
+- flag.ctime = 0;
+- flag.mtime = 0;
++ flag.ctime = 1;
+ flag.atime = 1;
+- ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv, NULL,
+- &flag, _gf_true);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+- "posix set mdata atime failed on file:"
+- " %s gfid:%s",
+- real_path, uuid_utoa(inode->gfid));
+- }
+ }
+
+ if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) {
+- tv.tv_sec = stbuf->ia_mtime;
+- SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_mtime_nsec);
++ tv_mtime.tv_sec = stbuf->ia_mtime;
++ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_mtime, stbuf->ia_mtime_nsec);
+
+- flag.ctime = 0;
++ flag.ctime = 1;
+ flag.mtime = 1;
+- flag.atime = 0;
++ }
+
+- ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv, NULL,
+- &flag, _gf_true);
++ if (flag.mtime || flag.atime) {
++ ret = posix_set_mdata_xattr(this, real_path, -1, inode, ctime,
++ &tv_atime, &tv_mtime, NULL, &flag,
++ _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+- "posix set mdata mtime failed on file:"
++ "posix set mdata atime failed on file:"
+ " %s gfid:%s",
+ real_path, uuid_utoa(inode->gfid));
+ }
+@@ -702,8 +706,8 @@ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path,
+ goto out;
+ }
+ ret = posix_set_mdata_xattr(this, real_path, fd, inode,
+- &frame->root->ctime, stbuf, &flag,
+- _gf_false);
++ &frame->root->ctime, NULL, NULL, stbuf,
++ &flag, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed on file: %s gfid:%s", real_path,
+@@ -733,8 +737,8 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,
+ goto out;
+ }
+ ret = posix_set_mdata_xattr(this, real_path, fd, inode,
+- &frame->root->ctime, stbuf, &flag,
+- _gf_false);
++ &frame->root->ctime, NULL, NULL, stbuf,
++ &flag, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed on file: %s gfid:%s", real_path,
+@@ -792,8 +796,8 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this,
+ flag_dup.atime = 0;
+
+ ret = posix_set_mdata_xattr(this, real_path_out, fd_out, inode_out,
+- &frame->root->ctime, stbuf_out, &flag_dup,
+- _gf_false);
++ &frame->root->ctime, NULL, NULL, stbuf_out,
++ &flag_dup, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed on file: %s gfid:%s", real_path_out,
+@@ -811,8 +815,8 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this,
+ flag_dup.ctime = 0;
+
+ ret = posix_set_mdata_xattr(this, real_path_in, fd_out, inode_out,
+- &frame->root->ctime, stbuf_out, &flag_dup,
+- _gf_false);
++ &frame->root->ctime, NULL, NULL, stbuf_out,
++ &flag_dup, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed on file: %s gfid:%s", real_path_in,
+diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h
+index dc25e59..c176699 100644
+--- a/xlators/storage/posix/src/posix-metadata.h
++++ b/xlators/storage/posix/src/posix-metadata.h
+@@ -40,7 +40,8 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf);
+ void
+ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+- inode_t *inode, struct iatt *stbuf, int valid);
++ inode_t *inode, struct timespec *ctime,
++ struct iatt *stbuf, int valid);
+ void
+ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path,
+ int fd, inode_t *inode, struct iatt *stbuf);
+--
+1.8.3.1
+
diff --git a/0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch b/0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch
new file mode 100644
index 0000000..4078bfc
--- /dev/null
+++ b/0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch
@@ -0,0 +1,61 @@
+From 243075b593c6fccbffb3e82ffcfdb58acfd68269 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Thu, 22 Aug 2019 15:51:43 +0530
+Subject: [PATCH 284/284] posix: log aio_error return codes in
+ posix_fs_health_check
+
+Problem: Sometime brick is going down to health check thread is
+ failed without logging error codes return by aio system calls.
+ As per aio_error man page it returns a positive error number
+ if the asynchronous I/O operation failed.
+
+Solution: log aio_error return codes in error message
+
+> Change-Id: I2496b1bc16e602b0fd3ad53e211de11ec8c641ef
+> Fixes: bz#1744519
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23284/
+
+Change-Id: I2496b1bc16e602b0fd3ad53e211de11ec8c641ef
+BUG: 1744518
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/179211
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/storage/posix/src/posix-helpers.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index ef5bfd5..d143d4c 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -2025,7 +2025,6 @@ posix_fs_health_check(xlator_t *this)
+ if (ret != 0) {
+ op_errno = errno;
+ op = "aio_write_error";
+- ret = -1;
+ goto out;
+ }
+
+@@ -2064,7 +2063,6 @@ posix_fs_health_check(xlator_t *this)
+ if (ret != 0) {
+ op_errno = errno;
+ op = "aio_read_error";
+- ret = -1;
+ goto out;
+ }
+
+@@ -2089,7 +2087,8 @@ out:
+ }
+ if (ret && file_path[0]) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HEALTHCHECK_FAILED,
+- "%s() on %s returned", op, file_path);
++ "%s() on %s returned ret is %d error is %s", op, file_path, ret,
++ ret != -1 ? strerror(ret) : strerror(op_errno));
+ gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED,
+ "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op,
+ file_path, strerror(op_errno), priv->hostname, priv->base_path,
+--
+1.8.3.1
+
diff --git a/0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch b/0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch
new file mode 100644
index 0000000..12549e7
--- /dev/null
+++ b/0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch
@@ -0,0 +1,43 @@
+From 49cd9ef7487ba88796315b897823837a9cbd535e Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Wed, 28 Aug 2019 09:05:20 +0530
+Subject: [PATCH 285/297] glusterd: glusterd service is getting timed out on
+ scaled setup
+
+Problem: On a three node cluster with 2000 replica volumes systemctl is getting
+ timed out for glusted service.
+
+Solution: Configure TimeoutSec 300 to wait for glusterd startup.
+
+> Change-Id: Idb3f3f3e56e6216a0ebd754cbb9e8e37ce9e636d
+> Fixes: bz#1746228
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Cherry picked from commit c90dc63ec9eee0f43ba8e489876fdf8b8810bbdc)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23316/)
+
+Change-Id: Idb3f3f3e56e6216a0ebd754cbb9e8e37ce9e636d
+BUG: 1746027
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/179806
+Tested-by: Mohit Agrawal <moagrawa@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/systemd/glusterd.service.in | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in
+index c33351c..f604160 100644
+--- a/extras/systemd/glusterd.service.in
++++ b/extras/systemd/glusterd.service.in
+@@ -13,6 +13,7 @@ Environment="LOG_LEVEL=INFO"
+ EnvironmentFile=-@sysconfdir@/sysconfig/glusterd
+ ExecStart=@prefix@/sbin/glusterd -p @localstatedir@/run/glusterd.pid --log-level $LOG_LEVEL $GLUSTERD_OPTIONS
+ KillMode=process
++TimeoutSec=300
+ SuccessExitStatus=15
+
+ [Install]
+--
+1.8.3.1
+
diff --git a/0286-glusterfs.spec.in-added-script-files-for-machine-com.patch b/0286-glusterfs.spec.in-added-script-files-for-machine-com.patch
new file mode 100644
index 0000000..415a07b
--- /dev/null
+++ b/0286-glusterfs.spec.in-added-script-files-for-machine-com.patch
@@ -0,0 +1,162 @@
+From 2a905a8ae6b4737e84543ad76b55f3346fa0f32c Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <hgowtham@redhat.com>
+Date: Tue, 27 Aug 2019 14:12:31 +0530
+Subject: [PATCH 286/297] glusterfs.spec.in: added script files for machine /
+ component stats
+
+Have added the file (extras/identify-hangs.sh) to the code base.
+And included the following to be packaged:
+
+Quota Accounting issue:
+extras/quota/xattr_analysis.py (made available only on server)
+extras/quota/quota_fsck.py (made available only on server)
+extras/quota/log_accounting.sh
+
+Debugging Statedumps:
+extras/identify-hangs.sh
+
+Performance:
+extras/collect-system-stats.sh
+
+Note: rest of the files were already included.
+
+Label: DOWNSTREAM ONLY.
+
+Change-Id: I2efb959865c3f381166c6a25c6eef613d13dd5ee
+fixes: bz#1719171
+Signed-off-by: Hari Gowtham <hgowtham@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/179515
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ extras/Makefile.am | 9 +++++++-
+ extras/identify-hangs.sh | 53 ++++++++++++++++++++++++++++++++++++++++++++++++
+ glusterfs.spec.in | 8 ++++++++
+ 3 files changed, 69 insertions(+), 1 deletion(-)
+ create mode 100644 extras/identify-hangs.sh
+
+diff --git a/extras/Makefile.am b/extras/Makefile.am
+index 983f014..8cbfda1 100644
+--- a/extras/Makefile.am
++++ b/extras/Makefile.am
+@@ -30,9 +30,14 @@ endif
+
+ scriptsdir = $(datadir)/glusterfs/scripts
+ scripts_SCRIPTS = thin-arbiter/setup-thin-arbiter.sh
++scripts_SCRIPTS += quota/log_accounting.sh
++scripts_SCRIPTS += collect-system-stats.sh
++scripts_SCRIPTS += identify-hangs.sh
+ if WITH_SERVER
+ scripts_SCRIPTS += post-upgrade-script-for-quota.sh \
+ pre-upgrade-script-for-quota.sh stop-all-gluster-processes.sh
++scripts_SCRIPTS += quota/quota_fsck.py
++scripts_SCRIPTS += quota/xattr_analysis.py
+ if USE_SYSTEMD
+ scripts_SCRIPTS += control-cpu-load.sh
+ scripts_SCRIPTS += control-mem.sh
+@@ -50,7 +55,9 @@ EXTRA_DIST = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.co
+ command-completion/Makefile command-completion/README \
+ stop-all-gluster-processes.sh clang-checker.sh mount-shared-storage.sh \
+ control-cpu-load.sh control-mem.sh group-distributed-virt \
+- thin-arbiter/thin-arbiter.vol thin-arbiter/setup-thin-arbiter.sh
++ thin-arbiter/thin-arbiter.vol thin-arbiter/setup-thin-arbiter.sh \
++ quota/xattr_analysis.py quota/quota_fsck.py quota/log_accounting.sh \
++ collect-system-stats.sh identify-hangs.sh
+
+ if WITH_SERVER
+ install-data-local:
+diff --git a/extras/identify-hangs.sh b/extras/identify-hangs.sh
+new file mode 100644
+index 0000000..ebc6bf1
+--- /dev/null
++++ b/extras/identify-hangs.sh
+@@ -0,0 +1,53 @@
++#!/bin/bash
++function get_statedump_fnames_without_timestamps
++{
++ ls | grep -E "[.]dump[.][0-9][0-9]*" | cut -f1-3 -d'.' | sort -u
++}
++
++function get_non_uniq_fields
++{
++ local statedump_fname_prefix=$1
++ print_stack_lkowner_unique_in_one_line "$statedump_fname_prefix" | sort | uniq -c | grep -vE "^\s*1 " | awk '{$1="repeats="$1; print $0}'
++}
++
++function print_stack_lkowner_unique_in_one_line
++{
++ local statedump_fname_prefix=$1
++ sed -e '/./{H;$!d;}' -e 'x;/unique=/!d;/stack=/!d;/lk-owner=/!d;/pid=/!d;' "${statedump_fname_prefix}"* | grep -E "(stack|lk-owner|unique|pid)=" | paste -d " " - - - -
++}
++
++function get_stacks_that_appear_in_multiple_statedumps
++{
++ #If a stack with same 'unique/lk-owner/stack' appears in multiple statedumps
++ #print the stack
++ local statedump_fname_prefix=$1
++ while read -r non_uniq_stack;
++ do
++ if [ -z "$printed" ];
++ then
++ printed="1"
++ fi
++ echo "$statedump_fname_prefix" "$non_uniq_stack"
++ done < <(get_non_uniq_fields "$statedump_fname_prefix")
++}
++
++statedumpdir=${1}
++if [ -z "$statedumpdir" ];
++then
++ echo "Usage: $0 <statedump-dir>"
++ exit 1
++fi
++
++if [ ! -d "$statedumpdir" ];
++then
++ echo "$statedumpdir: Is not a directory"
++ echo "Usage: $0 <statedump-dir>"
++ exit 1
++fi
++
++cd "$statedumpdir" || exit 1
++for statedump_fname_prefix in $(get_statedump_fnames_without_timestamps);
++do
++ get_stacks_that_appear_in_multiple_statedumps "$statedump_fname_prefix"
++done | column -t
++echo "NOTE: stacks with lk-owner=\"\"/lk-owner=0000000000000000/unique=0 may not be hung frames and need further inspection" >&2
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 00603ec..3c2e2dc 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1107,6 +1107,9 @@ exit 0
+ %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh
+ %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh
+ %endif
++%{_datadir}/glusterfs/scripts/identify-hangs.sh
++%{_datadir}/glusterfs/scripts/collect-system-stats.sh
++%{_datadir}/glusterfs/scripts/log_accounting.sh
+ # xlators that are needed on the client- and on the server-side
+ %dir %{_libdir}/glusterfs
+ %dir %{_libdir}/glusterfs/%{version}%{?prereltag}
+@@ -1352,6 +1355,8 @@ exit 0
+ %if ( 0%{!?_without_server:1} )
+ %files server
+ %doc extras/clear_xattrs.sh
++%{_datadir}/glusterfs/scripts/xattr_analysis.py*
++%{_datadir}/glusterfs/scripts/quota_fsck.py*
+ # sysconf
+ %config(noreplace) %{_sysconfdir}/glusterfs
+ %exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol
+@@ -1942,6 +1947,9 @@ fi
+ %endif
+
+ %changelog
++* Tue Aug 27 2019 Hari Gowtham <hgowtham@redhat.com>
++- Added scripts to collect machine stats and component stats (#1719171)
++
+ * Tue Jun 18 2019 Jiffin Tony Thottan <jthottan@redhat.com>
+ - build glusterfs-ganesha for rhel 7 and above (#1720551)
+
+--
+1.8.3.1
+
diff --git a/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch b/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch
new file mode 100644
index 0000000..93bd3c9
--- /dev/null
+++ b/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch
@@ -0,0 +1,372 @@
+From 546f412c155dd5aca2b3cd4202f80c9977b215dc Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Wed, 4 Sep 2019 12:06:34 +0530
+Subject: [PATCH 287/297] cluster/ec: Fail fsync/flush for files on update
+ size/version failure
+
+Problem:
+If update size/version is not successful on the file, updates on the
+same stripe could lead to data corruptions if the earlier un-aligned
+write is not successful on all the bricks. Application won't have
+any knowledge of this because update size/version happens in the
+background.
+
+Fix:
+Fail fsync/flush on fds that are opened before update-size-version
+went bad.
+
+Upstream-patch: https://review.gluster.org/c/glusterfs/+/23355
+fixes: bz#1745107
+Change-Id: I9d323eddcda703bd27d55f340c4079d76e06e492
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/180672
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/ec/ec-badfd.c | 124 +++++++++++++++++++++++++++++++++++
+ tests/basic/ec/ec-badfd.t | 26 ++++++++
+ xlators/cluster/ec/src/ec-common.c | 23 +++++++
+ xlators/cluster/ec/src/ec-generic.c | 47 +++++++++++++
+ xlators/cluster/ec/src/ec-helpers.c | 7 ++
+ xlators/cluster/ec/src/ec-messages.h | 2 +-
+ xlators/cluster/ec/src/ec-types.h | 2 +
+ 7 files changed, 230 insertions(+), 1 deletion(-)
+ create mode 100644 tests/basic/ec/ec-badfd.c
+ create mode 100755 tests/basic/ec/ec-badfd.t
+
+diff --git a/tests/basic/ec/ec-badfd.c b/tests/basic/ec/ec-badfd.c
+new file mode 100644
+index 0000000..8be23c1
+--- /dev/null
++++ b/tests/basic/ec/ec-badfd.c
+@@ -0,0 +1,124 @@
++#include <stdio.h>
++#include <fcntl.h>
++#include <unistd.h>
++#include <time.h>
++#include <limits.h>
++#include <string.h>
++#include <stdlib.h>
++#include <errno.h>
++#include <glusterfs/api/glfs.h>
++#include <glusterfs/api/glfs-handles.h>
++
++int
++fill_iov(struct iovec *iov, char fillchar, int count)
++{
++ int ret = -1;
++
++ iov->iov_base = malloc(count + 1);
++ if (iov->iov_base == NULL) {
++ return ret;
++ } else {
++ iov->iov_len = count;
++ ret = 0;
++ }
++ memset(iov->iov_base, fillchar, count);
++ memset(iov->iov_base + count, '\0', 1);
++
++ return ret;
++}
++
++int
++write_sync(glfs_t *fs, glfs_fd_t *glfd, int char_count)
++{
++ ssize_t ret = -1;
++ int flags = O_RDWR;
++ struct iovec iov = {0};
++
++ ret = fill_iov(&iov, 'a', char_count);
++ if (ret) {
++ fprintf(stderr, "failed to create iov");
++ goto out;
++ }
++
++ ret = glfs_pwritev(glfd, &iov, 1, 0, flags);
++out:
++ if (ret < 0) {
++ fprintf(stderr, "glfs_pwritev failed, %d", errno);
++ }
++ return ret;
++}
++
++int
++main(int argc, char *argv[])
++{
++ glfs_t *fs = NULL;
++ glfs_fd_t *fd = NULL;
++ int ret = 1;
++ char volume_cmd[4096] = {0};
++
++ if (argc != 4) {
++ fprintf(stderr, "Syntax: %s <host> <volname> <file>\n", argv[0]);
++ return 1;
++ }
++
++ fs = glfs_new(argv[2]);
++ if (!fs) {
++ fprintf(stderr, "glfs_new: returned NULL\n");
++ return 1;
++ }
++
++ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
++ if (ret != 0) {
++ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
++ goto out;
++ }
++ ret = glfs_set_logging(fs, "/tmp/ec-badfd.log", 7);
++ if (ret != 0) {
++ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
++ goto out;
++ }
++ ret = glfs_init(fs);
++ if (ret != 0) {
++ fprintf(stderr, "glfs_init: returned %d\n", ret);
++ goto out;
++ }
++
++ fd = glfs_open(fs, argv[3], O_RDWR);
++ if (fd == NULL) {
++ fprintf(stderr, "glfs_open: returned NULL\n");
++ goto out;
++ }
++
++ ret = write_sync(fs, fd, 16);
++ if (ret < 0) {
++ fprintf(stderr, "write_sync failed\n");
++ }
++
++ snprintf(volume_cmd, sizeof(volume_cmd),
++ "gluster --mode=script volume stop %s", argv[2]);
++ /*Stop the volume so that update-size-version fails*/
++ system(volume_cmd);
++ sleep(8); /* 3 seconds more than eager-lock-timeout*/
++ snprintf(volume_cmd, sizeof(volume_cmd),
++ "gluster --mode=script volume start %s", argv[2]);
++ system(volume_cmd);
++ sleep(8); /*wait for bricks to come up*/
++ ret = glfs_fsync(fd, NULL, NULL);
++ if (ret == 0) {
++ fprintf(stderr, "fsync succeeded on a BADFD\n");
++ exit(1);
++ }
++
++ ret = glfs_close(fd);
++ if (ret == 0) {
++ fprintf(stderr, "flush succeeded on a BADFD\n");
++ exit(1);
++ }
++ ret = 0;
++
++out:
++ unlink("/tmp/ec-badfd.log");
++ glfs_fini(fs);
++
++ return ret;
++}
+diff --git a/tests/basic/ec/ec-badfd.t b/tests/basic/ec/ec-badfd.t
+new file mode 100755
+index 0000000..56feb47
+--- /dev/null
++++ b/tests/basic/ec/ec-badfd.t
+@@ -0,0 +1,26 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++
++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{1..6}
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 disperse.eager-lock-timeout 5
++
++TEST $CLI volume start $V0
++EXPECT 'Started' volinfo_field $V0 'Status'
++
++TEST $GFS -s $H0 --volfile-id $V0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
++TEST touch $M0/file
++
++TEST build_tester $(dirname $0)/ec-badfd.c -lgfapi -Wall -O2
++TEST $(dirname $0)/ec-badfd $H0 $V0 /file
++cleanup_tester $(dirname ${0})/ec-badfd
++
++cleanup;
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index 5fb4610..92d4e5d 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -2255,6 +2255,23 @@ ec_unlock_lock(ec_lock_link_t *link)
+ }
+ }
+
++void
++ec_inode_bad_inc(inode_t *inode, xlator_t *xl)
++{
++ ec_inode_t *ctx = NULL;
++
++ LOCK(&inode->lock);
++ {
++ ctx = __ec_inode_get(inode, xl);
++ if (ctx == NULL) {
++ goto unlock;
++ }
++ ctx->bad_version++;
++ }
++unlock:
++ UNLOCK(&inode->lock);
++}
++
+ int32_t
+ ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+@@ -2270,6 +2287,12 @@ ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ ctx = lock->ctx;
+
+ if (op_ret < 0) {
++ if (link->lock->fd == NULL) {
++ ec_inode_bad_inc(link->lock->loc.inode, this);
++ } else {
++ ec_inode_bad_inc(link->lock->fd->inode, this);
++ }
++
+ gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno,
+ EC_MSG_SIZE_VERS_UPDATE_FAIL,
+ "Failed to update version and size. %s", ec_msg_str(fop));
+diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
+index acc16b5..b019050 100644
+--- a/xlators/cluster/ec/src/ec-generic.c
++++ b/xlators/cluster/ec/src/ec-generic.c
+@@ -150,6 +150,37 @@ ec_manager_flush(ec_fop_data_t *fop, int32_t state)
+ }
+ }
+
++static int32_t
++ec_validate_fd(fd_t *fd, xlator_t *xl)
++{
++ uint64_t iversion = 0;
++ uint64_t fversion = 0;
++ ec_inode_t *inode_ctx = NULL;
++ ec_fd_t *fd_ctx = NULL;
++
++ LOCK(&fd->lock);
++ {
++ fd_ctx = __ec_fd_get(fd, xl);
++ if (fd_ctx) {
++ fversion = fd_ctx->bad_version;
++ }
++ }
++ UNLOCK(&fd->lock);
++
++ LOCK(&fd->inode->lock);
++ {
++ inode_ctx = __ec_inode_get(fd->inode, xl);
++ if (inode_ctx) {
++ iversion = inode_ctx->bad_version;
++ }
++ }
++ UNLOCK(&fd->inode->lock);
++ if (fversion < iversion) {
++ return EBADF;
++ }
++ return 0;
++}
++
+ void
+ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd,
+@@ -165,6 +196,14 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
++ error = ec_validate_fd(fd, this);
++ if (error) {
++ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
++ "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH],
++ fd->inode ? uuid_utoa(fd->inode->gfid) : "");
++ goto out;
++ }
++
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags,
+ ec_wind_flush, ec_manager_flush, callback, data);
+ if (fop == NULL) {
+@@ -381,6 +420,14 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
++ error = ec_validate_fd(fd, this);
++ if (error) {
++ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
++ "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC],
++ fd->inode ? uuid_utoa(fd->inode->gfid) : "");
++ goto out;
++ }
++
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags,
+ ec_wind_fsync, ec_manager_fsync, callback, data);
+ if (fop == NULL) {
+diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
+index 43f6e3b..baac001 100644
+--- a/xlators/cluster/ec/src/ec-helpers.c
++++ b/xlators/cluster/ec/src/ec-helpers.c
+@@ -753,6 +753,7 @@ __ec_fd_get(fd_t *fd, xlator_t *xl)
+ {
+ int i = 0;
+ ec_fd_t *ctx = NULL;
++ ec_inode_t *ictx = NULL;
+ uint64_t value = 0;
+ ec_t *ec = xl->private;
+
+@@ -775,6 +776,12 @@ __ec_fd_get(fd_t *fd, xlator_t *xl)
+ GF_FREE(ctx);
+ return NULL;
+ }
++ /* Only refering bad-version so no need for lock
++ * */
++ ictx = __ec_inode_get(fd->inode, xl);
++ if (ictx) {
++ ctx->bad_version = ictx->bad_version;
++ }
+ }
+ } else {
+ ctx = (ec_fd_t *)(uintptr_t)value;
+diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
+index 7c28808..be86b37 100644
+--- a/xlators/cluster/ec/src/ec-messages.h
++++ b/xlators/cluster/ec/src/ec-messages.h
+@@ -55,6 +55,6 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
+ EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
+ EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
+ EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
+- EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);
++ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED, EC_MSG_FD_BAD);
+
+ #endif /* !_EC_MESSAGES_H_ */
+diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
+index 1c295c0..f27f2ec 100644
+--- a/xlators/cluster/ec/src/ec-types.h
++++ b/xlators/cluster/ec/src/ec-types.h
+@@ -150,6 +150,7 @@ struct _ec_fd {
+ loc_t loc;
+ uintptr_t open;
+ int32_t flags;
++ uint64_t bad_version;
+ ec_fd_status_t fd_status[0];
+ };
+
+@@ -180,6 +181,7 @@ struct _ec_inode {
+ uint64_t dirty[2];
+ struct list_head heal;
+ ec_stripe_list_t stripe_cache;
++ uint64_t bad_version;
+ };
+
+ typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
+--
+1.8.3.1
+
diff --git a/0288-cluster-ec-Fix-coverity-issues.patch b/0288-cluster-ec-Fix-coverity-issues.patch
new file mode 100644
index 0000000..8dd3fca
--- /dev/null
+++ b/0288-cluster-ec-Fix-coverity-issues.patch
@@ -0,0 +1,77 @@
+From ccf7775760dd923e21341438725946737eb8d8af Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Sat, 7 Sep 2019 20:18:01 +0530
+Subject: [PATCH 288/297] cluster/ec: Fix coverity issues
+
+Fixed the following coverity issue in both flush/fsync
+>>> CID 1404964: Null pointer dereferences (REVERSE_INULL)
+>>> Null-checking "fd" suggests that it may be null, but it has already
+been dereferenced on all paths leading to the check.
+>>> if (fd != NULL) {
+>>> fop->fd = fd_ref(fd);
+>>> if (fop->fd == NULL) {
+>>> gf_msg(this->name, GF_LOG_ERROR, 0,
+>>> "Failed to reference a "
+>>> "file descriptor.");
+
+Upstream-patch: https://review.gluster.org/c/glusterfs/+/23382
+fixes: bz#1745107
+Change-Id: I19c05d585e23f8fbfbc195d1f3775ec528eed671
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/180673
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-generic.c | 28 ++++++++++++++++------------
+ 1 file changed, 16 insertions(+), 12 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
+index b019050..192bb02 100644
+--- a/xlators/cluster/ec/src/ec-generic.c
++++ b/xlators/cluster/ec/src/ec-generic.c
+@@ -196,12 +196,14 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- error = ec_validate_fd(fd, this);
+- if (error) {
+- gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
+- "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH],
+- fd->inode ? uuid_utoa(fd->inode->gfid) : "");
+- goto out;
++ if (fd) {
++ error = ec_validate_fd(fd, this);
++ if (error) {
++ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
++ "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH],
++ fd->inode ? uuid_utoa(fd->inode->gfid) : "");
++ goto out;
++ }
+ }
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags,
+@@ -420,12 +422,14 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+- error = ec_validate_fd(fd, this);
+- if (error) {
+- gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
+- "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC],
+- fd->inode ? uuid_utoa(fd->inode->gfid) : "");
+- goto out;
++ if (fd) {
++ error = ec_validate_fd(fd, this);
++ if (error) {
++ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
++ "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC],
++ fd->inode ? uuid_utoa(fd->inode->gfid) : "");
++ goto out;
++ }
+ }
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags,
+--
+1.8.3.1
+
diff --git a/0289-cluster-ec-quorum-count-implementation.patch b/0289-cluster-ec-quorum-count-implementation.patch
new file mode 100644
index 0000000..6d24813
--- /dev/null
+++ b/0289-cluster-ec-quorum-count-implementation.patch
@@ -0,0 +1,721 @@
+From 0d54bb417e982a100ceefb5eab2a61a17e840f39 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Thu, 5 Sep 2019 16:12:39 +0530
+Subject: [PATCH 289/297] cluster/ec: quorum-count implementation
+
+Upstream-patch: https://review.gluster.org/c/glusterfs/+/23366
+upstream-issue: #721
+fixes: bz#1748688
+Change-Id: I5333540e3c635ccf441cf1f4696e4c8986e38ea8
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/180674
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/globals.h | 4 +-
+ tests/basic/ec/ec-quorum-count-partial-failure.t | 50 +++++++
+ tests/basic/ec/ec-quorum-count.t | 165 +++++++++++++++++++++++
+ tests/ec.rc | 9 ++
+ xlators/cluster/ec/src/ec-common.c | 13 ++
+ xlators/cluster/ec/src/ec-common.h | 24 ++++
+ xlators/cluster/ec/src/ec-dir-write.c | 57 ++++----
+ xlators/cluster/ec/src/ec-inode-write.c | 61 ++++-----
+ xlators/cluster/ec/src/ec-types.h | 1 +
+ xlators/cluster/ec/src/ec.c | 13 ++
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 46 +++++++
+ 11 files changed, 383 insertions(+), 60 deletions(-)
+ create mode 100755 tests/basic/ec/ec-quorum-count-partial-failure.t
+ create mode 100644 tests/basic/ec/ec-quorum-count.t
+
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index 55476f6..bdc8b3d 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -50,7 +50,7 @@
+ 1 /* MIN is the fresh start op-version, mostly \
+ should not change */
+ #define GD_OP_VERSION_MAX \
+- GD_OP_VERSION_7_0 /* MAX VERSION is the maximum \
++ GD_OP_VERSION_8_0 /* MAX VERSION is the maximum \
+ count in VME table, should \
+ keep changing with \
+ introduction of newer \
+@@ -136,6 +136,8 @@
+
+ #define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */
+
++#define GD_OP_VERSION_8_0 80000 /* Op-version for GlusterFS 8.0 */
++
+ #include "glusterfs/xlator.h"
+ #include "glusterfs/options.h"
+
+diff --git a/tests/basic/ec/ec-quorum-count-partial-failure.t b/tests/basic/ec/ec-quorum-count-partial-failure.t
+new file mode 100755
+index 0000000..79f5825
+--- /dev/null
++++ b/tests/basic/ec/ec-quorum-count-partial-failure.t
+@@ -0,0 +1,50 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++#This test checks that partial failure of fop results in main fop failure only
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
++TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5}
++TEST $CLI volume set $V0 performance.flush-behind off
++TEST $CLI volume start $V0
++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=/$V0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
++
++TEST dd if=/dev/urandom of=$M0/a bs=12347 count=1
++TEST dd if=/dev/urandom of=$M0/b bs=12347 count=1
++TEST cp $M0/b $M0/c
++TEST fallocate -p -l 101 $M0/c
++TEST $CLI volume stop $V0
++TEST $CLI volume set $V0 debug.delay-gen posix;
++TEST $CLI volume set $V0 delay-gen.delay-duration 10000000;
++TEST $CLI volume set $V0 delay-gen.enable WRITE;
++TEST $CLI volume set $V0 delay-gen.delay-percentage 100
++TEST $CLI volume set $V0 disperse.quorum-count 6
++TEST $CLI volume start $V0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
++cksum=$(dd if=$M0/a bs=12345 count=1 | md5sum | awk '{print $1}')
++truncate -s 12345 $M0/a & #While write is waiting for 5 seconds, introduce failure
++fallocate -p -l 101 $M0/b &
++sleep 1
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST wait
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
++EXPECT "12345" stat --format=%s $M0/a
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST kill_brick $V0 $H0 $B0/${V0}2
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
++cksum_after_heal=$(dd if=$M0/a | md5sum | awk '{print $1}')
++TEST [[ $cksum == $cksum_after_heal ]]
++cksum=$(dd if=$M0/c | md5sum | awk '{print $1}')
++cksum_after_heal=$(dd if=$M0/b | md5sum | awk '{print $1}')
++TEST [[ $cksum == $cksum_after_heal ]]
++
++cleanup;
+diff --git a/tests/basic/ec/ec-quorum-count.t b/tests/basic/ec/ec-quorum-count.t
+new file mode 100644
+index 0000000..56b5329
+--- /dev/null
++++ b/tests/basic/ec/ec-quorum-count.t
+@@ -0,0 +1,165 @@
++ #!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../ec.rc
++
++cleanup
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
++TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5}
++TEST $CLI volume set $V0 disperse.eager-lock-timeout 5
++TEST $CLI volume set $V0 performance.flush-behind off
++
++#Should fail on non-disperse volume
++TEST ! $CLI volume set $V1 disperse.quorum-count 5
++
++#Should succeed on a valid range
++TEST ! $CLI volume set $V0 disperse.quorum-count 0
++TEST ! $CLI volume set $V0 disperse.quorum-count -0
++TEST ! $CLI volume set $V0 disperse.quorum-count abc
++TEST ! $CLI volume set $V0 disperse.quorum-count 10abc
++TEST ! $CLI volume set $V0 disperse.quorum-count 1
++TEST ! $CLI volume set $V0 disperse.quorum-count 2
++TEST ! $CLI volume set $V0 disperse.quorum-count 3
++TEST $CLI volume set $V0 disperse.quorum-count 4
++TEST $CLI volume start $V0
++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
++
++#Test that the option is reflected in the mount
++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^4$" ec_option_value $V0 $M0 0 quorum-count
++TEST $CLI volume reset $V0 disperse.quorum-count
++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count
++TEST $CLI volume set $V0 disperse.quorum-count 6
++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^6$" ec_option_value $V0 $M0 0 quorum-count
++
++TEST touch $M0/a
++TEST touch $M0/data
++TEST setfattr -n trusted.def -v def $M0/a
++TEST touch $M0/src
++TEST touch $M0/del-me
++TEST mkdir $M0/dir1
++TEST dd if=/dev/zero of=$M0/read-file bs=1M count=1 oflag=direct
++TEST dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct
++TEST gf_rm_file_and_gfid_link $B0/${V0}0 del-file
++#modify operations should fail as the file is not in quorum
++TEST ! dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct
++TEST kill_brick $V0 $H0 $B0/${V0}0
++#Read should succeed even when quorum-count is not met
++TEST dd if=$M0/read-file of=/dev/null iflag=direct
++TEST ! touch $M0/a2
++TEST ! mkdir $M0/dir2
++TEST ! mknod $M0/b2 b 4 5
++TEST ! ln -s $M0/a $M0/symlink
++TEST ! ln $M0/a $M0/link
++TEST ! mv $M0/src $M0/dst
++TEST ! rm -f $M0/del-me
++TEST ! rmdir $M0/dir1
++TEST ! dd if=/dev/zero of=$M0/a bs=1M count=1 conv=notrunc
++TEST ! dd if=/dev/zero of=$M0/data bs=1M count=1 conv=notrunc
++TEST ! truncate -s 0 $M0/a
++TEST ! setfattr -n trusted.abc -v abc $M0/a
++TEST ! setfattr -x trusted.def $M0/a
++TEST ! chmod +x $M0/a
++TEST ! fallocate -l 2m -n $M0/a
++TEST ! fallocate -p -l 512k $M0/a
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
++
++# reset the option and check whether the default redundancy count is
++# accepted or not.
++TEST $CLI volume reset $V0 disperse.quorum-count
++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count
++TEST touch $M0/a1
++TEST touch $M0/data1
++TEST setfattr -n trusted.def -v def $M0/a1
++TEST touch $M0/src1
++TEST touch $M0/del-me1
++TEST mkdir $M0/dir11
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST touch $M0/a21
++TEST mkdir $M0/dir21
++TEST mknod $M0/b21 b 4 5
++TEST ln -s $M0/a1 $M0/symlink1
++TEST ln $M0/a1 $M0/link1
++TEST mv $M0/src1 $M0/dst1
++TEST rm -f $M0/del-me1
++TEST rmdir $M0/dir11
++TEST dd if=/dev/zero of=$M0/a1 bs=1M count=1 conv=notrunc
++TEST dd if=/dev/zero of=$M0/data1 bs=1M count=1 conv=notrunc
++TEST truncate -s 0 $M0/a1
++TEST setfattr -n trusted.abc -v abc $M0/a1
++TEST setfattr -x trusted.def $M0/a1
++TEST chmod +x $M0/a1
++TEST fallocate -l 2m -n $M0/a1
++TEST fallocate -p -l 512k $M0/a1
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
++
++TEST touch $M0/a2
++TEST touch $M0/data2
++TEST setfattr -n trusted.def -v def $M0/a1
++TEST touch $M0/src2
++TEST touch $M0/del-me2
++TEST mkdir $M0/dir12
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST kill_brick $V0 $H0 $B0/${V0}2
++TEST ! touch $M0/a22
++TEST ! mkdir $M0/dir22
++TEST ! mknod $M0/b22 b 4 5
++TEST ! ln -s $M0/a2 $M0/symlink2
++TEST ! ln $M0/a2 $M0/link2
++TEST ! mv $M0/src2 $M0/dst2
++TEST ! rm -f $M0/del-me2
++TEST ! rmdir $M0/dir12
++TEST ! dd if=/dev/zero of=$M0/a2 bs=1M count=1 conv=notrunc
++TEST ! dd if=/dev/zero of=$M0/data2 bs=1M count=1 conv=notrunc
++TEST ! truncate -s 0 $M0/a2
++TEST ! setfattr -n trusted.abc -v abc $M0/a2
++TEST ! setfattr -x trusted.def $M0/a2
++TEST ! chmod +x $M0/a2
++TEST ! fallocate -l 2m -n $M0/a2
++TEST ! fallocate -p -l 512k $M0/a2
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
++
++# Set quorum-count to 5 and kill 1 brick and the fops should pass
++TEST $CLI volume set $V0 disperse.quorum-count 5
++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^5$" ec_option_value $V0 $M0 0 quorum-count
++TEST touch $M0/a3
++TEST touch $M0/data3
++TEST setfattr -n trusted.def -v def $M0/a3
++TEST touch $M0/src3
++TEST touch $M0/del-me3
++TEST mkdir $M0/dir13
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST touch $M0/a31
++TEST mkdir $M0/dir31
++TEST mknod $M0/b31 b 4 5
++TEST ln -s $M0/a3 $M0/symlink3
++TEST ln $M0/a3 $M0/link3
++TEST mv $M0/src3 $M0/dst3
++TEST rm -f $M0/del-me3
++TEST rmdir $M0/dir13
++TEST dd if=/dev/zero of=$M0/a3 bs=1M count=1 conv=notrunc
++TEST dd if=/dev/zero of=$M0/data3 bs=1M count=1 conv=notrunc
++TEST truncate -s 0 $M0/a3
++TEST setfattr -n trusted.abc -v abc $M0/a3
++TEST setfattr -x trusted.def $M0/a3
++TEST chmod +x $M0/a3
++TEST fallocate -l 2m -n $M0/a3
++TEST fallocate -p -l 512k $M0/a3
++TEST dd if=/dev/urandom of=$M0/heal-file bs=1M count=1 oflag=direct
++cksum_before_heal="$(md5sum $M0/heal-file | awk '{print $1}')"
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
++TEST kill_brick $V0 $H0 $B0/${V0}4
++TEST kill_brick $V0 $H0 $B0/${V0}5
++cksum_after_heal=$(dd if=$M0/heal-file iflag=direct | md5sum | awk '{print $1}')
++TEST [[ $cksum_before_heal == $cksum_after_heal ]]
++cleanup;
+diff --git a/tests/ec.rc b/tests/ec.rc
+index 04405ec..f18752f 100644
+--- a/tests/ec.rc
++++ b/tests/ec.rc
+@@ -7,3 +7,12 @@ function ec_up_status()
+ local ec_id=$3
+ grep -E "^up =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'='
+ }
++
++function ec_option_value()
++{
++ local v=$1
++ local m=$2
++ local ec_id=$3
++ local opt=$4
++ grep -E "^$opt =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'='| awk '{print $1}'
++}
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index 92d4e5d..2e59180 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -707,6 +707,19 @@ ec_child_select(ec_fop_data_t *fop)
+ return 0;
+ }
+
++ if (!fop->parent && fop->lock_count &&
++ (fop->locks[0].update[EC_DATA_TXN] ||
++ fop->locks[0].update[EC_METADATA_TXN])) {
++ if (ec->quorum_count && (num < ec->quorum_count)) {
++ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
++ "Insufficient available children "
++ "for this request (have %d, need "
++ "%d). %s",
++ num, ec->quorum_count, ec_msg_str(fop));
++ return 0;
++ }
++ }
++
+ return 1;
+ }
+
+diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
+index 3c69471..eab86ee 100644
+--- a/xlators/cluster/ec/src/ec-common.h
++++ b/xlators/cluster/ec/src/ec-common.h
+@@ -26,6 +26,30 @@ typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t;
+
+ #define EC_FLAG_LOCK_SHARED 0x0001
+
++#define QUORUM_CBK(fn, fop, frame, cookie, this, op_ret, op_errno, params...) \
++ do { \
++ ec_t *__ec = fop->xl->private; \
++ int32_t __op_ret = 0; \
++ int32_t __op_errno = 0; \
++ int32_t __success_count = gf_bits_count(fop->good); \
++ \
++ __op_ret = op_ret; \
++ __op_errno = op_errno; \
++ if (!fop->parent && frame && \
++ (GF_CLIENT_PID_SELF_HEALD != frame->root->pid) && \
++ __ec->quorum_count && (__success_count < __ec->quorum_count) && \
++ op_ret >= 0) { \
++ __op_ret = -1; \
++ __op_errno = EIO; \
++ gf_msg(__ec->xl->name, GF_LOG_ERROR, 0, \
++ EC_MSG_CHILDS_INSUFFICIENT, \
++ "Insufficient available children for this request " \
++ "(have %d, need %d). %s", \
++ __success_count, __ec->quorum_count, ec_msg_str(fop)); \
++ } \
++ fn(frame, cookie, this, __op_ret, __op_errno, params); \
++ } while (0)
++
+ enum _ec_xattrop_flags {
+ EC_FLAG_XATTROP,
+ EC_FLAG_DATA_DIRTY,
+diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
+index 0b8ee21..8192462 100644
+--- a/xlators/cluster/ec/src/ec-dir-write.c
++++ b/xlators/cluster/ec/src/ec-dir-write.c
+@@ -218,10 +218,10 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state)
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.create != NULL) {
+- fop->cbks.create(fop->req_frame, fop, fop->xl, cbk->op_ret,
+- cbk->op_errno, fop->fd, fop->loc[0].inode,
+- &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+- cbk->xdata);
++ QUORUM_CBK(fop->cbks.create, fop, fop->req_frame, fop, fop->xl,
++ cbk->op_ret, cbk->op_errno, fop->fd,
++ fop->loc[0].inode, &cbk->iatt[0], &cbk->iatt[1],
++ &cbk->iatt[2], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+@@ -390,9 +390,10 @@ ec_manager_link(ec_fop_data_t *fop, int32_t state)
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.link != NULL) {
+- fop->cbks.link(fop->req_frame, fop, fop->xl, cbk->op_ret,
+- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
+- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
++ QUORUM_CBK(fop->cbks.link, fop, fop->req_frame, fop, fop->xl,
++ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
++ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
++ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+@@ -569,9 +570,10 @@ ec_manager_mkdir(ec_fop_data_t *fop, int32_t state)
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.mkdir != NULL) {
+- fop->cbks.mkdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
+- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
+- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
++ QUORUM_CBK(fop->cbks.mkdir, fop, fop->req_frame, fop, fop->xl,
++ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
++ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
++ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+@@ -773,9 +775,10 @@ ec_manager_mknod(ec_fop_data_t *fop, int32_t state)
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.mknod != NULL) {
+- fop->cbks.mknod(fop->req_frame, fop, fop->xl, cbk->op_ret,
+- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
+- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
++ QUORUM_CBK(fop->cbks.mknod, fop, fop->req_frame, fop, fop->xl,
++ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
++ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
++ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+@@ -931,10 +934,10 @@ ec_manager_rename(ec_fop_data_t *fop, int32_t state)
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.rename != NULL) {
+- fop->cbks.rename(fop->req_frame, fop, fop->xl, cbk->op_ret,
+- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+- &cbk->iatt[2], &cbk->iatt[3], &cbk->iatt[4],
+- cbk->xdata);
++ QUORUM_CBK(fop->cbks.rename, fop, fop->req_frame, fop, fop->xl,
++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
++ &cbk->iatt[1], &cbk->iatt[2], &cbk->iatt[3],
++ &cbk->iatt[4], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+@@ -1083,9 +1086,9 @@ ec_manager_rmdir(ec_fop_data_t *fop, int32_t state)
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.rmdir != NULL) {
+- fop->cbks.rmdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
+- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+- cbk->xdata);
++ QUORUM_CBK(fop->cbks.rmdir, fop, fop->req_frame, fop, fop->xl,
++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
++ &cbk->iatt[1], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+@@ -1237,10 +1240,10 @@ ec_manager_symlink(ec_fop_data_t *fop, int32_t state)
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.symlink != NULL) {
+- fop->cbks.symlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
+- cbk->op_errno, fop->loc[0].inode,
+- &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+- cbk->xdata);
++ QUORUM_CBK(fop->cbks.symlink, fop, fop->req_frame, fop, fop->xl,
++ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
++ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
++ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+@@ -1392,9 +1395,9 @@ ec_manager_unlink(ec_fop_data_t *fop, int32_t state)
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.unlink != NULL) {
+- fop->cbks.unlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
+- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+- cbk->xdata);
++ QUORUM_CBK(fop->cbks.unlink, fop, fop->req_frame, fop, fop->xl,
++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
++ &cbk->iatt[1], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
+index 8bfa3b4..2dbb4db 100644
+--- a/xlators/cluster/ec/src/ec-inode-write.c
++++ b/xlators/cluster/ec/src/ec-inode-write.c
+@@ -185,26 +185,26 @@ ec_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ switch (fop->id) {
+ case GF_FOP_SETXATTR:
+ if (fop->cbks.setxattr) {
+- fop->cbks.setxattr(frame, cookie, this, op_ret, op_errno,
+- xdata);
++ QUORUM_CBK(fop->cbks.setxattr, fop, frame, cookie, this, op_ret,
++ op_errno, xdata);
+ }
+ break;
+ case GF_FOP_REMOVEXATTR:
+ if (fop->cbks.removexattr) {
+- fop->cbks.removexattr(frame, cookie, this, op_ret, op_errno,
+- xdata);
++ QUORUM_CBK(fop->cbks.removexattr, fop, frame, cookie, this,
++ op_ret, op_errno, xdata);
+ }
+ break;
+ case GF_FOP_FSETXATTR:
+ if (fop->cbks.fsetxattr) {
+- fop->cbks.fsetxattr(frame, cookie, this, op_ret, op_errno,
+- xdata);
++ QUORUM_CBK(fop->cbks.fsetxattr, fop, frame, cookie, this,
++ op_ret, op_errno, xdata);
+ }
+ break;
+ case GF_FOP_FREMOVEXATTR:
+ if (fop->cbks.fremovexattr) {
+- fop->cbks.fremovexattr(frame, cookie, this, op_ret, op_errno,
+- xdata);
++ QUORUM_CBK(fop->cbks.fremovexattr, fop, frame, cookie, this,
++ op_ret, op_errno, xdata);
+ }
+ break;
+ }
+@@ -494,16 +494,15 @@ ec_manager_setattr(ec_fop_data_t *fop, int32_t state)
+
+ if (fop->id == GF_FOP_SETATTR) {
+ if (fop->cbks.setattr != NULL) {
+- fop->cbks.setattr(fop->req_frame, fop, fop->xl, cbk->op_ret,
+- cbk->op_errno, &cbk->iatt[0],
+- &cbk->iatt[1], cbk->xdata);
++ QUORUM_CBK(fop->cbks.setattr, fop, fop->req_frame, fop,
++ fop->xl, cbk->op_ret, cbk->op_errno,
++ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
+ }
+ } else {
+ if (fop->cbks.fsetattr != NULL) {
+- fop->cbks.fsetattr(fop->req_frame, fop, fop->xl,
+- cbk->op_ret, cbk->op_errno,
+- &cbk->iatt[0], &cbk->iatt[1],
+- cbk->xdata);
++ QUORUM_CBK(fop->cbks.fsetattr, fop, fop->req_frame, fop,
++ fop->xl, cbk->op_ret, cbk->op_errno,
++ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
+ }
+ }
+
+@@ -994,9 +993,9 @@ ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.fallocate != NULL) {
+- fop->cbks.fallocate(fop->req_frame, fop, fop->xl, cbk->op_ret,
+- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+- cbk->xdata);
++ QUORUM_CBK(fop->cbks.fallocate, fop, fop->req_frame, fop,
++ fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
++ &cbk->iatt[1], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+@@ -1247,9 +1246,9 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state)
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.discard != NULL) {
+- fop->cbks.discard(fop->req_frame, fop, fop->xl, cbk->op_ret,
+- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+- cbk->xdata);
++ QUORUM_CBK(fop->cbks.discard, fop, fop->req_frame, fop, fop->xl,
++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
++ &cbk->iatt[1], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+@@ -1477,17 +1476,15 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
+
+ if (fop->id == GF_FOP_TRUNCATE) {
+ if (fop->cbks.truncate != NULL) {
+- fop->cbks.truncate(fop->req_frame, fop, fop->xl,
+- cbk->op_ret, cbk->op_errno,
+- &cbk->iatt[0], &cbk->iatt[1],
+- cbk->xdata);
++ QUORUM_CBK(fop->cbks.truncate, fop, fop->req_frame, fop,
++ fop->xl, cbk->op_ret, cbk->op_errno,
++ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
+ }
+ } else {
+ if (fop->cbks.ftruncate != NULL) {
+- fop->cbks.ftruncate(fop->req_frame, fop, fop->xl,
+- cbk->op_ret, cbk->op_errno,
+- &cbk->iatt[0], &cbk->iatt[1],
+- cbk->xdata);
++ QUORUM_CBK(fop->cbks.ftruncate, fop, fop->req_frame, fop,
++ fop->xl, cbk->op_ret, cbk->op_errno,
++ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
+ }
+ }
+
+@@ -2245,9 +2242,9 @@ ec_manager_writev(ec_fop_data_t *fop, int32_t state)
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.writev != NULL) {
+- fop->cbks.writev(fop->req_frame, fop, fop->xl, cbk->op_ret,
+- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+- cbk->xdata);
++ QUORUM_CBK(fop->cbks.writev, fop, fop->req_frame, fop, fop->xl,
++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
++ &cbk->iatt[1], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
+index f27f2ec..ea4f6ad 100644
+--- a/xlators/cluster/ec/src/ec-types.h
++++ b/xlators/cluster/ec/src/ec-types.h
+@@ -654,6 +654,7 @@ struct _ec {
+ gf_boolean_t optimistic_changelog;
+ gf_boolean_t parallel_writes;
+ uint32_t stripe_cache;
++ uint32_t quorum_count;
+ uint32_t background_heals;
+ uint32_t heal_wait_qlen;
+ uint32_t self_heal_window_size; /* max size of read/writes */
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index 3c8013e..19094c4 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -285,6 +285,7 @@ reconfigure(xlator_t *this, dict_t *options)
+ GF_OPTION_RECONF("parallel-writes", ec->parallel_writes, options, bool,
+ failed);
+ GF_OPTION_RECONF("stripe-cache", ec->stripe_cache, options, uint32, failed);
++ GF_OPTION_RECONF("quorum-count", ec->quorum_count, options, uint32, failed);
+ ret = 0;
+ if (ec_assign_read_policy(ec, read_policy)) {
+ ret = -1;
+@@ -720,6 +721,7 @@ init(xlator_t *this)
+ failed);
+ GF_OPTION_INIT("parallel-writes", ec->parallel_writes, bool, failed);
+ GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed);
++ GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed);
+
+ this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
+@@ -1402,6 +1404,7 @@ ec_dump_private(xlator_t *this)
+ gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters);
+ gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]);
+ gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes);
++ gf_proc_dump_write("quorum-count", "%u", ec->quorum_count);
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s.stats.stripe_cache",
+ this->type, this->name);
+@@ -1672,6 +1675,16 @@ struct volume_options options[] = {
+ "lead to extra memory consumption, maximum "
+ "(cache size * stripe size) Bytes per open file."},
+ {
++ .key = {"quorum-count"},
++ .type = GF_OPTION_TYPE_INT,
++ .default_value = "0",
++ .description =
++ "This option can be used to define how many successes on"
++ "the bricks constitute a success to the application. This"
++ " count should be in the range"
++ "[disperse-data-count, disperse-count] (inclusive)",
++ },
++ {
+ .key = {NULL},
+ },
+ };
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 8ce338e..7ca47a6 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -1128,6 +1128,42 @@ out:
+ }
+
+ static int
++validate_disperse_quorum_count(glusterd_volinfo_t *volinfo, dict_t *dict,
++ char *key, char *value, char **op_errstr)
++{
++ int ret = -1;
++ int quorum_count = 0;
++ int data_count = 0;
++
++ ret = gf_string2int(value, &quorum_count);
++ if (ret) {
++ gf_asprintf(op_errstr,
++ "%s is not an integer. %s expects a "
++ "valid integer value.",
++ value, key);
++ goto out;
++ }
++
++ if (volinfo->type != GF_CLUSTER_TYPE_DISPERSE) {
++ gf_asprintf(op_errstr, "Cannot set %s for a non-disperse volume.", key);
++ ret = -1;
++ goto out;
++ }
++
++ data_count = volinfo->disperse_count - volinfo->redundancy_count;
++ if (quorum_count < data_count || quorum_count > volinfo->disperse_count) {
++ gf_asprintf(op_errstr, "%d for %s is out of range [%d - %d]",
++ quorum_count, key, data_count, volinfo->disperse_count);
++ ret = -1;
++ goto out;
++ }
++
++ ret = 0;
++out:
++ return ret;
++}
++
++static int
+ validate_parallel_readdir(glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char *value, char **op_errstr)
+ {
+@@ -3663,6 +3699,16 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .type = NO_DOC,
+ .op_version = GD_OP_VERSION_3_13_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
++ {.key = "disperse.quorum-count",
++ .voltype = "cluster/disperse",
++ .type = NO_DOC,
++ .op_version = GD_OP_VERSION_8_0,
++ .validate_fn = validate_disperse_quorum_count,
++ .description = "This option can be used to define how many successes on"
++ "the bricks constitute a success to the application. This"
++ " count should be in the range"
++ "[disperse-data-count, disperse-count] (inclusive)",
++ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {
+ .key = "features.sdfs",
+ .voltype = "features/sdfs",
+--
+1.8.3.1
+
diff --git a/0290-glusterd-tag-disperse.quorum-count-for-31306.patch b/0290-glusterd-tag-disperse.quorum-count-for-31306.patch
new file mode 100644
index 0000000..01ea8c2
--- /dev/null
+++ b/0290-glusterd-tag-disperse.quorum-count-for-31306.patch
@@ -0,0 +1,84 @@
+From 312da653ac80b537af06139f8d83a63180c72461 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Tue, 10 Sep 2019 14:04:17 +0530
+Subject: [PATCH 290/297] glusterd: tag disperse.quorum-count for 31306
+
+In upstream disperse.quorum-count is makred for release-8
+latest new op-version is 31306.
+
+Label: DOWNSTREAM ONLY
+
+fixes: bz#1748688
+Change-Id: I88fdbd56ce3b8475b5ec670659adaa9d11c01d97
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/180675
+Reviewed-by: Ashish Pandey <aspandey@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/globals.h | 12 ++++++------
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +-
+ 2 files changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index bdc8b3d..e218285 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -50,19 +50,19 @@
+ 1 /* MIN is the fresh start op-version, mostly \
+ should not change */
+ #define GD_OP_VERSION_MAX \
+- GD_OP_VERSION_8_0 /* MAX VERSION is the maximum \
++ GD_OP_VERSION_7_0 /* MAX VERSION is the maximum \
+ count in VME table, should \
+ keep changing with \
+ introduction of newer \
+ versions */
+
+-#define GD_OP_VERSION_RHS_3_0 30000 /* Op-Version of RHS 3.0 */
++#define GD_OP_VERSION_RHS_3_0 30000 /* Op-Version of RHS 3.0 */
+
+ #define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_RHS_3_0
+
+-#define GD_OP_VERSION_RHS_2_1_5 20105 /* RHS 2.1 update 5 */
++#define GD_OP_VERSION_RHS_2_1_5 20105 /* RHS 2.1 update 5 */
+
+-#define GD_OP_VERSION_RHS_3_0_4 30004 /* Op-Version of RHS 3.0.4 */
++#define GD_OP_VERSION_RHS_3_0_4 30004 /* Op-Version of RHS 3.0.4 */
+
+ #define GD_OP_VERSION_3_7_0 30700 /* Op-version for GlusterFS 3.7.0 */
+
+@@ -124,6 +124,8 @@
+
+ #define GD_OP_VERSION_3_13_5 31305 /* Op-version for GlusterFS 3.13.5 */
+
++#define GD_OP_VERSION_3_13_6 31306 /* Op-version for GlusterFS 3.13.6 */
++
+ #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */
+
+ #define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */
+@@ -136,8 +138,6 @@
+
+ #define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */
+
+-#define GD_OP_VERSION_8_0 80000 /* Op-version for GlusterFS 8.0 */
+-
+ #include "glusterfs/xlator.h"
+ #include "glusterfs/options.h"
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 7ca47a6..16601a2 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -3702,7 +3702,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "disperse.quorum-count",
+ .voltype = "cluster/disperse",
+ .type = NO_DOC,
+- .op_version = GD_OP_VERSION_8_0,
++ .op_version = GD_OP_VERSION_3_13_6,
+ .validate_fn = validate_disperse_quorum_count,
+ .description = "This option can be used to define how many successes on"
+ "the bricks constitute a success to the application. This"
+--
+1.8.3.1
+
diff --git a/0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch b/0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch
new file mode 100644
index 0000000..efdbc23
--- /dev/null
+++ b/0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch
@@ -0,0 +1,106 @@
+From 87d8070f80487322a1736846a78725fd88f8de34 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Tue, 20 Aug 2019 13:27:24 +0530
+Subject: [PATCH 291/297] cluster/ec: Mark release only when it is acquired
+
+Problem:
+Mount-1 Mount-2
+1)Tries to acquire lock on 'dir1' 1)Tries to acquire lock on 'dir1'
+2)Lock is granted on brick-0 2)Lock gets EAGAIN on brick-0 and
+ leads to blocking lock on brick-0
+3)Gets a lock-contention 3) Doesn't matter what happens on mount-2
+ notification, marks lock->release from here on.
+ to true.
+4)New fop comes on 'dir1' which will
+ be put in frozen list as lock->release
+ is set to true.
+5) Lock acquisition from step-2 fails because
+3 bricks went down in 4+2 setup.
+
+Fop on mount-1 which is put in frozen list will hang because no codepath will
+move it from frozen list to any other list and the lock will not be retried.
+
+Fix:
+Don't set lock->release to true if lock is not acquired at the time of
+lock-contention-notification
+
+Upstream-patch: https://review.gluster.org/c/glusterfs/+/23272
+fixes: bz#1731896
+Change-Id: Ie6630db8735ccf372cc54b873a3a3aed7a6082b7
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/180870
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-common.c | 20 ++++++++++++++++++--
+ xlators/cluster/ec/src/ec-types.h | 1 +
+ 2 files changed, 19 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index 2e59180..5cae37b 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -1867,6 +1867,10 @@ ec_lock_acquired(ec_lock_link_t *link)
+ LOCK(&lock->loc.inode->lock);
+
+ lock->acquired = _gf_true;
++ if (lock->contention) {
++ lock->release = _gf_true;
++ lock->contention = _gf_false;
++ }
+
+ ec_lock_update_fd(lock, fop);
+ ec_lock_wake_shared(lock, &list);
+@@ -1892,15 +1896,20 @@ ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ ec_lock_link_t *link = NULL;
+ ec_lock_t *lock = NULL;
+
++ link = fop->data;
++ lock = link->lock;
+ if (op_ret >= 0) {
+- link = fop->data;
+- lock = link->lock;
+ lock->mask = lock->good_mask = fop->good;
+ lock->healing = 0;
+
+ ec_lock_acquired(link);
+ ec_lock(fop->parent);
+ } else {
++ LOCK(&lock->loc.inode->lock);
++ {
++ lock->contention = _gf_false;
++ }
++ UNLOCK(&lock->loc.inode->lock);
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED,
+ "Failed to complete preop lock");
+ }
+@@ -2547,6 +2556,13 @@ ec_lock_release(ec_t *ec, inode_t *inode)
+ gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention",
+ inode);
+
++ if (!lock->acquired) {
++ /* This happens if some bricks already got the lock while inodelk is in
++ * progress. Set release to true after lock is acquired*/
++ lock->contention = _gf_true;
++ goto done;
++ }
++
+ /* The lock is not marked to be released, so the frozen list should be
+ * empty. */
+ GF_ASSERT(list_empty(&lock->frozen));
+diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
+index ea4f6ad..34a9768 100644
+--- a/xlators/cluster/ec/src/ec-types.h
++++ b/xlators/cluster/ec/src/ec-types.h
+@@ -267,6 +267,7 @@ struct _ec_lock {
+ uint32_t refs_pending; /* Refs assigned to fops being prepared */
+ uint32_t waiting_flags; /*Track xattrop/dirty marking*/
+ gf_boolean_t acquired;
++ gf_boolean_t contention;
+ gf_boolean_t unlock_now;
+ gf_boolean_t release;
+ gf_boolean_t query;
+--
+1.8.3.1
+
diff --git a/0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch b/0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch
new file mode 100644
index 0000000..07fc8f4
--- /dev/null
+++ b/0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch
@@ -0,0 +1,72 @@
+From 769263ad422e3c1069de0994ff2274044982b242 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Sun, 1 Sep 2019 12:01:09 +0530
+Subject: [PATCH 292/297] rpc: Update address family if it is not provide in
+ cmd-line arguments
+
+Problem: After enabling transport-type to inet6 and passed ipv6
+ transport.socket.bind-address in glusterd.vol clients are
+ not started.
+
+Solution: Need to update address-family based on remote-address for
+ all gluster client process
+
+> Change-Id: Iaa3588cd87cebc45231bfd675745c1a457dc9b31
+> Fixes: bz#1747746
+> Credits: Amgad Saleh <amgad.saleh@nokia.com>
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Cherry picked from commit 80b8cfe3f1386606bada97a76a0cad7acdf6b877)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23340/)
+
+Change-Id: Iaa3588cd87cebc45231bfd675745c1a457dc9b31
+BUG: 1750241
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/181184
+Tested-by: Mohit Agrawal <moagrawa@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ rpc/rpc-transport/socket/src/name.c | 13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+diff --git a/rpc/rpc-transport/socket/src/name.c b/rpc/rpc-transport/socket/src/name.c
+index 7f18cc4..b473f3b 100644
+--- a/rpc/rpc-transport/socket/src/name.c
++++ b/rpc/rpc-transport/socket/src/name.c
+@@ -214,6 +214,7 @@ af_inet_client_get_remote_sockaddr(rpc_transport_t *this,
+ uint16_t remote_port = 0;
+ struct addrinfo *addr_info = NULL;
+ int32_t ret = 0;
++ struct in6_addr serveraddr;
+
+ remote_host_data = dict_get(options, "remote-host");
+ if (remote_host_data == NULL) {
+@@ -249,6 +250,13 @@ af_inet_client_get_remote_sockaddr(rpc_transport_t *this,
+ goto err;
+ }
+
++ /* Need to update transport-address family if address-family is not provide
++ to command-line arguments
++ */
++ if (inet_pton(AF_INET6, remote_host, &serveraddr)) {
++ sockaddr->sa_family = AF_INET6;
++ }
++
+ /* TODO: gf_resolve is a blocking call. kick in some
+ non blocking dns techniques */
+ ret = gf_resolve_ip6(remote_host, remote_port, sockaddr->sa_family,
+@@ -522,7 +530,10 @@ socket_client_get_remote_sockaddr(rpc_transport_t *this,
+ ret = -1;
+ }
+
+- if (*sa_family == AF_UNSPEC) {
++ /* Address-family is updated based on remote_host in
++ af_inet_client_get_remote_sockaddr
++ */
++ if (*sa_family != sockaddr->sa_family) {
+ *sa_family = sockaddr->sa_family;
+ }
+
+--
+1.8.3.1
+
diff --git a/0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch b/0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch
new file mode 100644
index 0000000..23120cb
--- /dev/null
+++ b/0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch
@@ -0,0 +1,69 @@
+From 8f89aef9691b0806d7487525c6a54a1a615c8bc1 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Mon, 2 Sep 2019 10:46:10 +0530
+Subject: [PATCH 293/297] glusterd: IPV6 hostname address is not parsed
+ correctly
+
+Problem: IPV6 hostname address is not parsed correctly in function
+ glusterd_check_brick_order
+
+Solution: Update the code to parse hostname address
+
+> Change-Id: Ifb2f83f9c6e987b2292070e048e97eeb51b728ab
+> Fixes: bz#1747746
+> Credits: Amgad Saleh <amgad.saleh@nokia.com>
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Cherry picked from commit 6563ffb04d7ba51a89726e7c5bbb85c7dbc685b5)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23341/)
+
+Change-Id: Ifb2f83f9c6e987b2292070e048e97eeb51b728ab
+BUG: 1750241
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/181185
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index 1ea8ba6..076bc80 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -95,6 +95,10 @@ glusterd_check_brick_order(dict_t *dict, char *err_str)
+ int32_t type = GF_CLUSTER_TYPE_NONE;
+ int32_t sub_count = 0;
+ struct addrinfo *ai_info = NULL;
++ char brick_addr[128] = {
++ 0,
++ };
++ int addrlen = 0;
+
+ const char failed_string[2048] =
+ "Failed to perform brick order "
+@@ -182,15 +186,17 @@ glusterd_check_brick_order(dict_t *dict, char *err_str)
+ brick_list_dup = tmpptr;
+ if (brick == NULL)
+ goto check_failed;
+- brick = strtok_r(brick, ":", &tmpptr);
+- if (brick == NULL)
++ tmpptr = strrchr(brick, ':');
++ if (tmpptr == NULL)
+ goto check_failed;
+- ret = getaddrinfo(brick, NULL, NULL, &ai_info);
++ addrlen = strlen(brick) - strlen(tmpptr);
++ strncpy(brick_addr, brick, addrlen);
++ brick_addr[addrlen] = '\0';
++ ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info);
+ if (ret != 0) {
+ ret = 0;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL,
+- "unable to resolve "
+- "host name");
++ "unable to resolve host name for addr %s", brick_addr);
+ goto out;
+ }
+ ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t));
+--
+1.8.3.1
+
diff --git a/0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch b/0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch
new file mode 100644
index 0000000..1665185
--- /dev/null
+++ b/0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch
@@ -0,0 +1,59 @@
+From 2fa5476b95d4547bdde50f2281bf58b7db24e37a Mon Sep 17 00:00:00 2001
+From: Aravinda VK <avishwan@redhat.com>
+Date: Mon, 16 Sep 2019 10:04:26 +0530
+Subject: [PATCH 294/297] eventsapi: Set IPv4/IPv6 family based on input IP
+
+server.sin_family was set to AF_INET while creating socket connection,
+this was failing if the input address is IPv6(`::1`).
+
+With this patch, sin_family is set by reading the ai_family of
+`getaddrinfo` result.
+
+> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23423/
+
+>Fixes: bz#1752330
+>Change-Id: I499f957b432842fa989c698f6e5b25b7016084eb
+>Signed-off-by: Aravinda VK <avishwan@redhat.com>
+
+BUG: 1732443
+Change-Id: I499f957b432842fa989c698f6e5b25b7016084eb
+Signed-off-by: Aravinda VK <avishwan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/181197
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/events.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c
+index 2509767..9d33783 100644
+--- a/libglusterfs/src/events.c
++++ b/libglusterfs/src/events.c
+@@ -42,6 +42,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+ struct addrinfo hints;
+ struct addrinfo *result = NULL;
+ xlator_t *this = THIS;
++ int sin_family = AF_INET;
+
+ /* Global context */
+ ctx = THIS->ctx;
+@@ -75,13 +76,15 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+ ret = EVENT_ERROR_RESOLVE;
+ goto out;
+ }
++
++ sin_family = result->ai_family;
+ } else {
+ /* Localhost, Use the defined IP for localhost */
+ host = gf_strdup(EVENT_HOST);
+ }
+
+ /* Socket Configurations */
+- server.sin_family = AF_INET;
++ server.sin_family = sin_family;
+ server.sin_port = htons(EVENT_PORT);
+ ret = inet_pton(server.sin_family, host, &server.sin_addr);
+ if (ret <= 0) {
+--
+1.8.3.1
+
diff --git a/0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch b/0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch
new file mode 100644
index 0000000..9d3820d
--- /dev/null
+++ b/0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch
@@ -0,0 +1,1164 @@
+From d5ce2300f77c25b38a076d4dd6a5521e82c56172 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Mon, 29 Jul 2019 18:30:42 +0530
+Subject: [PATCH 295/297] ctime/rebalance: Heal ctime xattr on directory during
+ rebalance
+
+After add-brick and rebalance, the ctime xattr is not present
+on rebalanced directories on new brick. This patch fixes the
+same.
+
+Note that ctime still doesn't support consistent time across
+distribute sub-volume.
+
+This patch also fixes the in-memory inconsistency of time attributes
+when metadata is self healed.
+
+Backport of:
+ > Patch: https://review.gluster.org/23127/
+ > Change-Id: Ia20506f1839021bf61d4753191e7dc34b31bb2df
+ > fixes: bz#1734026
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+Change-Id: Ia20506f1839021bf61d4753191e7dc34b31bb2df
+BUG: 1728673
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/181105
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ tests/basic/afr/split-brain-healing-ctime.t | 253 +++++++++++++++++++++
+ tests/basic/afr/split-brain-healing.t | 1 +
+ tests/basic/ctime/ctime-ec-heal.t | 71 ++++++
+ tests/basic/ctime/ctime-ec-rebalance.t | 44 ++++
+ tests/basic/ctime/ctime-rep-heal.t | 71 ++++++
+ tests/basic/ctime/ctime-rep-rebalance.t | 42 ++++
+ .../bug-1734370-entry-heal-restore-time.t | 84 +++++++
+ tests/volume.rc | 15 +-
+ xlators/cluster/afr/src/afr-self-heal-common.c | 3 +-
+ xlators/cluster/afr/src/afr-self-heal-entry.c | 2 +
+ xlators/cluster/dht/src/dht-common.c | 1 +
+ xlators/cluster/ec/src/ec-heal.c | 7 +-
+ xlators/storage/posix/src/posix-entry-ops.c | 8 +-
+ xlators/storage/posix/src/posix-helpers.c | 31 ++-
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 57 ++---
+ xlators/storage/posix/src/posix-metadata.c | 65 +++++-
+ xlators/storage/posix/src/posix-metadata.h | 7 +
+ xlators/storage/posix/src/posix.h | 5 +-
+ 18 files changed, 714 insertions(+), 53 deletions(-)
+ create mode 100644 tests/basic/afr/split-brain-healing-ctime.t
+ create mode 100644 tests/basic/ctime/ctime-ec-heal.t
+ create mode 100644 tests/basic/ctime/ctime-ec-rebalance.t
+ create mode 100644 tests/basic/ctime/ctime-rep-heal.t
+ create mode 100644 tests/basic/ctime/ctime-rep-rebalance.t
+ create mode 100644 tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
+
+diff --git a/tests/basic/afr/split-brain-healing-ctime.t b/tests/basic/afr/split-brain-healing-ctime.t
+new file mode 100644
+index 0000000..1ca18e3
+--- /dev/null
++++ b/tests/basic/afr/split-brain-healing-ctime.t
+@@ -0,0 +1,253 @@
++#!/bin/bash
++
++#Test the split-brain resolution CLI commands.
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++function get_replicate_subvol_number {
++ local filename=$1
++ #get_backend_paths
++ if [ -f $B0/${V0}1/$filename ]
++ then
++ echo 0
++ elif [ -f $B0/${V0}3/$filename ]
++ then echo 1
++ else
++ echo -1
++ fi
++}
++
++cleanup;
++
++AREQUAL_PATH=$(dirname $0)/../../utils
++GET_MDATA_PATH=$(dirname $0)/../../utils
++CFLAGS=""
++test "`uname -s`" != "Linux" && {
++ CFLAGS="$CFLAGS -lintl";
++}
++build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
++build_tester $GET_MDATA_PATH/get-mdata-xattr.c
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
++TEST $CLI volume set $V0 cluster.self-heal-daemon off
++TEST $CLI volume set $V0 cluster.data-self-heal off
++TEST $CLI volume set $V0 cluster.metadata-self-heal off
++TEST $CLI volume set $V0 cluster.entry-self-heal off
++TEST $CLI volume set $V0 ctime on
++TEST $CLI volume start $V0
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++cd $M0
++for i in {1..10}
++do
++ echo "Initial content">>file$i
++done
++
++replica_0_files_list=(`ls $B0/${V0}1|grep -v '^\.'`)
++replica_1_files_list=(`ls $B0/${V0}3|grep -v '^\.'`)
++
++############ Create data split-brain in the files. ###########################
++TEST kill_brick $V0 $H0 $B0/${V0}1
++for file in ${!replica_0_files_list[*]}
++do
++ echo "B1 is down">>${replica_0_files_list[$file]}
++done
++TEST kill_brick $V0 $H0 $B0/${V0}3
++for file in ${!replica_1_files_list[*]}
++do
++ echo "B3 is down">>${replica_1_files_list[$file]}
++done
++
++SMALLER_FILE_SIZE=$(stat -c %s file1)
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
++
++TEST kill_brick $V0 $H0 $B0/${V0}2
++for file in ${!replica_0_files_list[*]}
++do
++ echo "B2 is down">>${replica_0_files_list[$file]}
++ echo "appending more content to make it the bigger file">>${replica_0_files_list[$file]}
++done
++TEST kill_brick $V0 $H0 $B0/${V0}4
++for file in ${!replica_1_files_list[*]}
++do
++ echo "B4 is down">>${replica_1_files_list[$file]}
++ echo "appending more content to make it the bigger file">>${replica_1_files_list[$file]}
++done
++
++BIGGER_FILE_SIZE=$(stat -c %s file1)
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
++
++
++############### Acessing the files should now give EIO. ###############################
++TEST ! cat file1
++TEST ! cat file2
++TEST ! cat file3
++TEST ! cat file4
++TEST ! cat file5
++TEST ! cat file6
++TEST ! cat file7
++TEST ! cat file8
++TEST ! cat file9
++TEST ! cat file10
++###################
++TEST $CLI volume set $V0 cluster.self-heal-daemon on
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
++
++################ Heal file1 using the bigger-file option ##############
++$CLI volume heal $V0 split-brain bigger-file /file1
++EXPECT "0" echo $?
++EXPECT $BIGGER_FILE_SIZE stat -c %s file1
++
++################ Heal file2 using the bigger-file option and its gfid ##############
++subvolume=$(get_replicate_subvol_number file2)
++if [ $subvolume == 0 ]
++then
++ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file2)
++elif [ $subvolume == 1 ]
++then
++ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file2)
++fi
++GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
++$CLI volume heal $V0 split-brain bigger-file $GFIDSTR
++EXPECT "0" echo $?
++
++################ Heal file3 using the source-brick option ##############
++################ Use the brick having smaller file size as source #######
++subvolume=$(get_replicate_subvol_number file3)
++if [ $subvolume == 0 ]
++then
++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file3
++elif [ $subvolume == 1 ]
++then
++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
++fi
++EXPECT "0" echo $?
++EXPECT $SMALLER_FILE_SIZE stat -c %s file3
++
++################ Heal file4 using the source-brick option and it's gfid ##############
++################ Use the brick having smaller file size as source #######
++subvolume=$(get_replicate_subvol_number file4)
++if [ $subvolume == 0 ]
++then
++ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file4)
++ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 $GFIDSTR
++elif [ $subvolume == 1 ]
++then
++ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file4)
++ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 $GFIDSTR
++fi
++EXPECT "0" echo $?
++EXPECT $SMALLER_FILE_SIZE stat -c %s file4
++
++# With ctime enabled, the ctime xattr ("trusted.glusterfs.mdata") gets healed
++# as part of metadata heal. So mtime would be same, hence it can't be healed
++# using 'latest-mtime' policy, use 'source-brick' option instead.
++################ Heal file5 using the source-brick option ##############
++subvolume=$(get_replicate_subvol_number file5)
++if [ $subvolume == 0 ]
++then
++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file5
++elif [ $subvolume == 1 ]
++then
++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 /file5
++fi
++EXPECT "0" echo $?
++
++if [ $subvolume == 0 ]
++then
++ mtime1_after_heal=$(get_mtime $B0/${V0}1/file5)
++ mtime2_after_heal=$(get_mtime $B0/${V0}2/file5)
++elif [ $subvolume == 1 ]
++then
++ mtime1_after_heal=$(get_mtime $B0/${V0}3/file5)
++ mtime2_after_heal=$(get_mtime $B0/${V0}4/file5)
++fi
++
++#TODO: To below comparisons on full sub-second resolution
++
++TEST [ $mtime1_after_heal -eq $mtime2_after_heal ]
++
++mtime_mount_after_heal=$(stat -c %Y file5)
++
++TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ]
++
++################ Heal file6 using the source-brick option and its gfid ##############
++subvolume=$(get_replicate_subvol_number file6)
++if [ $subvolume == 0 ]
++then
++ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6)
++ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR
++elif [ $subvolume == 1 ]
++then
++ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6)
++ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 $GFIDSTR
++fi
++EXPECT "0" echo $?
++
++if [ $subvolume == 0 ]
++then
++ mtime1_after_heal=$(get_mtime $B0/${V0}1/file6)
++ mtime2_after_heal=$(get_mtime $B0/${V0}2/file6)
++elif [ $subvolume == 1 ]
++then
++ mtime1_after_heal=$(get_mtime $B0/${V0}3/file6)
++ mtime2_after_heal=$(get_mtime $B0/${V0}4/file6)
++fi
++
++#TODO: To below comparisons on full sub-second resolution
++
++TEST [ $mtime1_after_heal -eq $mtime2_after_heal ]
++
++mtime_mount_after_heal=$(stat -c %Y file6)
++
++TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ]
++
++################ Heal remaining SB'ed files of replica_0 using B1 as source ##############
++$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1
++EXPECT "0" echo $?
++
++################ Heal remaining SB'ed files of replica_1 using B3 as source ##############
++$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3
++EXPECT "0" echo $?
++
++############### Reading the files should now succeed. ###############################
++TEST cat file1
++TEST cat file2
++TEST cat file3
++TEST cat file4
++TEST cat file5
++TEST cat file6
++TEST cat file7
++TEST cat file8
++TEST cat file9
++TEST cat file10
++
++################ File contents on the bricks must be same. ################################
++TEST diff <(arequal-checksum -p $B0/$V01 -i .glusterfs) <(arequal-checksum -p $B0/$V02 -i .glusterfs)
++TEST diff <(arequal-checksum -p $B0/$V03 -i .glusterfs) <(arequal-checksum -p $B0/$V04 -i .glusterfs)
++
++############### Trying to heal files not in SB should fail. ###############################
++$CLI volume heal $V0 split-brain bigger-file /file1
++EXPECT "1" echo $?
++$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
++EXPECT "1" echo $?
++
++cd -
++TEST rm $AREQUAL_PATH/arequal-checksum
++TEST rm $GET_MDATA_PATH/get-mdata-xattr
++cleanup
+diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t
+index 78553e6..315e815 100644
+--- a/tests/basic/afr/split-brain-healing.t
++++ b/tests/basic/afr/split-brain-healing.t
+@@ -35,6 +35,7 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
+ TEST $CLI volume set $V0 cluster.data-self-heal off
+ TEST $CLI volume set $V0 cluster.metadata-self-heal off
+ TEST $CLI volume set $V0 cluster.entry-self-heal off
++TEST $CLI volume set $V0 ctime off
+ TEST $CLI volume start $V0
+ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+diff --git a/tests/basic/ctime/ctime-ec-heal.t b/tests/basic/ctime/ctime-ec-heal.t
+new file mode 100644
+index 0000000..1cb4516
+--- /dev/null
++++ b/tests/basic/ctime/ctime-ec-heal.t
+@@ -0,0 +1,71 @@
++#!/bin/bash
++#
++# This will test self healing of ctime xattr 'trusted.glusterfs.mdata'
++#
++###
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup
++
++#cleate and start volume
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{1..3}
++TEST $CLI volume set $V0 ctime on
++TEST $CLI volume start $V0
++
++#Mount the volume
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++
++# Create files
++mkdir $M0/dir1
++echo "Initial content" > $M0/file1
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
++
++# Kill brick
++TEST kill_brick $V0 $H0 $B0/${V0}3
++
++echo "B3 is down" >> $M0/file1
++echo "Change dir1 time attributes" > $M0/dir1/dir1_file1
++echo "Entry heal file" > $M0/entry_heal_file1
++mkdir $M0/entry_heal_dir1
++
++# Check xattr
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
++
++TEST $CLI volume start $V0 force
++$CLI volume heal $V0
++
++# Check xattr
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
++
++cleanup;
+diff --git a/tests/basic/ctime/ctime-ec-rebalance.t b/tests/basic/ctime/ctime-ec-rebalance.t
+new file mode 100644
+index 0000000..caccdc1
+--- /dev/null
++++ b/tests/basic/ctime/ctime-ec-rebalance.t
+@@ -0,0 +1,44 @@
++#!/bin/bash
++#
++# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance
++#
++###
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../fallocate.rc
++
++cleanup
++
++#cleate and start volume
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..5}
++TEST $CLI volume set $V0 ctime on
++TEST $CLI volume start $V0
++
++#Mount the volume
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
++
++# Create files
++mkdir $M0/dir1
++echo "test data" > $M0/dir1/file1
++
++# Add brick
++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8}
++
++#Trigger rebalance
++TEST $CLI volume rebalance $V0 start force
++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
++
++#Verify ctime xattr heal on directory
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1"
++
++b6_mdata=$(get_mdata "$B0/${V0}6/dir1")
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1
++
++cleanup;
+diff --git a/tests/basic/ctime/ctime-rep-heal.t b/tests/basic/ctime/ctime-rep-heal.t
+new file mode 100644
+index 0000000..ba8b08a
+--- /dev/null
++++ b/tests/basic/ctime/ctime-rep-heal.t
+@@ -0,0 +1,71 @@
++#!/bin/bash
++#
++# This will test self healing of ctime xattr 'trusted.glusterfs.mdata'
++#
++###
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup
++
++#cleate and start volume
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}
++TEST $CLI volume set $V0 ctime on
++TEST $CLI volume start $V0
++
++#Mount the volume
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++
++# Create files
++mkdir $M0/dir1
++echo "Initial content" > $M0/file1
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
++
++# Kill brick
++TEST kill_brick $V0 $H0 $B0/${V0}3
++
++echo "B3 is down" >> $M0/file1
++echo "Change dir1 time attributes" > $M0/dir1/dir1_file1
++echo "Entry heal file" > $M0/entry_heal_file1
++mkdir $M0/entry_heal_dir1
++
++# Check xattr
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
++
++TEST $CLI volume start $V0 force
++$CLI volume heal $V0
++
++# Check xattr
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
++
++cleanup;
+diff --git a/tests/basic/ctime/ctime-rep-rebalance.t b/tests/basic/ctime/ctime-rep-rebalance.t
+new file mode 100644
+index 0000000..dd9743e
+--- /dev/null
++++ b/tests/basic/ctime/ctime-rep-rebalance.t
+@@ -0,0 +1,42 @@
++#!/bin/bash
++#
++# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance
++#
++###
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup
++
++#cleate and start volume
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..5}
++TEST $CLI volume set $V0 ctime on
++TEST $CLI volume start $V0
++
++#Mount the volume
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++
++# Create files
++mkdir $M0/dir1
++
++# Add brick
++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8}
++
++#Trigger rebalance
++TEST $CLI volume rebalance $V0 start force
++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
++
++#Verify ctime xattr heal on directory
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1"
++
++b6_mdata=$(get_mdata "$B0/${V0}6/dir1")
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1
++
++cleanup;
+diff --git a/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
+new file mode 100644
+index 0000000..298d6ed
+--- /dev/null
++++ b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
+@@ -0,0 +1,84 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++function time_stamps_match {
++ path=$1
++ mtime_source_b0=$(get_mtime $B0/${V0}0/$path)
++ atime_source_b0=$(get_atime $B0/${V0}0/$path)
++ mtime_source_b2=$(get_mtime $B0/${V0}2/$path)
++ atime_source_b2=$(get_atime $B0/${V0}2/$path)
++ mtime_sink_b1=$(get_mtime $B0/${V0}1/$path)
++ atime_sink_b1=$(get_atime $B0/${V0}1/$path)
++
++ #The same brick must be the source of heal for both atime and mtime.
++ if [[ ( $mtime_source_b0 -eq $mtime_sink_b1 && $atime_source_b0 -eq $atime_sink_b1 ) || \
++ ( $mtime_source_b2 -eq $mtime_sink_b1 && $atime_source_b2 -eq $atime_sink_b1 ) ]]
++ then
++ echo "Y"
++ else
++ echo "N"
++ fi
++
++}
++
++# Test that the parent dir's timestamps are restored during entry-heal.
++GET_MDATA_PATH=$(dirname $0)/../../utils
++build_tester $GET_MDATA_PATH/get-mdata-xattr.c
++
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
++TEST $CLI volume set $V0 ctime on
++TEST $CLI volume start $V0;
++
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
++
++###############################################################################
++TEST mkdir $M0/DIR
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST touch $M0/DIR/FILE
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
++
++EXPECT "Y" time_stamps_match DIR
++ctime_source1=$(get_ctime $B0/${V0}0/$path)
++ctime_source2=$(get_ctime $B0/${V0}2/$path)
++ctime_sink=$(get_ctime $B0/${V0}1/$path)
++TEST [ $ctime_source1 -eq $ctime_sink ]
++TEST [ $ctime_source2 -eq $ctime_sink ]
++
++###############################################################################
++# Repeat the test with ctime feature disabled.
++TEST $CLI volume set $V0 features.ctime off
++TEST mkdir $M0/DIR2
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST touch $M0/DIR2/FILE
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
++
++EXPECT "Y" time_stamps_match DIR2
++
++TEST rm $GET_MDATA_PATH/get-mdata-xattr
++cleanup;
+diff --git a/tests/volume.rc b/tests/volume.rc
+index 76a8fd4..9a002d9 100644
+--- a/tests/volume.rc
++++ b/tests/volume.rc
+@@ -371,6 +371,19 @@ function get_gfid2path {
+ getfattr -h --only-values -n glusterfs.gfidtopath $path 2>/dev/null
+ }
+
++function get_mdata {
++ local path=$1
++ getfattr -h -e hex -n trusted.glusterfs.mdata $path 2>/dev/null | grep "trusted.glusterfs.mdata" | cut -f2 -d'='
++}
++
++function get_mdata_count {
++ getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | wc -l
++}
++
++function get_mdata_uniq_count {
++ getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | uniq | wc -l
++}
++
+ function get_xattr_key {
+ local key=$1
+ local path=$2
+@@ -925,7 +938,7 @@ function get_ctime {
+ local time=$(get-mdata-xattr -c $1)
+ if [ $time == "-1" ];
+ then
+- echo $(stat -c %Z $2)
++ echo $(stat -c %Z $1)
+ else
+ echo $time
+ fi
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index b38085a..81ef38a 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -513,7 +513,8 @@ afr_selfheal_restore_time(call_frame_t *frame, xlator_t *this, inode_t *inode,
+
+ AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, setattr, &loc,
+ &replies[source].poststat,
+- (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME), NULL);
++ (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME),
++ NULL);
+
+ loc_wipe(&loc);
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index e07b521..35b600f 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -1032,6 +1032,8 @@ unlock:
+ goto postop_unlock;
+ }
+
++ afr_selfheal_restore_time(frame, this, fd->inode, source, healed_sinks,
++ locked_replies);
+ ret = afr_selfheal_undo_pending(
+ frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending,
+ AFR_ENTRY_TRANSACTION, locked_replies, postop_lock);
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 219b072..99cccd6 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -115,6 +115,7 @@ char *xattrs_to_heal[] = {"user.",
+ QUOTA_LIMIT_KEY,
+ QUOTA_LIMIT_OBJECTS_KEY,
+ GF_SELINUX_XATTR_KEY,
++ GF_XATTR_MDATA_KEY,
+ NULL};
+
+ char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index 0f0f398..06a7016 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -2301,9 +2301,10 @@ ec_restore_time_and_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd,
+
+ loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(loc.gfid, fd->inode->gfid);
+- ret = cluster_setattr(ec->xl_list, healed_sinks, ec->nodes, replies,
+- output, frame, ec->xl, &loc, &source_buf,
+- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL);
++ ret = cluster_setattr(
++ ec->xl_list, healed_sinks, ec->nodes, replies, output, frame,
++ ec->xl, &loc, &source_buf,
++ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME, NULL);
+ EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes);
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
+index 34ee2b8..283b305 100644
+--- a/xlators/storage/posix/src/posix-entry-ops.c
++++ b/xlators/storage/posix/src/posix-entry-ops.c
+@@ -500,7 +500,7 @@ post_op:
+ posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name);
+ }
+
+- op_ret = posix_entry_create_xattr_set(this, real_path, xdata);
++ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
+ if (op_ret) {
+ if (errno != EEXIST)
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+@@ -828,7 +828,7 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ "setting ACLs on %s failed ", real_path);
+ }
+
+- op_ret = posix_entry_create_xattr_set(this, real_path, xdata);
++ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "setting xattrs on %s failed", real_path);
+@@ -1529,7 +1529,7 @@ posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ }
+
+ ignore:
+- op_ret = posix_entry_create_xattr_set(this, real_path, xdata);
++ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "setting xattrs on %s failed ", real_path);
+@@ -2167,7 +2167,7 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name);
+ }
+ ignore:
+- op_ret = posix_entry_create_xattr_set(this, real_path, xdata);
++ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "setting xattrs on %s failed ", real_path);
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index d143d4c..6a1a35c 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -1188,11 +1188,15 @@ posix_dump_buffer(xlator_t *this, const char *real_path, const char *key,
+ #endif
+
+ int
+-posix_handle_pair(xlator_t *this, const char *real_path, char *key,
++posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key,
+ data_t *value, int flags, struct iatt *stbuf)
+ {
+ int sys_ret = -1;
+ int ret = 0;
++ int op_errno = 0;
++ struct mdata_iatt mdata_iatt = {
++ 0,
++ };
+ #ifdef GF_DARWIN_HOST_OS
+ const int error_code = EINVAL;
+ #else
+@@ -1216,6 +1220,23 @@ posix_handle_pair(xlator_t *this, const char *real_path, char *key,
+ /* ignore this key value pair */
+ ret = 0;
+ goto out;
++ } else if (!strncmp(key, GF_XATTR_MDATA_KEY, strlen(key))) {
++ /* This is either by rebalance or self heal. Create the xattr if it's
++ * not present. Compare and update the larger value if the xattr is
++ * already present.
++ */
++ if (loc == NULL) {
++ ret = -EINVAL;
++ goto out;
++ }
++ posix_mdata_iatt_from_disk(&mdata_iatt,
++ (posix_mdata_disk_t *)value->data);
++ ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path,
++ &mdata_iatt, &op_errno);
++ if (ret != 0) {
++ ret = -op_errno;
++ }
++ goto out;
+ } else {
+ sys_ret = sys_lsetxattr(real_path, key, value->data, value->len, flags);
+ #ifdef GF_DARWIN_HOST_OS
+@@ -1810,8 +1831,8 @@ _handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
+ return 0;
+ }
+
+- ret = posix_handle_pair(filler->this, filler->real_path, k, v, XATTR_CREATE,
+- filler->stbuf);
++ ret = posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v,
++ XATTR_CREATE, filler->stbuf);
+ if (ret < 0) {
+ errno = -ret;
+ return -1;
+@@ -1820,7 +1841,8 @@ _handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
+ }
+
+ int
+-posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict)
++posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path,
++ dict_t *dict)
+ {
+ int ret = -1;
+
+@@ -1834,6 +1856,7 @@ posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict)
+ filler.this = this;
+ filler.real_path = path;
+ filler.stbuf = NULL;
++ filler.loc = loc;
+
+ ret = dict_foreach(dict, _handle_entry_create_keyvalue_pair, &filler);
+
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index e0ea85b..a2a518f 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -429,22 +429,9 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ &frame->root->ctime, stbuf, valid);
+ }
+
+- if (valid & GF_SET_ATTR_CTIME && !priv->ctime) {
+- /*
+- * If ctime is not enabled, we have no means to associate an
+- * arbitrary ctime with the file, so as a fallback, we ignore
+- * the ctime payload and update the file ctime to current time
+- * (which is possible directly with the POSIX API).
+- */
+- op_ret = PATH_SET_TIMESPEC_OR_TIMEVAL(real_path, NULL);
+- if (op_ret == -1) {
+- op_errno = errno;
+- gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED,
+- "setattr (utimes) on %s "
+- "failed",
+- real_path);
+- goto out;
+- }
++ if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) {
++ posix_update_ctime_in_mdata(this, real_path, -1, loc->inode,
++ &frame->root->ctime, stbuf, valid);
+ }
+
+ if (!valid) {
+@@ -469,14 +456,6 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ goto out;
+ }
+
+- if (valid & GF_SET_ATTR_CTIME && priv->ctime) {
+- /*
+- * If we got ctime payload, we override
+- * the ctime of statpost with that.
+- */
+- statpost.ia_ctime = stbuf->ia_ctime;
+- statpost.ia_ctime_nsec = stbuf->ia_ctime_nsec;
+- }
+ posix_set_ctime(frame, this, real_path, -1, loc->inode, &statpost);
+
+ if (xdata)
+@@ -592,6 +571,7 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt statpost = {
+ 0,
+ };
++ struct posix_private *priv = NULL;
+ struct posix_fd *pfd = NULL;
+ dict_t *xattr_rsp = NULL;
+ int32_t ret = -1;
+@@ -604,6 +584,9 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
++ priv = this->private;
++ VALIDATE_OR_GOTO(priv, out);
++
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
+@@ -656,6 +639,11 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ &frame->root->ctime, stbuf, valid);
+ }
+
++ if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) {
++ posix_update_ctime_in_mdata(this, NULL, pfd->fd, fd->inode,
++ &frame->root->ctime, stbuf, valid);
++ }
++
+ if (!valid) {
+ op_ret = sys_fchown(pfd->fd, -1, -1);
+ if (op_ret == -1) {
+@@ -2578,7 +2566,7 @@ _handle_setxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
+
+ filler = tmp;
+
+- return posix_handle_pair(filler->this, filler->real_path, k, v,
++ return posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v,
+ filler->flags, filler->stbuf);
+ }
+
+@@ -2641,27 +2629,27 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
++ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
++ if (!real_path) {
++ op_ret = -1;
++ op_errno = ESTALE;
++ goto out;
++ }
++
+ ret = dict_get_mdata(dict, CTIME_MDATA_XDATA_KEY, &mdata_iatt);
+ if (ret == 0) {
+ /* This is initiated by lookup when ctime feature is enabled to create
+ * "trusted.glusterfs.mdata" xattr if not present. These are the files
+ * which were created when ctime feature is disabled.
+ */
+- ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, &mdata_iatt,
+- &op_errno);
++ ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path,
++ &mdata_iatt, &op_errno);
+ if (ret != 0) {
+ op_ret = -1;
+ }
+ goto out;
+ }
+
+- MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+- if (!real_path) {
+- op_ret = -1;
+- op_errno = ESTALE;
+- goto out;
+- }
+-
+ posix_pstat(this, loc->inode, loc->gfid, real_path, &preop, _gf_false);
+
+ op_ret = -1;
+@@ -2796,6 +2784,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ filler.real_path = real_path;
+ filler.this = this;
+ filler.stbuf = &preop;
++ filler.loc = loc;
+
+ #ifdef GF_DARWIN_HOST_OS
+ filler.flags = map_xattr_flags(flags);
+diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
+index 532daa2..9efaf99 100644
+--- a/xlators/storage/posix/src/posix-metadata.c
++++ b/xlators/storage/posix/src/posix-metadata.c
+@@ -56,6 +56,19 @@ posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in)
+ out->atime.tv_nsec = be64toh(in->atime.tv_nsec);
+ }
+
++void
++posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in)
++{
++ out->ia_ctime = be64toh(in->ctime.tv_sec);
++ out->ia_ctime_nsec = be64toh(in->ctime.tv_nsec);
++
++ out->ia_mtime = be64toh(in->mtime.tv_sec);
++ out->ia_mtime_nsec = be64toh(in->mtime.tv_nsec);
++
++ out->ia_atime = be64toh(in->atime.tv_sec);
++ out->ia_atime_nsec = be64toh(in->atime.tv_nsec);
++}
++
+ /* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */
+ static int
+ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd,
+@@ -341,6 +354,7 @@ posix_compare_timespec(struct timespec *first, struct timespec *second)
+
+ int
+ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
++ const char *realpath,
+ struct mdata_iatt *mdata_iatt, int *op_errno)
+ {
+ posix_mdata_t *mdata = NULL;
+@@ -369,8 +383,8 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
+ goto unlock;
+ }
+
+- ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata,
+- op_errno);
++ ret = posix_fetch_mdata_xattr(this, realpath, -1, inode,
++ (void *)mdata, op_errno);
+ if (ret == 0) {
+ /* Got mdata from disk. This is a race, another client
+ * has healed the xattr during lookup. So set it in inode
+@@ -412,7 +426,7 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
+ }
+ }
+
+- ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata);
++ ret = posix_store_mdata_xattr(this, realpath, -1, inode, mdata);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STOREMDATA_FAILED,
+ "gfid: %s key:%s ", uuid_utoa(inode->gfid),
+@@ -445,7 +459,8 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, time, out);
+
+- if (update_utime && (!u_atime || !u_mtime)) {
++ if (update_utime && (flag->ctime && !time) && (flag->atime && !u_atime) &&
++ (flag->mtime && !u_mtime)) {
+ goto out;
+ }
+
+@@ -652,6 +667,48 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+ return;
+ }
+
++/* posix_update_ctime_in_mdata updates the posix_mdata_t when ctime needs
++ * to be modified
++ */
++void
++posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd,
++ inode_t *inode, struct timespec *ctime,
++ struct iatt *stbuf, int valid)
++{
++ int32_t ret = 0;
++#if defined(HAVE_UTIMENSAT)
++ struct timespec tv_ctime = {
++ 0,
++ };
++#else
++ struct timeval tv_ctime = {
++ 0,
++ };
++#endif
++ posix_mdata_flag_t flag = {
++ 0,
++ };
++
++ struct posix_private *priv = NULL;
++ priv = this->private;
++
++ if (inode && priv->ctime) {
++ tv_ctime.tv_sec = stbuf->ia_ctime;
++ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_ctime, stbuf->ia_ctime_nsec);
++ flag.ctime = 1;
++
++ ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv_ctime, NULL,
++ NULL, NULL, &flag, _gf_true);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
++ "posix set mdata atime failed on file:"
++ " %s gfid:%s",
++ real_path, uuid_utoa(inode->gfid));
++ }
++ }
++ return;
++}
++
+ static void
+ posix_get_mdata_flag(uint64_t flags, posix_mdata_flag_t *flag)
+ {
+diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h
+index c176699..63e8771 100644
+--- a/xlators/storage/posix/src/posix-metadata.h
++++ b/xlators/storage/posix/src/posix-metadata.h
+@@ -43,6 +43,10 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+ inode_t *inode, struct timespec *ctime,
+ struct iatt *stbuf, int valid);
+ void
++posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd,
++ inode_t *inode, struct timespec *ctime,
++ struct iatt *stbuf, int valid);
++void
+ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path,
+ int fd, inode_t *inode, struct iatt *stbuf);
+ void
+@@ -56,7 +60,10 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this,
+ int fd_out, inode_t *inode_out, struct iatt *stbuf_out);
+ int
+ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
++ const char *realpath,
+ struct mdata_iatt *mdata_iatt,
+ int *op_errno);
++void
++posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in);
+
+ #endif /* _POSIX_METADATA_H */
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index 64288a7..dd51062 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -339,7 +339,7 @@ dict_t *
+ posix_xattr_fill(xlator_t *this, const char *path, loc_t *loc, fd_t *fd,
+ int fdnum, dict_t *xattr, struct iatt *buf);
+ int
+-posix_handle_pair(xlator_t *this, const char *real_path, char *key,
++posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key,
+ data_t *value, int flags, struct iatt *stbuf);
+ int
+ posix_fhandle_pair(call_frame_t *frame, xlator_t *this, int fd, char *key,
+@@ -352,7 +352,8 @@ int
+ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc,
+ dict_t *xattr_req);
+ int
+-posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict);
++posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path,
++ dict_t *dict);
+
+ int
+ posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd,
+--
+1.8.3.1
+
diff --git a/0296-glusterfind-pre-command-failure-on-a-modify.patch b/0296-glusterfind-pre-command-failure-on-a-modify.patch
new file mode 100644
index 0000000..9f43ff8
--- /dev/null
+++ b/0296-glusterfind-pre-command-failure-on-a-modify.patch
@@ -0,0 +1,62 @@
+From bfb64a0e685eb5755ceda6c54690335564e135c9 Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <hgowtham@redhat.com>
+Date: Mon, 16 Sep 2019 14:22:34 +0530
+Subject: [PATCH 296/297] glusterfind: pre command failure on a modify
+
+Label: DOWNSTREAM ONLY
+
+On upstream we have gfid_to_all_paths_using_gfid2path instead of
+gfid_to_path_using_pgfid and so we do not hit this in upstream.
+
+Problem: On a modify, the pre commands runs through the find function.
+where the number of arguments sent mismatches and causes a stderr.
+The mismatch is because of both changelog and brickfind use the find(),
+but the brickfind was alone handled.
+
+Fix: Have handled the additional argument on the changelog side as well.
+Received it as a dummy variable for changelog.
+
+Change-Id: I5eecdd993e477b68a0e486db2ad7e56ba94bbf02
+fixes: bz#1733970
+Signed-off-by: Hari Gowtham <hgowtham@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/181095
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
+Reviewed-by: Rinku Kothiya <rkothiya@redhat.com>
+---
+ tools/glusterfind/src/changelog.py | 5 +++--
+ tools/glusterfind/src/utils.py | 2 +-
+ 2 files changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py
+index 40c381b..ef982db 100644
+--- a/tools/glusterfind/src/changelog.py
++++ b/tools/glusterfind/src/changelog.py
+@@ -141,8 +141,9 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args):
+
+ # Length of brick path, to remove from output path
+ brick_path_len = len(brick)
+-
+- def output_callback(path, inode):
++ # is_dir is a dummy variable to make it compitable with the find
++ # used in brickfind
++ def output_callback(path, inode, is_dir):
+ # For each path found, encodes it and updates path1
+ # Also updates converted flag in inodegfid table as 1
+ path = path.strip()
+diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py
+index cc09903..e226c5a 100644
+--- a/tools/glusterfind/src/utils.py
++++ b/tools/glusterfind/src/utils.py
+@@ -70,7 +70,7 @@ def find(path, callback_func=lambda x: True, filter_func=lambda x: True,
+ else:
+ filter_result = filter_func(full_path)
+ if filter_result is not None:
+- callback_func(full_path, filter_result)
++ callback_func(full_path, filter_result, None)
+ else:
+ filter_result = filter_func(full_path)
+ if filter_result is not None:
+--
+1.8.3.1
+
diff --git a/0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch b/0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch
new file mode 100644
index 0000000..47b5da0
--- /dev/null
+++ b/0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch
@@ -0,0 +1,89 @@
+From 37555b6c83d3a979033111a754ee1728dab254f5 Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <hgowtham@redhat.com>
+Date: Wed, 18 Sep 2019 17:38:52 +0530
+Subject: [PATCH 297/297] rpmbuild: fixing the build errors with 2a905a8ae
+
+Label: DOWNSTREAM ONLY
+
+Have added a Makefile inside extras/quota to remove the
+No rule to make target error for quota/log_accounting.sh
+
+Change-Id: Ia3f6b3fa21a0de7eb3bdb31b3d205139df412aca
+fixes: bz#1719171
+Signed-off-by: Hari Gowtham <hgowtham@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/181326
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ configure.ac | 1 +
+ extras/Makefile.am | 6 +-----
+ extras/quota/Makefile.am | 8 ++++++++
+ 3 files changed, 10 insertions(+), 5 deletions(-)
+ create mode 100644 extras/quota/Makefile.am
+
+diff --git a/configure.ac b/configure.ac
+index f597b86..327733e 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -232,6 +232,7 @@ AC_CONFIG_FILES([Makefile
+ extras/hook-scripts/reset/pre/Makefile
+ extras/python/Makefile
+ extras/snap_scheduler/Makefile
++ extras/quota/Makefile
+ events/Makefile
+ events/src/Makefile
+ events/src/eventsapiconf.py
+diff --git a/extras/Makefile.am b/extras/Makefile.am
+index 8cbfda1..31ccdf5 100644
+--- a/extras/Makefile.am
++++ b/extras/Makefile.am
+@@ -12,7 +12,7 @@ EditorMode_DATA = glusterfs-mode.el glusterfs.vim
+
+ SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM \
+ $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python \
+- ganesha
++ ganesha quota
+
+ confdir = $(sysconfdir)/glusterfs
+ if WITH_SERVER
+@@ -30,14 +30,11 @@ endif
+
+ scriptsdir = $(datadir)/glusterfs/scripts
+ scripts_SCRIPTS = thin-arbiter/setup-thin-arbiter.sh
+-scripts_SCRIPTS += quota/log_accounting.sh
+ scripts_SCRIPTS += collect-system-stats.sh
+ scripts_SCRIPTS += identify-hangs.sh
+ if WITH_SERVER
+ scripts_SCRIPTS += post-upgrade-script-for-quota.sh \
+ pre-upgrade-script-for-quota.sh stop-all-gluster-processes.sh
+-scripts_SCRIPTS += quota/quota_fsck.py
+-scripts_SCRIPTS += quota/xattr_analysis.py
+ if USE_SYSTEMD
+ scripts_SCRIPTS += control-cpu-load.sh
+ scripts_SCRIPTS += control-mem.sh
+@@ -56,7 +53,6 @@ EXTRA_DIST = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.co
+ stop-all-gluster-processes.sh clang-checker.sh mount-shared-storage.sh \
+ control-cpu-load.sh control-mem.sh group-distributed-virt \
+ thin-arbiter/thin-arbiter.vol thin-arbiter/setup-thin-arbiter.sh \
+- quota/xattr_analysis.py quota/quota_fsck.py quota/log_accounting.sh \
+ collect-system-stats.sh identify-hangs.sh
+
+ if WITH_SERVER
+diff --git a/extras/quota/Makefile.am b/extras/quota/Makefile.am
+new file mode 100644
+index 0000000..cdb6be1
+--- /dev/null
++++ b/extras/quota/Makefile.am
+@@ -0,0 +1,8 @@
++scriptsdir = $(datadir)/glusterfs/scripts
++scripts_SCRIPTS = log_accounting.sh
++
++if WITH_SERVER
++scripts_SCRIPTS += xattr_analysis.py quota_fsck.py
++endif
++
++EXTRA_DIST = log_accounting.sh xattr_analysis.py quota_fsck.py
+--
+1.8.3.1
+
diff --git a/0298-geo-rep-fix-sub-command-during-worker-connection.patch b/0298-geo-rep-fix-sub-command-during-worker-connection.patch
new file mode 100644
index 0000000..72daa15
--- /dev/null
+++ b/0298-geo-rep-fix-sub-command-during-worker-connection.patch
@@ -0,0 +1,56 @@
+From f65f4739914cf317da7e5eaa3b5a06fe64f338c2 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Sat, 21 Sep 2019 01:07:30 +0530
+Subject: [PATCH 298/302] geo-rep : fix sub-command during worker connection
+
+Problem:
+
+Geo-rep session for non-root going faulty.
+
+Solution:
+
+During worker start we do not construct slave url and use 'args.resource_remote'
+which is basically just slave-hostname.
+This works better for root session but fails in non-root session during
+ssh command.
+Using slave url solves this issue.
+
+Backport of:
+ >fixes: bz#1753928
+ >Change-Id: Ib83552fde77f81c208896494b323514ab37ebf22
+ >Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+Upstream patch:
+ https://review.gluster.org/#/c/glusterfs/+/23465/
+
+BUG: 1754407
+Change-Id: Ib83552fde77f81c208896494b323514ab37ebf22
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/181895
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/subcmds.py | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py
+index 8de7db2..f8515f2 100644
+--- a/geo-replication/syncdaemon/subcmds.py
++++ b/geo-replication/syncdaemon/subcmds.py
+@@ -73,8 +73,11 @@ def subcmd_worker(args):
+ Popen.init_errhandler()
+ fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+ local = GLUSTER("localhost", args.master)
+- slavevol = args.slave.split("::")[-1]
+- slavehost = args.resource_remote
++ slave_url, slavevol = args.slave.split("::")
++ if "@" not in slave_url:
++ slavehost = args.resource_remote
++ else:
++ slavehost = "%s@%s" % (slave_url.split("@")[0], args.resource_remote)
+ remote = SSH(slavehost, slavevol)
+ remote.connect_remote()
+ local.connect()
+--
+1.8.3.1
+
diff --git a/0299-geo-rep-performance-improvement-while-syncing-rename.patch b/0299-geo-rep-performance-improvement-while-syncing-rename.patch
new file mode 100644
index 0000000..9dea8cc
--- /dev/null
+++ b/0299-geo-rep-performance-improvement-while-syncing-rename.patch
@@ -0,0 +1,156 @@
+From f293f7ac2f75c58d81da1229b484eb530b7083b5 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Fri, 20 Sep 2019 09:39:12 +0530
+Subject: [PATCH 299/302] geo-rep: performance improvement while syncing
+ renames with existing gfid
+
+Problem:
+The bug[1] addresses issue of data inconsistency when handling RENAME with
+existing destination. This fix requires some performance tuning considering
+this issue occurs in heavy rename workload.
+
+Solution:
+If distribution count for master volume is one do not verify op's on
+master and go ahead with rename.
+
+The performance improvement with this patch can only be observed if
+master volume has distribution count one.
+
+[1]. https://bugzilla.redhat.com/show_bug.cgi?id=1694820
+Backport of:
+
+ >fixes: bz#1753857
+ >Change-Id: I8e9bcd575e7e35f40f9f78b7961c92dee642f47b
+ >Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+Upstream Patch:
+ https://review.gluster.org/#/c/glusterfs/+/23459/
+
+BUG: 1726000
+Change-Id: I8e9bcd575e7e35f40f9f78b7961c92dee642f47b
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/181893
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ geo-replication/gsyncd.conf.in | 5 +++++
+ geo-replication/syncdaemon/gsyncd.py | 2 ++
+ geo-replication/syncdaemon/monitor.py | 2 ++
+ geo-replication/syncdaemon/resource.py | 13 +++++++++++--
+ geo-replication/syncdaemon/syncdutils.py | 11 +++++++++++
+ 5 files changed, 31 insertions(+), 2 deletions(-)
+
+diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in
+index 5ebd57a..9155cd8 100644
+--- a/geo-replication/gsyncd.conf.in
++++ b/geo-replication/gsyncd.conf.in
+@@ -23,6 +23,11 @@ configurable=false
+ type=int
+ value=1
+
++[master-distribution-count]
++configurable=false
++type=int
++value=1
++
+ [glusterd-workdir]
+ value = @GLUSTERD_WORKDIR@
+
+diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
+index a4c6f32..6ae5269 100644
+--- a/geo-replication/syncdaemon/gsyncd.py
++++ b/geo-replication/syncdaemon/gsyncd.py
+@@ -134,6 +134,8 @@ def main():
+ help="Directory where Gluster binaries exist on slave")
+ p.add_argument("--slave-access-mount", action="store_true",
+ help="Do not lazy umount the slave volume")
++ p.add_argument("--master-dist-count", type=int,
++ help="Master Distribution count")
+
+ # Status
+ p = sp.add_parser("status")
+diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
+index 234f3f1..236afe7 100644
+--- a/geo-replication/syncdaemon/monitor.py
++++ b/geo-replication/syncdaemon/monitor.py
+@@ -37,6 +37,8 @@ def get_subvol_num(brick_idx, vol, hot):
+ tier = vol.is_tier()
+ disperse_count = vol.disperse_count(tier, hot)
+ replica_count = vol.replica_count(tier, hot)
++ distribute_count = vol.distribution_count(tier, hot)
++ gconf.setconfig("master-distribution-count", distribute_count)
+
+ if (tier and not hot):
+ brick_idx = brick_idx - vol.get_hot_bricks_count(tier)
+diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
+index b16db60..189d8a1 100644
+--- a/geo-replication/syncdaemon/resource.py
++++ b/geo-replication/syncdaemon/resource.py
+@@ -377,6 +377,7 @@ class Server(object):
+ def entry_ops(cls, entries):
+ pfx = gauxpfx()
+ logging.debug('entries: %s' % repr(entries))
++ dist_count = rconf.args.master_dist_count
+
+ def entry_purge(op, entry, gfid, e, uid, gid):
+ # This is an extremely racy code and needs to be fixed ASAP.
+@@ -686,9 +687,15 @@ class Server(object):
+ raise
+ else:
+ raise
+- elif not matching_disk_gfid(gfid, en):
++ elif not matching_disk_gfid(gfid, en) and dist_count > 1:
+ collect_failure(e, EEXIST, uid, gid, True)
+ else:
++ # We are here which means matching_disk_gfid for
++ # both source and destination has returned false
++ # and distribution count for master vol is greater
++ # then one. Which basically says both the source and
++ # destination exist and not hardlinks.
++ # So we are safe to go ahead with rename here.
+ rename_with_disk_gfid_confirmation(gfid, entry, en,
+ uid, gid)
+ if blob:
+@@ -1409,7 +1416,9 @@ class SSH(object):
+ '--slave-gluster-log-level',
+ gconf.get("slave-gluster-log-level"),
+ '--slave-gluster-command-dir',
+- gconf.get("slave-gluster-command-dir")]
++ gconf.get("slave-gluster-command-dir"),
++ '--master-dist-count',
++ str(gconf.get("master-distribution-count"))]
+
+ if gconf.get("slave-access-mount"):
+ args_to_slave.append('--slave-access-mount')
+diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
+index 2ee10ac..aadaebd 100644
+--- a/geo-replication/syncdaemon/syncdutils.py
++++ b/geo-replication/syncdaemon/syncdutils.py
+@@ -926,6 +926,14 @@ class Volinfo(object):
+ else:
+ return int(self.get('disperseCount')[0].text)
+
++ def distribution_count(self, tier, hot):
++ if (tier and hot):
++ return int(self.get('hotBricks/hotdistCount')[0].text)
++ elif (tier and not hot):
++ return int(self.get('coldBricks/colddistCount')[0].text)
++ else:
++ return int(self.get('distCount')[0].text)
++
+ @property
+ @memoize
+ def hot_bricks(self):
+@@ -994,6 +1002,9 @@ class VolinfoFromGconf(object):
+ def disperse_count(self, tier, hot):
+ return gconf.get("master-disperse-count")
+
++ def distribution_count(self, tier, hot):
++ return gconf.get("master-distribution-count")
++
+ @property
+ @memoize
+ def hot_bricks(self):
+--
+1.8.3.1
+
diff --git a/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch b/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch
new file mode 100644
index 0000000..62bac41
--- /dev/null
+++ b/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch
@@ -0,0 +1,70 @@
+From 039a3f81209706261fc809eac94564e81a3377da Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Wed, 25 Sep 2019 14:55:19 +0530
+Subject: [PATCH 300/302] cli: remove the warning displayed when remove brick
+ start issued
+
+remove-brick start command gives displays below error:
+
+It is recommended that remove-brick be run with cluster.force-migration
+option disabled to prevent possible data corruption. Doing so will ensure
+that files that receive writes during migration will not be migrated and
+will need to be manually copied after the remove-brick commit operation.
+Please check the value of the option and update accordingly.
+Do you want to continue with your current cluster.force-migration settings? (y/n)
+
+As we are not qualifying cluster.force-migration for 3.5.0,
+we should not display this message. So, removing it.
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1755227
+Change-Id: I409f2059d43c5e867788f19d2ccb8d6d839520f7
+fixes: bz#1755227
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/182009
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ cli/src/cli-cmd-parser.c | 2 --
+ cli/src/cli-cmd-volume.c | 11 -----------
+ 2 files changed, 13 deletions(-)
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index 92ceb8e..4456a7b 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -2101,8 +2101,6 @@ cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words,
+ wordcount--;
+ if (!strcmp("start", w)) {
+ command = GF_OP_CMD_START;
+- if (question)
+- *question = 1;
+ } else if (!strcmp("commit", w)) {
+ command = GF_OP_CMD_COMMIT;
+ } else if (!strcmp("stop", w)) {
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index a42e663..6b958bd 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -2088,17 +2088,6 @@ cli_cmd_volume_remove_brick_cbk(struct cli_state *state,
+ "Remove-brick force will not migrate files from the "
+ "removed bricks, so they will no longer be available"
+ " on the volume.\nDo you want to continue?";
+- } else if (command == GF_OP_CMD_START) {
+- question =
+- "It is recommended that remove-brick be run with"
+- " cluster.force-migration option disabled to prevent"
+- " possible data corruption. Doing so will ensure that"
+- " files that receive writes during migration will not"
+- " be migrated and will need to be manually copied"
+- " after the remove-brick commit operation. Please"
+- " check the value of the option and update accordingly."
+- " \nDo you want to continue with your current"
+- " cluster.force-migration settings?";
+ }
+
+ if (!brick_count) {
+--
+1.8.3.1
+
diff --git a/0301-posix-Brick-is-going-down-unexpectedly.patch b/0301-posix-Brick-is-going-down-unexpectedly.patch
new file mode 100644
index 0000000..270a0d7
--- /dev/null
+++ b/0301-posix-Brick-is-going-down-unexpectedly.patch
@@ -0,0 +1,61 @@
+From 913a0dc8f1eaa2fb18a6ebd6fcf66f46b48039f1 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Wed, 18 Sep 2019 19:11:33 +0530
+Subject: [PATCH 301/302] posix: Brick is going down unexpectedly
+
+Problem: In brick_mux environment, while multiple volumes are
+ created (1-1000) sometimes brick is going down due to
+ health_check thread failure
+
+Solution: Ignore EAGAIN error in health_check thread code to
+ avoid the issue
+
+> Change-Id: Id44c59f8e071a363a14d09d188813a6633855213
+> Fixes: bz#1751907
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Cherry picked from commit c4d926900dc36f71c04b3f65ceca5150ce0e8c81)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23437/)
+
+Change-Id: Id44c59f8e071a363a14d09d188813a6633855213
+BUG: 1731826
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/182106
+Tested-by: Mohit Agrawal <moagrawa@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/storage/posix/src/posix-helpers.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 6a1a35c..35dd3b6 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -2108,14 +2108,20 @@ out:
+ if (fd != -1) {
+ sys_close(fd);
+ }
++
+ if (ret && file_path[0]) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HEALTHCHECK_FAILED,
+ "%s() on %s returned ret is %d error is %s", op, file_path, ret,
+ ret != -1 ? strerror(ret) : strerror(op_errno));
+- gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED,
+- "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op,
+- file_path, strerror(op_errno), priv->hostname, priv->base_path,
+- timeout);
++
++ if ((op_errno == EAGAIN) || (ret == EAGAIN)) {
++ ret = 0;
++ } else {
++ gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED,
++ "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op,
++ file_path, strerror(op_errno), priv->hostname,
++ priv->base_path, timeout);
++ }
+ }
+ return ret;
+ }
+--
+1.8.3.1
+
diff --git a/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch b/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch
new file mode 100644
index 0000000..7972767
--- /dev/null
+++ b/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch
@@ -0,0 +1,67 @@
+From fb1d503791c874296afab0cd7be59b6865340d72 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <jahernan@redhat.com>
+Date: Wed, 25 Sep 2019 11:56:35 +0200
+Subject: [PATCH 302/302] cluster/ec: prevent filling shd log with "table not
+ found" messages
+
+When self-heal daemon receives an inodelk contention notification, it tries
+to locate the related inode using inode_find() and the inode table owned by
+top-most xlator, which in this case doesn't have any inode table. This causes
+many messages to be logged by inode_find() function because the inode table
+passed is NULL.
+
+This patch prevents this by making sure the inode table is not NULL before
+calling inode_find().
+
+Upstream patch:
+> Change-Id: I8d001bd180aaaf1521ba40a536b097fcf70c991f
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23481
+> Fixes: bz#1755344
+> Signed-off-by: Xavi Hernandez <jahernan@redhat.com>
+
+Change-Id: I8d001bd180aaaf1521ba40a536b097fcf70c991f
+BUG: 1754790
+Signed-off-by: Xavi Hernandez <jahernan@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/182207
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec.c | 15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index 19094c4..3f31c74 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -463,6 +463,7 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall)
+ struct gf_upcall_cache_invalidation *ci = NULL;
+ struct gf_upcall_inodelk_contention *lc = NULL;
+ inode_t *inode;
++ inode_table_t *table;
+
+ switch (upcall->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+@@ -476,8 +477,18 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall)
+ /* The lock is not owned by EC, ignore it. */
+ return _gf_true;
+ }
+- inode = inode_find(((xlator_t *)ec->xl->graph->top)->itable,
+- upcall->gfid);
++ table = ((xlator_t *)ec->xl->graph->top)->itable;
++ if (table == NULL) {
++ /* Self-heal daemon doesn't have an inode table on the top
++ * xlator because it doesn't need it. In this case we should
++ * use the inode table managed by EC itself where all inodes
++ * being healed should be present. However self-heal doesn't
++ * use eager-locking and inodelk's are already released as
++ * soon as possible. In this case we can safely ignore these
++ * notifications. */
++ return _gf_false;
++ }
++ inode = inode_find(table, upcall->gfid);
+ /* If inode is not found, it means that it's already released,
+ * so we can ignore it. Probably it has been released and
+ * destroyed while the contention notification was being sent.
+--
+1.8.3.1
+
diff --git a/0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch b/0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch
new file mode 100644
index 0000000..8641353
--- /dev/null
+++ b/0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch
@@ -0,0 +1,45 @@
+From ae4f538065d26a277e38810c6eef18c0312cd1f3 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Thu, 26 Sep 2019 17:52:30 +0530
+Subject: [PATCH 303/304] posix: heketidbstorage bricks go down during PVC
+ creation
+
+Problem: In OCS environment heketidbstorage is detached due
+ to health_check thread is failed.Sometime aio_write
+ is not successfully finished within default health-check-timeout
+ limit and the brick is detached.
+
+Solution: To avoid the issue increase default timeout to 20s
+
+> Change-Id: Idff283d5713da571f9d20a6b296274f69c3e5b7b
+> Fixes: bz#1755900
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Cherry picked from commit c6df9e962483bac5bfcd8916318b19040387ce81)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23495/)
+
+Change-Id: Idff283d5713da571f9d20a6b296274f69c3e5b7b
+BUG: 1752713
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/182387
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
+---
+ xlators/storage/posix/src/posix-common.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index 69857d9..2cb58ba 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -1257,7 +1257,7 @@ struct volume_options posix_options[] = {
+ {.key = {"health-check-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+- .default_value = "10",
++ .default_value = "20",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description =
+ "Interval in seconds to wait aio_write finish for health check, "
+--
+1.8.3.1
+
diff --git a/0304-cluster-dht-Correct-fd-processing-loop.patch b/0304-cluster-dht-Correct-fd-processing-loop.patch
new file mode 100644
index 0000000..5f16e0a
--- /dev/null
+++ b/0304-cluster-dht-Correct-fd-processing-loop.patch
@@ -0,0 +1,194 @@
+From ad233c1b3abdfe2bdfd1eacc83b5f84b7afa6b46 Mon Sep 17 00:00:00 2001
+From: N Balachandran <nbalacha@redhat.com>
+Date: Tue, 1 Oct 2019 17:37:15 +0530
+Subject: [PATCH 304/304] cluster/dht: Correct fd processing loop
+
+The fd processing loops in the
+dht_migration_complete_check_task and the
+dht_rebalance_inprogress_task functions were unsafe
+and could cause an open to be sent on an already freed
+fd. This has been fixed.
+
+> Change-Id: I0a3c7d2fba314089e03dfd704f9dceb134749540
+> Fixes: bz#1757399
+> Signed-off-by: N Balachandran <nbalacha@redhat.com>
+> (Cherry picked from commit 9b15867070b0cc241ab165886292ecffc3bc0aed)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23506/)
+
+Change-Id: I0a3c7d2fba314089e03dfd704f9dceb134749540
+BUG: 1756325
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/182826
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-helper.c | 84 ++++++++++++++++++++++++++----------
+ 1 file changed, 62 insertions(+), 22 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
+index 4c57e0d..1e9fee0 100644
+--- a/xlators/cluster/dht/src/dht-helper.c
++++ b/xlators/cluster/dht/src/dht-helper.c
+@@ -1261,6 +1261,7 @@ dht_migration_complete_check_task(void *data)
+ fd_t *tmp = NULL;
+ uint64_t tmp_miginfo = 0;
+ dht_migrate_info_t *miginfo = NULL;
++ gf_boolean_t skip_open = _gf_false;
+ int open_failed = 0;
+
+ this = THIS;
+@@ -1399,24 +1400,34 @@ dht_migration_complete_check_task(void *data)
+ * the loop will cause the destruction of the fd. So we need to
+ * iterate the list safely because iter_fd cannot be trusted.
+ */
+- list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list)
+- {
+- if (fd_is_anonymous(iter_fd))
+- continue;
+-
+- if (dht_fd_open_on_dst(this, iter_fd, dst_node))
+- continue;
+-
++ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
++ while (&iter_fd->inode_list != (&inode->fd_list)) {
++ if (fd_is_anonymous(iter_fd) ||
++ (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
++ if (!tmp) {
++ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
++ inode_list);
++ continue;
++ }
++ skip_open = _gf_true;
++ }
+ /* We need to release the inode->lock before calling
+ * syncop_open() to avoid possible deadlocks. However this
+ * can cause the iter_fd to be released by other threads.
+ * To avoid this, we take a reference before releasing the
+ * lock.
+ */
+- __fd_ref(iter_fd);
++ fd_ref(iter_fd);
+
+ UNLOCK(&inode->lock);
+
++ if (tmp) {
++ fd_unref(tmp);
++ tmp = NULL;
++ }
++ if (skip_open)
++ goto next;
++
+ /* flags for open are stripped down to allow following the
+ * new location of the file, otherwise we can get EEXIST or
+ * truncate the file again as rebalance is moving the data */
+@@ -1438,9 +1449,11 @@ dht_migration_complete_check_task(void *data)
+ dht_fd_ctx_set(this, iter_fd, dst_node);
+ }
+
+- fd_unref(iter_fd);
+-
++ next:
+ LOCK(&inode->lock);
++ skip_open = _gf_false;
++ tmp = iter_fd;
++ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
+ }
+
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
+@@ -1453,6 +1466,10 @@ dht_migration_complete_check_task(void *data)
+
+ unlock:
+ UNLOCK(&inode->lock);
++ if (tmp) {
++ fd_unref(tmp);
++ tmp = NULL;
++ }
+
+ out:
+ if (dict) {
+@@ -1534,6 +1551,7 @@ dht_rebalance_inprogress_task(void *data)
+ int open_failed = 0;
+ uint64_t tmp_miginfo = 0;
+ dht_migrate_info_t *miginfo = NULL;
++ gf_boolean_t skip_open = _gf_false;
+
+ this = THIS;
+ frame = data;
+@@ -1654,24 +1672,40 @@ dht_rebalance_inprogress_task(void *data)
+ * the loop will cause the destruction of the fd. So we need to
+ * iterate the list safely because iter_fd cannot be trusted.
+ */
+- list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list)
+- {
+- if (fd_is_anonymous(iter_fd))
+- continue;
+-
+- if (dht_fd_open_on_dst(this, iter_fd, dst_node))
+- continue;
+-
++ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
++ while (&iter_fd->inode_list != (&inode->fd_list)) {
+ /* We need to release the inode->lock before calling
+ * syncop_open() to avoid possible deadlocks. However this
+ * can cause the iter_fd to be released by other threads.
+ * To avoid this, we take a reference before releasing the
+ * lock.
+ */
+- __fd_ref(iter_fd);
+
++ if (fd_is_anonymous(iter_fd) ||
++ (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
++ if (!tmp) {
++ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
++ inode_list);
++ continue;
++ }
++ skip_open = _gf_true;
++ }
++
++ /* Yes, this is ugly but there isn't a cleaner way to do this
++ * the fd_ref is an atomic increment so not too bad. We want to
++ * reduce the number of inode locks and unlocks.
++ */
++
++ fd_ref(iter_fd);
+ UNLOCK(&inode->lock);
+
++ if (tmp) {
++ fd_unref(tmp);
++ tmp = NULL;
++ }
++ if (skip_open)
++ goto next;
++
+ /* flags for open are stripped down to allow following the
+ * new location of the file, otherwise we can get EEXIST or
+ * truncate the file again as rebalance is moving the data */
+@@ -1692,9 +1726,11 @@ dht_rebalance_inprogress_task(void *data)
+ dht_fd_ctx_set(this, iter_fd, dst_node);
+ }
+
+- fd_unref(iter_fd);
+-
++ next:
+ LOCK(&inode->lock);
++ skip_open = _gf_false;
++ tmp = iter_fd;
++ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
+ }
+
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
+@@ -1702,6 +1738,10 @@ dht_rebalance_inprogress_task(void *data)
+ unlock:
+ UNLOCK(&inode->lock);
+
++ if (tmp) {
++ fd_unref(tmp);
++ tmp = NULL;
++ }
+ if (open_failed) {
+ ret = -1;
+ goto out;
+--
+1.8.3.1
+
diff --git a/0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch b/0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch
new file mode 100644
index 0000000..a1e77c6
--- /dev/null
+++ b/0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch
@@ -0,0 +1,56 @@
+From 90e52f3b44da0ed05e35ebd474e284d45794b0d6 Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Thu, 10 Oct 2019 20:40:49 +0530
+Subject: [PATCH 305/307] glusterd: rebalance start should fail when quorum is
+ not met
+
+rebalance start should not succeed if quorum is not met.
+this patch adds a condition to check whether quorum is met
+in pre-validation stage.
+
+> fixes: bz#1760467
+> Change-Id: Ic7d0d08f69e4bc6d5e7abae713ec1881531c8ad4
+> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+upstream patch: https://review.gluster.org/#/c/glusterfs/+/23536/
+BUG: 1760261
+Change-Id: Ic7d0d08f69e4bc6d5e7abae713ec1881531c8ad4
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/183146
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/glusterd/quorum-validation.t | 2 ++
+ xlators/mgmt/glusterd/src/glusterd-mgmt.c | 3 ++-
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/tests/bugs/glusterd/quorum-validation.t b/tests/bugs/glusterd/quorum-validation.t
+index ff46729..3cc3351 100644
+--- a/tests/bugs/glusterd/quorum-validation.t
++++ b/tests/bugs/glusterd/quorum-validation.t
+@@ -34,6 +34,8 @@ TEST ! $CLI_1 volume add-brick $V0 $H1:$B1/${V0}2
+ TEST ! $CLI_1 volume remove-brick $V0 $H1:$B1/${V0}0 start
+ TEST ! $CLI_1 volume set $V0 barrier enable
+
++#quorum is not met, rebalance/profile start should fail
++TEST ! $CLI_1 volume rebalance $V0 start
+ TEST ! $CLI_1 volume profile $V0 start
+
+ #bug-1690753 - Volume stop when quorum not met is successful
+diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+index ec78913..a4915f3 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c
++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+@@ -1059,7 +1059,8 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict,
+ goto out;
+ }
+
+- if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME) {
++ if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME ||
++ op == GD_OP_REBALANCE) {
+ ret = glusterd_validate_quorum(this, op, req_dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
+--
+1.8.3.1
+
diff --git a/0306-cli-fix-distCount-value.patch b/0306-cli-fix-distCount-value.patch
new file mode 100644
index 0000000..0e8b9f2
--- /dev/null
+++ b/0306-cli-fix-distCount-value.patch
@@ -0,0 +1,43 @@
+From 167980565e1ab56989b25fe6aa0203aeb7970c8b Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Sun, 6 Oct 2019 19:05:28 +0530
+Subject: [PATCH 306/307] cli: fix distCount value
+
+gluster volume info --xml id displaying wrong distCount
+value. This patch addresses it.
+
+> fixes: bz#1758878
+> Change-Id: I64081597e06018361e6524587b433b0c4b2a0260
+> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+upstream patch: https://review.gluster.org/#/c/glusterfs/+/23521/
+
+BUG: 1758618
+Change-Id: I64081597e06018361e6524587b433b0c4b2a0260
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/183147
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-xml-output.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c
+index b417bb8..006e2fb 100644
+--- a/cli/src/cli-xml-output.c
++++ b/cli/src/cli-xml-output.c
+@@ -2548,8 +2548,9 @@ cli_xml_output_vol_info(cli_local_t *local, dict_t *dict)
+ ret = dict_get_int32(dict, key, &dist_count);
+ if (ret)
+ goto out;
+- ret = xmlTextWriterWriteFormatElement(
+- local->writer, (xmlChar *)"distCount", "%d", dist_count);
++ ret = xmlTextWriterWriteFormatElement(local->writer,
++ (xmlChar *)"distCount", "%d",
++ (brick_count / dist_count));
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.stripe_count", i);
+--
+1.8.3.1
+
diff --git a/0307-ssl-fix-RHEL8-regression-failure.patch b/0307-ssl-fix-RHEL8-regression-failure.patch
new file mode 100644
index 0000000..7a85b50
--- /dev/null
+++ b/0307-ssl-fix-RHEL8-regression-failure.patch
@@ -0,0 +1,42 @@
+From be9695391f39fe6eb1d157f6bfd018116d1ee42b Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Mon, 30 Sep 2019 13:14:06 +0530
+Subject: [PATCH 307/307] ssl: fix RHEL8 regression failure
+
+This tests is failing with
+"SSL routines:SSL_CTX_use_certificate:ee key too small"
+in RHEL8. This change is made according to
+https://access.redhat.com/solutions/4157431
+
+> updates: bz#1756900
+> Change-Id: Ib436372c3bd94bcf7324976337add7da4088b3d5
+> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+upstream patch: https://review.gluster.org/#/c/glusterfs/+/23501/
+
+BUG: 1704562
+Change-Id: Ib436372c3bd94bcf7324976337add7da4088b3d5
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/183148
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/cli/bug-1320388.t | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t
+index f5ffcbe..8e5d77b 100755
+--- a/tests/bugs/cli/bug-1320388.t
++++ b/tests/bugs/cli/bug-1320388.t
+@@ -21,7 +21,7 @@ cleanup;
+ rm -f $SSL_BASE/glusterfs.*
+ touch "$GLUSTERD_WORKDIR"/secure-access
+
+-TEST openssl genrsa -out $SSL_KEY 1024
++TEST openssl genrsa -out $SSL_KEY 3072
+ TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ ln $SSL_CERT $SSL_CA
+
+--
+1.8.3.1
+
diff --git a/0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch b/0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch
new file mode 100644
index 0000000..adbeb43
--- /dev/null
+++ b/0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch
@@ -0,0 +1,347 @@
+From 27f799563c1c2c1986662ed4a3a83d834c04fd98 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Mon, 14 Oct 2019 15:42:31 +0530
+Subject: [PATCH 308/308] dht: Rebalance causing IO Error - File descriptor in
+ bad state
+
+Problem : When a file is migrated, dht attempts to re-open all open
+ fds on the new cached subvol. Earlier, if dht had not opened the fd,
+ the client xlator would be unable to find the remote fd and would
+ fall back to using an anon fd for the fop. That behavior changed with
+ https://review.gluster.org/#/c/glusterfs/+/15804, causing fops to fail
+ with EBADFD if the fd was not available on the cached subvol.
+ The client xlator returns EBADFD if the remote fd is not found but
+ dht only checks for EBADF before re-opening fds on the new cached subvol.
+
+Solution: Handle EBADFD at dht code path to avoid the issue
+
+> Change-Id: I43c51995cdd48d05b12e4b2889c8dbe2bb2a72d8
+> Fixes: bz#1758579
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry pick from commit 9314a9fbf487614c736cf6c4c1b93078d37bb9df)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23518/)
+
+Change-Id: I43c51995cdd48d05b12e4b2889c8dbe2bb2a72d8
+BUG: 1758432
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/183370
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 27 +++++++++++++++++---
+ xlators/cluster/dht/src/dht-common.h | 19 ++++++++++++++
+ xlators/cluster/dht/src/dht-helper.c | 29 +++++++++++++++++++++
+ xlators/cluster/dht/src/dht-inode-read.c | 42 +++++++++++++++++++++++++++----
+ xlators/cluster/dht/src/dht-inode-write.c | 16 ++++++------
+ 5 files changed, 116 insertions(+), 17 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 99cccd6..37952ba 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -53,6 +53,17 @@ dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req);
+ int
+ dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc);
+
++/* Check the xdata to make sure EBADF has been set by client xlator */
++int32_t
++dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno)
++{
++ if (op_ret == -1 && (op_errno == EBADF || op_errno == EBADFD) &&
++ !(local->fd_checked)) {
++ return 1;
++ }
++ return 0;
++}
++
+ /* Sets the blocks and size values to fixed values. This is to be called
+ * only for dirs. The caller is responsible for checking the type
+ */
+@@ -4529,6 +4540,7 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
++ int ret = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(frame->local, err);
+@@ -4537,6 +4549,13 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ conf = this->private;
+ local = frame->local;
+
++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
++ ret = dht_check_and_open_fd_on_subvol(this, frame);
++ if (ret)
++ goto err;
++ return 0;
++ }
++
+ LOCK(&frame->lock);
+ {
+ if (!xattr || (op_ret == -1)) {
+@@ -5204,8 +5223,8 @@ dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+
+ local->op_errno = op_errno;
+
+- if ((local->fop == GF_FOP_FSETXATTR) && op_ret == -1 &&
+- (op_errno == EBADF) && !(local->fd_checked)) {
++ if ((local->fop == GF_FOP_FSETXATTR) &&
++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+@@ -5929,8 +5948,8 @@ dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+
+ local->op_errno = op_errno;
+
+- if ((local->fop == GF_FOP_FREMOVEXATTR) && (op_ret == -1) &&
+- (op_errno == EBADF) && !(local->fd_checked)) {
++ if ((local->fop == GF_FOP_FREMOVEXATTR) &&
++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
+index c516271..ce11f02 100644
+--- a/xlators/cluster/dht/src/dht-common.h
++++ b/xlators/cluster/dht/src/dht-common.h
+@@ -1230,6 +1230,22 @@ dht_newfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
+
+ int
++dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *xdata);
++
++int
++dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
++ int op_errno, dict_t *xattr, dict_t *xdata);
++
++int
++dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *dict,
++ dict_t *xdata);
++int
++dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
++
++int
+ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
+
+ void
+@@ -1525,4 +1541,7 @@ int
+ dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
++int32_t
++dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno);
++
+ #endif /* _DHT_H */
+diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
+index 1e9fee0..4f7370d 100644
+--- a/xlators/cluster/dht/src/dht-helper.c
++++ b/xlators/cluster/dht/src/dht-helper.c
+@@ -366,6 +366,23 @@ dht_check_and_open_fd_on_subvol_complete(int ret, call_frame_t *frame,
+
+ break;
+
++ case GF_FOP_FXATTROP:
++ STACK_WIND(frame, dht_common_xattrop_cbk, subvol,
++ subvol->fops->fxattrop, local->fd,
++ local->rebalance.flags, local->rebalance.xattr,
++ local->xattr_req);
++ break;
++
++ case GF_FOP_FGETXATTR:
++ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr,
++ local->fd, local->key, NULL);
++ break;
++
++ case GF_FOP_FINODELK:
++ STACK_WIND(frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
++ local->key, local->fd, local->rebalance.lock_cmd,
++ &local->rebalance.flock, local->xattr_req);
++ break;
+ default:
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP,
+ "Unknown FOP on fd (%p) on file %s @ %s", fd,
+@@ -429,6 +446,18 @@ handle_err:
+ DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
+ break;
+
++ case GF_FOP_FXATTROP:
++ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
++ break;
++
++ case GF_FOP_FGETXATTR:
++ DHT_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
++ break;
++
++ case GF_FOP_FINODELK:
++ DHT_STACK_UNWIND(finodelk, frame, -1, op_errno, NULL);
++ break;
++
+ default:
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP,
+ "Unknown FOP on fd (%p) on file %s @ %s", fd,
+diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
+index cacfe35..0c209a5 100644
+--- a/xlators/cluster/dht/src/dht-inode-read.c
++++ b/xlators/cluster/dht/src/dht-inode-read.c
+@@ -162,8 +162,8 @@ dht_file_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ local = frame->local;
+ prev = cookie;
+
+- if ((local->fop == GF_FOP_FSTAT) && (op_ret == -1) && (op_errno == EBADF) &&
+- !(local->fd_checked)) {
++ if ((local->fop == GF_FOP_FSTAT) &&
++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+@@ -431,7 +431,7 @@ dht_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ if (local->call_cnt != 1)
+ goto out;
+
+- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+@@ -703,7 +703,7 @@ dht_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ if (local->call_cnt != 1)
+ goto out;
+
+- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+@@ -820,7 +820,7 @@ dht_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+
+ local->op_errno = op_errno;
+
+- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+@@ -1223,6 +1223,13 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ if (local->call_cnt != 1)
+ goto out;
+
++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
++ ret = dht_check_and_open_fd_on_subvol(this, frame);
++ if (ret)
++ goto out;
++ return 0;
++ }
++
+ ret = dht_read_iatt_from_xdata(this, xdata, &stbuf);
+
+ if ((!op_ret) && (ret)) {
+@@ -1535,8 +1542,26 @@ dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+ {
++ dht_local_t *local = NULL;
++ int ret = 0;
++
++ GF_VALIDATE_OR_GOTO("dht", frame, out);
++ GF_VALIDATE_OR_GOTO("dht", this, out);
++ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
++
++ local = frame->local;
++
++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
++ ret = dht_check_and_open_fd_on_subvol(this, frame);
++ if (ret)
++ goto out;
++ return 0;
++ }
++
++out:
+ dht_lk_inode_unref(frame, op_ret);
+ DHT_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata);
++
+ return 0;
+ }
+
+@@ -1574,6 +1599,13 @@ dht_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ if (ret)
+ goto err;
+ */
++ local->rebalance.flock = *lock;
++ local->rebalance.lock_cmd = cmd;
++ local->key = gf_strdup(volume);
++
++ if (xdata)
++ local->xattr_req = dict_ref(xdata);
++
+ STACK_WIND(frame, dht_finodelk_cbk, lock_subvol,
+ lock_subvol->fops->finodelk, volume, fd, cmd, lock, xdata);
+
+diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
+index b26b705..b6b349d 100644
+--- a/xlators/cluster/dht/src/dht-inode-write.c
++++ b/xlators/cluster/dht/src/dht-inode-write.c
+@@ -49,7 +49,7 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ * We only check once as this could be a valid bad fd error.
+ */
+
+- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+@@ -262,8 +262,8 @@ dht_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ * We only check once as this could actually be a valid error.
+ */
+
+- if ((local->fop == GF_FOP_FTRUNCATE) && (op_ret == -1) &&
+- ((op_errno == EBADF) || (op_errno == EINVAL)) && !(local->fd_checked)) {
++ if ((local->fop == GF_FOP_FTRUNCATE) &&
++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+@@ -489,7 +489,7 @@ dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ * We only check once as this could actually be a valid error.
+ */
+
+- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+@@ -666,7 +666,7 @@ dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+@@ -838,7 +838,7 @@ dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+@@ -1005,8 +1005,8 @@ dht_file_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+
+ local->op_errno = op_errno;
+
+- if ((local->fop == GF_FOP_FSETATTR) && (op_ret == -1) &&
+- (op_errno == EBADF) && !(local->fd_checked)) {
++ if ((local->fop == GF_FOP_FSETATTR) &&
++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+--
+1.8.3.1
+
diff --git a/0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch b/0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch
new file mode 100644
index 0000000..6ae359e
--- /dev/null
+++ b/0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch
@@ -0,0 +1,240 @@
+From 2b1738402276f43d7cb64542b74cb50145e46d77 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Wed, 16 Oct 2019 14:25:47 +0530
+Subject: [PATCH 309/309] geo-rep: Fix config upgrade on non-participating node
+
+After upgrade, if the config files are of old format, it
+gets migrated to new format. Monitor process migrates it.
+Since monitor doesn't run on nodes where bricks are not
+hosted, it doesn't get migrated there. So this patch fixes
+the config upgrade on nodes which doesn't host bricks.
+This happens during config either on get/set/reset.
+
+Backport of:
+ > Patch: https://review.gluster.org/23555
+ > Change-Id: Ibade2f2310b0f3affea21a3baa1ae0eb71162cba
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+ > fixes: bz#1762220
+
+Change-Id: Ibade2f2310b0f3affea21a3baa1ae0eb71162cba
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+BUG: 1760939
+Reviewed-on: https://code.engineering.redhat.com/gerrit/183461
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/gsyncd.py | 3 +-
+ tests/00-geo-rep/georep-config-upgrade.t | 132 +++++++++++++++++++++++++++++++
+ tests/00-geo-rep/gsyncd.conf.old | 47 +++++++++++
+ 3 files changed, 181 insertions(+), 1 deletion(-)
+ create mode 100644 tests/00-geo-rep/georep-config-upgrade.t
+ create mode 100644 tests/00-geo-rep/gsyncd.conf.old
+
+diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
+index 6ae5269..7b48d82 100644
+--- a/geo-replication/syncdaemon/gsyncd.py
++++ b/geo-replication/syncdaemon/gsyncd.py
+@@ -255,7 +255,8 @@ def main():
+ if args.subcmd == "slave":
+ override_from_args = True
+
+- if args.subcmd == "monitor":
++ if config_file is not None and \
++ args.subcmd in ["monitor", "config-get", "config-set", "config-reset"]:
+ ret = gconf.is_config_file_old(config_file, args.master, extra_tmpl_args["slavevol"])
+ if ret is not None:
+ gconf.config_upgrade(config_file, ret)
+diff --git a/tests/00-geo-rep/georep-config-upgrade.t b/tests/00-geo-rep/georep-config-upgrade.t
+new file mode 100644
+index 0000000..557461c
+--- /dev/null
++++ b/tests/00-geo-rep/georep-config-upgrade.t
+@@ -0,0 +1,132 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++. $(dirname $0)/../geo-rep.rc
++. $(dirname $0)/../env.rc
++
++SCRIPT_TIMEOUT=300
++OLD_CONFIG_PATH=$(dirname $0)/gsyncd.conf.old
++WORKING_DIR=/var/lib/glusterd/geo-replication/master_127.0.0.1_slave
++
++##Cleanup and start glusterd
++cleanup;
++TEST glusterd;
++TEST pidof glusterd
++
++##Variables
++GEOREP_CLI="$CLI volume geo-replication"
++master=$GMV0
++SH0="127.0.0.1"
++slave=${SH0}::${GSV0}
++num_active=2
++num_passive=2
++master_mnt=$M0
++slave_mnt=$M1
++
++############################################################
++#SETUP VOLUMES AND GEO-REPLICATION
++############################################################
++
++##create_and_start_master_volume
++TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
++TEST $CLI volume start $GMV0
++
++##create_and_start_slave_volume
++TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
++TEST $CLI volume start $GSV0
++
++##Create, start and mount meta_volume
++TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
++TEST $CLI volume start $META_VOL
++TEST mkdir -p $META_MNT
++TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
++
++##Mount master
++TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
++
++##Mount slave
++TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
++
++############################################################
++#BASIC GEO-REPLICATION TESTS
++############################################################
++
++#Create geo-rep session
++TEST create_georep_session $master $slave
++
++#Config gluster-command-dir
++TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
++
++#Config gluster-command-dir
++TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
++
++#Enable_metavolume
++TEST $GEOREP_CLI $master $slave config use_meta_volume true
++
++#Wait for common secret pem file to be created
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
++
++#Verify the keys are distributed
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
++
++#Start_georep
++TEST $GEOREP_CLI $master $slave start
++
++EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
++EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
++
++TEST $GEOREP_CLI $master $slave config sync-method tarssh
++
++#Stop Geo-rep
++TEST $GEOREP_CLI $master $slave stop
++
++#Copy old config file
++mv -f $WORKING_DIR/gsyncd.conf $WORKING_DIR/gsyncd.conf.org
++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
++
++#Check if config get all updates config_file
++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
++TEST $GEOREP_CLI $master $slave config
++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
++
++#Check if config get updates config_file
++rm -f $WORKING_DIR/gsyncd.conf
++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
++TEST $GEOREP_CLI $master $slave config sync-method
++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
++
++#Check if config set updates config_file
++rm -f $WORKING_DIR/gsyncd.conf
++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
++TEST $GEOREP_CLI $master $slave config sync-xattrs false
++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
++
++#Check if config reset updates config_file
++rm -f $WORKING_DIR/gsyncd.conf
++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
++TEST $GEOREP_CLI $master $slave config \!sync-xattrs
++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
++
++#Check if geo-rep start updates config_file
++rm -f $WORKING_DIR/gsyncd.conf
++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
++TEST $GEOREP_CLI $master $slave start
++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
++
++#Stop geo-rep
++TEST $GEOREP_CLI $master $slave stop
++
++#Delete Geo-rep
++TEST $GEOREP_CLI $master $slave delete
++
++#Cleanup authorized keys
++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
++sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
++
++cleanup;
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+diff --git a/tests/00-geo-rep/gsyncd.conf.old b/tests/00-geo-rep/gsyncd.conf.old
+new file mode 100644
+index 0000000..519acaf
+--- /dev/null
++++ b/tests/00-geo-rep/gsyncd.conf.old
+@@ -0,0 +1,47 @@
++[__meta__]
++version = 2.0
++
++[peersrx . .]
++remote_gsyncd = /usr/local/libexec/glusterfs/gsyncd
++georep_session_working_dir = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/
++ssh_command_tar = ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/tar_ssh.pem
++changelog_log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}${local_id}-changes.log
++working_dir = /var/lib/misc/glusterfsd/${mastervol}/${eSlave}
++ignore_deletes = false
++pid_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.pid
++state_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.status
++gluster_command_dir = /usr/local/sbin/
++gluster_params = aux-gfid-mount acl
++ssh_command = ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/secret.pem
++state_detail_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status
++state_socket_unencoded = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/${eSlave}.socket
++socketdir = /var/run/gluster
++log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}.log
++gluster_log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}${local_id}.gluster.log
++special_sync_mode = partial
++change_detector = changelog
++pid-file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.pid
++state-file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.status
++
++[__section_order__]
++peersrx . . = 0
++peersrx . %5essh%3a = 2
++peersrx . = 3
++peers master slave = 4
++
++[peersrx . %5Essh%3A]
++remote_gsyncd = /nonexistent/gsyncd
++
++[peersrx .]
++gluster_command_dir = /usr/local/sbin/
++gluster_params = aux-gfid-mount acl
++log_file = /var/log/glusterfs/geo-replication-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.log
++log_file_mbr = /var/log/glusterfs/geo-replication-slaves/mbr/${session_owner}:${local_node}${local_id}.${slavevol}.log
++gluster_log_file = /var/log/glusterfs/geo-replication-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.gluster.log
++
++[peers master slave]
++session_owner = 0732cbd1-3ec5-4920-ab0d-aa5a896d5214
++master.stime_xattr_name = trusted.glusterfs.0732cbd1-3ec5-4920-ab0d-aa5a896d5214.07a9005c-ace4-4f67-b3c0-73938fb236c4.stime
++volume_id = 0732cbd1-3ec5-4920-ab0d-aa5a896d5214
++use_tarssh = true
++
+--
+1.8.3.1
+
diff --git a/0310-tests-test-case-for-non-root-geo-rep-setup.patch b/0310-tests-test-case-for-non-root-geo-rep-setup.patch
new file mode 100644
index 0000000..a38a4aa
--- /dev/null
+++ b/0310-tests-test-case-for-non-root-geo-rep-setup.patch
@@ -0,0 +1,284 @@
+From c2decfb59bd1be7cd2b0d792fd2ca2627913638a Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Tue, 24 Sep 2019 18:22:13 +0530
+Subject: [PATCH 310/313] tests : test case for non-root geo-rep setup
+
+Added test case for non-root geo-rep setup.
+
+Backport of:
+ > Patch: https://review.gluster.org/22902
+ > Change-Id: Ib6ebee79949a9f61bdc5c7b5e11b51b262750e98
+ > fixes: bz#1717827
+ > Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+Change-Id: Ib6ebee79949a9f61bdc5c7b5e11b51b262750e98
+BUG: 1763412
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/183664
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/00-geo-rep/00-georep-verify-non-root-setup.t | 251 +++++++++++++++++++++
+ 1 file changed, 251 insertions(+)
+ create mode 100644 tests/00-geo-rep/00-georep-verify-non-root-setup.t
+
+diff --git a/tests/00-geo-rep/00-georep-verify-non-root-setup.t b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
+new file mode 100644
+index 0000000..e753c1f
+--- /dev/null
++++ b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
+@@ -0,0 +1,251 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++. $(dirname $0)/../geo-rep.rc
++. $(dirname $0)/../env.rc
++
++SCRIPT_TIMEOUT=500
++
++### Basic Non-root geo-rep setup test with Distribute Replicate volumes
++
++##Cleanup and start glusterd
++cleanup;
++TEST glusterd;
++TEST pidof glusterd
++
++
++##Variables
++GEOREP_CLI="$CLI volume geo-replication"
++master=$GMV0
++SH0="127.0.0.1"
++slave=${SH0}::${GSV0}
++num_active=2
++num_passive=2
++master_mnt=$M0
++slave_mnt=$M1
++
++##User and group to be used for non-root geo-rep setup
++usr="nroot"
++grp="ggroup"
++
++slave_url=$usr@$slave
++slave_vol=$GSV0
++ssh_url=$usr@$SH0
++
++############################################################
++#SETUP VOLUMES AND VARIABLES
++
++##create_and_start_master_volume
++TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
++TEST $CLI volume start $GMV0
++
++##create_and_start_slave_volume
++TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
++TEST $CLI volume start $GSV0
++
++##Mount master
++#TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
++
++##Mount slave
++#TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
++
++
++##########################################################
++#TEST FUNCTIONS
++
++function distribute_key_non_root()
++{
++ ${GLUSTER_LIBEXECDIR}/set_geo_rep_pem_keys.sh $usr $master $slave_vol
++ echo $?
++}
++
++
++function check_status_non_root()
++{
++ local search_key=$1
++ $GEOREP_CLI $master $slave_url status | grep -F "$search_key" | wc -l
++}
++
++
++function check_and_clean_group()
++{
++ if [ $(getent group $grp) ]
++ then
++ groupdel $grp;
++ echo $?
++ else
++ echo 0
++ fi
++}
++
++function clean_lock_files()
++{
++ if [ ! -f /etc/passwd.lock ];
++ then
++ rm -rf /etc/passwd.lock;
++ fi
++
++ if [ ! -f /etc/group.lock ];
++ then
++ rm -rf /etc/group.lock;
++ fi
++
++ if [ ! -f /etc/shadow.lock ];
++ then
++ rm -rf /etc/shadow.lock;
++ fi
++
++ if [ ! -f /etc/gshadow.lock ];
++ then
++ rm -rf /etc/gshadow.lock;
++ fi
++}
++
++
++###########################################################
++#SETUP NON-ROOT GEO REPLICATION
++
++##Create ggroup group
++##First test if group exists and then create new one
++
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_and_clean_group
++
++##cleanup *.lock files
++
++clean_lock_files
++
++TEST /usr/sbin/groupadd $grp
++
++clean_lock_files
++##Create non-root user and assign it to newly created group
++
++TEST /usr/sbin/useradd -G $grp $usr
++
++##Modify password for non-root user to have control over distributing ssh-key
++echo "$usr:pass" | chpasswd
++
++##Set up mountbroker root
++TEST gluster-mountbroker setup /var/mountbroker-root $grp
++
++##Associate volume and non-root user to the mountbroker
++TEST gluster-mountbroker add $slave_vol $usr
++
++##Check ssh setting for clear text passwords
++sed '/^PasswordAuthentication /{s/no/yes/}' -i /etc/ssh/sshd_config && grep '^PasswordAuthentication ' /etc/ssh/sshd_config && service sshd restart
++
++
++##Restart glusterd to reflect mountbroker changages
++TEST killall_gluster;
++TEST glusterd;
++TEST pidof glusterd;
++
++
++
++##Create, start and mount meta_volume
++TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
++TEST $CLI volume start $META_VOL
++TEST mkdir -p $META_MNT
++TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
++
++##Mount master
++TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
++
++##Mount slave
++TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
++
++## Check status of mount-broker
++TEST gluster-mountbroker status
++
++
++##Setup password-less ssh for non-root user
++#sshpass -p "pass" ssh-copy-id -i ~/.ssh/id_rsa.pub $ssh_url
++##Run ssh agent
++eval "$(ssh-agent -s)"
++PASS="pass"
++
++
++##Create a temp script to echo the SSH password, used by SSH_ASKPASS
++
++SSH_ASKPASS_SCRIPT=/tmp/ssh-askpass-script
++cat > ${SSH_ASKPASS_SCRIPT} <<EOL
++#!/bin/bash
++echo "${PASS}"
++EOL
++chmod u+x ${SSH_ASKPASS_SCRIPT}
++
++##set no display, necessary for ssh to use with setsid and SSH_ASKPASS
++#export DISPLAY=:0
++
++export SSH_ASKPASS=${SSH_ASKPASS_SCRIPT}
++
++DISPLAY=: setsid ssh-copy-id -i ~/.ssh/id_rsa.pub $ssh_url
++
++##Setting up PATH for gluster binaries in case of source installation
++##ssh -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $ssh_url "echo "export PATH=$PATH:/usr/local/sbin" >> ~/.bashrc"
++
++##Creating secret pem pub file
++TEST gluster-georep-sshkey generate
++
++##Create geo-rep non-root setup
++
++TEST $GEOREP_CLI $master $slave_url create push-pem
++
++#Config gluster-command-dir
++TEST $GEOREP_CLI $master $slave_url config gluster-command-dir ${GLUSTER_CMD_DIR}
++
++#Config gluster-command-dir
++TEST $GEOREP_CLI $master $slave_url config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
++
++## Test for key distribution
++
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 distribute_key_non_root
++
++##Wait for common secret pem file to be created
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
++
++#Enable_metavolume
++TEST $GEOREP_CLI $master $slave config use_meta_volume true
++
++#Start_georep
++TEST $GEOREP_CLI $master $slave_url start
++
++## Meta volume is enabled so looking for 2 Active and 2 Passive sessions
++
++EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_non_root "Active"
++
++EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_non_root "Passive"
++
++#Pause geo-replication session
++TEST $GEOREP_CLI $master $slave_url pause
++
++#Resume geo-replication session
++TEST $GEOREP_CLI $master $slave_url resume
++
++#Validate failure of volume stop when geo-rep is running
++TEST ! $CLI volume stop $GMV0
++
++#Stop Geo-rep
++TEST $GEOREP_CLI $master $slave_url stop
++
++#Delete Geo-rep
++TEST $GEOREP_CLI $master $slave_url delete
++
++#Cleanup authorized_keys
++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
++sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
++
++#clear mountbroker
++gluster-mountbroker remove --user $usr
++gluster-mountbroker remove --volume $slave_vol
++
++#delete group and user created for non-root setup
++TEST userdel -r -f $usr
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_and_clean_group
++
++##password script cleanup
++rm -rf /tmp/ssh-askpass-script
++
++
++cleanup;
++
+--
+1.8.3.1
+
diff --git a/0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch b/0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch
new file mode 100644
index 0000000..af0206a
--- /dev/null
+++ b/0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch
@@ -0,0 +1,186 @@
+From 4a2441e76f4240568093080769ede07bb7fb2016 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Sun, 20 Oct 2019 01:01:39 +0530
+Subject: [PATCH 311/313] geo-rep: Fix Permission denied traceback on non root
+ setup
+
+Problem:
+While syncing rename of directory in hybrid crawl, geo-rep
+crashes as below.
+
+Traceback (most recent call last):
+ File "/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 118, in worker
+ res = getattr(self.obj, rmeth)(*in_data[2:])
+ File "/usr/local/libexec/glusterfs/python/syncdaemon/resource.py", line 588, in entry_ops
+ src_entry = get_slv_dir_path(slv_host, slv_volume, gfid)
+ File "/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py", line 687, in get_slv_dir_path
+ [ENOENT], [ESTALE])
+ File "/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py", line 546, in errno_wrap
+ return call(*arg)
+PermissionError: [Errno 13] Permission denied: '/bricks/brick1/b1/.glusterfs/8e/c0/8ec0fcd4-d50f-4a6e-b473-a7943ab66640'
+
+Cause:
+Conversion of gfid to path for a directory uses readlink on backend
+.glusterfs gfid path. But this fails for non root user with
+permission denied.
+
+Fix:
+Use gfid2path interface to get the path from gfid
+
+Backport of:
+ > Patch: https://review.gluster.org/23570
+ > Change-Id: I9d40c713a1b32cea95144cbc0f384ada82972222
+ > fixes: bz#1763439
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+Change-Id: I9d40c713a1b32cea95144cbc0f384ada82972222
+BUG: 1763412
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/183665
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/gsyncd.py | 3 +-
+ geo-replication/syncdaemon/syncdutils.py | 35 ++++++++++++++++------
+ tests/00-geo-rep/00-georep-verify-non-root-setup.t | 30 +++++++++++++++----
+ 3 files changed, 52 insertions(+), 16 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
+index 7b48d82..8940384 100644
+--- a/geo-replication/syncdaemon/gsyncd.py
++++ b/geo-replication/syncdaemon/gsyncd.py
+@@ -231,7 +231,8 @@ def main():
+ # Set default path for config file in that case
+ # If an subcmd accepts config file then it also accepts
+ # master and Slave arguments.
+- if config_file is None and hasattr(args, "config_file"):
++ if config_file is None and hasattr(args, "config_file") \
++ and args.subcmd != "slave":
+ config_file = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % (
+ GLUSTERD_WORKDIR,
+ args.master,
+diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
+index aadaebd..b08098e 100644
+--- a/geo-replication/syncdaemon/syncdutils.py
++++ b/geo-replication/syncdaemon/syncdutils.py
+@@ -57,6 +57,7 @@ from hashlib import sha256 as sha256
+
+ # auxiliary gfid based access prefix
+ _CL_AUX_GFID_PFX = ".gfid/"
++ROOT_GFID = "00000000-0000-0000-0000-000000000001"
+ GF_OP_RETRIES = 10
+
+ GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0'
+@@ -670,6 +671,7 @@ def get_slv_dir_path(slv_host, slv_volume, gfid):
+ global slv_bricks
+
+ dir_path = ENOENT
++ pfx = gauxpfx()
+
+ if not slv_bricks:
+ slv_info = Volinfo(slv_volume, slv_host, master=False)
+@@ -683,15 +685,30 @@ def get_slv_dir_path(slv_host, slv_volume, gfid):
+ gfid[2:4],
+ gfid], [ENOENT], [ESTALE])
+ if dir_path != ENOENT:
+- realpath = errno_wrap(os.readlink, [dir_path],
+- [ENOENT], [ESTALE])
+- if not isinstance(realpath, int):
+- realpath_parts = realpath.split('/')
+- pargfid = realpath_parts[-2]
+- basename = realpath_parts[-1]
+- pfx = gauxpfx()
+- dir_entry = os.path.join(pfx, pargfid, basename)
+- return dir_entry
++ try:
++ realpath = errno_wrap(os.readlink, [dir_path],
++ [ENOENT], [ESTALE])
++ if not isinstance(realpath, int):
++ realpath_parts = realpath.split('/')
++ pargfid = realpath_parts[-2]
++ basename = realpath_parts[-1]
++ dir_entry = os.path.join(pfx, pargfid, basename)
++ return dir_entry
++ except OSError:
++ # .gfid/GFID
++ gfidpath = unescape_space_newline(os.path.join(pfx, gfid))
++ realpath = errno_wrap(Xattr.lgetxattr_buf,
++ [gfidpath, 'glusterfs.gfid2path'], [ENOENT], [ESTALE])
++ if not isinstance(realpath, int):
++ basename = os.path.basename(realpath).rstrip('\x00')
++ dirpath = os.path.dirname(realpath)
++ if dirpath is "/":
++ pargfid = ROOT_GFID
++ else:
++ dirpath = dirpath.strip("/")
++ pargfid = get_gfid_from_mnt(dirpath)
++ dir_entry = os.path.join(pfx, pargfid, basename)
++ return dir_entry
+
+ return None
+
+diff --git a/tests/00-geo-rep/00-georep-verify-non-root-setup.t b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
+index e753c1f..c9fd8b2 100644
+--- a/tests/00-geo-rep/00-georep-verify-non-root-setup.t
++++ b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
+@@ -118,8 +118,8 @@ clean_lock_files
+ TEST /usr/sbin/groupadd $grp
+
+ clean_lock_files
+-##Create non-root user and assign it to newly created group
+-
++##Del if exists and create non-root user and assign it to newly created group
++userdel -r -f $usr
+ TEST /usr/sbin/useradd -G $grp $usr
+
+ ##Modify password for non-root user to have control over distributing ssh-key
+@@ -140,8 +140,6 @@ TEST killall_gluster;
+ TEST glusterd;
+ TEST pidof glusterd;
+
+-
+-
+ ##Create, start and mount meta_volume
+ TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+ TEST $CLI volume start $META_VOL
+@@ -225,6 +223,26 @@ TEST $GEOREP_CLI $master $slave_url resume
+ #Validate failure of volume stop when geo-rep is running
+ TEST ! $CLI volume stop $GMV0
+
++#Hybrid directory rename test BZ#1763439
++TEST $GEOREP_CLI $master $slave_url config change_detector xsync
++mkdir ${master_mnt}/dir1
++mkdir ${master_mnt}/dir1/dir2
++mkdir ${master_mnt}/dir1/dir3
++mkdir ${master_mnt}/hybrid_d1
++
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/dir2
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/dir3
++
++mv ${master_mnt}/hybrid_d1 ${master_mnt}/hybrid_rn_d1
++mv ${master_mnt}/dir1/dir2 ${master_mnt}/rn_dir2
++mv ${master_mnt}/dir1/dir3 ${master_mnt}/dir1/rn_dir3
++
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_rn_d1
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/rn_dir2
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/rn_dir3
++
+ #Stop Geo-rep
+ TEST $GEOREP_CLI $master $slave_url stop
+
+@@ -232,8 +250,8 @@ TEST $GEOREP_CLI $master $slave_url stop
+ TEST $GEOREP_CLI $master $slave_url delete
+
+ #Cleanup authorized_keys
+-sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+-sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' /home/$usr/.ssh/authorized_keys
++sed -i '/^command=.*gsyncd.*/d' /home/$usr/.ssh/authorized_keys
+
+ #clear mountbroker
+ gluster-mountbroker remove --user $usr
+--
+1.8.3.1
+
diff --git a/0312-Scripts-quota_fsck-script-KeyError-contri_size.patch b/0312-Scripts-quota_fsck-script-KeyError-contri_size.patch
new file mode 100644
index 0000000..bf8c820
--- /dev/null
+++ b/0312-Scripts-quota_fsck-script-KeyError-contri_size.patch
@@ -0,0 +1,59 @@
+From b1d8a5ee8b2e320aaaf9b2a145fbc285178d07bb Mon Sep 17 00:00:00 2001
+From: hari gowtham <hgowtham@redhat.com>
+Date: Tue, 22 Oct 2019 15:11:03 +0530
+Subject: [PATCH 312/313] Scripts: quota_fsck script KeyError: 'contri_size'
+
+ back-port of: https://review.gluster.org/#/c/glusterfs/+/23586/
+
+Problem: In a certain code flow, we weren't handling the
+unavailability of the contri value in the dict. Trying to print
+without the value resulted in erroring out.
+
+Fix: Have printed the whole of dictionary as the values will be
+helpful in understanding the state of the file/dir
+
+>Fixes: bz#1764129
+>Change-Id: I99c538adb712f281ca10e4e0088f404f515b9725
+>Signed-off-by: hari gowtham <hgowtham@redhat.com>
+
+BUG: 1719171
+Change-Id: I99c538adb712f281ca10e4e0088f404f515b9725
+Signed-off-by: hari gowtham <hgowtham@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/183720
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/quota/quota_fsck.py | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py
+index f03895d..485a37a 100755
+--- a/extras/quota/quota_fsck.py
++++ b/extras/quota/quota_fsck.py
+@@ -52,17 +52,17 @@ epilog_msg='''
+
+ def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None):
+ if log_type == QUOTA_VERBOSE:
+- print('%-24s %-60s\nxattr_values: %s\n%s\n' % {"Verbose", path, xattr_dict, stbuf})
++ print('%-24s %-60s\nxattr_values: %s\n%s\n' % ("Verbose", path, xattr_dict, stbuf))
+ elif log_type == QUOTA_META_ABSENT:
+- print('%-24s %-60s\n%s\n' % {"Quota-Meta Absent", path, xattr_dict})
++ print('%-24s %-60s\n%s\n' % ("Quota-Meta Absent", path, xattr_dict))
+ elif log_type == QUOTA_SIZE_MISMATCH:
+ print("mismatch")
+ if dir_size is not None:
+- print('%24s %60s %12s %12s' % {"Size Mismatch", path, xattr_dict['contri_size'],
+- dir_size})
++ print('%24s %60s %12s %12s' % ("Size Mismatch", path,
++ xattr_dict, dir_size))
+ else:
+- print('%-24s %-60s %-12i %-12i' % {"Size Mismatch", path, xattr_dict['contri_size'],
+- stbuf.st_size})
++ print('%-24s %-60s %-12i %-12i' % ("Size Mismatch", path, xattr_dict,
++ stbuf.st_size))
+
+ def size_differs_lot(s1, s2):
+ '''
+--
+1.8.3.1
+
diff --git a/0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch b/0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch
new file mode 100644
index 0000000..e4887b8
--- /dev/null
+++ b/0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch
@@ -0,0 +1,60 @@
+From 23091d24d34102c7938ae2890930b73c89c5a8e7 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Tue, 22 Oct 2019 18:52:25 +0530
+Subject: [PATCH 313/313] extras: Cgroup(CPU/Mem) restriction are not working
+ on gluster process
+
+Problem: After Configure the Cgroup(CPU/MEM) limit to a gluster processes
+ resource(CPU/MEM) limits are not applicable to the gluster
+ processes.Cgroup limits are not applicable because all threads are
+ not moved into a newly created cgroup to apply restriction.
+
+Solution: To move a gluster thread to newly created cgroup change the
+ condition in script
+
+> Change-Id: I8ad81c69200e4ec43a74f6052481551cf835354c
+> Fixes: bz#1764208
+> (Cherry pick from commit 38de02012948013a88597545cf49380ce97f6fa7)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23599/)
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+
+Change-Id: I8ad81c69200e4ec43a74f6052481551cf835354c
+BUG: 1764202
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/183730
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/control-cpu-load.sh | 2 +-
+ extras/control-mem.sh | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/extras/control-cpu-load.sh b/extras/control-cpu-load.sh
+index b739c82..52dcf62 100755
+--- a/extras/control-cpu-load.sh
++++ b/extras/control-cpu-load.sh
+@@ -104,7 +104,7 @@ echo "Setting $quota_value to cpu.cfs_quota_us for gluster_cgroup."
+ echo ${quota_value} > ${LOC}/${cgroup_name}/cpu.cfs_quota_us
+
+ if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then
+- for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`;
++ for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`;
+ do
+ echo ${thid} > ${LOC}/${cgroup_name}/tasks ;
+ done
+diff --git a/extras/control-mem.sh b/extras/control-mem.sh
+index 38aa2a0..91b36f8 100755
+--- a/extras/control-mem.sh
++++ b/extras/control-mem.sh
+@@ -116,7 +116,7 @@ else
+ fi
+
+ if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then
+- for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`;
++ for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`;
+ do
+ echo ${thid} > ${LOC}/${cgroup_name}/tasks ;
+ done
+--
+1.8.3.1
+
diff --git a/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch b/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch
new file mode 100644
index 0000000..adde426
--- /dev/null
+++ b/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch
@@ -0,0 +1,38 @@
+From 2a4f19df70276ba41db19938507297f7580286fa Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Fri, 25 Oct 2019 18:07:27 +0530
+Subject: [PATCH 314/314] glusterd/tier: is_tier_enabled inserted causing
+ checksum mismatch
+
+the volfile entry is_tier_enabled is checked for version 3.7.6 while it was
+supposed to check for 3.10. this is to fix it downstream only but changing the
+version of check to 3.13.1
+
+Label: DOWNSTREAM ONLY
+BUG: 1765555
+Change-Id: Id631f3ba520b3e7b126c7607dca1bb7874532e81
+Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/183932
+Reviewed-by: Sanju Rakonde <srakonde@redhat.com>
+Tested-by: Sanju Rakonde <srakonde@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-store.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index 4889217..8a10eb8 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -1036,7 +1036,7 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo)
+ if (ret)
+ goto out;
+ }
+- if (conf->op_version >= GD_OP_VERSION_3_10_0) {
++ if (conf->op_version >= GD_OP_VERSION_3_13_1) {
+ snprintf(buf, sizeof(buf), "%d", volinfo->is_tier_enabled);
+ ret = gf_store_save_value(fd, GF_TIER_ENABLED, buf);
+ if (ret)
+--
+1.8.3.1
+
diff --git a/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch b/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch
new file mode 100644
index 0000000..a0448cc
--- /dev/null
+++ b/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch
@@ -0,0 +1,52 @@
+From 4a04e1b5540921db22f1894f71eb30342127192d Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Tue, 12 Nov 2019 21:53:20 +0530
+Subject: [PATCH 315/316] geo-rep: Fix py2/py3 compatibility in repce
+
+Geo-rep fails to start on python2 only machine like
+centos6. It fails with "ImportError no module named _io".
+This patch fixes the same.
+
+Backport of:
+ > Patch: https://review.gluster.org/23702
+ > fixes: bz#1771577
+ > Change-Id: I8228458a853a230546f9faf29a0e9e0f23b3efec
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+BUG: 1771524
+Change-Id: I8228458a853a230546f9faf29a0e9e0f23b3efec
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185377
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunny Kumar <sunkumar@redhat.com>
+---
+ geo-replication/syncdaemon/repce.py | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/repce.py b/geo-replication/syncdaemon/repce.py
+index 6065b82..c622afa 100644
+--- a/geo-replication/syncdaemon/repce.py
++++ b/geo-replication/syncdaemon/repce.py
+@@ -8,7 +8,6 @@
+ # cases as published by the Free Software Foundation.
+ #
+
+-import _io
+ import os
+ import sys
+ import time
+@@ -58,9 +57,9 @@ def recv(inf):
+ """load an object from input stream
+ python2 and python3 compatibility, inf is sys.stdin
+ and is opened as text stream by default. Hence using the
+- buffer attribute
++ buffer attribute in python3
+ """
+- if isinstance(inf, _io.TextIOWrapper):
++ if hasattr(inf, "buffer"):
+ return pickle.load(inf.buffer)
+ else:
+ return pickle.load(inf)
+--
+1.8.3.1
+
diff --git a/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch b/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch
new file mode 100644
index 0000000..c2045a0
--- /dev/null
+++ b/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch
@@ -0,0 +1,51 @@
+From b9a19aef5de94eb91162448ad687f2d2d194f82c Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Thu, 14 Nov 2019 09:55:15 +0000
+Subject: [PATCH 316/316] spec: fixed python-prettytable dependency for rhel6
+
+Installing glusterfs on rhel6 was failing with python-prettytable
+dependency as it required python2-prettytable for glusterfs-events.
+This patch conditionally sets the python version for rhel7 and
+fixes the problem.
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1771614
+
+Change-Id: I6288daa5d8c2d82a6d73a0d9722786a2a99b9db5
+fixes: bz#1771614
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185385
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 3c2e2dc..eeadb65 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -706,7 +706,7 @@ This package provides the translators needed on any GlusterFS client.
+ %package events
+ Summary: GlusterFS Events
+ Requires: %{name}-server%{?_isa} = %{version}-%{release}
+-Requires: python%{_pythonver} python%{_pythonver}-prettytable
++Requires: python%{_pythonver}
+ Requires: python%{_pythonver}-gluster = %{version}-%{release}
+ %if ( 0%{?rhel} && 0%{?rhel} < 8 )
+ Requires: python-requests
+@@ -714,7 +714,10 @@ Requires: python-requests
+ Requires: python%{_pythonver}-requests
+ %endif
+ %if ( 0%{?rhel} && 0%{?rhel} < 7 )
++Requires: python-prettytable
+ Requires: python-argparse
++%else
++Requires: python%{_pythonver}-prettytable
+ %endif
+ %if ( 0%{?_with_systemd:1} )
+ %{?systemd_requires}
+--
+1.8.3.1
+
diff --git a/0317-Update-rfc.sh-to-rhgs-3.5.1.patch b/0317-Update-rfc.sh-to-rhgs-3.5.1.patch
new file mode 100644
index 0000000..eccf2e3
--- /dev/null
+++ b/0317-Update-rfc.sh-to-rhgs-3.5.1.patch
@@ -0,0 +1,43 @@
+From 985ef94c63859907339c11b158e4540a5568d638 Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Mon, 18 Nov 2019 02:25:25 -0500
+Subject: [PATCH 317/335] Update rfc.sh to rhgs-3.5.1
+
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+---
+ README | 9 +++++++++
+ rfc.sh | 2 +-
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+ create mode 100644 README
+
+diff --git a/README b/README
+new file mode 100644
+index 0000000..44a118b
+--- /dev/null
++++ b/README
+@@ -0,0 +1,9 @@
++
++'master' branch is just dummy branch in downstream. Any reference to 'upstream'
++will point to http://git.gluster.org.
++
++You can checkout the release specific branch by running below command
++ bash$ git checkout -t -b rhs-x.y origin/rhs-x.y
++
++Happy Hacking!!
++
+diff --git a/rfc.sh b/rfc.sh
+index 94c92ef..69ddd2b 100755
+--- a/rfc.sh
++++ b/rfc.sh
+@@ -18,7 +18,7 @@ done
+ shift $((OPTIND-1))
+
+
+-branch="rhgs-3.5.0";
++branch="rhgs-3.5.1";
+
+ set_hooks_commit_msg()
+ {
+--
+1.8.3.1
+
diff --git a/0318-Update-rfc.sh-to-rhgs-3.5.1.patch b/0318-Update-rfc.sh-to-rhgs-3.5.1.patch
new file mode 100644
index 0000000..e65ae38
--- /dev/null
+++ b/0318-Update-rfc.sh-to-rhgs-3.5.1.patch
@@ -0,0 +1,114 @@
+From 1f03327887645be2500cd29f69f7a77a4f5d0164 Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Mon, 18 Nov 2019 14:25:12 -0500
+Subject: [PATCH 318/335] Update rfc.sh to rhgs-3.5.1
+
+Removed the checks for updates and fixes from rfc.sh
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: I436c959aa3b3366cd313b29f41c2466c4072efd7
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+---
+ rfc.sh | 47 ++++++++---------------------------------------
+ 1 file changed, 8 insertions(+), 39 deletions(-)
+
+diff --git a/rfc.sh b/rfc.sh
+index 69ddd2b..918fb11 100755
+--- a/rfc.sh
++++ b/rfc.sh
+@@ -129,13 +129,8 @@ editor_mode()
+
+ if [ $(basename "$1") = "COMMIT_EDITMSG" ]; then
+ # see note above function warn_reference_missing for regex elaboration
+- # Lets first check for github issues
+- ref=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+(gluster\/glusterfs)?#[[:digit:]]+" | awk -F '#' '{print $2}');
+- if [ "x${ref}" = "x" ]; then
+- # if not found, check for bugs
+- ref=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+bz#[[:digit:]]+" | awk -F '#' '{print $2}');
+- fi
+
++ ref=$(git log -n1 --format='%b' | grep -ow -E "^[bB][uU][gG](:)[[:space:]]+[[:digit:]]+")
+ if [ "x${ref}" != "x" ]; then
+ return;
+ fi
+@@ -157,16 +152,6 @@ editor_mode()
+ bz_string=""
+ fi
+
+- echo "Select yes '(y)' if this patch fixes the bug/feature completely,"
+- echo -n "or is the last of the patchset which brings feature (Y/n): "
+- read fixes
+- fixes_string="fixes"
+- if [ "${fixes}" = 'N' ] || [ "${fixes}" = 'n' ]; then
+- fixes_string="updates"
+- fi
+-
+- sed "/^Change-Id:/{p; s/^.*$/${fixes_string}: ${bz_string}#${bug}/;}" $1 > $1.new && \
+- mv $1.new $1;
+ return;
+ done
+ fi
+@@ -234,8 +219,8 @@ check_patches_for_coding_style()
+ # IOW, the above helps us find the pattern with leading or training spaces
+ # or non word consituents like , or ;
+ #
+-# [fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])
+-# Finds 'fixes' OR 'updates' in any case combination
++# [bB][uU][gG]
++# Finds 'bug' in any case
+ #
+ # (:)?
+ # Followed by an optional : (colon)
+@@ -256,28 +241,11 @@ warn_reference_missing()
+ echo ""
+ echo "=== Missing a reference in commit! ==="
+ echo ""
+- echo "Gluster commits are made with a reference to a bug or a github issue"
+- echo ""
+- echo "Submissions that are enhancements (IOW, not functional"
+- echo "bug fixes, but improvements of any nature to the code) are tracked"
+- echo "using github issues [1]."
++ echo "You must give BUG: <bugid>"
+ echo ""
+- echo "Submissions that are bug fixes are tracked using Bugzilla [2]."
++ echo "for example:"
+ echo ""
+- echo "A check on the commit message, reveals that there is no bug or"
+- echo "github issue referenced in the commit message"
+- echo ""
+- echo "[1] https://github.com/gluster/glusterfs/issues/new"
+- echo "[2] https://bugzilla.redhat.com/enter_bug.cgi?product=GlusterFS"
+- echo ""
+- echo "Please file an issue or a bug report and reference the same in the"
+- echo "commit message using the following tags:"
+- echo "GitHub Issues:"
+- echo "\"Fixes: gluster/glusterfs#n\" OR \"Updates: gluster/glusterfs#n\","
+- echo "\"Fixes: #n\" OR \"Updates: #n\","
+- echo "Bugzilla ID:"
+- echo "\"Fixes: bz#n\" OR \"Updates: bz#n\","
+- echo "where n is the issue or bug number"
++ echo "BUG: 1234567"
+ echo ""
+ echo "You may abort the submission choosing 'N' below and use"
+ echo "'git commit --amend' to add the issue reference before posting"
+@@ -312,7 +280,7 @@ main()
+ assert_diverge;
+
+ # see note above function warn_reference_missing for regex elaboration
+- reference=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+(gluster\/glusterfs)?(bz)?#[[:digit:]]+" | awk -F '#' '{print $2}');
++ reference=$(git log -n1 --format='%b' | grep -ow -E "^[bB][uU][gG](:)[[:space:]]+[[:digit:]]+" | awk '{print $2}')
+
+ # If this is a commit against master and does not have a bug ID or a github
+ # issue reference. Warn the contributor that one of the 2 is required
+@@ -320,6 +288,7 @@ main()
+ warn_reference_missing;
+ fi
+
++
+ # TODO: add clang-format command here. It will after the changes are done everywhere else
+ clang_format=$(clang-format --version)
+ if [ ! -z "${clang_format}" ]; then
+--
+1.8.3.1
+
diff --git a/0319-features-snapview-server-obtain-the-list-of-snapshot.patch b/0319-features-snapview-server-obtain-the-list-of-snapshot.patch
new file mode 100644
index 0000000..d37efaf
--- /dev/null
+++ b/0319-features-snapview-server-obtain-the-list-of-snapshot.patch
@@ -0,0 +1,48 @@
+From 659bd2a0fde9ba0cb8fc3905bcdb63d91e3dfa9d Mon Sep 17 00:00:00 2001
+From: Raghavendra Bhat <raghavendra@redhat.com>
+Date: Tue, 2 Jul 2019 16:50:23 -0400
+Subject: [PATCH 319/335] features/snapview-server: obtain the list of
+ snapshots inside the lock
+
+The current list of snapshots from priv->dirents is obtained outside
+the lock.
+
+Upstream patch:
+> Change-Id: I8876ec0a38308da5db058397382fbc82cc7ac177
+> Fixes: bz#1726783
+> Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
+> patch: https://review.gluster.org/#/c/glusterfs/+/22990/
+
+BUG: 1731513
+Change-Id: I8876ec0a38308da5db058397382fbc82cc7ac177
+Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185838
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/snapview-server/src/snapview-server-mgmt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c
+index bc415ef..3d64383 100644
+--- a/xlators/features/snapview-server/src/snapview-server-mgmt.c
++++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c
+@@ -256,7 +256,6 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ this = frame->this;
+ ctx = frame->this->ctx;
+ priv = this->private;
+- old_dirents = priv->dirents;
+
+ if (!ctx) {
+ errno = EINVAL;
+@@ -388,6 +387,7 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ LOCK(&priv->snaplist_lock);
+ {
+ oldcount = priv->num_snaps;
++ old_dirents = priv->dirents;
+ for (i = 0; i < priv->num_snaps; i++) {
+ for (j = 0; j < snapcount; j++) {
+ if ((!strcmp(old_dirents[i].name, dirents[j].name)) &&
+--
+1.8.3.1
+
diff --git a/0320-gf-event-Handle-unix-volfile-servers.patch b/0320-gf-event-Handle-unix-volfile-servers.patch
new file mode 100644
index 0000000..48a9cad
--- /dev/null
+++ b/0320-gf-event-Handle-unix-volfile-servers.patch
@@ -0,0 +1,58 @@
+From 7e5d8dcb4f557eaca259e8d81cf34d651907396c Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Thu, 24 Oct 2019 12:24:35 +0530
+Subject: [PATCH 320/335] gf-event: Handle unix volfile-servers
+
+Problem:
+glfsheal program uses unix-socket-based volfile server.
+volfile server will be the path to socket in this case.
+gf_event expects this to be hostname in all cases. So getaddrinfo
+will fail on the unix-socket path, events won't be sent in this case.
+
+Fix:
+In case of unix sockets, default to localhost
+
+upstream-patch: https://review.gluster.org/c/glusterfs/+/23606
+BUG: 1758923
+Change-Id: I60d27608792c29d83fb82beb5fde5ef4754bece8
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185851
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/events.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c
+index 9d33783..4e2f8f9 100644
+--- a/libglusterfs/src/events.c
++++ b/libglusterfs/src/events.c
+@@ -43,6 +43,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+ struct addrinfo *result = NULL;
+ xlator_t *this = THIS;
+ int sin_family = AF_INET;
++ char *volfile_server_transport = NULL;
+
+ /* Global context */
+ ctx = THIS->ctx;
+@@ -62,8 +63,16 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+
++ if (ctx) {
++ volfile_server_transport = ctx->cmd_args.volfile_server_transport;
++ }
++
++ if (!volfile_server_transport) {
++ volfile_server_transport = "tcp";
++ }
+ /* Get Host name to send message */
+- if (ctx && ctx->cmd_args.volfile_server) {
++ if (ctx && ctx->cmd_args.volfile_server &&
++ (strcmp(volfile_server_transport, "unix"))) {
+ /* If it is client code then volfile_server is set
+ use that information to push the events. */
+ if ((getaddrinfo(ctx->cmd_args.volfile_server, NULL, &hints,
+--
+1.8.3.1
+
diff --git a/0321-Adding-white-spaces-to-description-of-set-group.patch b/0321-Adding-white-spaces-to-description-of-set-group.patch
new file mode 100644
index 0000000..8dec96f
--- /dev/null
+++ b/0321-Adding-white-spaces-to-description-of-set-group.patch
@@ -0,0 +1,55 @@
+From 5e7a2ad35a174d6d0ee5ed58a3e27955e85aa47c Mon Sep 17 00:00:00 2001
+From: kshithijiyer <kshithij.ki@gmail.com>
+Date: Mon, 24 Jun 2019 20:08:48 +0530
+Subject: [PATCH 321/335] Adding white spaces to description of set group.
+
+The description of set group is missing spaces which
+leads to the description look like:
+volume set <VOLNAME> group <GROUP> - This option can be used for
+setting multiple pre-defined volume optionswhere group_name is a
+file under /var/lib/glusterd/groups containing onekey, value pair
+per line
+
+Instead of:
+volume set <VOLNAME> group <GROUP> - This option can be used for
+setting multiple pre-defined volume options where group_name is a
+file under /var/lib/glusterd/groups containing one key value
+pair per line
+
+> upstream patch: https://review.gluster.org/#/c/glusterfs/+/22934/
+> Fixes: bz#1723455
+> Change-Id: I4957988c0c1f35f043db3f64089c049193e60e8f
+> Signed-off-by: kshithijiyer <kshithij.ki@gmail.com>
+
+BUG: 1724021
+Change-Id: I4957988c0c1f35f043db3f64089c049193e60e8f
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185756
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-volume.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index 6b958bd..66beb1b 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -3393,10 +3393,10 @@ struct cli_cmd volume_cmds[] = {
+ {"volume set <VOLNAME> <KEY> <VALUE>", cli_cmd_volume_set_cbk,
+ "set options for volume <VOLNAME>"},
+
+- {"volume set <VOLNAME> group <GROUP>", cli_cmd_volume_set_cbk,
+- "This option can be used for setting multiple pre-defined volume options"
+- "where group_name is a file under /var/lib/glusterd/groups containing one"
+- "key, value pair per line"},
++ {"volume set <VOLNAME> group <GROUP>", cli_cmd_volume_set_cbk,
++ "This option can be used for setting multiple pre-defined volume options "
++ "where group_name is a file under /var/lib/glusterd/groups containing one "
++ "key value pair per line"},
+
+ {"volume log <VOLNAME> rotate [BRICK]", cli_cmd_log_rotate_cbk,
+ "rotate the log file for corresponding volume/brick"},
+--
+1.8.3.1
+
diff --git a/0322-glusterd-display-correct-rebalance-data-size-after-g.patch b/0322-glusterd-display-correct-rebalance-data-size-after-g.patch
new file mode 100644
index 0000000..35a234b
--- /dev/null
+++ b/0322-glusterd-display-correct-rebalance-data-size-after-g.patch
@@ -0,0 +1,65 @@
+From 9be255f76c78fcbbda1e3a72eb2e99d3aface53e Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Wed, 16 Oct 2019 23:26:03 +0530
+Subject: [PATCH 322/335] glusterd: display correct rebalance data size after
+ glusterd restart
+
+Problem: After completion of rebalance, if glusterd is restarted,
+rebalance status displays wrong rebalance data size in its output.
+
+Cause: While glusterd restoring the information from /var/lib/glusterd/
+into its memory, glusterd fetches rebalance_data from
+/var/lib/glusterd/vols/volname/node_state.info. This value is
+converted into an integer using atoi(), which is returning
+incorrect value for larger values.
+
+Solution: use sscanf() instead of atoi() to convert string to
+integer(in this case it is unsigned long)
+
+> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23560/
+> fixes: bz#1762438
+> Change-Id: Icbdb096919612b4a1d6fb0e315f09d38900abf4e
+> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+BUG: 1761486
+Change-Id: Icbdb096919612b4a1d6fb0e315f09d38900abf4e
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185752
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-store.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index 8a10eb8..b3b5ee9 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -2974,19 +2974,19 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo)
+ volinfo->rebal.op = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES))) {
+- volinfo->rebal.rebalance_files = atoi(value);
++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_files);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE))) {
+- volinfo->rebal.rebalance_data = atoi(value);
++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_data);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED))) {
+- volinfo->rebal.lookedup_files = atoi(value);
++ sscanf(value, "%" PRIu64, &volinfo->rebal.lookedup_files);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES))) {
+- volinfo->rebal.rebalance_failures = atoi(value);
++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_failures);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED))) {
+- volinfo->rebal.skipped_files = atoi(value);
++ sscanf(value, "%" PRIu64, &volinfo->rebal.skipped_files);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME))) {
+ volinfo->rebal.rebalance_time = atoi(value);
+--
+1.8.3.1
+
diff --git a/0323-cli-display-detailed-rebalance-info.patch b/0323-cli-display-detailed-rebalance-info.patch
new file mode 100644
index 0000000..a00faf8
--- /dev/null
+++ b/0323-cli-display-detailed-rebalance-info.patch
@@ -0,0 +1,101 @@
+From 852c475040a599ed35798dbb388c6b59c1d0a820 Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Tue, 22 Oct 2019 15:06:29 +0530
+Subject: [PATCH 323/335] cli: display detailed rebalance info
+
+Problem: When one of the node is down in cluster,
+rebalance status is not displaying detailed
+information.
+
+Cause: In glusterd_volume_rebalance_use_rsp_dict()
+we are aggregating rsp from all the nodes into a
+dictionary and sending it to cli for printing. While
+assigning a index to keys we are considering all the
+peers instead of considering only the peers which are
+up. Because of which, index is not reaching till 1.
+while parsing the rsp cli unable to find status-1
+key in dictionary and going out without printing
+any information.
+
+Solution: The simplest fix for this without much
+code change is to continue to look for other keys
+when status-1 key is not found.
+
+> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23588
+> fixes: bz#1764119
+> Change-Id: I0062839933c9706119eb85416256eade97e976dc
+> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+BUG: 1761326
+Change-Id: I0062839933c9706119eb85416256eade97e976dc
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185749
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-rpc-ops.c | 21 ++++++++++++++-------
+ tests/bugs/glusterd/rebalance-in-cluster.t | 9 +++++++++
+ 2 files changed, 23 insertions(+), 7 deletions(-)
+
+diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
+index b167e26..4e91265 100644
+--- a/cli/src/cli-rpc-ops.c
++++ b/cli/src/cli-rpc-ops.c
+@@ -1597,13 +1597,20 @@ gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type,
+ goto out;
+ }
+
+- snprintf(key, sizeof(key), "status-1");
+-
+- ret = dict_get_int32(dict, key, (int32_t *)&status_rcd);
+- if (ret) {
+- gf_log("cli", GF_LOG_TRACE, "count %d %d", count, 1);
+- gf_log("cli", GF_LOG_TRACE, "failed to get status");
+- goto out;
++ for (i = 1; i <= count; i++) {
++ snprintf(key, sizeof(key), "status-%d", i);
++ ret = dict_get_int32(dict, key, (int32_t *)&status_rcd);
++ /* If information from a node is missing we should skip
++ * the node and try to fetch information of other nodes.
++ * If information is not found for all nodes, we should
++ * error out.
++ */
++ if (!ret)
++ break;
++ if (ret && i == count) {
++ gf_log("cli", GF_LOG_TRACE, "failed to get status");
++ goto out;
++ }
+ }
+
+ /* Fix layout will be sent to all nodes for the volume
+diff --git a/tests/bugs/glusterd/rebalance-in-cluster.t b/tests/bugs/glusterd/rebalance-in-cluster.t
+index 9565fae..469ec6c 100644
+--- a/tests/bugs/glusterd/rebalance-in-cluster.t
++++ b/tests/bugs/glusterd/rebalance-in-cluster.t
+@@ -4,6 +4,10 @@
+ . $(dirname $0)/../../cluster.rc
+ . $(dirname $0)/../../volume.rc
+
++function rebalance_status_field_1 {
++ $CLI_1 volume rebalance $1 status | awk '{print $7}' | sed -n 3p
++}
++
+ cleanup;
+ TEST launch_cluster 2;
+ TEST $CLI_1 peer probe $H2;
+@@ -29,6 +33,11 @@ TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1
+ TEST $CLI_1 volume rebalance $V0 start
+ EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
+
++#bug - 1764119 - rebalance status should display detailed info when any of the node is dowm
++TEST kill_glusterd 2
++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field_1 $V0
++
++TEST start_glusterd 2
+ #bug-1245142
+
+ $CLI_1 volume rebalance $V0 start &
+--
+1.8.3.1
+
diff --git a/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch b/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch
new file mode 100644
index 0000000..26e1577
--- /dev/null
+++ b/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch
@@ -0,0 +1,128 @@
+From dcf3f74fa7e812dfe89667bd6219f70a8457f755 Mon Sep 17 00:00:00 2001
+From: Anoop C S <anoopcs@redhat.com>
+Date: Thu, 6 Jun 2019 18:33:19 +0530
+Subject: [PATCH 324/335] extras/hooks: Add SELinux label on new bricks during
+ add-brick
+
+Backport of https://review.gluster.org/c/glusterfs/+/22834
+
+Change-Id: Ifd8ae5eeb91b968cc1a9a9b5d15844c5233d56db
+BUG: 1686800
+Signed-off-by: Anoop C S <anoopcs@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185855
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../add-brick/post/S10selinux-label-brick.sh | 100 +++++++++++++++++++++
+ 1 file changed, 100 insertions(+)
+ create mode 100755 extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh
+
+diff --git a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh
+new file mode 100755
+index 0000000..4a17c99
+--- /dev/null
++++ b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh
+@@ -0,0 +1,100 @@
++#!/bin/bash
++#
++# Install to hooks/<HOOKS_VER>/add-brick/post
++#
++# Add an SELinux file context for each brick using the glusterd_brick_t type.
++# This ensures that the brick is relabeled correctly on an SELinux restart or
++# restore. Subsequently, run a restore on the brick path to set the selinux
++# labels.
++#
++###
++
++PROGNAME="Sselinux"
++OPTSPEC="volname:,version:,gd-workdir:,volume-op:"
++VOL=
++
++parse_args () {
++ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
++ eval set -- "${ARGS}"
++
++ while true; do
++ case ${1} in
++ --volname)
++ shift
++ VOL=${1}
++ ;;
++ --gd-workdir)
++ shift
++ GLUSTERD_WORKDIR=$1
++ ;;
++ --version)
++ shift
++ ;;
++ --volume-op)
++ shift
++ ;;
++ *)
++ shift
++ break
++ ;;
++ esac
++ shift
++ done
++}
++
++set_brick_labels()
++{
++ local volname="${1}"
++ local fctx
++ local list=()
++
++ fctx="$(semanage fcontext --list -C)"
++
++ # wait for new brick path to be updated under
++ # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/
++ sleep 5
++
++ # grab the path for each local brick
++ brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/"
++ brickdirs=$(
++ find "${brickpath}" -type f -exec grep '^path=' {} \; | \
++ cut -d= -f 2 | \
++ sort -u
++ )
++
++ # create a list of bricks for which custom SELinux
++ # label doesn't exist
++ for b in ${brickdirs}; do
++ pattern="${b}(/.*)?"
++ echo "${fctx}" | grep "^${pattern}\s" >/dev/null
++ if [[ $? -ne 0 ]]; then
++ list+=("${pattern}")
++ fi
++ done
++
++ # Add a file context for each brick path in the list and associate with the
++ # glusterd_brick_t SELinux type.
++ for p in ${list[@]}
++ do
++ semanage fcontext --add -t glusterd_brick_t -r s0 "${p}"
++ done
++
++ # Set the labels for which SELinux label was added above
++ for b in ${brickdirs}
++ do
++ echo "${list[@]}" | grep "${b}" >/dev/null
++ if [[ $? -eq 0 ]]; then
++ restorecon -R "${b}"
++ fi
++ done
++}
++
++SELINUX_STATE=$(which getenforce && getenforce)
++[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
++
++parse_args "$@"
++[ -z "${VOL}" ] && exit 1
++
++set_brick_labels "${VOL}"
++
++exit 0
+--
+1.8.3.1
+
diff --git a/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch b/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch
new file mode 100644
index 0000000..8e5a5fa
--- /dev/null
+++ b/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch
@@ -0,0 +1,52 @@
+From 27d69d8927a946562aef08a6edfee38b9998f96d Mon Sep 17 00:00:00 2001
+From: Anoop C S <anoopcs@redhat.com>
+Date: Wed, 12 Jun 2019 15:41:27 +0530
+Subject: [PATCH 325/335] extras/hooks: Install and package newly added post
+ add-brick hook script
+
+Previously a new SELinux hook script was added as a post add-brick
+operation to label new brick paths. But the change failed to install
+and package new script. Therefore making necessary changes to Makefile
+and spec file to get it installed and packaged.
+
+Backport of https://review.gluster.org/c/glusterfs/+/22856
+
+Change-Id: I67b8f4982c2783c34a4bc749fb4387c19a038225
+BUG: 1686800
+Signed-off-by: Anoop C S <anoopcs@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185856
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/hook-scripts/add-brick/post/Makefile.am | 4 ++--
+ glusterfs.spec.in | 1 +
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/extras/hook-scripts/add-brick/post/Makefile.am b/extras/hook-scripts/add-brick/post/Makefile.am
+index bfc0c1c..9b236df 100644
+--- a/extras/hook-scripts/add-brick/post/Makefile.am
++++ b/extras/hook-scripts/add-brick/post/Makefile.am
+@@ -1,6 +1,6 @@
+-EXTRA_DIST = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh
++EXTRA_DIST = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh
+
+ hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/
+ if WITH_SERVER
+-hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh
++hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh
+ endif
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index eeadb65..91180db 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1447,6 +1447,7 @@ exit 0
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh
++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S10selinux-label-brick.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
+--
+1.8.3.1
+
diff --git a/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch b/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch
new file mode 100644
index 0000000..b0afcc7
--- /dev/null
+++ b/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch
@@ -0,0 +1,51 @@
+From a4f01ad90a0c0dfd0655da509c5ed2a11a507cc3 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Mon, 17 Jun 2019 11:10:42 +0530
+Subject: [PATCH 326/335] tests: subdir-mount.t is failing for brick_mux
+ regrssion
+
+To avoid the failure wait to run hook script S13create-subdir-mounts.sh
+after executed add-brick command by test case.
+
+This is required as a dependency for the bz referenced below.
+
+Backport of https://review.gluster.org/c/glusterfs/+/22877
+
+Change-Id: I063b6d0f86a550ed0a0527255e4dfbe8f0a8c02e
+BUG: 1686800
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185857
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/features/subdir-mount.t | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/tests/features/subdir-mount.t b/tests/features/subdir-mount.t
+index 8401946..a02bd6b 100644
+--- a/tests/features/subdir-mount.t
++++ b/tests/features/subdir-mount.t
+@@ -85,12 +85,17 @@ TEST $CLI volume start $V0
+ TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2
+ TEST stat $M2
+
++initcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
+ # mount shouldn't fail even after add-brick
+ TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6};
+
+-# Give time for client process to get notified and use the new
+-# volfile after add-brick
+-sleep 1
++# Wait to execute create-subdir-mounts.sh script by glusterd
++newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
++while [ $newcnt -eq $initcnt ]
++do
++ newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
++ sleep 1
++done
+
+ # Existing mount should still be active
+ mount_inode=$(stat --format "%i" "$M2")
+--
+1.8.3.1
+
diff --git a/0327-glusterfind-integrate-with-gfid2path.patch b/0327-glusterfind-integrate-with-gfid2path.patch
new file mode 100644
index 0000000..e3e42fa
--- /dev/null
+++ b/0327-glusterfind-integrate-with-gfid2path.patch
@@ -0,0 +1,93 @@
+From f89242132dc4756c827113154cc6ad18ad6bde88 Mon Sep 17 00:00:00 2001
+From: Milind Changire <mchangir@redhat.com>
+Date: Tue, 19 Feb 2019 12:49:12 +0530
+Subject: [PATCH 327/335] glusterfind: integrate with gfid2path
+
+Integration with gfid2path helps avoid file-system crawl and saves
+precious time. Extended attributes starting with "trusted.gfid2path."
+are read and the <PGFID>/<BN> values are extracted and the <PGFID> is
+iteratively resolved from the brick backend to arrive at the full path.
+
+>Change-Id: I593b02880e3413b77bfceed4a36b00d401f03bc0
+>fixes: #529
+>Signed-off-by: Milind Changire <mchangir@redhat.com>
+>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+
+backport of https://review.gluster.org/#/c/glusterfs/+/22225/
+BUG: 1599802
+Change-Id: I593b02880e3413b77bfceed4a36b00d401f03bc0
+Signed-off-by: Milind Changire <mchangir@redhat.com>
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185706
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tools/glusterfind/src/changelog.py | 45 ++++++++++++++++++++++++++++++++++----
+ 1 file changed, 41 insertions(+), 4 deletions(-)
+
+diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py
+index ef982db..d8f97e0 100644
+--- a/tools/glusterfind/src/changelog.py
++++ b/tools/glusterfind/src/changelog.py
+@@ -114,6 +114,43 @@ def populate_pgfid_and_inodegfid(brick, changelog_data):
+ continue
+
+
++def enum_hard_links_using_gfid2path(brick, gfid, args):
++ hardlinks = []
++ p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
++ if not os.path.isdir(p):
++ # we have a symlink or a normal file
++ try:
++ file_xattrs = xattr.list(p)
++ for x in file_xattrs:
++ if x.startswith("trusted.gfid2path."):
++ # get the value for the xattr i.e. <PGFID>/<BN>
++ v = xattr.getxattr(p, x)
++ pgfid, bn = v.split(os.sep)
++ try:
++ path = symlink_gfid_to_path(brick, pgfid)
++ fullpath = os.path.join(path, bn)
++ fullpath = output_path_prepare(fullpath, args)
++ hardlinks.append(fullpath)
++ except (IOError, OSError) as e:
++ logger.warn("Error converting to path: %s" % e)
++ continue
++ except (IOError, OSError):
++ pass
++ return hardlinks
++
++
++def gfid_to_all_paths_using_gfid2path(brick, changelog_data, args):
++ path = ""
++ for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}):
++ gfid = row[3].strip()
++ logger.debug("Processing gfid %s" % gfid)
++ hardlinks = enum_hard_links_using_gfid2path(brick, gfid, args)
++
++ path = ",".join(hardlinks)
++
++ changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid})
++
++
+ def gfid_to_path_using_pgfid(brick, changelog_data, args):
+ """
+ For all the pgfids collected, Converts to Path and
+@@ -314,11 +351,11 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
+ changelog_data.commit()
+ logger.info("[2/4] Finished 'pgfid to path' conversions.")
+
+- # Convert all GFIDs for which no other additional details available
+- logger.info("[3/4] Starting 'gfid to path using pgfid' conversions ...")
+- gfid_to_path_using_pgfid(brick, changelog_data, args)
++ # Convert all gfids recorded for data and metadata to all hardlink paths
++ logger.info("[3/4] Starting 'gfid2path' conversions ...")
++ gfid_to_all_paths_using_gfid2path(brick, changelog_data, args)
+ changelog_data.commit()
+- logger.info("[3/4] Finished 'gfid to path using pgfid' conversions.")
++ logger.info("[3/4] Finished 'gfid2path' conversions.")
+
+ # If some GFIDs fail to get converted from previous step,
+ # convert using find
+--
+1.8.3.1
+
diff --git a/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch b/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch
new file mode 100644
index 0000000..0d12daa
--- /dev/null
+++ b/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch
@@ -0,0 +1,55 @@
+From a8d8fc91af226fbf49e9dd1d7d91ad287707c4fe Mon Sep 17 00:00:00 2001
+From: Vishal Pandey <vpandey@redhat.com>
+Date: Wed, 7 Aug 2019 12:53:06 +0530
+Subject: [PATCH 328/335] glusterd: Add warning and abort in case of failures
+ in migration during remove-brick commit
+
+Problem -
+Currently remove-brick commit goes through even though there were files
+that failed to migrate or were skipped. There is no warning raised to the user.
+Solution-
+Add a check in the remove brick staging phase to verify if the status of the
+rebalnce process is complete but there has been failures or some skipped files
+while migration, In this case user will be given a warning and remove-brick
+commit. User will need to use the force option to remove the bricks.
+
+> Upstream Path Link: https://review.gluster.org/#/c/glusterfs/+/23171/
+> Fixes: bz#1514683
+> Signed-offby- Vishal Pandey <vpandey@redhat.com>
+> Change-Id: I014d0f0afb4b2fac35ab0de52227f98dbae079d5
+
+BUG: 1344758
+Change-Id: I014d0f0afb4b2fac35ab0de52227f98dbae079d5
+Signed-off-by: Vishal Pandey <vpandey@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185831
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+index ad9a572..c5141de 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+@@ -2191,6 +2191,17 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
+ goto out;
+ }
+
++ if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_COMPLETE) {
++ if (volinfo->rebal.rebalance_failures > 0 ||
++ volinfo->rebal.skipped_files > 0) {
++ errstr = gf_strdup(
++ "use 'force' option as migration "
++ "of some files might have been skipped or "
++ "has failed");
++ goto out;
++ }
++ }
++
+ ret = glusterd_remove_brick_validate_bricks(
+ cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE);
+ if (ret)
+--
+1.8.3.1
+
diff --git a/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch b/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch
new file mode 100644
index 0000000..935824d
--- /dev/null
+++ b/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch
@@ -0,0 +1,165 @@
+From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Wed, 20 Nov 2019 12:26:11 +0530
+Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no
+ healed_sinks
+
+Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/
+
+Problem:
+In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame
+anything for entry heal, heal will not complete even though we have
+clear source and sinks. This will happen because while doing
+afr_selfheal_find_direction() only the bricks which are blamed by
+non-accused bricks are considered as sinks. Later in
+__afr_selfheal_entry_finalize_source() when it tries to mark all the
+non-sources as sinks it fails to do so because there won't be any
+healed_sinks marked, no witness present and there will be a source.
+
+Fix:
+If there is a source and no healed_sinks, then reset all the locked
+sources to 0 and healed sinks to 1 to do conservative merge.
+
+Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+BUG: 1764095
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185834
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../bug-1749322-entry-heal-not-happening.t | 89 ++++++++++++++++++++++
+ xlators/cluster/afr/src/afr-self-heal-entry.c | 15 ++++
+ 2 files changed, 104 insertions(+)
+ create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
+
+diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
+new file mode 100644
+index 0000000..9627908
+--- /dev/null
++++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
+@@ -0,0 +1,89 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup
++
++function check_gfid_and_link_count
++{
++ local file=$1
++
++ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
++ TEST [ ! -z $file_gfid_b0 ]
++ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
++ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
++ EXPECT $file_gfid_b0 echo $file_gfid_b1
++ EXPECT $file_gfid_b0 echo $file_gfid_b2
++
++ EXPECT "2" stat -c %h $B0/${V0}0/$file
++ EXPECT "2" stat -c %h $B0/${V0}1/$file
++ EXPECT "2" stat -c %h $B0/${V0}2/$file
++}
++TESTS_EXPECTED_IN_LOOP=18
++
++################################################################################
++## Start and create a volume
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume info;
++
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
++TEST $CLI volume start $V0;
++TEST $CLI volume set $V0 cluster.heal-timeout 5
++TEST $CLI volume heal $V0 disable
++EXPECT 'Started' volinfo_field $V0 'Status';
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++TEST mkdir $M0/dir
++TEST `echo "File 1 " > $M0/dir/file1`
++TEST touch $M0/dir/file{2..4}
++
++# Remove file2 from 1st & 3rd bricks
++TEST rm -f $B0/$V0"0"/dir/file2
++TEST rm -f $B0/$V0"2"/dir/file2
++
++# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
++gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
++gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
++TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
++TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
++TEST rm -f $B0/$V0"0"/dir/file3
++TEST rm -f $B0/$V0"1"/dir/file3
++
++# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
++gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
++gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
++TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
++TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
++
++# B0 and B2 blame each other
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++
++# Add entry to xattrop dir on first brick.
++xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
++base_entry_b0=`ls $xattrop_dir0`
++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++
++EXPECT "^1$" get_pending_heal_count $V0
++
++# Launch heal
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++# All the files must be present on all the bricks after conservative merge and
++# should have the gfid xattr and the .glusterfs hardlink.
++check_gfid_and_link_count dir/file1
++check_gfid_and_link_count dir/file2
++check_gfid_and_link_count dir/file3
++check_gfid_and_link_count dir/file4
++
++cleanup
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index 35b600f..3ce882e 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int sources_count = 0;
++ int i = 0;
+
+ priv = this->private;
+
+@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
+ }
+
+ source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
++
++ /*If the selected source does not blame any other brick, then mark
++ * everything as sink to trigger conservative merge.
++ */
++ if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
++ for (i = 0; i < priv->child_count; i++) {
++ if (locked_on[i]) {
++ sources[i] = 0;
++ healed_sinks[i] = 1;
++ }
++ }
++ return -1;
++ }
++
+ return source;
+ }
+
+--
+1.8.3.1
+
diff --git a/0330-mount.glusterfs-change-the-error-message.patch b/0330-mount.glusterfs-change-the-error-message.patch
new file mode 100644
index 0000000..b64f0c6
--- /dev/null
+++ b/0330-mount.glusterfs-change-the-error-message.patch
@@ -0,0 +1,59 @@
+From 72168245761592a2cd0ebec05dd9bd9bc00745ca Mon Sep 17 00:00:00 2001
+From: Amar Tumballi <amarts@redhat.com>
+Date: Wed, 13 Mar 2019 08:51:31 +0530
+Subject: [PATCH 330/335] mount.glusterfs: change the error message
+
+In scenarios where a mount fails before creating log file, doesn't
+make sense to give message to 'check log file'. See below:
+
+```
+ERROR: failed to create logfile "/var/log/glusterfs/mnt.log" (No space left on device)
+ERROR: failed to open logfile /var/log/glusterfs/mnt.log
+Mount failed. Please check the log file for more details.
+```
+
+>upstream patch: https://review.gluster.org/#/c/glusterfs/+/22346/
+>Fixes: bz#1688068
+>Change-Id: I1d837caa4f9bc9f1a37780783e95007e01ae4e3f
+>Signed-off-by: Amar Tumballi <amarts@redhat.com>
+
+BUG: 1685406
+Change-Id: I1d837caa4f9bc9f1a37780783e95007e01ae4e3f
+Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185828
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mount/fuse/utils/mount.glusterfs.in | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
+index 3f5d76d..cbde42d 100755
+--- a/xlators/mount/fuse/utils/mount.glusterfs.in
++++ b/xlators/mount/fuse/utils/mount.glusterfs.in
+@@ -361,7 +361,10 @@ start_glusterfs ()
+ cmd_line=$(echo "$cmd_line $mount_point");
+ $cmd_line;
+ if [ $? -ne 0 ]; then
+- warn "Mount failed. Please check the log file for more details."
++ # If this is true, then glusterfs process returned error without
++ # getting daemonized. We have made sure the logs are posted to
++ # 'stderr', so no need to point them to logfile.
++ warn "Mounting glusterfs on $mount_point failed."
+ exit 1;
+ fi
+
+@@ -369,7 +372,9 @@ start_glusterfs ()
+ inode=$( ${getinode} $mount_point 2>/dev/null);
+ # this is required if the stat returns error
+ if [ $? -ne 0 ]; then
+- warn "Mount failed. Please check the log file for more details."
++ # At this time, glusterfs got daemonized, and then later exited.
++ # These failures are only logged in log file.
++ warn "Mount failed. Check the log file ${log_file} for more details."
+ umount $mount_point > /dev/null 2>&1;
+ exit 1;
+ fi
+--
+1.8.3.1
+
diff --git a/0331-features-locks-Do-special-handling-for-op-version-3..patch b/0331-features-locks-Do-special-handling-for-op-version-3..patch
new file mode 100644
index 0000000..6eb15b0
--- /dev/null
+++ b/0331-features-locks-Do-special-handling-for-op-version-3..patch
@@ -0,0 +1,44 @@
+From 147cff762b307bf60519bae4cdefc62f655119a7 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Wed, 30 Oct 2019 10:47:17 +0530
+Subject: [PATCH 331/335] features/locks: Do special handling for op-version <
+ 3.12.0
+
+Problem:
+Patch https://code.engineering.redhat.com/gerrit/#/c/140080/ diverges from
+its upstream patch(https://review.gluster.org/c/glusterfs/+/20031) in op-version.
+On upstream special-handling happens for version < 3.10.0 whereas for downstream
+special-handling happens for version < 3.12.0.
+ When rebase happened for 3.5.0 from upstream, this downstream specific change
+is missed as there was no special downstream-only patch tracking this difference.
+This leads to I/O errors on upgrade from 3.3.1->3.5.0
+
+Fix:
+Do special handling for op-version < 3.12.0 as in 3.4.x
+
+Change-Id: I72fec058bdfb3cd30d017d205c90aa61aec86c5d
+Label: DOWNSTREAM ONLY
+BUG: 1766640
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185835
+Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
+---
+ xlators/features/locks/src/posix.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index 9db5ac6..4592240 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -57,7 +57,7 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
+ do { \
+ pl_local_t *__local = NULL; \
+ if (frame->root->client && \
+- (frame->root->client->opversion < GD_OP_VERSION_3_10_0)) { \
++ (frame->root->client->opversion < GD_OP_VERSION_3_12_0)) { \
+ __local = frame->local; \
+ PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params); \
+ } else { \
+--
+1.8.3.1
+
diff --git a/0332-Removing-one-top-command-from-gluster-v-help.patch b/0332-Removing-one-top-command-from-gluster-v-help.patch
new file mode 100644
index 0000000..c9b2b56
--- /dev/null
+++ b/0332-Removing-one-top-command-from-gluster-v-help.patch
@@ -0,0 +1,57 @@
+From 808f311bd4f38f06b8afc49fc8d2c65fc4797431 Mon Sep 17 00:00:00 2001
+From: kshithijiyer <kshithij.ki@gmail.com>
+Date: Fri, 28 Jun 2019 15:32:31 +0530
+Subject: [PATCH 332/335] Removing one top command from gluster v help
+
+The current help show 2 different top commands
+intead of one single top command which can be
+easily observed when "# gluster v help" command
+is issued. Removing one "volume top <VOLNAME>"
+and clubbing into them into a single command.
+
+Current help:
+volume top <VOLNAME> {open|read|write|opendir|readdir|clear}
+[nfs|brick <brick>] [list-cnt <value>] |
+volume top <VOLNAME> {read-perf|write-perf}
+[bs <size> count <count>] [brick <brick>]
+[list-cnt <value>] - volume top operations
+
+Expected help:
+volume top <VOLNAME> {open|read|write|opendir|readdir|clear}
+[nfs|brick <brick>] [list-cnt <value>] | {read-perf|write-perf}
+[bs <size> count <count>] [brick <brick>] [list-cnt <value>]
+- volume top operations
+
+> upstream patch: https://review.gluster.org/#/c/glusterfs/+/22972/
+> fixes: bz#1725034
+> Change-Id: Ifbc4c95f2558286e27dfc5e9667046b80eb1715d
+> Signed-off-by: kshithijiyer <kshithij.ki@gmail.com>
+
+BUG: 1726058
+Change-Id: Ifbc4c95f2558286e27dfc5e9667046b80eb1715d
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185757
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-volume.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index 66beb1b..754d333 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -3427,8 +3427,8 @@ struct cli_cmd volume_cmds[] = {
+ cli_cmd_volume_profile_cbk, "volume profile operations"},
+
+ {"volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick "
+- "<brick>] [list-cnt <value>] |\n"
+- "volume top <VOLNAME> {read-perf|write-perf} [bs <size> count <count>] "
++ "<brick>] [list-cnt <value>] | "
++ "{read-perf|write-perf} [bs <size> count <count>] "
+ "[brick <brick>] [list-cnt <value>]",
+ cli_cmd_volume_top_cbk, "volume top operations"},
+
+--
+1.8.3.1
+
diff --git a/0333-rpc-Synchronize-slot-allocation-code.patch b/0333-rpc-Synchronize-slot-allocation-code.patch
new file mode 100644
index 0000000..b1d94b4
--- /dev/null
+++ b/0333-rpc-Synchronize-slot-allocation-code.patch
@@ -0,0 +1,195 @@
+From f199094cb61341a47c98a8ed91b293446182b5a9 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Thu, 3 Oct 2019 14:06:52 +0530
+Subject: [PATCH 333/335] rpc: Synchronize slot allocation code
+
+Problem: Current slot allocation/deallocation code path is not
+ synchronized.There are scenario when due to race condition
+ in slot allocation/deallocation code path brick is crashed.
+
+Solution: Synchronize slot allocation/deallocation code path to
+ avoid the issue
+
+> Change-Id: I4fb659a75234218ffa0e5e0bf9308f669f75fc25
+> Fixes: bz#1763036
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23508/)
+> (Cherry pick from commit faf5ac13c4ee00a05e9451bf8da3be2a9043bbf2)
+
+Change-Id: I4fb659a75234218ffa0e5e0bf9308f669f75fc25
+BUG: 1741193
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185827
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/event-epoll.c | 74 +++++++++++++++++++++++-------------------
+ 1 file changed, 41 insertions(+), 33 deletions(-)
+
+diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c
+index 0cec47e..65f5efd 100644
+--- a/libglusterfs/src/event-epoll.c
++++ b/libglusterfs/src/event-epoll.c
+@@ -69,15 +69,27 @@ __event_newtable(struct event_pool *event_pool, int table_idx)
+ }
+
+ static int
++event_slot_ref(struct event_slot_epoll *slot)
++{
++ if (!slot)
++ return -1;
++
++ return GF_ATOMIC_INC(slot->ref);
++}
++
++static int
+ __event_slot_alloc(struct event_pool *event_pool, int fd,
+- char notify_poller_death)
++ char notify_poller_death, struct event_slot_epoll **slot)
+ {
+ int i = 0;
++ int j = 0;
+ int table_idx = -1;
+ int gen = -1;
+ struct event_slot_epoll *table = NULL;
+
+- for (i = 0; i < EVENT_EPOLL_TABLES; i++) {
++retry:
++
++ while (i < EVENT_EPOLL_TABLES) {
+ switch (event_pool->slots_used[i]) {
+ case EVENT_EPOLL_SLOTS:
+ continue;
+@@ -98,6 +110,7 @@ __event_slot_alloc(struct event_pool *event_pool, int fd,
+ if (table)
+ /* break out of the loop */
+ break;
++ i++;
+ }
+
+ if (!table)
+@@ -105,20 +118,20 @@ __event_slot_alloc(struct event_pool *event_pool, int fd,
+
+ table_idx = i;
+
+- for (i = 0; i < EVENT_EPOLL_SLOTS; i++) {
+- if (table[i].fd == -1) {
++ for (j = 0; j < EVENT_EPOLL_SLOTS; j++) {
++ if (table[j].fd == -1) {
+ /* wipe everything except bump the generation */
+- gen = table[i].gen;
+- memset(&table[i], 0, sizeof(table[i]));
+- table[i].gen = gen + 1;
++ gen = table[j].gen;
++ memset(&table[j], 0, sizeof(table[j]));
++ table[j].gen = gen + 1;
+
+- LOCK_INIT(&table[i].lock);
+- INIT_LIST_HEAD(&table[i].poller_death);
++ LOCK_INIT(&table[j].lock);
++ INIT_LIST_HEAD(&table[j].poller_death);
+
+- table[i].fd = fd;
++ table[j].fd = fd;
+ if (notify_poller_death) {
+- table[i].idx = table_idx * EVENT_EPOLL_SLOTS + i;
+- list_add_tail(&table[i].poller_death,
++ table[j].idx = table_idx * EVENT_EPOLL_SLOTS + j;
++ list_add_tail(&table[j].poller_death,
+ &event_pool->poller_death);
+ }
+
+@@ -128,18 +141,26 @@ __event_slot_alloc(struct event_pool *event_pool, int fd,
+ }
+ }
+
+- return table_idx * EVENT_EPOLL_SLOTS + i;
++ if (j == EVENT_EPOLL_SLOTS) {
++ table = NULL;
++ i++;
++ goto retry;
++ } else {
++ (*slot) = &table[j];
++ event_slot_ref(*slot);
++ return table_idx * EVENT_EPOLL_SLOTS + j;
++ }
+ }
+
+ static int
+ event_slot_alloc(struct event_pool *event_pool, int fd,
+- char notify_poller_death)
++ char notify_poller_death, struct event_slot_epoll **slot)
+ {
+ int idx = -1;
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+- idx = __event_slot_alloc(event_pool, fd, notify_poller_death);
++ idx = __event_slot_alloc(event_pool, fd, notify_poller_death, slot);
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+@@ -153,6 +174,7 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx)
+ int offset = 0;
+ struct event_slot_epoll *table = NULL;
+ struct event_slot_epoll *slot = NULL;
++ int fd = -1;
+
+ table_idx = idx / EVENT_EPOLL_SLOTS;
+ offset = idx % EVENT_EPOLL_SLOTS;
+@@ -164,11 +186,13 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx)
+ slot = &table[offset];
+ slot->gen++;
+
++ fd = slot->fd;
+ slot->fd = -1;
+ slot->handled_error = 0;
+ slot->in_handler = 0;
+ list_del_init(&slot->poller_death);
+- event_pool->slots_used[table_idx]--;
++ if (fd != -1)
++ event_pool->slots_used[table_idx]--;
+
+ return;
+ }
+@@ -185,15 +209,6 @@ event_slot_dealloc(struct event_pool *event_pool, int idx)
+ return;
+ }
+
+-static int
+-event_slot_ref(struct event_slot_epoll *slot)
+-{
+- if (!slot)
+- return -1;
+-
+- return GF_ATOMIC_INC(slot->ref);
+-}
+-
+ static struct event_slot_epoll *
+ event_slot_get(struct event_pool *event_pool, int idx)
+ {
+@@ -379,20 +394,13 @@ event_register_epoll(struct event_pool *event_pool, int fd,
+ if (destroy == 1)
+ goto out;
+
+- idx = event_slot_alloc(event_pool, fd, notify_poller_death);
++ idx = event_slot_alloc(event_pool, fd, notify_poller_death, &slot);
+ if (idx == -1) {
+ gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND,
+ "could not find slot for fd=%d", fd);
+ return -1;
+ }
+
+- slot = event_slot_get(event_pool, idx);
+- if (!slot) {
+- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND,
+- "could not find slot for fd=%d idx=%d", fd, idx);
+- return -1;
+- }
+-
+ assert(slot->fd == fd);
+
+ LOCK(&slot->lock);
+--
+1.8.3.1
+
diff --git a/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch b/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch
new file mode 100644
index 0000000..48f927f
--- /dev/null
+++ b/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch
@@ -0,0 +1,54 @@
+From 17940583c4d991a568582581f68dcbf08463ccaf Mon Sep 17 00:00:00 2001
+From: Susant Palai <spalai@redhat.com>
+Date: Tue, 16 Jul 2019 10:31:46 +0530
+Subject: [PATCH 334/335] dht: log getxattr failure for node-uuid at "DEBUG"
+
+There are two ways to fetch node-uuid information from dht.
+
+1 - #define GF_XATTR_LIST_NODE_UUIDS_KEY "trusted.glusterfs.list-node-uuids"
+This key is used by AFR.
+
+2 - #define GF_REBAL_FIND_LOCAL_SUBVOL "glusterfs.find-local-subvol"
+This key is used for non-afr volume type.
+
+We do two getxattr operations. First on the #1 key followed by on #2 if
+getxattr on #1 key fails.
+
+Since the parent function "dht_init_local_subvols_and_nodeuuids" logs failure,
+moving the log-level to DEBUG in dht_find_local_subvol_cbk.
+
+>fixes: bz#1730175
+>Change-Id: I4d88244dc26587b111ca5b00d4c00118efdaac14
+>Signed-off-by: Susant Palai <spalai@redhat.com>
+Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23053/
+
+BUG: 1727755
+Change-Id: I4d88244dc26587b111ca5b00d4c00118efdaac14
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185876
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 37952ba..d0b5287 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -4253,8 +4253,11 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED,
+- "getxattr err for dir");
++ if (op_errno == ENODATA)
++ gf_msg_debug(this->name, 0, "failed to get node-uuid");
++ else
++ gf_msg(this->name, GF_LOG_ERROR, op_errno,
++ DHT_MSG_GET_XATTR_FAILED, "failed to get node-uuid");
+ goto post_unlock;
+ }
+
+--
+1.8.3.1
+
diff --git a/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch b/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch
new file mode 100644
index 0000000..c3341df
--- /dev/null
+++ b/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch
@@ -0,0 +1,15991 @@
+From 39523fd6c1b4789b12c8db81f4e08a3eb0c6a65c Mon Sep 17 00:00:00 2001
+From: Sunil Kumar Acharya <sheggodu@redhat.com>
+Date: Thu, 17 Oct 2019 13:03:56 +0530
+Subject: [PATCH 335/335] tests: RHEL8 test failure fixes for RHGS
+
+- tests/bugs/shard/bug-1272986.t
+ https://review.gluster.org/#/c/glusterfs/+/23499/
+ https://review.gluster.org/#/c/glusterfs/+/23551/
+
+- tests/basic/posix/shared-statfs.t
+ https://review.gluster.org/c/glusterfs/+/23550
+
+- tests/basic/fops-sanity.t
+ https://review.gluster.org/c/glusterfs/+/22210/
+
+- tests/bugs/transport/bug-873367.t
+- tests/features/ssl-authz.t
+- tests/bugs/snapshot/bug-1399598-uss-with-ssl.t
+ https://review.gluster.org/#/c/glusterfs/+/23587/
+
+- remove gnfs relatedtests
+
+- tests/bugs/shard/unlinks-and-renames.t
+ https://review.gluster.org/#/c/glusterfs/+/23585/
+
+- tests/bugs/rpc/bug-954057.t
+- tests/bugs/glusterfs-server/bug-887145.t
+ https://review.gluster.org/#/c/glusterfs/+/23710/
+
+- tests/features/ssl-ciphers.t
+ https://review.gluster.org/#/c/glusterfs/+/23703/
+
+- tests/bugs/fuse/bug-985074.t
+ https://review.gluster.org/#/c/glusterfs/+/23734/
+
+BUG: 1762180
+Change-Id: I97b344a632b49ca9ca332a5a463756b160aee5bd
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/185716
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ tests/basic/fops-sanity.c | 1862 ++--
+ tests/basic/posix/shared-statfs.t | 11 +-
+ tests/bugs/cli/bug-1320388.t | 2 +-
+ tests/bugs/fuse/bug-985074.t | 4 +-
+ tests/bugs/glusterd/quorum-value-check.t | 35 -
+ tests/bugs/glusterfs-server/bug-887145.t | 14 +-
+ tests/bugs/nfs/bug-1053579.t | 114 -
+ tests/bugs/nfs/bug-1116503.t | 47 -
+ tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t | 24 -
+ tests/bugs/nfs/bug-1157223-symlink-mounting.t | 126 -
+ tests/bugs/nfs/bug-1161092-nfs-acls.t | 39 -
+ tests/bugs/nfs/bug-1166862.t | 69 -
+ tests/bugs/nfs/bug-1210338.c | 31 -
+ tests/bugs/nfs/bug-1210338.t | 30 -
+ tests/bugs/nfs/bug-1302948.t | 13 -
+ tests/bugs/nfs/bug-847622.t | 39 -
+ tests/bugs/nfs/bug-877885.t | 39 -
+ tests/bugs/nfs/bug-904065.t | 100 -
+ tests/bugs/nfs/bug-915280.t | 54 -
+ tests/bugs/nfs/bug-970070.t | 13 -
+ tests/bugs/nfs/bug-974972.t | 41 -
+ tests/bugs/nfs/showmount-many-clients.t | 41 -
+ tests/bugs/nfs/socket-as-fifo.py | 33 -
+ tests/bugs/nfs/socket-as-fifo.t | 25 -
+ tests/bugs/nfs/subdir-trailing-slash.t | 32 -
+ tests/bugs/nfs/zero-atime.t | 33 -
+ tests/bugs/rpc/bug-954057.t | 10 +-
+ tests/bugs/shard/bug-1272986.t | 6 +-
+ tests/bugs/transport/bug-873367.t | 2 +-
+ tests/features/ssl-authz.t | 2 +-
+ tests/features/ssl-ciphers.t | 61 +-
+ tests/ssl.rc | 2 +-
+ xlators/features/shard/src/shard.c | 11754 ++++++++++----------
+ 33 files changed, 6638 insertions(+), 8070 deletions(-)
+ delete mode 100755 tests/bugs/glusterd/quorum-value-check.t
+ delete mode 100755 tests/bugs/nfs/bug-1053579.t
+ delete mode 100644 tests/bugs/nfs/bug-1116503.t
+ delete mode 100644 tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t
+ delete mode 100644 tests/bugs/nfs/bug-1157223-symlink-mounting.t
+ delete mode 100644 tests/bugs/nfs/bug-1161092-nfs-acls.t
+ delete mode 100755 tests/bugs/nfs/bug-1166862.t
+ delete mode 100644 tests/bugs/nfs/bug-1210338.c
+ delete mode 100644 tests/bugs/nfs/bug-1210338.t
+ delete mode 100755 tests/bugs/nfs/bug-1302948.t
+ delete mode 100755 tests/bugs/nfs/bug-847622.t
+ delete mode 100755 tests/bugs/nfs/bug-877885.t
+ delete mode 100755 tests/bugs/nfs/bug-904065.t
+ delete mode 100755 tests/bugs/nfs/bug-915280.t
+ delete mode 100755 tests/bugs/nfs/bug-970070.t
+ delete mode 100755 tests/bugs/nfs/bug-974972.t
+ delete mode 100644 tests/bugs/nfs/showmount-many-clients.t
+ delete mode 100755 tests/bugs/nfs/socket-as-fifo.py
+ delete mode 100644 tests/bugs/nfs/socket-as-fifo.t
+ delete mode 100644 tests/bugs/nfs/subdir-trailing-slash.t
+ delete mode 100755 tests/bugs/nfs/zero-atime.t
+
+diff --git a/tests/basic/fops-sanity.c b/tests/basic/fops-sanity.c
+index aff72d8..171d003 100644
+--- a/tests/basic/fops-sanity.c
++++ b/tests/basic/fops-sanity.c
+@@ -17,15 +17,16 @@
+
+ /* Filesystem basic sanity check, tests all (almost) fops. */
+
+-#include <stdio.h>
++#include <dirent.h>
++#include <errno.h>
+ #include <fcntl.h>
+-#include <unistd.h>
+-#include <sys/types.h>
++#include <stdio.h>
++#include <string.h>
+ #include <sys/stat.h>
++#include <sys/sysmacros.h>
++#include <sys/types.h>
+ #include <sys/xattr.h>
+-#include <errno.h>
+-#include <string.h>
+-#include <dirent.h>
++#include <unistd.h>
+
+ #ifndef linux
+ #include <sys/socket.h>
+@@ -34,904 +35,880 @@
+ #endif
+
+ /* for fd based fops after unlink */
+-int
+-fd_based_fops_1(char *filename);
++int fd_based_fops_1(char *filename);
+ /* for fd based fops before unlink */
+-int
+-fd_based_fops_2(char *filename);
++int fd_based_fops_2(char *filename);
+ /* fops based on fd after dup */
+-int
+-dup_fd_based_fops(char *filename);
++int dup_fd_based_fops(char *filename);
+ /* for fops based on path */
+-int
+-path_based_fops(char *filename);
++int path_based_fops(char *filename);
+ /* for fops which operate on directory */
+-int
+-dir_based_fops(char *filename);
++int dir_based_fops(char *filename);
+ /* for fops which operate in link files (symlinks) */
+-int
+-link_based_fops(char *filename);
++int link_based_fops(char *filename);
+ /* to test open syscall with open modes available. */
+-int
+-test_open_modes(char *filename);
++int test_open_modes(char *filename);
+ /* generic function which does open write and read. */
+-int
+-generic_open_read_write(char *filename, int flag, mode_t mode);
++int generic_open_read_write(char *filename, int flag, mode_t mode);
+
+ #define OPEN_MODE 0666
+
+-int
+-main(int argc, char *argv[])
+-{
+- int ret = -1;
+- int result = 0;
+- char filename[255] = {
+- 0,
+- };
+-
+- if (argc > 1)
+- strcpy(filename, argv[1]);
+- else
+- strcpy(filename, "temp-xattr-test-file");
+-
+- ret = fd_based_fops_1(strcat(filename, "_1"));
+- if (ret < 0) {
+- fprintf(stderr, "fd based file operation 1 failed\n");
+- result |= ret;
+- } else {
+- fprintf(stdout, "fd based file operation 1 passed\n");
+- }
+-
+- ret = fd_based_fops_2(strcat(filename, "_2"));
+- if (ret < 0) {
+- result |= ret;
+- fprintf(stderr, "fd based file operation 2 failed\n");
+- } else {
+- fprintf(stdout, "fd based file operation 2 passed\n");
+- }
+-
+- ret = dup_fd_based_fops(strcat(filename, "_3"));
+- if (ret < 0) {
+- result |= ret;
+- fprintf(stderr, "dup fd based file operation failed\n");
+- } else {
+- fprintf(stdout, "dup fd based file operation passed\n");
+- }
+-
+- ret = path_based_fops(strcat(filename, "_4"));
+- if (ret < 0) {
+- result |= ret;
+- fprintf(stderr, "path based file operation failed\n");
+- } else {
+- fprintf(stdout, "path based file operation passed\n");
+- }
+-
+- ret = dir_based_fops(strcat(filename, "_5"));
+- if (ret < 0) {
+- result |= ret;
+- fprintf(stderr, "directory based file operation failed\n");
+- } else {
+- fprintf(stdout, "directory based file operation passed\n");
+- }
+-
+- ret = link_based_fops(strcat(filename, "_5"));
+- if (ret < 0) {
+- result |= ret;
+- fprintf(stderr, "link based file operation failed\n");
+- } else {
+- fprintf(stdout, "link based file operation passed\n");
+- }
+-
+- ret = test_open_modes(strcat(filename, "_5"));
+- if (ret < 0) {
+- result |= ret;
+- fprintf(stderr, "testing modes of `open' call failed\n");
+- } else {
+- fprintf(stdout, "testing modes of `open' call passed\n");
+- }
+- return result;
++int main(int argc, char *argv[]) {
++ int ret = -1;
++ int result = 0;
++ char filename[255] = {
++ 0,
++ };
++
++ if (argc > 1)
++ strcpy(filename, argv[1]);
++ else
++ strcpy(filename, "temp-xattr-test-file");
++
++ ret = fd_based_fops_1(strcat(filename, "_1"));
++ if (ret < 0) {
++ fprintf(stderr, "fd based file operation 1 failed\n");
++ result |= ret;
++ } else {
++ fprintf(stdout, "fd based file operation 1 passed\n");
++ }
++
++ ret = fd_based_fops_2(strcat(filename, "_2"));
++ if (ret < 0) {
++ result |= ret;
++ fprintf(stderr, "fd based file operation 2 failed\n");
++ } else {
++ fprintf(stdout, "fd based file operation 2 passed\n");
++ }
++
++ ret = dup_fd_based_fops(strcat(filename, "_3"));
++ if (ret < 0) {
++ result |= ret;
++ fprintf(stderr, "dup fd based file operation failed\n");
++ } else {
++ fprintf(stdout, "dup fd based file operation passed\n");
++ }
++
++ ret = path_based_fops(strcat(filename, "_4"));
++ if (ret < 0) {
++ result |= ret;
++ fprintf(stderr, "path based file operation failed\n");
++ } else {
++ fprintf(stdout, "path based file operation passed\n");
++ }
++
++ ret = dir_based_fops(strcat(filename, "_5"));
++ if (ret < 0) {
++ result |= ret;
++ fprintf(stderr, "directory based file operation failed\n");
++ } else {
++ fprintf(stdout, "directory based file operation passed\n");
++ }
++
++ ret = link_based_fops(strcat(filename, "_5"));
++ if (ret < 0) {
++ result |= ret;
++ fprintf(stderr, "link based file operation failed\n");
++ } else {
++ fprintf(stdout, "link based file operation passed\n");
++ }
++
++ ret = test_open_modes(strcat(filename, "_5"));
++ if (ret < 0) {
++ result |= ret;
++ fprintf(stderr, "testing modes of `open' call failed\n");
++ } else {
++ fprintf(stdout, "testing modes of `open' call passed\n");
++ }
++ return result;
+ }
+
+ /* Execute all possible fops on a fd which is unlinked */
+-int
+-fd_based_fops_1(char *filename)
+-{
+- int fd = 0;
+- int ret = -1;
+- int result = 0;
+- struct stat stbuf = {
+- 0,
+- };
+- char wstr[50] = {
+- 0,
+- };
+- char rstr[50] = {
+- 0,
+- };
+-
+- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
+- if (fd < 0) {
+- fprintf(stderr, "open failed : %s\n", strerror(errno));
+- return ret;
+- }
+-
+- ret = unlink(filename);
+- if (ret < 0) {
+- fprintf(stderr, "unlink failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- strcpy(wstr, "This is my string\n");
+- ret = write(fd, wstr, strlen(wstr));
+- if (ret <= 0) {
+- fprintf(stderr, "write failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = lseek(fd, 0, SEEK_SET);
+- if (ret < 0) {
+- fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = read(fd, rstr, strlen(wstr));
+- if (ret <= 0) {
+- fprintf(stderr, "read failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = memcmp(rstr, wstr, strlen(wstr));
+- if (ret != 0) {
+- fprintf(stderr, "read returning junk\n");
+- result |= ret;
+- }
+-
+- ret = ftruncate(fd, 0);
+- if (ret < 0) {
+- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fstat(fd, &stbuf);
+- if (ret < 0) {
+- fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fsync(fd);
+- if (ret < 0) {
+- fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fdatasync(fd);
+- if (ret < 0) {
+- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- /*
+- * These metadata operations fail at the moment because kernel doesn't
+- * pass the client fd in the operation.
+- * The following bug tracks this change.
+- * https://bugzilla.redhat.com/show_bug.cgi?id=1084422
+- * ret = fchmod (fd, 0640);
+- * if (ret < 0) {
+- * fprintf (stderr, "fchmod failed : %s\n", strerror (errno));
+- * result |= ret;
+- * }
+-
+- * ret = fchown (fd, 10001, 10001);
+- * if (ret < 0) {
+- * fprintf (stderr, "fchown failed : %s\n", strerror (errno));
+- * result |= ret;
+- * }
+-
+- * ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0);
+- * if (ret < 0) {
+- * fprintf (stderr, "fsetxattr failed : %s\n", strerror
+- (errno));
+- * result |= ret;
+- * }
+-
+- * ret = flistxattr (fd, NULL, 0);
+- * if (ret <= 0) {
+- * fprintf (stderr, "flistxattr failed : %s\n", strerror
+- (errno));
+- * result |= ret;
+- * }
+-
+- * ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0);
+- * if (ret <= 0) {
+- * fprintf (stderr, "fgetxattr failed : %s\n", strerror
+- (errno));
+- * result |= ret;
+- * }
+-
+- * ret = fremovexattr (fd, "trusted.xattr-test");
+- * if (ret < 0) {
+- * fprintf (stderr, "fremovexattr failed : %s\n", strerror
+- (errno));
+- * result |= ret;
+- * }
+- */
+-
+- if (fd)
+- close(fd);
+- return result;
++int fd_based_fops_1(char *filename) {
++ int fd = 0;
++ int ret = -1;
++ int result = 0;
++ struct stat stbuf = {
++ 0,
++ };
++ char wstr[50] = {
++ 0,
++ };
++ char rstr[50] = {
++ 0,
++ };
++
++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
++ if (fd < 0) {
++ fprintf(stderr, "open failed : %s\n", strerror(errno));
++ return ret;
++ }
++
++ ret = unlink(filename);
++ if (ret < 0) {
++ fprintf(stderr, "unlink failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ strcpy(wstr, "This is my string\n");
++ ret = write(fd, wstr, strlen(wstr));
++ if (ret <= 0) {
++ fprintf(stderr, "write failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = lseek(fd, 0, SEEK_SET);
++ if (ret < 0) {
++ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = read(fd, rstr, strlen(wstr));
++ if (ret <= 0) {
++ fprintf(stderr, "read failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = memcmp(rstr, wstr, strlen(wstr));
++ if (ret != 0) {
++ fprintf(stderr, "read returning junk\n");
++ result |= ret;
++ }
++
++ ret = ftruncate(fd, 0);
++ if (ret < 0) {
++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fstat(fd, &stbuf);
++ if (ret < 0) {
++ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fsync(fd);
++ if (ret < 0) {
++ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fdatasync(fd);
++ if (ret < 0) {
++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ /*
++ * These metadata operations fail at the moment because kernel doesn't
++ * pass the client fd in the operation.
++ * The following bug tracks this change.
++ * https://bugzilla.redhat.com/show_bug.cgi?id=1084422
++ * ret = fchmod (fd, 0640);
++ * if (ret < 0) {
++ * fprintf (stderr, "fchmod failed : %s\n", strerror (errno));
++ * result |= ret;
++ * }
++
++ * ret = fchown (fd, 10001, 10001);
++ * if (ret < 0) {
++ * fprintf (stderr, "fchown failed : %s\n", strerror (errno));
++ * result |= ret;
++ * }
++
++ * ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0);
++ * if (ret < 0) {
++ * fprintf (stderr, "fsetxattr failed : %s\n", strerror
++ (errno));
++ * result |= ret;
++ * }
++
++ * ret = flistxattr (fd, NULL, 0);
++ * if (ret <= 0) {
++ * fprintf (stderr, "flistxattr failed : %s\n", strerror
++ (errno));
++ * result |= ret;
++ * }
++
++ * ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0);
++ * if (ret <= 0) {
++ * fprintf (stderr, "fgetxattr failed : %s\n", strerror
++ (errno));
++ * result |= ret;
++ * }
++
++ * ret = fremovexattr (fd, "trusted.xattr-test");
++ * if (ret < 0) {
++ * fprintf (stderr, "fremovexattr failed : %s\n", strerror
++ (errno));
++ * result |= ret;
++ * }
++ */
++
++ if (fd)
++ close(fd);
++ return result;
+ }
+
+-int
+-fd_based_fops_2(char *filename)
+-{
+- int fd = 0;
+- int ret = -1;
+- int result = 0;
+- struct stat stbuf = {
+- 0,
+- };
+- char wstr[50] = {
+- 0,
+- };
+- char rstr[50] = {
+- 0,
+- };
+-
+- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
+- if (fd < 0) {
+- fprintf(stderr, "open failed : %s\n", strerror(errno));
+- return ret;
+- }
+-
+- ret = ftruncate(fd, 0);
+- if (ret < 0) {
+- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- strcpy(wstr, "This is my second string\n");
+- ret = write(fd, wstr, strlen(wstr));
+- if (ret < 0) {
+- fprintf(stderr, "write failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- lseek(fd, 0, SEEK_SET);
+- if (ret < 0) {
+- fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = read(fd, rstr, strlen(wstr));
+- if (ret <= 0) {
+- fprintf(stderr, "read failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = memcmp(rstr, wstr, strlen(wstr));
+- if (ret != 0) {
+- fprintf(stderr, "read returning junk\n");
+- result |= ret;
+- }
+-
+- ret = fstat(fd, &stbuf);
+- if (ret < 0) {
+- fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fchmod(fd, 0640);
+- if (ret < 0) {
+- fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fchown(fd, 10001, 10001);
+- if (ret < 0) {
+- fprintf(stderr, "fchown failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fsync(fd);
+- if (ret < 0) {
+- fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0);
+- if (ret < 0) {
+- fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fdatasync(fd);
+- if (ret < 0) {
+- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = flistxattr(fd, NULL, 0);
+- if (ret <= 0) {
+- fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0);
+- if (ret <= 0) {
+- fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fremovexattr(fd, "trusted.xattr-test");
+- if (ret < 0) {
+- fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- if (fd)
+- close(fd);
+- unlink(filename);
++int fd_based_fops_2(char *filename) {
++ int fd = 0;
++ int ret = -1;
++ int result = 0;
++ struct stat stbuf = {
++ 0,
++ };
++ char wstr[50] = {
++ 0,
++ };
++ char rstr[50] = {
++ 0,
++ };
++
++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
++ if (fd < 0) {
++ fprintf(stderr, "open failed : %s\n", strerror(errno));
++ return ret;
++ }
++
++ ret = ftruncate(fd, 0);
++ if (ret < 0) {
++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ strcpy(wstr, "This is my second string\n");
++ ret = write(fd, wstr, strlen(wstr));
++ if (ret < 0) {
++ fprintf(stderr, "write failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ lseek(fd, 0, SEEK_SET);
++ if (ret < 0) {
++ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = read(fd, rstr, strlen(wstr));
++ if (ret <= 0) {
++ fprintf(stderr, "read failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = memcmp(rstr, wstr, strlen(wstr));
++ if (ret != 0) {
++ fprintf(stderr, "read returning junk\n");
++ result |= ret;
++ }
++
++ ret = fstat(fd, &stbuf);
++ if (ret < 0) {
++ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fchmod(fd, 0640);
++ if (ret < 0) {
++ fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fchown(fd, 10001, 10001);
++ if (ret < 0) {
++ fprintf(stderr, "fchown failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fsync(fd);
++ if (ret < 0) {
++ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0);
++ if (ret < 0) {
++ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fdatasync(fd);
++ if (ret < 0) {
++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = flistxattr(fd, NULL, 0);
++ if (ret <= 0) {
++ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0);
++ if (ret <= 0) {
++ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fremovexattr(fd, "trusted.xattr-test");
++ if (ret < 0) {
++ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ if (fd)
++ close(fd);
++ unlink(filename);
+
+- return result;
++ return result;
+ }
+
+-int
+-path_based_fops(char *filename)
+-{
+- int ret = -1;
+- int fd = 0;
+- int result = 0;
+- struct stat stbuf = {
+- 0,
+- };
+- char newfilename[255] = {
+- 0,
+- };
+- char *hardlink = "linkfile-hard.txt";
+- char *symlnk = "linkfile-soft.txt";
+- char buf[1024] = {
+- 0,
+- };
+-
+- fd = creat(filename, 0644);
+- if (fd < 0) {
+- fprintf(stderr, "creat failed: %s\n", strerror(errno));
+- return ret;
+- }
+-
+- ret = truncate(filename, 0);
+- if (ret < 0) {
+- fprintf(stderr, "truncate failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = stat(filename, &stbuf);
+- if (ret < 0) {
+- fprintf(stderr, "stat failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = chmod(filename, 0640);
+- if (ret < 0) {
+- fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = chown(filename, 10001, 10001);
+- if (ret < 0) {
+- fprintf(stderr, "chown failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0);
+- if (ret < 0) {
+- fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = listxattr(filename, NULL, 0);
+- if (ret <= 0) {
+- ret = -1;
+- fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = getxattr(filename, "trusted.xattr-test", NULL, 0);
+- if (ret <= 0) {
+- fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = removexattr(filename, "trusted.xattr-test");
+- if (ret < 0) {
+- fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = access(filename, R_OK | W_OK);
+- if (ret < 0) {
+- fprintf(stderr, "access failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = link(filename, hardlink);
+- if (ret < 0) {
+- fprintf(stderr, "link failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+- unlink(hardlink);
+-
+- ret = symlink(filename, symlnk);
+- if (ret < 0) {
+- fprintf(stderr, "symlink failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = readlink(symlnk, buf, sizeof(buf));
+- if (ret < 0) {
+- fprintf(stderr, "readlink failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+- unlink(symlnk);
+-
+- /* Create a character special file */
+- ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3));
+- if (ret < 0) {
+- fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+- unlink("cspecial");
+-
+- ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5));
+- if (ret < 0) {
+- fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+- unlink("bspecial");
++int path_based_fops(char *filename) {
++ int ret = -1;
++ int fd = 0;
++ int result = 0;
++ struct stat stbuf = {
++ 0,
++ };
++ char newfilename[255] = {
++ 0,
++ };
++ char *hardlink = "linkfile-hard.txt";
++ char *symlnk = "linkfile-soft.txt";
++ char buf[1024] = {
++ 0,
++ };
++
++ fd = creat(filename, 0644);
++ if (fd < 0) {
++ fprintf(stderr, "creat failed: %s\n", strerror(errno));
++ return ret;
++ }
++
++ ret = truncate(filename, 0);
++ if (ret < 0) {
++ fprintf(stderr, "truncate failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = stat(filename, &stbuf);
++ if (ret < 0) {
++ fprintf(stderr, "stat failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = chmod(filename, 0640);
++ if (ret < 0) {
++ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = chown(filename, 10001, 10001);
++ if (ret < 0) {
++ fprintf(stderr, "chown failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0);
++ if (ret < 0) {
++ fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = listxattr(filename, NULL, 0);
++ if (ret <= 0) {
++ ret = -1;
++ fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = getxattr(filename, "trusted.xattr-test", NULL, 0);
++ if (ret <= 0) {
++ fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = removexattr(filename, "trusted.xattr-test");
++ if (ret < 0) {
++ fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = access(filename, R_OK | W_OK);
++ if (ret < 0) {
++ fprintf(stderr, "access failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = link(filename, hardlink);
++ if (ret < 0) {
++ fprintf(stderr, "link failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++ unlink(hardlink);
++
++ ret = symlink(filename, symlnk);
++ if (ret < 0) {
++ fprintf(stderr, "symlink failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = readlink(symlnk, buf, sizeof(buf));
++ if (ret < 0) {
++ fprintf(stderr, "readlink failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++ unlink(symlnk);
++
++ /* Create a character special file */
++ ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3));
++ if (ret < 0) {
++ fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++ unlink("cspecial");
++
++ ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5));
++ if (ret < 0) {
++ fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++ unlink("bspecial");
+
+ #ifdef linux
+- ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0);
++ ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0);
+ #else
+- ret = mkfifo("fifo", 0);
++ ret = mkfifo("fifo", 0);
+ #endif
+- if (ret < 0) {
+- fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+- unlink("fifo");
++ if (ret < 0) {
++ fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++ unlink("fifo");
+
+ #ifdef linux
+- ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0);
+- if (ret < 0) {
+- fprintf(stderr, "sock mknod failed: %s\n", strerror(errno));
+- result |= ret;
+- }
++ ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0);
++ if (ret < 0) {
++ fprintf(stderr, "sock mknod failed: %s\n", strerror(errno));
++ result |= ret;
++ }
+ #else
+- {
+- int s;
+- const char *pathname = "sock";
+- struct sockaddr_un addr;
+-
+- s = socket(PF_LOCAL, SOCK_STREAM, 0);
+- memset(&addr, 0, sizeof(addr));
+- strncpy(addr.sun_path, pathname, sizeof(addr.sun_path));
+- ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr));
+- if (ret < 0) {
+- fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+- close(s);
+- }
+-#endif
+- unlink("sock");
++ {
++ int s;
++ const char *pathname = "sock";
++ struct sockaddr_un addr;
+
+- strcpy(newfilename, filename);
+- strcat(newfilename, "_new");
+- ret = rename(filename, newfilename);
++ s = socket(PF_LOCAL, SOCK_STREAM, 0);
++ memset(&addr, 0, sizeof(addr));
++ strncpy(addr.sun_path, pathname, sizeof(addr.sun_path));
++ ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr));
+ if (ret < 0) {
+- fprintf(stderr, "rename failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+- unlink(newfilename);
+-
+- if (fd)
+- close(fd);
+-
+- unlink(filename);
+- return result;
+-}
+-
+-int
+-dup_fd_based_fops(char *filename)
+-{
+- int fd = 0;
+- int result = 0;
+- int newfd = 0;
+- int ret = -1;
+- struct stat stbuf = {
+- 0,
+- };
+- char wstr[50] = {
+- 0,
+- };
+- char rstr[50] = {
+- 0,
+- };
+-
+- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
+- if (fd < 0) {
+- fprintf(stderr, "open failed : %s\n", strerror(errno));
+- return ret;
+- }
+-
+- newfd = dup(fd);
+- if (newfd < 0) {
+- fprintf(stderr, "dup failed: %s\n", strerror(errno));
+- result |= ret;
++ fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno));
++ result |= ret;
+ }
+-
++ close(s);
++ }
++#endif
++ unlink("sock");
++
++ strcpy(newfilename, filename);
++ strcat(newfilename, "_new");
++ ret = rename(filename, newfilename);
++ if (ret < 0) {
++ fprintf(stderr, "rename failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++ unlink(newfilename);
++
++ if (fd)
+ close(fd);
+
+- strcpy(wstr, "This is my string\n");
+- ret = write(newfd, wstr, strlen(wstr));
+- if (ret <= 0) {
+- fprintf(stderr, "write failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = lseek(newfd, 0, SEEK_SET);
+- if (ret < 0) {
+- fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = read(newfd, rstr, strlen(wstr));
+- if (ret <= 0) {
+- fprintf(stderr, "read failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = memcmp(rstr, wstr, strlen(wstr));
+- if (ret != 0) {
+- fprintf(stderr, "read returning junk\n");
+- result |= ret;
+- }
+-
+- ret = ftruncate(newfd, 0);
+- if (ret < 0) {
+- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fstat(newfd, &stbuf);
+- if (ret < 0) {
+- fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fchmod(newfd, 0640);
+- if (ret < 0) {
+- fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fchown(newfd, 10001, 10001);
+- if (ret < 0) {
+- fprintf(stderr, "fchown failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fsync(newfd);
+- if (ret < 0) {
+- fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0);
+- if (ret < 0) {
+- fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fdatasync(newfd);
+- if (ret < 0) {
+- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = flistxattr(newfd, NULL, 0);
+- if (ret <= 0) {
+- fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0);
+- if (ret <= 0) {
+- fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = fremovexattr(newfd, "trusted.xattr-test");
+- if (ret < 0) {
+- fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- if (newfd)
+- close(newfd);
+- ret = unlink(filename);
+- if (ret < 0) {
+- fprintf(stderr, "unlink failed : %s\n", strerror(errno));
+- result |= ret;
+- }
+- return result;
++ unlink(filename);
++ return result;
+ }
+
+-int
+-dir_based_fops(char *dirname)
+-{
+- int ret = -1;
+- int result = 0;
+- DIR *dp = NULL;
+- char buff[255] = {
+- 0,
+- };
+- struct dirent *dbuff = {
+- 0,
+- };
+- struct stat stbuff = {
+- 0,
+- };
+- char newdname[255] = {
+- 0,
+- };
+- char *cwd = NULL;
+-
+- ret = mkdir(dirname, 0755);
+- if (ret < 0) {
+- fprintf(stderr, "mkdir failed: %s\n", strerror(errno));
+- return ret;
+- }
+-
+- dp = opendir(dirname);
+- if (dp == NULL) {
+- fprintf(stderr, "opendir failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- dbuff = readdir(dp);
+- if (NULL == dbuff) {
+- fprintf(stderr, "readdir failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = closedir(dp);
+- if (ret < 0) {
+- fprintf(stderr, "closedir failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = stat(dirname, &stbuff);
+- if (ret < 0) {
+- fprintf(stderr, "stat failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = chmod(dirname, 0744);
+- if (ret < 0) {
+- fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = chown(dirname, 10001, 10001);
+- if (ret < 0) {
+- fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0);
+- if (ret < 0) {
+- fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = listxattr(dirname, NULL, 0);
+- if (ret <= 0) {
+- ret = -1;
+- fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = getxattr(dirname, "trusted.xattr-test", NULL, 0);
+- if (ret <= 0) {
+- ret = -1;
+- fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = removexattr(dirname, "trusted.xattr-test");
+- if (ret < 0) {
+- fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- strcpy(newdname, dirname);
+- strcat(newdname, "/../");
+- ret = chdir(newdname);
+- if (ret < 0) {
+- fprintf(stderr, "chdir failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- cwd = getcwd(buff, 255);
+- if (NULL == cwd) {
+- fprintf(stderr, "getcwd failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- strcpy(newdname, dirname);
+- strcat(newdname, "new");
+- ret = rename(dirname, newdname);
+- if (ret < 0) {
+- fprintf(stderr, "rename failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = rmdir(newdname);
+- if (ret < 0) {
+- fprintf(stderr, "rmdir failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- rmdir(dirname);
+- return result;
++int dup_fd_based_fops(char *filename) {
++ int fd = 0;
++ int result = 0;
++ int newfd = 0;
++ int ret = -1;
++ struct stat stbuf = {
++ 0,
++ };
++ char wstr[50] = {
++ 0,
++ };
++ char rstr[50] = {
++ 0,
++ };
++
++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
++ if (fd < 0) {
++ fprintf(stderr, "open failed : %s\n", strerror(errno));
++ return ret;
++ }
++
++ newfd = dup(fd);
++ if (newfd < 0) {
++ fprintf(stderr, "dup failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ close(fd);
++
++ strcpy(wstr, "This is my string\n");
++ ret = write(newfd, wstr, strlen(wstr));
++ if (ret <= 0) {
++ fprintf(stderr, "write failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = lseek(newfd, 0, SEEK_SET);
++ if (ret < 0) {
++ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = read(newfd, rstr, strlen(wstr));
++ if (ret <= 0) {
++ fprintf(stderr, "read failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = memcmp(rstr, wstr, strlen(wstr));
++ if (ret != 0) {
++ fprintf(stderr, "read returning junk\n");
++ result |= ret;
++ }
++
++ ret = ftruncate(newfd, 0);
++ if (ret < 0) {
++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fstat(newfd, &stbuf);
++ if (ret < 0) {
++ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fchmod(newfd, 0640);
++ if (ret < 0) {
++ fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fchown(newfd, 10001, 10001);
++ if (ret < 0) {
++ fprintf(stderr, "fchown failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fsync(newfd);
++ if (ret < 0) {
++ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0);
++ if (ret < 0) {
++ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fdatasync(newfd);
++ if (ret < 0) {
++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = flistxattr(newfd, NULL, 0);
++ if (ret <= 0) {
++ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0);
++ if (ret <= 0) {
++ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = fremovexattr(newfd, "trusted.xattr-test");
++ if (ret < 0) {
++ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ if (newfd)
++ close(newfd);
++ ret = unlink(filename);
++ if (ret < 0) {
++ fprintf(stderr, "unlink failed : %s\n", strerror(errno));
++ result |= ret;
++ }
++ return result;
+ }
+
+-int
+-link_based_fops(char *filename)
+-{
+- int ret = -1;
+- int result = 0;
+- int fd = 0;
+- char newname[255] = {
+- 0,
+- };
+- char linkname[255] = {
+- 0,
+- };
+- struct stat lstbuf = {
+- 0,
+- };
+-
+- fd = creat(filename, 0644);
+- if (fd < 0) {
+- fd = 0;
+- fprintf(stderr, "creat failed: %s\n", strerror(errno));
+- return ret;
+- }
+-
+- strcpy(newname, filename);
+- strcat(newname, "_hlink");
+- ret = link(filename, newname);
+- if (ret < 0) {
+- fprintf(stderr, "link failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = unlink(filename);
+- if (ret < 0) {
+- fprintf(stderr, "unlink failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- strcpy(linkname, filename);
+- strcat(linkname, "_slink");
+- ret = symlink(newname, linkname);
+- if (ret < 0) {
+- fprintf(stderr, "symlink failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = lstat(linkname, &lstbuf);
+- if (ret < 0) {
+- fprintf(stderr, "lstbuf failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = lchown(linkname, 10001, 10001);
+- if (ret < 0) {
+- fprintf(stderr, "lchown failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0);
+- if (ret < 0) {
+- fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = llistxattr(linkname, NULL, 0);
+- if (ret < 0) {
+- ret = -1;
+- fprintf(stderr, "llistxattr failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0);
+- if (ret < 0) {
+- ret = -1;
+- fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- ret = lremovexattr(linkname, "trusted.lxattr-test");
+- if (ret < 0) {
+- fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno));
+- result |= ret;
+- }
+-
+- if (fd)
+- close(fd);
+- unlink(linkname);
+- unlink(newname);
+- return result;
++int dir_based_fops(char *dirname) {
++ int ret = -1;
++ int result = 0;
++ DIR *dp = NULL;
++ char buff[255] = {
++ 0,
++ };
++ struct dirent *dbuff = {
++ 0,
++ };
++ struct stat stbuff = {
++ 0,
++ };
++ char newdname[255] = {
++ 0,
++ };
++ char *cwd = NULL;
++
++ ret = mkdir(dirname, 0755);
++ if (ret < 0) {
++ fprintf(stderr, "mkdir failed: %s\n", strerror(errno));
++ return ret;
++ }
++
++ dp = opendir(dirname);
++ if (dp == NULL) {
++ fprintf(stderr, "opendir failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ dbuff = readdir(dp);
++ if (NULL == dbuff) {
++ fprintf(stderr, "readdir failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = closedir(dp);
++ if (ret < 0) {
++ fprintf(stderr, "closedir failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = stat(dirname, &stbuff);
++ if (ret < 0) {
++ fprintf(stderr, "stat failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = chmod(dirname, 0744);
++ if (ret < 0) {
++ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = chown(dirname, 10001, 10001);
++ if (ret < 0) {
++ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0);
++ if (ret < 0) {
++ fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = listxattr(dirname, NULL, 0);
++ if (ret <= 0) {
++ ret = -1;
++ fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = getxattr(dirname, "trusted.xattr-test", NULL, 0);
++ if (ret <= 0) {
++ ret = -1;
++ fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = removexattr(dirname, "trusted.xattr-test");
++ if (ret < 0) {
++ fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ strcpy(newdname, dirname);
++ strcat(newdname, "/../");
++ ret = chdir(newdname);
++ if (ret < 0) {
++ fprintf(stderr, "chdir failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ cwd = getcwd(buff, 255);
++ if (NULL == cwd) {
++ fprintf(stderr, "getcwd failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ strcpy(newdname, dirname);
++ strcat(newdname, "new");
++ ret = rename(dirname, newdname);
++ if (ret < 0) {
++ fprintf(stderr, "rename failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = rmdir(newdname);
++ if (ret < 0) {
++ fprintf(stderr, "rmdir failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ rmdir(dirname);
++ return result;
+ }
+
+-int
+-test_open_modes(char *filename)
+-{
+- int ret = -1;
+- int result = 0;
+-
+- ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE);
+- if (ret != 0) {
+- fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n");
+- result |= ret;
+- }
+-
+- ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE);
+- if (ret != 0) {
+- fprintf(stderr, "flag O_CREAT|O_RDWR failed\n");
+- result |= ret;
+- }
+-
+- ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE);
+- if (ret != 0) {
+- fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n");
+- result |= ret;
+- }
+-
+- ret = creat(filename, 0644);
+- close(ret);
+- ret = generic_open_read_write(filename, O_WRONLY, 0);
+- if (ret != 0) {
+- fprintf(stderr, "flag O_WRONLY failed\n");
+- result |= ret;
+- }
+-
+- ret = creat(filename, 0644);
+- close(ret);
+- ret = generic_open_read_write(filename, O_RDWR, 0);
+- if (0 != ret) {
+- fprintf(stderr, "flag O_RDWR failed\n");
+- result |= ret;
+- }
+-
+- ret = creat(filename, 0644);
+- close(ret);
+- ret = generic_open_read_write(filename, O_RDONLY, 0);
+- if (0 != ret) {
+- fprintf(stderr, "flag O_RDONLY failed\n");
+- result |= ret;
+- }
++int link_based_fops(char *filename) {
++ int ret = -1;
++ int result = 0;
++ int fd = 0;
++ char newname[255] = {
++ 0,
++ };
++ char linkname[255] = {
++ 0,
++ };
++ struct stat lstbuf = {
++ 0,
++ };
++
++ fd = creat(filename, 0644);
++ if (fd < 0) {
++ fd = 0;
++ fprintf(stderr, "creat failed: %s\n", strerror(errno));
++ return ret;
++ }
++
++ strcpy(newname, filename);
++ strcat(newname, "_hlink");
++ ret = link(filename, newname);
++ if (ret < 0) {
++ fprintf(stderr, "link failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = unlink(filename);
++ if (ret < 0) {
++ fprintf(stderr, "unlink failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ strcpy(linkname, filename);
++ strcat(linkname, "_slink");
++ ret = symlink(newname, linkname);
++ if (ret < 0) {
++ fprintf(stderr, "symlink failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = lstat(linkname, &lstbuf);
++ if (ret < 0) {
++ fprintf(stderr, "lstbuf failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = lchown(linkname, 10001, 10001);
++ if (ret < 0) {
++ fprintf(stderr, "lchown failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0);
++ if (ret < 0) {
++ fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = llistxattr(linkname, NULL, 0);
++ if (ret < 0) {
++ ret = -1;
++ fprintf(stderr, "llistxattr failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0);
++ if (ret < 0) {
++ ret = -1;
++ fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ ret = lremovexattr(linkname, "trusted.lxattr-test");
++ if (ret < 0) {
++ fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno));
++ result |= ret;
++ }
++
++ if (fd)
++ close(fd);
++ unlink(linkname);
++ unlink(newname);
++ return result;
++}
+
+- ret = creat(filename, 0644);
+- close(ret);
+- ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0);
+- if (0 != ret) {
+- fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n");
+- result |= ret;
+- }
++int test_open_modes(char *filename) {
++ int ret = -1;
++ int result = 0;
++
++ ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE);
++ if (ret != 0) {
++ fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n");
++ result |= ret;
++ }
++
++ ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE);
++ if (ret != 0) {
++ fprintf(stderr, "flag O_CREAT|O_RDWR failed\n");
++ result |= ret;
++ }
++
++ ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE);
++ if (ret != 0) {
++ fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n");
++ result |= ret;
++ }
++
++ ret = creat(filename, 0644);
++ close(ret);
++ ret = generic_open_read_write(filename, O_WRONLY, 0);
++ if (ret != 0) {
++ fprintf(stderr, "flag O_WRONLY failed\n");
++ result |= ret;
++ }
++
++ ret = creat(filename, 0644);
++ close(ret);
++ ret = generic_open_read_write(filename, O_RDWR, 0);
++ if (0 != ret) {
++ fprintf(stderr, "flag O_RDWR failed\n");
++ result |= ret;
++ }
++
++ ret = creat(filename, 0644);
++ close(ret);
++ ret = generic_open_read_write(filename, O_RDONLY, 0);
++ if (0 != ret) {
++ fprintf(stderr, "flag O_RDONLY failed\n");
++ result |= ret;
++ }
++
++ ret = creat(filename, 0644);
++ close(ret);
++ ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0);
++ if (0 != ret) {
++ fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n");
++ result |= ret;
++ }
+
+ #if 0 /* undefined behaviour, unable to reliably test */
+ ret = creat (filename, 0644);
+@@ -943,90 +920,87 @@ test_open_modes(char *filename)
+ }
+ #endif
+
+- ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC,
+- OPEN_MODE);
+- if (0 != ret) {
+- fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n");
+- result |= ret;
+- }
+-
+- ret = creat(filename, 0644);
+- close(ret);
+- ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE);
+- if (0 != ret) {
+- fprintf(stderr, "flag O_CREAT|O_EXCL failed\n");
+- result |= ret;
+- }
+-
+- return result;
++ ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC, OPEN_MODE);
++ if (0 != ret) {
++ fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n");
++ result |= ret;
++ }
++
++ ret = creat(filename, 0644);
++ close(ret);
++ ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE);
++ if (0 != ret) {
++ fprintf(stderr, "flag O_CREAT|O_EXCL failed\n");
++ result |= ret;
++ }
++
++ return result;
+ }
+
+-int
+-generic_open_read_write(char *filename, int flag, mode_t mode)
+-{
+- int fd = 0;
+- int ret = -1;
+- char wstring[50] = {
+- 0,
+- };
+- char rstring[50] = {
+- 0,
+- };
+-
+- fd = open(filename, flag, mode);
+- if (fd < 0) {
+- if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) {
+- unlink(filename);
+- return 0;
+- } else {
+- fprintf(stderr, "open failed: %s\n", strerror(errno));
+- return -1;
+- }
+- }
+-
+- strcpy(wstring, "My string to write\n");
+- ret = write(fd, wstring, strlen(wstring));
+- if (ret <= 0) {
+- if (errno != EBADF) {
+- fprintf(stderr, "write failed: %s\n", strerror(errno));
+- close(fd);
+- unlink(filename);
+- return ret;
+- }
+- }
+-
+- ret = lseek(fd, 0, SEEK_SET);
+- if (ret < 0) {
+- close(fd);
+- unlink(filename);
+- return ret;
++int generic_open_read_write(char *filename, int flag, mode_t mode) {
++ int fd = 0;
++ int ret = -1;
++ char wstring[50] = {
++ 0,
++ };
++ char rstring[50] = {
++ 0,
++ };
++
++ fd = open(filename, flag, mode);
++ if (fd < 0) {
++ if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) {
++ unlink(filename);
++ return 0;
++ } else {
++ fprintf(stderr, "open failed: %s\n", strerror(errno));
++ return -1;
+ }
++ }
+
+- ret = read(fd, rstring, strlen(wstring));
+- if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY &&
+- flag != (O_TRUNC | O_WRONLY)) {
+- close(fd);
+- unlink(filename);
+- return ret;
++ strcpy(wstring, "My string to write\n");
++ ret = write(fd, wstring, strlen(wstring));
++ if (ret <= 0) {
++ if (errno != EBADF) {
++ fprintf(stderr, "write failed: %s\n", strerror(errno));
++ close(fd);
++ unlink(filename);
++ return ret;
+ }
++ }
+
+- /* Compare the rstring with wstring. But we do not want to return
+- * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or
+- * O_TRUNC|O_RDONLY. Because in that case we are not writing
+- * anything to the file.*/
+-
+- ret = memcmp(wstring, rstring, strlen(wstring));
+- if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY &&
+- flag != (O_CREAT | O_WRONLY) &&
+- !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY ||
+- flag == (O_TRUNC | O_RDONLY))) {
+- fprintf(stderr, "read is returning junk\n");
+- close(fd);
+- unlink(filename);
+- return ret;
+- }
++ ret = lseek(fd, 0, SEEK_SET);
++ if (ret < 0) {
++ close(fd);
++ unlink(filename);
++ return ret;
++ }
+
++ ret = read(fd, rstring, strlen(wstring));
++ if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY &&
++ flag != (O_TRUNC | O_WRONLY)) {
++ close(fd);
++ unlink(filename);
++ return ret;
++ }
++
++ /* Compare the rstring with wstring. But we do not want to return
++ * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or
++ * O_TRUNC|O_RDONLY. Because in that case we are not writing
++ * anything to the file.*/
++
++ ret = memcmp(wstring, rstring, strlen(wstring));
++ if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY &&
++ flag != (O_CREAT | O_WRONLY) &&
++ !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY ||
++ flag == (O_TRUNC | O_RDONLY))) {
++ fprintf(stderr, "read is returning junk\n");
+ close(fd);
+ unlink(filename);
+- return 0;
++ return ret;
++ }
++
++ close(fd);
++ unlink(filename);
++ return 0;
+ }
+diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t
+index 3343956..0e4a1bb 100644
+--- a/tests/basic/posix/shared-statfs.t
++++ b/tests/basic/posix/shared-statfs.t
+@@ -20,15 +20,18 @@ TEST mkdir -p $B0/${V0}1 $B0/${V0}2
+ TEST MOUNT_LOOP $LO1 $B0/${V0}1
+ TEST MOUNT_LOOP $LO2 $B0/${V0}2
+
++total_brick_blocks=$(df -P $B0/${V0}1 $B0/${V0}2 | tail -2 | awk '{sum = sum+$2}END{print sum}')
++#Account for rounding error
++brick_blocks_two_percent_less=$((total_brick_blocks*98/100))
+ # Create a subdir in mountpoint and use that for volume.
+ TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1;
+ TEST $CLI volume start $V0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
+ TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+-total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')
++total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}')
+ # Keeping the size less than 200M mainly because XFS will use
+ # some storage in brick to keep its own metadata.
+-TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ]
++TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ]
+
+
+ TEST force_umount $M0
+@@ -41,8 +44,8 @@ TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/
+ TEST $CLI volume start $V0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count
+ TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+-total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')
+-TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ]
++total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}')
++TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ]
+
+ TEST force_umount $M0
+ TEST $CLI volume stop $V0
+diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t
+index 8e5d77b..e719fc5 100755
+--- a/tests/bugs/cli/bug-1320388.t
++++ b/tests/bugs/cli/bug-1320388.t
+@@ -21,7 +21,7 @@ cleanup;
+ rm -f $SSL_BASE/glusterfs.*
+ touch "$GLUSTERD_WORKDIR"/secure-access
+
+-TEST openssl genrsa -out $SSL_KEY 3072
++TEST openssl genrsa -out $SSL_KEY 2048
+ TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ ln $SSL_CERT $SSL_CA
+
+diff --git a/tests/bugs/fuse/bug-985074.t b/tests/bugs/fuse/bug-985074.t
+index d10fd9f..26d196e 100644
+--- a/tests/bugs/fuse/bug-985074.t
++++ b/tests/bugs/fuse/bug-985074.t
+@@ -30,7 +30,7 @@ TEST glusterd
+
+ TEST $CLI volume create $V0 $H0:$B0/$V0
+ TEST $CLI volume start $V0
+-TEST $CLI volume set $V0 md-cache-timeout 3
++TEST $CLI volume set $V0 performance.stat-prefetch off
+
+ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --entry-timeout=0 --attribute-timeout=0
+ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 --entry-timeout=0 --attribute-timeout=0
+@@ -40,8 +40,6 @@ TEST ln $M0/file $M0/file.link
+ TEST ls -ali $M0 $M1
+ TEST rm -f $M1/file.link
+ TEST ls -ali $M0 $M1
+-# expire the md-cache timeout
+-sleep 3
+ TEST mv $M0/file $M0/file.link
+ TEST stat $M0/file.link
+ TEST ! stat $M0/file
+diff --git a/tests/bugs/glusterd/quorum-value-check.t b/tests/bugs/glusterd/quorum-value-check.t
+deleted file mode 100755
+index aaf6362..0000000
+--- a/tests/bugs/glusterd/quorum-value-check.t
++++ /dev/null
+@@ -1,35 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-
+-function check_quorum_nfs() {
+- local qnfs="$(less /var/lib/glusterd/nfs/nfs-server.vol | grep "quorum-count"| awk '{print $3}')"
+- local qinfo="$($CLI volume info $V0| grep "cluster.quorum-count"| awk '{print $2}')"
+-
+- if [ $qnfs = $qinfo ]; then
+- echo "Y"
+- else
+- echo "N"
+- fi
+-}
+-
+-cleanup;
+-
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+-TEST $CLI volume set $V0 nfs.disable off
+-TEST $CLI volume set $V0 performance.write-behind off
+-TEST $CLI volume set $V0 cluster.self-heal-daemon off
+-TEST $CLI volume set $V0 cluster.quorum-type fixed
+-TEST $CLI volume start $V0
+-
+-TEST $CLI volume set $V0 cluster.quorum-count 1
+-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs
+-TEST $CLI volume set $V0 cluster.quorum-count 2
+-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs
+-TEST $CLI volume set $V0 cluster.quorum-count 3
+-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs
+-
+-cleanup;
+diff --git a/tests/bugs/glusterfs-server/bug-887145.t b/tests/bugs/glusterfs-server/bug-887145.t
+index 82f7cca..f65b1bd 100755
+--- a/tests/bugs/glusterfs-server/bug-887145.t
++++ b/tests/bugs/glusterfs-server/bug-887145.t
+@@ -29,7 +29,15 @@ chmod 600 $M0/file;
+
+ TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+-chown -R nfsnobody:nfsnobody $M0/dir;
++grep nfsnobody /etc/passwd > /dev/nul
++if [ $? -eq 1 ]; then
++usr=nobody
++grp=nobody
++else
++usr=nfsnobody
++grp=nfsnobody
++fi
++chown -R $usr:$grp $M0/dir;
+ chown -R tmp_user:tmp_user $M0/other;
+
+ TEST $CLI volume set $V0 server.root-squash on;
+@@ -38,7 +46,7 @@ EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+ # create files and directories in the root of the glusterfs and nfs mount
+ # which is owned by root and hence the right behavior is getting EACCESS
+-# as the fops are executed as nfsnobody.
++# as the fops are executed as nfsnobody/nobody.
+ touch $M0/foo 2>/dev/null;
+ TEST [ $? -ne 0 ]
+ touch $N0/foo 2>/dev/null;
+@@ -61,7 +69,7 @@ cat $N0/passwd 1>/dev/null;
+ TEST [ $? -eq 0 ]
+
+ # create files and directories should succeed as the fops are being executed
+-# inside the directory owned by nfsnobody
++# inside the directory owned by nfsnobody/nobody
+ TEST touch $M0/dir/file;
+ TEST touch $N0/dir/foo;
+ TEST mkdir $M0/dir/new;
+diff --git a/tests/bugs/nfs/bug-1053579.t b/tests/bugs/nfs/bug-1053579.t
+deleted file mode 100755
+index 2f53172..0000000
+--- a/tests/bugs/nfs/bug-1053579.t
++++ /dev/null
+@@ -1,114 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-cleanup
+-
+-# prepare the users and groups
+-NEW_USER=bug1053579
+-NEW_UID=1053579
+-NEW_GID=1053579
+-LAST_GID=1053779
+-NEW_GIDS=${NEW_GID}
+-
+-# OS-specific overrides
+-case $OSTYPE in
+-NetBSD|Darwin)
+- # only NGROUPS_MAX=16 secondary groups are supported
+- LAST_GID=1053593
+- ;;
+-FreeBSD)
+- # NGROUPS_MAX=1023 (FreeBSD>=8.0), we can afford 200 groups
+- ;;
+-Linux)
+- # NGROUPS_MAX=65536, we can afford 200 groups
+- ;;
+-*)
+- ;;
+-esac
+-
+-# create a user that belongs to many groups
+-for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID})
+-do
+- groupadd -o -g ${GID} ${NEW_USER}-${GID}
+- NEW_GIDS="${NEW_GIDS},${NEW_USER}-${GID}"
+-done
+-TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER}
+-
+-# preparation done, start the tests
+-
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 $H0:$B0/${V0}1
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume set $V0 nfs.server-aux-gids on
+-TEST $CLI volume start $V0
+-
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+-
+-# mount the volume
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+-
+-# the actual test, this used to crash
+-su -m ${NEW_USER} -c "stat $N0/. > /dev/null"
+-TEST [ $? -eq 0 ]
+-
+-# create a file that only a user in a high-group can access
+-echo 'Hello World!' > $N0/README
+-chgrp ${LAST_GID} $N0/README
+-chmod 0640 $N0/README
+-
+-#su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null"
+-su -m ${NEW_USER} -c "cat $N0/README"
+-ret=$?
+-
+-case $OSTYPE in
+-Linux) # Linux NFS fails with big GID
+- if [ $ret -ne 0 ] ; then
+- res="Y"
+- else
+- res="N"
+- fi
+- ;;
+-*) # Other systems should cope better
+- if [ $ret -eq 0 ] ; then
+- res="Y"
+- else
+- res="N"
+- fi
+- ;;
+-esac
+-TEST [ "x$res" = "xY" ]
+-
+-# This passes only on build.gluster.org, not reproducible on other machines?!
+-#su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null"
+-#TEST [ $? -ne 0 ]
+-
+-# enable server.manage-gids and things should work
+-TEST $CLI volume set $V0 server.manage-gids on
+-
+-su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null"
+-TEST [ $? -eq 0 ]
+-su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null"
+-TEST [ $? -eq 0 ]
+-
+-# cleanup
+-userdel --force ${NEW_USER}
+-for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID})
+-do
+- groupdel ${NEW_USER}-${GID}
+-done
+-
+-rm -f $N0/README
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+-
+-TEST $CLI volume stop $V0
+-TEST $CLI volume delete $V0
+-
+-cleanup
+diff --git a/tests/bugs/nfs/bug-1116503.t b/tests/bugs/nfs/bug-1116503.t
+deleted file mode 100644
+index dd3998d..0000000
+--- a/tests/bugs/nfs/bug-1116503.t
++++ /dev/null
+@@ -1,47 +0,0 @@
+-#!/bin/bash
+-#
+-# Verify that mounting NFS over UDP (MOUNT service only) works.
+-#
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-cleanup;
+-TEST glusterd
+-TEST pidof glusterd
+-
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume set $V0 nfs.mount-udp on
+-
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-
+-TEST mount_nfs $H0:/$V0 $N0 nolock,mountproto=udp,proto=tcp;
+-TEST mkdir -p $N0/foo/bar
+-TEST ls $N0/foo
+-TEST ls $N0/foo/bar
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+-
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0/foo $N0 nolock,mountproto=udp,proto=tcp;
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+-
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp;
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+-
+-TEST $CLI volume set $V0 nfs.addr-namelookup on
+-TEST $CLI volume set $V0 nfs.rpc-auth-allow $H0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp;
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+-
+-TEST $CLI volume set $V0 nfs.rpc-auth-reject $H0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST ! mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp;
+-
+-cleanup;
+diff --git a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t b/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t
+deleted file mode 100644
+index c360db4..0000000
+--- a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t
++++ /dev/null
+@@ -1,24 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-cleanup;
+-
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume set $V0 performance.open-behind off
+-TEST $CLI volume start $V0
+-
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+-
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-TEST mkdir -p $N0/foo
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+-TEST mount_nfs $H0:/$V0/foo $N0 nolock
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+-cleanup
+diff --git a/tests/bugs/nfs/bug-1157223-symlink-mounting.t b/tests/bugs/nfs/bug-1157223-symlink-mounting.t
+deleted file mode 100644
+index dea609e..0000000
+--- a/tests/bugs/nfs/bug-1157223-symlink-mounting.t
++++ /dev/null
+@@ -1,126 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-cleanup;
+-
+-## Start and create a volume
+-TEST glusterd
+-TEST pidof glusterd
+-
+-TEST $CLI volume info;
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0;
+-
+-## Wait for volume to register with rpc.mountd
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-
+-## Mount NFS
+-TEST mount_nfs $H0:/$V0 $N0 nolock;
+-
+-mkdir $N0/dir1;
+-mkdir $N0/dir2;
+-pushd $N0/ ;
+-
+-##link created using relative path
+-ln -s dir1 symlink1;
+-
+-##relative path contains ".."
+-ln -s ../dir1 dir2/symlink2;
+-
+-##link created using absolute path
+-ln -s $N0/dir1 symlink3;
+-
+-##link pointing to another symlinks
+-ln -s symlink1 symlink4
+-ln -s symlink3 symlink5
+-
+-##dead links
+-ln -s does/not/exist symlink6
+-
+-##link which contains ".." points out of glusterfs
+-ln -s ../../ symlink7
+-
+-##links pointing to unauthorized area
+-ln -s .glusterfs symlink8
+-
+-popd ;
+-
+-##Umount the volume
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+-
+-## Mount and umount NFS via directory
+-TEST mount_nfs $H0:/$V0/dir1 $N0 nolock;
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+-
+-## Mount and umount NFS via symlink1
+-TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock;
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+-
+-## Mount and umount NFS via symlink2
+-TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock;
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+-
+-## Mount NFS via symlink3 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock;
+-
+-## Mount and umount NFS via symlink4
+-TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock;
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+-
+-## Mount NFS via symlink5 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock;
+-
+-## Mount NFS via symlink6 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock;
+-
+-## Mount NFS via symlink7 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink7 $N0 nolock;
+-
+-## Mount NFS via symlink8 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock;
+-
+-##Similar check for udp mount
+-$CLI volume stop $V0
+-TEST $CLI volume set $V0 nfs.mount-udp on
+-$CLI volume start $V0
+-
+-## Wait for volume to register with rpc.mountd
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-
+-## Mount and umount NFS via directory
+-TEST mount_nfs $H0:/$V0/dir1 $N0 nolock,mountproto=udp,proto=tcp;
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+-
+-## Mount and umount NFS via symlink1
+-TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock,mountproto=udp,proto=tcp;
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+-
+-## Mount and umount NFS via symlink2
+-TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock,mountproto=udp,proto=tcp;
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+-
+-## Mount NFS via symlink3 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock,mountproto=udp,proto=tcp;
+-
+-## Mount and umount NFS via symlink4
+-TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock,mountproto=udp,proto=tcp;
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+-
+-## Mount NFS via symlink5 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock,mountproto=udp,proto=tcp;
+-
+-## Mount NFS via symlink6 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock,mountproto=udp,proto=tcp;
+-
+-##symlink7 is not check here, because in udp mount ../../ resolves into root '/'
+-
+-## Mount NFS via symlink8 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock,mountproto=udp,proto=tcp;
+-
+-rm -rf $H0:$B0/
+-cleanup;
+diff --git a/tests/bugs/nfs/bug-1161092-nfs-acls.t b/tests/bugs/nfs/bug-1161092-nfs-acls.t
+deleted file mode 100644
+index 45a22e7..0000000
+--- a/tests/bugs/nfs/bug-1161092-nfs-acls.t
++++ /dev/null
+@@ -1,39 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-cleanup;
+-
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume info
+-
+-TEST $CLI volume create $V0 $H0:$B0/brick1;
+-EXPECT 'Created' volinfo_field $V0 'Status';
+-TEST $CLI volume set $V0 nfs.disable false
+-
+-TEST $CLI volume start $V0;
+-EXPECT 'Started' volinfo_field $V0 'Status';
+-
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+-TEST mount_nfs $H0:/$V0 $N0
+-
+-TEST touch $N0/file1
+-TEST chmod 700 $N0/file1
+-TEST getfacl $N0/file1
+-
+-TEST $CLI volume set $V0 root-squash on
+-TEST getfacl $N0/file1
+-
+-TEST umount_nfs $H0:/$V0 $N0
+-TEST mount_nfs $H0:/$V0 $N0
+-TEST getfacl $N0/file1
+-
+-## Before killing daemon to avoid deadlocks
+-umount_nfs $N0
+-
+-cleanup;
+-
+diff --git a/tests/bugs/nfs/bug-1166862.t b/tests/bugs/nfs/bug-1166862.t
+deleted file mode 100755
+index c4f51a2..0000000
+--- a/tests/bugs/nfs/bug-1166862.t
++++ /dev/null
+@@ -1,69 +0,0 @@
+-#!/bin/bash
+-#
+-# When nfs.mount-rmtab is disabled, it should not get updated.
+-#
+-# Based on: bug-904065.t
+-#
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-# count the lines of a file, return 0 if the file does not exist
+-function count_lines()
+-{
+- if [ -n "$1" ]
+- then
+- $@ 2>/dev/null | wc -l
+- else
+- echo 0
+- fi
+-}
+-
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-. $(dirname $0)/../../volume.rc
+-
+-cleanup
+-
+-TEST glusterd
+-TEST pidof glusterd
+-
+-TEST $CLI volume create $V0 $H0:$B0/brick1
+-EXPECT 'Created' volinfo_field $V0 'Status'
+-TEST $CLI volume set $V0 nfs.disable false
+-
+-TEST $CLI volume start $V0;
+-EXPECT 'Started' volinfo_field $V0 'Status'
+-
+-# glusterfs/nfs needs some time to start up in the background
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+-
+-# disable the rmtab by settting it to the magic "/-" value
+-TEST $CLI volume set $V0 nfs.mount-rmtab /-
+-
+-# before mounting the rmtab should be empty
+-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+-
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+-
+-# showmount should list one client
+-EXPECT '1' count_lines showmount --no-headers $H0
+-
+-# unmount
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+-
+-# after resetting the option, the rmtab should get updated again
+-TEST $CLI volume reset $V0 nfs.mount-rmtab
+-
+-# before mounting the rmtab should be empty
+-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+-
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-EXPECT '2' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+-
+-# removing a mount
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+-
+-cleanup
+diff --git a/tests/bugs/nfs/bug-1210338.c b/tests/bugs/nfs/bug-1210338.c
+deleted file mode 100644
+index d409924..0000000
+--- a/tests/bugs/nfs/bug-1210338.c
++++ /dev/null
+@@ -1,31 +0,0 @@
+-#include <stdio.h>
+-#include <stdlib.h>
+-#include <unistd.h>
+-#include <string.h>
+-#include <errno.h>
+-#include <sys/types.h>
+-#include <fcntl.h>
+-#include <sys/stat.h>
+-
+-int
+-main(int argc, char *argv[])
+-{
+- int ret = -1;
+- int fd = -1;
+-
+- fd = open(argv[1], O_CREAT | O_EXCL, 0644);
+-
+- if (fd == -1) {
+- fprintf(stderr, "creation of the file %s failed (%s)\n", argv[1],
+- strerror(errno));
+- goto out;
+- }
+-
+- ret = 0;
+-
+-out:
+- if (fd > 0)
+- close(fd);
+-
+- return ret;
+-}
+diff --git a/tests/bugs/nfs/bug-1210338.t b/tests/bugs/nfs/bug-1210338.t
+deleted file mode 100644
+index b5c9245..0000000
+--- a/tests/bugs/nfs/bug-1210338.t
++++ /dev/null
+@@ -1,30 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-cleanup;
+-
+-NFS_SOURCE=$(dirname $0)/bug-1210338.c
+-NFS_EXEC=$(dirname $0)/excl_create
+-
+-TEST glusterd
+-TEST pidof glusterd
+-
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-
+-build_tester $NFS_SOURCE -o $NFS_EXEC
+-TEST [ -e $NFS_EXEC ]
+-
+-TEST $NFS_EXEC $N0/my_file
+-
+-rm -f $NFS_EXEC;
+-
+-cleanup
+diff --git a/tests/bugs/nfs/bug-1302948.t b/tests/bugs/nfs/bug-1302948.t
+deleted file mode 100755
+index a2fb0e6..0000000
+--- a/tests/bugs/nfs/bug-1302948.t
++++ /dev/null
+@@ -1,13 +0,0 @@
+-#!/bin/bash
+-# TEST the nfs.rdirplus option
+-. $(dirname $0)/../../include.rc
+-
+-cleanup
+-TEST glusterd
+-TEST pidof glusterd
+-
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume start $V0
+-TEST $CLI volume set $V0 nfs.rdirplus off
+-TEST $CLI volume set $V0 nfs.rdirplus on
+-cleanup
+diff --git a/tests/bugs/nfs/bug-847622.t b/tests/bugs/nfs/bug-847622.t
+deleted file mode 100755
+index 5ccee72..0000000
+--- a/tests/bugs/nfs/bug-847622.t
++++ /dev/null
+@@ -1,39 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-. $(dirname $0)/../../volume.rc
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-case $OSTYPE in
+-NetBSD)
+- echo "Skip test on ACL which are not available on NetBSD" >&2
+- SKIP_TESTS
+- exit 0
+- ;;
+-*)
+- ;;
+-esac
+-
+-cleanup;
+-
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 $H0:$B0/brick0
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0
+-
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-cd $N0
+-
+-# simple getfacl setfacl commands
+-TEST touch testfile
+-TEST setfacl -m u:14:r testfile
+-TEST getfacl testfile
+-
+-cd
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+-cleanup
+-
+diff --git a/tests/bugs/nfs/bug-877885.t b/tests/bugs/nfs/bug-877885.t
+deleted file mode 100755
+index dca315a..0000000
+--- a/tests/bugs/nfs/bug-877885.t
++++ /dev/null
+@@ -1,39 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-. $(dirname $0)/../../volume.rc
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-cleanup;
+-
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0
+-
+-## Mount FUSE with caching disabled
+-TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \
+-$M0;
+-
+-TEST touch $M0/file
+-TEST mkdir $M0/dir
+-
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-cd $N0
+-
+-rm -rf * &
+-
+-TEST mount_nfs $H0:/$V0 $N1 retry=0,nolock;
+-
+-cd;
+-
+-kill %1;
+-
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1
+-
+-cleanup
+diff --git a/tests/bugs/nfs/bug-904065.t b/tests/bugs/nfs/bug-904065.t
+deleted file mode 100755
+index 0eba86e..0000000
+--- a/tests/bugs/nfs/bug-904065.t
++++ /dev/null
+@@ -1,100 +0,0 @@
+-#!/bin/bash
+-#
+-# This test does not use 'showmount' from the nfs-utils package, it would
+-# require setting up a portmapper (either rpcbind or portmap, depending on the
+-# Linux distribution used for testing). The persistancy of the rmtab should not
+-# affect the current showmount outputs, so existing regression tests should be
+-# sufficient.
+-#
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-# count the lines of a file, return 0 if the file does not exist
+-function count_lines()
+-{
+- if [ -e "$1" ]
+- then
+- wc -l < $1
+- else
+- echo 0
+- fi
+-}
+-
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-. $(dirname $0)/../../volume.rc
+-
+-cleanup
+-
+-TEST glusterd
+-TEST pidof glusterd
+-
+-TEST $CLI volume create $V0 $H0:$B0/brick1
+-EXPECT 'Created' volinfo_field $V0 'Status'
+-TEST $CLI volume set $V0 nfs.disable false
+-
+-TEST $CLI volume start $V0;
+-EXPECT 'Started' volinfo_field $V0 'Status'
+-
+-# glusterfs/nfs needs some time to start up in the background
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+-
+-# before mounting the rmtab should be empty
+-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+-
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-# the output would looks similar to:
+-#
+-# hostname-0=172.31.122.104
+-# mountpoint-0=/ufo
+-#
+-EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+-
+-# duplicate mounts should not be recorded (client could have crashed)
+-TEST mount_nfs $H0:/$V0 $N1 nolock
+-EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+-
+-# removing a mount should (even if there are two) should remove the entry
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1
+-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+-
+-# unmounting the other mount should work flawlessly
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+-
+-TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $M0
+-
+-# we'll create a fake rmtab here, similar to how an other storage server would do
+-# using an invalid IP address to prevent (unlikely) collisions on the test-machine
+-cat << EOF > $M0/rmtab
+-hostname-0=127.0.0.256
+-mountpoint-0=/ufo
+-EOF
+-EXPECT '2' count_lines $M0/rmtab
+-
+-# reconfigure merges the rmtab with the one on the volume
+-TEST gluster volume set $V0 nfs.mount-rmtab $M0/rmtab
+-
+-# glusterfs/nfs needs some time to restart
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+-
+-# Apparently "is_nfs_export_available" might return even if the export is
+-# not, in fact, available. (eyeroll) Give it a bit of extra time.
+-#
+-# TBD: fix the broken shell function instead of working around it here
+-sleep 5
+-
+-# a new mount should be added to the rmtab, not overwrite exiting ones
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT '4' count_lines $M0/rmtab
+-
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+-EXPECT '2' count_lines $M0/rmtab
+-
+-# TODO: nfs/reconfigure() is never called and is therefor disabled. When the
+-# NFS-server supports reloading and does not get restarted anymore, we should
+-# add a test that includes the merging of entries in the old rmtab with the new
+-# rmtab.
+-
+-cleanup
+diff --git a/tests/bugs/nfs/bug-915280.t b/tests/bugs/nfs/bug-915280.t
+deleted file mode 100755
+index bd27915..0000000
+--- a/tests/bugs/nfs/bug-915280.t
++++ /dev/null
+@@ -1,54 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-cleanup;
+-
+-TEST glusterd
+-TEST pidof glusterd
+-
+-function volinfo_field()
+-{
+- local vol=$1;
+- local field=$2;
+-
+- $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+-}
+-
+-TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+-EXPECT 'Created' volinfo_field $V0 'Status';
+-TEST $CLI volume set $V0 nfs.disable false
+-
+-TEST $CLI volume start $V0;
+-EXPECT 'Started' volinfo_field $V0 'Status';
+-
+-MOUNTDIR=$N0;
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0 nolock,timeo=30,retrans=1
+-TEST touch $N0/testfile
+-
+-TEST $CLI volume set $V0 debug.error-gen client
+-TEST $CLI volume set $V0 debug.error-fops stat
+-TEST $CLI volume set $V0 debug.error-failure 100
+-
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-
+-pid_file=$(read_nfs_pidfile);
+-
+-getfacl $N0/testfile 2>/dev/null
+-
+-nfs_pid=$(get_nfs_pid);
+-if [ ! $nfs_pid ]
+-then
+- nfs_pid=0;
+-fi
+-
+-TEST [ $nfs_pid -eq $pid_file ]
+-
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+-
+-cleanup;
+diff --git a/tests/bugs/nfs/bug-970070.t b/tests/bugs/nfs/bug-970070.t
+deleted file mode 100755
+index 61be484..0000000
+--- a/tests/bugs/nfs/bug-970070.t
++++ /dev/null
+@@ -1,13 +0,0 @@
+-#!/bin/bash
+-# TEST the nfs.acl option
+-. $(dirname $0)/../../include.rc
+-
+-cleanup
+-TEST glusterd
+-TEST pidof glusterd
+-
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume start $V0
+-TEST $CLI volume set $V0 nfs.acl off
+-TEST $CLI volume set $V0 nfs.acl on
+-cleanup
+diff --git a/tests/bugs/nfs/bug-974972.t b/tests/bugs/nfs/bug-974972.t
+deleted file mode 100755
+index 975c46f..0000000
+--- a/tests/bugs/nfs/bug-974972.t
++++ /dev/null
+@@ -1,41 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-#This script checks that nfs mount does not fail lookup on files with split-brain
+-cleanup;
+-
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+-TEST $CLI volume set $V0 self-heal-daemon off
+-TEST $CLI volume set $V0 cluster.eager-lock off
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0
+-TEST touch $N0/1
+-TEST kill_brick ${V0} ${H0} ${B0}/${V0}1
+-echo abc > $N0/1
+-TEST $CLI volume start $V0 force
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1
+-
+-TEST kill_brick ${V0} ${H0} ${B0}/${V0}0
+-echo def > $N0/1
+-TEST $CLI volume start $V0 force
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1
+-
+-#Lookup should not fail
+-TEST ls $N0/1
+-TEST ! cat $N0/1
+-
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+-cleanup
+diff --git a/tests/bugs/nfs/showmount-many-clients.t b/tests/bugs/nfs/showmount-many-clients.t
+deleted file mode 100644
+index f1b6859..0000000
+--- a/tests/bugs/nfs/showmount-many-clients.t
++++ /dev/null
+@@ -1,41 +0,0 @@
+-#!/bin/bash
+-#
+-# The nfs.rpc-auth-allow volume option is used to generate the list of clients
+-# that are displayed as able to mount the export. The "group" in the export
+-# should be a list of all clients, identified by "name". In previous versions,
+-# the "name" was the copied string from nfs.rpc-auth-allow. This is not
+-# correct, as the volume option should be parsed and split into different
+-# groups.
+-#
+-# When the single string is passed, this testcase fails when the
+-# nfs.rpc-auth-allow volume option is longer than 256 characters. By splitting
+-# the groups into their own structures, this testcase passes.
+-#
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-. $(dirname $0)/../../volume.rc
+-
+-cleanup
+-
+-TEST glusterd
+-TEST pidof glusterd
+-
+-TEST $CLI volume create $V0 $H0:$B0/brick1
+-EXPECT 'Created' volinfo_field $V0 'Status'
+-TEST $CLI volume set $V0 nfs.disable false
+-
+-CLIENTS=$(echo 127.0.0.{1..128} | tr ' ' ,)
+-TEST $CLI volume set $V0 nfs.rpc-auth-allow ${CLIENTS}
+-TEST $CLI volume set $V0 nfs.rpc-auth-reject all
+-
+-TEST $CLI volume start $V0;
+-EXPECT 'Started' volinfo_field $V0 'Status'
+-
+-# glusterfs/nfs needs some time to start up in the background
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+-
+-# showmount should not timeout (no reply is sent on error)
+-TEST showmount -e $H0
+-
+-cleanup
+diff --git a/tests/bugs/nfs/socket-as-fifo.py b/tests/bugs/nfs/socket-as-fifo.py
+deleted file mode 100755
+index eb507e1..0000000
+--- a/tests/bugs/nfs/socket-as-fifo.py
++++ /dev/null
+@@ -1,33 +0,0 @@
+-#
+-# Create a unix domain socket and test if it is a socket (and not a fifo/pipe).
+-#
+-# Author: Niels de Vos <ndevos@redhat.com>
+-#
+-
+-from __future__ import print_function
+-import os
+-import stat
+-import sys
+-import socket
+-
+-ret = 1
+-
+-if len(sys.argv) != 2:
+- print('Usage: %s <socket>' % (sys.argv[0]))
+- sys.exit(ret)
+-
+-path = sys.argv[1]
+-
+-sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+-sock.bind(path)
+-
+-stbuf = os.stat(path)
+-mode = stbuf.st_mode
+-
+-if stat.S_ISSOCK(mode):
+- ret = 0
+-
+-sock.close()
+-os.unlink(path)
+-
+-sys.exit(ret)
+diff --git a/tests/bugs/nfs/socket-as-fifo.t b/tests/bugs/nfs/socket-as-fifo.t
+deleted file mode 100644
+index d9b9e95..0000000
+--- a/tests/bugs/nfs/socket-as-fifo.t
++++ /dev/null
+@@ -1,25 +0,0 @@
+-#!/bin/bash
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-cleanup;
+-
+-TEST glusterd
+-TEST pidof glusterd
+-
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-
+-# this is the actual test
+-TEST $PYTHON $(dirname $0)/socket-as-fifo.py $N0/not-a-fifo.socket
+-
+-TEST umount_nfs $N0
+-
+-cleanup
+diff --git a/tests/bugs/nfs/subdir-trailing-slash.t b/tests/bugs/nfs/subdir-trailing-slash.t
+deleted file mode 100644
+index 6a11487..0000000
+--- a/tests/bugs/nfs/subdir-trailing-slash.t
++++ /dev/null
+@@ -1,32 +0,0 @@
+-#!/bin/bash
+-#
+-# Verify that mounting a subdir over NFS works, even with a trailing /
+-#
+-# For example:
+-# mount -t nfs server.example.com:/volume/subdir/
+-#
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-cleanup;
+-TEST glusterd
+-TEST pidof glusterd
+-
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume set $V0 nfs.disable false
+-
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+-
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-TEST mkdir -p $N0/subdir
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+-
+-TEST mount_nfs $H0:/$V0/subdir/ $N0 nolock
+-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+-
+-cleanup
+diff --git a/tests/bugs/nfs/zero-atime.t b/tests/bugs/nfs/zero-atime.t
+deleted file mode 100755
+index 2a94009..0000000
+--- a/tests/bugs/nfs/zero-atime.t
++++ /dev/null
+@@ -1,33 +0,0 @@
+-#!/bin/bash
+-#
+-# posix_do_utimes() sets atime and mtime to the values in the passed IATT. If
+-# not set, these values are 0 and cause a atime/mtime set to the Epoch.
+-#
+-
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-
+-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+-
+-cleanup
+-
+-TEST glusterd
+-TEST pidof glusterd
+-
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-
+-# create a file for testing
+-TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k
+-
+-# timezone in UTC results in atime=0 if not set correctly
+-TEST TZ=UTC dd if=/dev/urandom of=$M0/small bs=64k count=1 conv=nocreat
+-TEST [ "$(stat --format=%X $M0/small)" != "0" ]
+-
+-TEST rm $M0/small
+-
+-cleanup
+diff --git a/tests/bugs/rpc/bug-954057.t b/tests/bugs/rpc/bug-954057.t
+index 65af274..9ad0ab2 100755
+--- a/tests/bugs/rpc/bug-954057.t
++++ b/tests/bugs/rpc/bug-954057.t
+@@ -25,7 +25,15 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+ TEST mkdir $M0/dir
+ TEST mkdir $M0/nobody
+-TEST chown nfsnobody:nfsnobody $M0/nobody
++grep nfsnobody /etc/passwd > /dev/nul
++if [ $? -eq 1 ]; then
++usr=nobody
++grp=nobody
++else
++usr=nfsnobody
++grp=nfsnobody
++fi
++TEST chown $usr:$grp $M0/nobody
+ TEST `echo "file" >> $M0/file`
+ TEST cp $M0/file $M0/new
+ TEST chmod 700 $M0/new
+diff --git a/tests/bugs/shard/bug-1272986.t b/tests/bugs/shard/bug-1272986.t
+index 7628870..66e896a 100644
+--- a/tests/bugs/shard/bug-1272986.t
++++ b/tests/bugs/shard/bug-1272986.t
+@@ -16,16 +16,16 @@ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1
+
+ # Write some data into a file, such that its size crosses the shard block size.
+-TEST dd if=/dev/zero of=$M1/file bs=1M count=5 conv=notrunc
++TEST dd if=/dev/urandom of=$M1/file bs=1M count=5 conv=notrunc oflag=direct
+
+ md5sum1_reader=$(md5sum $M0/file | awk '{print $1}')
+
+ EXPECT "$md5sum1_reader" echo `md5sum $M1/file | awk '{print $1}'`
+
+ # Append some more data into the file.
+-TEST `echo "abcdefg" >> $M1/file`
++TEST dd if=/dev/urandom of=$M1/file bs=256k count=1 conv=notrunc oflag=direct
+
+-md5sum2_reader=$(md5sum $M0/file | awk '{print $1}')
++md5sum2_reader=$(dd if=$M0/file iflag=direct bs=256k| md5sum | awk '{print $1}')
+
+ # Test to see if the reader refreshes its cache correctly as part of the reads
+ # triggered through md5sum. If it does, then the md5sum on the reader and writer
+diff --git a/tests/bugs/transport/bug-873367.t b/tests/bugs/transport/bug-873367.t
+index d4c0702..8070bc1 100755
+--- a/tests/bugs/transport/bug-873367.t
++++ b/tests/bugs/transport/bug-873367.t
+@@ -13,7 +13,7 @@ rm -f $SSL_BASE/glusterfs.*
+ mkdir -p $B0/1
+ mkdir -p $M0
+
+-TEST openssl genrsa -out $SSL_KEY 1024
++TEST openssl genrsa -out $SSL_KEY 2048
+ TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ ln $SSL_CERT $SSL_CA
+
+diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t
+index 3cb45b5..cae010c 100755
+--- a/tests/features/ssl-authz.t
++++ b/tests/features/ssl-authz.t
+@@ -41,7 +41,7 @@ function valid_ciphers {
+ -e '/:$/s///'
+ }
+
+-TEST openssl genrsa -out $SSL_KEY 1024
++TEST openssl genrsa -out $SSL_KEY 2048
+ TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ ln $SSL_CERT $SSL_CA
+
+diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t
+index 7e1e199..e4bcdf5 100644
+--- a/tests/features/ssl-ciphers.t
++++ b/tests/features/ssl-ciphers.t
+@@ -33,18 +33,26 @@ wait_mount() {
+ openssl_connect() {
+ ssl_opt="-verify 3 -verify_return_error -CAfile $SSL_CA"
+ ssl_opt="$ssl_opt -crl_check_all -CApath $TMPDIR"
+- #echo openssl s_client $ssl_opt $@ > /dev/tty
+- #read -p "Continue? " nothing
+- CIPHER=`echo "" |
+- openssl s_client $ssl_opt $@ 2>/dev/null |
+- awk '/^ Cipher/{print $3}'`
+- if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" ] ; then
++ cmd="echo "" | openssl s_client $ssl_opt $@ 2>/dev/null"
++ CIPHER=$(eval $cmd | awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}')
++ if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" -o "x${CIPHER}" = "x(NONE)" ] ; then
+ echo "N"
+ else
+ echo "Y"
+ fi
+ }
+
++#Validate the cipher to pass EXPECT test case before call openssl_connect
++check_cipher() {
++ cmd="echo "" | openssl s_client $@ 2> /dev/null"
++ cipher=$(eval $cmd |awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}')
++ if [ "x${cipher}" = "x" -o "x${cipher}" = "x0000" -o "x${cipher}" = "x(NONE)" ] ; then
++ echo "N"
++ else
++ echo "Y"
++ fi
++}
++
+ cleanup;
+ mkdir -p $B0
+ mkdir -p $M0
+@@ -65,7 +73,7 @@ TEST glusterd
+ TEST pidof glusterd
+ TEST $CLI volume info;
+
+-TEST openssl genrsa -out $SSL_KEY 1024 2>/dev/null
++TEST openssl genrsa -out $SSL_KEY 2048 2>/dev/null
+ TEST openssl req -config $SSL_CFG -new -key $SSL_KEY -x509 \
+ -subj /CN=CA -out $SSL_CA
+ TEST openssl req -config $SSL_CFG -new -key $SSL_KEY \
+@@ -106,28 +114,36 @@ EXPECT "N" openssl_connect -ssl3 -connect $H0:$BRICK_PORT
+ EXPECT "N" openssl_connect -tls1 -connect $H0:$BRICK_PORT
+
+ # Test a HIGH CBC cipher
+-EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+
+ # Test EECDH
+-EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+
+ # test MD5 fails
+-EXPECT "N" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT
+
+ # test RC4 fails
+-EXPECT "N" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher RC4-SHA -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT
+
+ # test eNULL fails
+-EXPECT "N" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher NULL-SHA256 -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT
+
+ # test SHA2
+-EXPECT "Y" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher AES256-SHA256 -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT
+
+ # test GCM
+-EXPECT "Y" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT
+
+ # Test DH fails without DH params
+-EXPECT "N" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher EDH -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT
+
+ # Test DH with DH params
+ TEST $CLI volume set $V0 ssl.dh-param `pwd`/`dirname $0`/dh1024.pem
+@@ -145,8 +161,10 @@ TEST $CLI volume stop $V0
+ TEST $CLI volume start $V0
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+ BRICK_PORT=`brick_port $V0`
+-EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+-EXPECT "N" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher AES128-SHA -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT
+
+ # Test the ec-curve option
+ TEST $CLI volume set $V0 ssl.cipher-list EECDH:EDH:!TLSv1
+@@ -155,8 +173,10 @@ TEST $CLI volume stop $V0
+ TEST $CLI volume start $V0
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+ BRICK_PORT=`brick_port $V0`
+-EXPECT "N" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+-EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+
+ TEST $CLI volume set $V0 ssl.ec-curve invalid
+ EXPECT invalid volume_option $V0 ssl.ec-curve
+@@ -164,7 +184,8 @@ TEST $CLI volume stop $V0
+ TEST $CLI volume start $V0
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+ BRICK_PORT=`brick_port $V0`
+-EXPECT "N" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+
+ TEST $CLI volume set $V0 ssl.ec-curve secp521r1
+ EXPECT secp521r1 volume_option $V0 ssl.ec-curve
+diff --git a/tests/ssl.rc b/tests/ssl.rc
+index 127f83f..b1ccc4c 100644
+--- a/tests/ssl.rc
++++ b/tests/ssl.rc
+@@ -20,7 +20,7 @@ SSL_CA=$SSL_BASE/glusterfs.ca
+
+ # Create self-signed certificates
+ function create_self_signed_certs (){
+- openssl genrsa -out $SSL_KEY 1024
++ openssl genrsa -out $SSL_KEY 2048
+ openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ ln $SSL_CERT $SSL_CA
+ return $?
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index b248767..b224abd 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -10,6883 +10,6417 @@
+
+ #include <unistd.h>
+
+-#include "shard.h"
+ #include "shard-mem-types.h"
++#include "shard.h"
+ #include <glusterfs/byte-order.h>
+ #include <glusterfs/defaults.h>
+ #include <glusterfs/statedump.h>
+
+-static gf_boolean_t
+-__is_shard_dir(uuid_t gfid)
+-{
+- shard_priv_t *priv = THIS->private;
++static gf_boolean_t __is_shard_dir(uuid_t gfid) {
++ shard_priv_t *priv = THIS->private;
+
+- if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0)
+- return _gf_true;
++ if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0)
++ return _gf_true;
+
+- return _gf_false;
++ return _gf_false;
+ }
+
+-static gf_boolean_t
+-__is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc)
+-{
+- if (frame->root->pid == GF_CLIENT_PID_GSYNCD &&
+- (__is_shard_dir(loc->pargfid) ||
+- (loc->parent && __is_shard_dir(loc->parent->gfid))))
+- return _gf_true;
++static gf_boolean_t __is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) {
++ if (frame->root->pid == GF_CLIENT_PID_GSYNCD &&
++ (__is_shard_dir(loc->pargfid) ||
++ (loc->parent && __is_shard_dir(loc->parent->gfid))))
++ return _gf_true;
+
+- return _gf_false;
++ return _gf_false;
+ }
+
+-void
+-shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len)
+-{
+- char gfid_str[GF_UUID_BUF_SIZE] = {
+- 0,
+- };
++void shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) {
++ char gfid_str[GF_UUID_BUF_SIZE] = {
++ 0,
++ };
+
+- gf_uuid_unparse(gfid, gfid_str);
+- snprintf(buf, len, "%s.%d", gfid_str, block_num);
++ gf_uuid_unparse(gfid, gfid_str);
++ snprintf(buf, len, "%s.%d", gfid_str, block_num);
+ }
+
+-void
+-shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len)
+-{
+- char gfid_str[GF_UUID_BUF_SIZE] = {
+- 0,
+- };
++void shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath,
++ size_t len) {
++ char gfid_str[GF_UUID_BUF_SIZE] = {
++ 0,
++ };
+
+- gf_uuid_unparse(gfid, gfid_str);
+- snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num);
++ gf_uuid_unparse(gfid, gfid_str);
++ snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num);
+ }
+
+-int
+-__shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
+-{
+- int ret = -1;
+- uint64_t ctx_uint = 0;
+- shard_inode_ctx_t *ctx_p = NULL;
++int __shard_inode_ctx_get(inode_t *inode, xlator_t *this,
++ shard_inode_ctx_t **ctx) {
++ int ret = -1;
++ uint64_t ctx_uint = 0;
++ shard_inode_ctx_t *ctx_p = NULL;
+
+- ret = __inode_ctx_get(inode, this, &ctx_uint);
+- if (ret == 0) {
+- *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+- return ret;
+- }
++ ret = __inode_ctx_get(inode, this, &ctx_uint);
++ if (ret == 0) {
++ *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++ return ret;
++ }
+
+- ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t);
+- if (!ctx_p)
+- return ret;
++ ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t);
++ if (!ctx_p)
++ return ret;
+
+- INIT_LIST_HEAD(&ctx_p->ilist);
+- INIT_LIST_HEAD(&ctx_p->to_fsync_list);
++ INIT_LIST_HEAD(&ctx_p->ilist);
++ INIT_LIST_HEAD(&ctx_p->to_fsync_list);
+
+- ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p);
+- if (ret < 0) {
+- GF_FREE(ctx_p);
+- return ret;
+- }
++ ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p);
++ if (ret < 0) {
++ GF_FREE(ctx_p);
++ return ret;
++ }
+
+- *ctx = ctx_p;
++ *ctx = ctx_p;
+
+- return ret;
++ return ret;
+ }
+
+-int
+-shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
+-{
+- int ret = 0;
++int shard_inode_ctx_get(inode_t *inode, xlator_t *this,
++ shard_inode_ctx_t **ctx) {
++ int ret = 0;
+
+- LOCK(&inode->lock);
+- {
+- ret = __shard_inode_ctx_get(inode, this, ctx);
+- }
+- UNLOCK(&inode->lock);
++ LOCK(&inode->lock);
++ { ret = __shard_inode_ctx_get(inode, this, ctx); }
++ UNLOCK(&inode->lock);
+
+- return ret;
++ return ret;
+ }
+
+-int
+-__shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
+- uint64_t block_size, int32_t valid)
+-{
+- int ret = -1;
+- shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
++ uint64_t block_size, int32_t valid) {
++ int ret = -1;
++ shard_inode_ctx_t *ctx = NULL;
+
+- ret = __shard_inode_ctx_get(inode, this, &ctx);
+- if (ret)
+- return ret;
++ ret = __shard_inode_ctx_get(inode, this, &ctx);
++ if (ret)
++ return ret;
+
+- if (valid & SHARD_MASK_BLOCK_SIZE)
+- ctx->block_size = block_size;
++ if (valid & SHARD_MASK_BLOCK_SIZE)
++ ctx->block_size = block_size;
+
+- if (valid & SHARD_MASK_PROT)
+- ctx->stat.ia_prot = stbuf->ia_prot;
++ if (valid & SHARD_MASK_PROT)
++ ctx->stat.ia_prot = stbuf->ia_prot;
+
+- if (valid & SHARD_MASK_NLINK)
+- ctx->stat.ia_nlink = stbuf->ia_nlink;
++ if (valid & SHARD_MASK_NLINK)
++ ctx->stat.ia_nlink = stbuf->ia_nlink;
+
+- if (valid & SHARD_MASK_UID)
+- ctx->stat.ia_uid = stbuf->ia_uid;
++ if (valid & SHARD_MASK_UID)
++ ctx->stat.ia_uid = stbuf->ia_uid;
+
+- if (valid & SHARD_MASK_GID)
+- ctx->stat.ia_gid = stbuf->ia_gid;
++ if (valid & SHARD_MASK_GID)
++ ctx->stat.ia_gid = stbuf->ia_gid;
+
+- if (valid & SHARD_MASK_SIZE)
+- ctx->stat.ia_size = stbuf->ia_size;
++ if (valid & SHARD_MASK_SIZE)
++ ctx->stat.ia_size = stbuf->ia_size;
+
+- if (valid & SHARD_MASK_BLOCKS)
+- ctx->stat.ia_blocks = stbuf->ia_blocks;
++ if (valid & SHARD_MASK_BLOCKS)
++ ctx->stat.ia_blocks = stbuf->ia_blocks;
+
+- if (valid & SHARD_MASK_TIMES) {
+- SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec,
+- stbuf->ia_mtime, stbuf->ia_mtime_nsec);
+- SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec,
+- stbuf->ia_ctime, stbuf->ia_ctime_nsec);
+- SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec,
+- stbuf->ia_atime, stbuf->ia_atime_nsec);
+- }
++ if (valid & SHARD_MASK_TIMES) {
++ SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec,
++ stbuf->ia_mtime, stbuf->ia_mtime_nsec);
++ SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec,
++ stbuf->ia_ctime, stbuf->ia_ctime_nsec);
++ SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec,
++ stbuf->ia_atime, stbuf->ia_atime_nsec);
++ }
+
+- if (valid & SHARD_MASK_OTHERS) {
+- ctx->stat.ia_ino = stbuf->ia_ino;
+- gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid);
+- ctx->stat.ia_dev = stbuf->ia_dev;
+- ctx->stat.ia_type = stbuf->ia_type;
+- ctx->stat.ia_rdev = stbuf->ia_rdev;
+- ctx->stat.ia_blksize = stbuf->ia_blksize;
+- }
++ if (valid & SHARD_MASK_OTHERS) {
++ ctx->stat.ia_ino = stbuf->ia_ino;
++ gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid);
++ ctx->stat.ia_dev = stbuf->ia_dev;
++ ctx->stat.ia_type = stbuf->ia_type;
++ ctx->stat.ia_rdev = stbuf->ia_rdev;
++ ctx->stat.ia_blksize = stbuf->ia_blksize;
++ }
+
+- if (valid & SHARD_MASK_REFRESH_RESET)
+- ctx->refresh = _gf_false;
++ if (valid & SHARD_MASK_REFRESH_RESET)
++ ctx->refresh = _gf_false;
+
+- return 0;
++ return 0;
+ }
+
+-int
+-shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
+- uint64_t block_size, int32_t valid)
+-{
+- int ret = -1;
++int shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
++ uint64_t block_size, int32_t valid) {
++ int ret = -1;
+
+- LOCK(&inode->lock);
+- {
+- ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid);
+- }
+- UNLOCK(&inode->lock);
++ LOCK(&inode->lock);
++ { ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); }
++ UNLOCK(&inode->lock);
+
+- return ret;
++ return ret;
+ }
+
+-int
+-__shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this)
+-{
+- int ret = -1;
+- shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) {
++ int ret = -1;
++ shard_inode_ctx_t *ctx = NULL;
+
+- ret = __shard_inode_ctx_get(inode, this, &ctx);
+- if (ret)
+- return ret;
++ ret = __shard_inode_ctx_get(inode, this, &ctx);
++ if (ret)
++ return ret;
+
+- ctx->refresh = _gf_true;
++ ctx->refresh = _gf_true;
+
+- return 0;
++ return 0;
+ }
+-int
+-shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this)
+-{
+- int ret = -1;
++int shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) {
++ int ret = -1;
+
+- LOCK(&inode->lock);
+- {
+- ret = __shard_inode_ctx_set_refresh_flag(inode, this);
+- }
+- UNLOCK(&inode->lock);
++ LOCK(&inode->lock);
++ { ret = __shard_inode_ctx_set_refresh_flag(inode, this); }
++ UNLOCK(&inode->lock);
+
+- return ret;
++ return ret;
+ }
+
+-int
+-__shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this)
+-{
+- int ret = -1;
+- shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) {
++ int ret = -1;
++ shard_inode_ctx_t *ctx = NULL;
+
+- ret = __shard_inode_ctx_get(inode, this, &ctx);
+- if (ret)
+- return ret;
++ ret = __shard_inode_ctx_get(inode, this, &ctx);
++ if (ret)
++ return ret;
+
+- ctx->refreshed = _gf_true;
+- return 0;
++ ctx->refreshed = _gf_true;
++ return 0;
+ }
+
+-int
+-shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this)
+-{
+- int ret = -1;
++int shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) {
++ int ret = -1;
+
+- LOCK(&inode->lock);
+- {
+- ret = __shard_inode_ctx_mark_dir_refreshed(inode, this);
+- }
+- UNLOCK(&inode->lock);
++ LOCK(&inode->lock);
++ { ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); }
++ UNLOCK(&inode->lock);
+
+- return ret;
++ return ret;
+ }
+
+-int
+-__shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
+- inode_t *shard_inode)
+-{
+- int ret = -1;
+- shard_inode_ctx_t *base_ictx = NULL;
+- shard_inode_ctx_t *shard_ictx = NULL;
+-
+- ret = __shard_inode_ctx_get(base_inode, this, &base_ictx);
+- if (ret)
+- return ret;
++int __shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
++ inode_t *shard_inode) {
++ int ret = -1;
++ shard_inode_ctx_t *base_ictx = NULL;
++ shard_inode_ctx_t *shard_ictx = NULL;
+
+- ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx);
+- if (ret)
+- return ret;
++ ret = __shard_inode_ctx_get(base_inode, this, &base_ictx);
++ if (ret)
++ return ret;
+
+- if (shard_ictx->fsync_needed) {
+- shard_ictx->fsync_needed++;
+- return 1;
+- }
++ ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx);
++ if (ret)
++ return ret;
+
+- list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list);
+- shard_ictx->inode = shard_inode;
++ if (shard_ictx->fsync_needed) {
+ shard_ictx->fsync_needed++;
+- base_ictx->fsync_count++;
+- shard_ictx->base_inode = base_inode;
++ return 1;
++ }
+
+- return 0;
++ list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list);
++ shard_ictx->inode = shard_inode;
++ shard_ictx->fsync_needed++;
++ base_ictx->fsync_count++;
++ shard_ictx->base_inode = base_inode;
++
++ return 0;
+ }
+
+-int
+-shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
+- inode_t *shard_inode)
+-{
+- int ret = -1;
++int shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
++ inode_t *shard_inode) {
++ int ret = -1;
+
+- /* This ref acts as a refkeepr on the base inode. We
+- * need to keep this inode alive as it holds the head
+- * of the to_fsync_list.
+- */
+- inode_ref(base_inode);
+- inode_ref(shard_inode);
++ /* This ref acts as a refkeepr on the base inode. We
++ * need to keep this inode alive as it holds the head
++ * of the to_fsync_list.
++ */
++ inode_ref(base_inode);
++ inode_ref(shard_inode);
+
+- LOCK(&base_inode->lock);
+- LOCK(&shard_inode->lock);
+- {
+- ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this,
+- shard_inode);
+- }
+- UNLOCK(&shard_inode->lock);
+- UNLOCK(&base_inode->lock);
++ LOCK(&base_inode->lock);
++ LOCK(&shard_inode->lock);
++ { ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, shard_inode); }
++ UNLOCK(&shard_inode->lock);
++ UNLOCK(&base_inode->lock);
+
+- /* Unref the base inode corresponding to the ref above, if the shard is
+- * found to be already part of the fsync list.
+- */
+- if (ret != 0) {
+- inode_unref(base_inode);
+- inode_unref(shard_inode);
+- }
+- return ret;
++ /* Unref the base inode corresponding to the ref above, if the shard is
++ * found to be already part of the fsync list.
++ */
++ if (ret != 0) {
++ inode_unref(base_inode);
++ inode_unref(shard_inode);
++ }
++ return ret;
+ }
+
+-gf_boolean_t
+-__shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this)
+-{
+- int ret = -1;
+- shard_inode_ctx_t *ctx = NULL;
++gf_boolean_t __shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) {
++ int ret = -1;
++ shard_inode_ctx_t *ctx = NULL;
+
+- ret = __shard_inode_ctx_get(inode, this, &ctx);
+- /* If inode ctx get fails, better to err on the side of caution and
+- * try again? Unless the failure is due to mem-allocation.
+- */
+- if (ret)
+- return _gf_true;
++ ret = __shard_inode_ctx_get(inode, this, &ctx);
++ /* If inode ctx get fails, better to err on the side of caution and
++ * try again? Unless the failure is due to mem-allocation.
++ */
++ if (ret)
++ return _gf_true;
+
+- return !ctx->refreshed;
++ return !ctx->refreshed;
+ }
+
+-gf_boolean_t
+-shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this)
+-{
+- gf_boolean_t flag = _gf_false;
++gf_boolean_t shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) {
++ gf_boolean_t flag = _gf_false;
+
+- LOCK(&inode->lock);
+- {
+- flag = __shard_inode_ctx_needs_lookup(inode, this);
+- }
+- UNLOCK(&inode->lock);
++ LOCK(&inode->lock);
++ { flag = __shard_inode_ctx_needs_lookup(inode, this); }
++ UNLOCK(&inode->lock);
+
+- return flag;
++ return flag;
+ }
+-int
+-__shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf)
+-{
+- int ret = -1;
+- shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this,
++ struct iatt *stbuf) {
++ int ret = -1;
++ shard_inode_ctx_t *ctx = NULL;
+
+- ret = __shard_inode_ctx_get(inode, this, &ctx);
+- if (ret)
+- return ret;
++ ret = __shard_inode_ctx_get(inode, this, &ctx);
++ if (ret)
++ return ret;
+
+- if ((stbuf->ia_size != ctx->stat.ia_size) ||
+- (stbuf->ia_blocks != ctx->stat.ia_blocks))
+- ctx->refresh = _gf_true;
++ if ((stbuf->ia_size != ctx->stat.ia_size) ||
++ (stbuf->ia_blocks != ctx->stat.ia_blocks))
++ ctx->refresh = _gf_true;
+
+- return 0;
++ return 0;
+ }
+
+-int
+-shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf)
+-{
+- int ret = -1;
++int shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this,
++ struct iatt *stbuf) {
++ int ret = -1;
+
+- LOCK(&inode->lock);
+- {
+- ret = __shard_inode_ctx_invalidate(inode, this, stbuf);
+- }
+- UNLOCK(&inode->lock);
++ LOCK(&inode->lock);
++ { ret = __shard_inode_ctx_invalidate(inode, this, stbuf); }
++ UNLOCK(&inode->lock);
+
+- return ret;
++ return ret;
+ }
+
+-int
+-__shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
+- uint64_t *block_size)
+-{
+- int ret = -1;
+- uint64_t ctx_uint = 0;
+- shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
++ uint64_t *block_size) {
++ int ret = -1;
++ uint64_t ctx_uint = 0;
++ shard_inode_ctx_t *ctx = NULL;
+
+- ret = __inode_ctx_get(inode, this, &ctx_uint);
+- if (ret < 0)
+- return ret;
++ ret = __inode_ctx_get(inode, this, &ctx_uint);
++ if (ret < 0)
++ return ret;
+
+- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+- *block_size = ctx->block_size;
++ *block_size = ctx->block_size;
+
+- return 0;
++ return 0;
+ }
+
+-int
+-shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
+- uint64_t *block_size)
+-{
+- int ret = -1;
++int shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
++ uint64_t *block_size) {
++ int ret = -1;
+
+- LOCK(&inode->lock);
+- {
+- ret = __shard_inode_ctx_get_block_size(inode, this, block_size);
+- }
+- UNLOCK(&inode->lock);
++ LOCK(&inode->lock);
++ { ret = __shard_inode_ctx_get_block_size(inode, this, block_size); }
++ UNLOCK(&inode->lock);
+
+- return ret;
++ return ret;
+ }
+
+-int
+-__shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
+- int *fsync_count)
+-{
+- int ret = -1;
+- uint64_t ctx_uint = 0;
+- shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
++ int *fsync_count) {
++ int ret = -1;
++ uint64_t ctx_uint = 0;
++ shard_inode_ctx_t *ctx = NULL;
+
+- ret = __inode_ctx_get(inode, this, &ctx_uint);
+- if (ret < 0)
+- return ret;
++ ret = __inode_ctx_get(inode, this, &ctx_uint);
++ if (ret < 0)
++ return ret;
+
+- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+- *fsync_count = ctx->fsync_needed;
++ *fsync_count = ctx->fsync_needed;
+
+- return 0;
++ return 0;
+ }
+
+-int
+-shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
+- int *fsync_count)
+-{
+- int ret = -1;
++int shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
++ int *fsync_count) {
++ int ret = -1;
+
+- LOCK(&inode->lock);
+- {
+- ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count);
+- }
+- UNLOCK(&inode->lock);
++ LOCK(&inode->lock);
++ { ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); }
++ UNLOCK(&inode->lock);
+
+- return ret;
++ return ret;
+ }
+-int
+-__shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
+- shard_inode_ctx_t *ctx_out)
+-{
+- int ret = -1;
+- uint64_t ctx_uint = 0;
+- shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
++ shard_inode_ctx_t *ctx_out) {
++ int ret = -1;
++ uint64_t ctx_uint = 0;
++ shard_inode_ctx_t *ctx = NULL;
+
+- ret = __inode_ctx_get(inode, this, &ctx_uint);
+- if (ret < 0)
+- return ret;
++ ret = __inode_ctx_get(inode, this, &ctx_uint);
++ if (ret < 0)
++ return ret;
+
+- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+- memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t));
+- return 0;
++ memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t));
++ return 0;
+ }
+
+-int
+-shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
+- shard_inode_ctx_t *ctx_out)
+-{
+- int ret = -1;
++int shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
++ shard_inode_ctx_t *ctx_out) {
++ int ret = -1;
+
+- LOCK(&inode->lock);
+- {
+- ret = __shard_inode_ctx_get_all(inode, this, ctx_out);
+- }
+- UNLOCK(&inode->lock);
++ LOCK(&inode->lock);
++ { ret = __shard_inode_ctx_get_all(inode, this, ctx_out); }
++ UNLOCK(&inode->lock);
+
+- return ret;
++ return ret;
+ }
+
+-int
+-__shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
+- struct iatt *buf,
+- gf_boolean_t *need_refresh)
+-{
+- int ret = -1;
+- uint64_t ctx_uint = 0;
+- shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
++ struct iatt *buf,
++ gf_boolean_t *need_refresh) {
++ int ret = -1;
++ uint64_t ctx_uint = 0;
++ shard_inode_ctx_t *ctx = NULL;
+
+- ret = __inode_ctx_get(inode, this, &ctx_uint);
+- if (ret < 0)
+- return ret;
++ ret = __inode_ctx_get(inode, this, &ctx_uint);
++ if (ret < 0)
++ return ret;
+
+- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+- if (ctx->refresh == _gf_false)
+- *buf = ctx->stat;
+- else
+- *need_refresh = _gf_true;
++ if (ctx->refresh == _gf_false)
++ *buf = ctx->stat;
++ else
++ *need_refresh = _gf_true;
+
+- return 0;
++ return 0;
+ }
+
+-int
+-shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
+- struct iatt *buf,
+- gf_boolean_t *need_refresh)
+-{
+- int ret = -1;
++int shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
++ struct iatt *buf,
++ gf_boolean_t *need_refresh) {
++ int ret = -1;
+
+- LOCK(&inode->lock);
+- {
+- ret = __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf,
+- need_refresh);
+- }
+- UNLOCK(&inode->lock);
++ LOCK(&inode->lock);
++ {
++ ret =
++ __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, need_refresh);
++ }
++ UNLOCK(&inode->lock);
+
+- return ret;
++ return ret;
+ }
+
+-void
+-shard_local_wipe(shard_local_t *local)
+-{
+- int i = 0;
+- int count = 0;
+-
+- count = local->num_blocks;
+-
+- syncbarrier_destroy(&local->barrier);
+- loc_wipe(&local->loc);
+- loc_wipe(&local->dot_shard_loc);
+- loc_wipe(&local->dot_shard_rm_loc);
+- loc_wipe(&local->loc2);
+- loc_wipe(&local->tmp_loc);
+- loc_wipe(&local->int_inodelk.loc);
+- loc_wipe(&local->int_entrylk.loc);
+- loc_wipe(&local->newloc);
+-
+- if (local->int_entrylk.basename)
+- GF_FREE(local->int_entrylk.basename);
+- if (local->fd)
+- fd_unref(local->fd);
+-
+- if (local->xattr_req)
+- dict_unref(local->xattr_req);
+- if (local->xattr_rsp)
+- dict_unref(local->xattr_rsp);
+-
+- for (i = 0; i < count; i++) {
+- if (!local->inode_list)
+- break;
+-
+- if (local->inode_list[i])
+- inode_unref(local->inode_list[i]);
+- }
+-
+- GF_FREE(local->inode_list);
+-
+- GF_FREE(local->vector);
+- if (local->iobref)
+- iobref_unref(local->iobref);
+- if (local->list_inited)
+- gf_dirent_free(&local->entries_head);
+- if (local->inodelk_frame)
+- SHARD_STACK_DESTROY(local->inodelk_frame);
+- if (local->entrylk_frame)
+- SHARD_STACK_DESTROY(local->entrylk_frame);
+-}
+-
+-int
+-shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict)
+-{
+- int ret = -1;
+- void *size_attr = NULL;
+- uint64_t size_array[4];
+-
+- ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+- if (ret) {
+- gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0,
+- SHARD_MSG_INTERNAL_XATTR_MISSING,
+- "Failed to "
+- "get " GF_XATTR_SHARD_FILE_SIZE " for %s",
+- uuid_utoa(stbuf->ia_gfid));
+- return ret;
+- }
++void shard_local_wipe(shard_local_t *local) {
++ int i = 0;
++ int count = 0;
+
+- memcpy(size_array, size_attr, sizeof(size_array));
++ count = local->num_blocks;
+
+- stbuf->ia_size = ntoh64(size_array[0]);
+- stbuf->ia_blocks = ntoh64(size_array[2]);
++ syncbarrier_destroy(&local->barrier);
++ loc_wipe(&local->loc);
++ loc_wipe(&local->dot_shard_loc);
++ loc_wipe(&local->dot_shard_rm_loc);
++ loc_wipe(&local->loc2);
++ loc_wipe(&local->tmp_loc);
++ loc_wipe(&local->int_inodelk.loc);
++ loc_wipe(&local->int_entrylk.loc);
++ loc_wipe(&local->newloc);
+
+- return 0;
+-}
++ if (local->int_entrylk.basename)
++ GF_FREE(local->int_entrylk.basename);
++ if (local->fd)
++ fd_unref(local->fd);
+
+-int
+-shard_call_count_return(call_frame_t *frame)
+-{
+- int call_count = 0;
+- shard_local_t *local = NULL;
++ if (local->xattr_req)
++ dict_unref(local->xattr_req);
++ if (local->xattr_rsp)
++ dict_unref(local->xattr_rsp);
+
+- local = frame->local;
++ for (i = 0; i < count; i++) {
++ if (!local->inode_list)
++ break;
++
++ if (local->inode_list[i])
++ inode_unref(local->inode_list[i]);
++ }
++
++ GF_FREE(local->inode_list);
++
++ GF_FREE(local->vector);
++ if (local->iobref)
++ iobref_unref(local->iobref);
++ if (local->list_inited)
++ gf_dirent_free(&local->entries_head);
++ if (local->inodelk_frame)
++ SHARD_STACK_DESTROY(local->inodelk_frame);
++ if (local->entrylk_frame)
++ SHARD_STACK_DESTROY(local->entrylk_frame);
++}
++
++int shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) {
++ int ret = -1;
++ void *size_attr = NULL;
++ uint64_t size_array[4];
++
++ ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
++ if (ret) {
++ gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0,
++ SHARD_MSG_INTERNAL_XATTR_MISSING,
++ "Failed to "
++ "get " GF_XATTR_SHARD_FILE_SIZE " for %s",
++ uuid_utoa(stbuf->ia_gfid));
++ return ret;
++ }
++
++ memcpy(size_array, size_attr, sizeof(size_array));
++
++ stbuf->ia_size = ntoh64(size_array[0]);
++ stbuf->ia_blocks = ntoh64(size_array[2]);
++
++ return 0;
++}
++
++int shard_call_count_return(call_frame_t *frame) {
++ int call_count = 0;
++ shard_local_t *local = NULL;
++
++ local = frame->local;
++
++ LOCK(&frame->lock);
++ { call_count = --local->call_count; }
++ UNLOCK(&frame->lock);
++
++ return call_count;
++}
++
++static char *shard_internal_dir_string(shard_internal_dir_type_t type) {
++ char *str = NULL;
++
++ switch (type) {
++ case SHARD_INTERNAL_DIR_DOT_SHARD:
++ str = GF_SHARD_DIR;
++ break;
++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++ str = GF_SHARD_REMOVE_ME_DIR;
++ break;
++ default:
++ break;
++ }
++ return str;
++}
++
++static int shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local,
++ shard_internal_dir_type_t type) {
++ int ret = -1;
++ char *bname = NULL;
++ inode_t *parent = NULL;
++ loc_t *internal_dir_loc = NULL;
++ shard_priv_t *priv = NULL;
++
++ priv = this->private;
++ if (!local)
++ return -1;
++
++ switch (type) {
++ case SHARD_INTERNAL_DIR_DOT_SHARD:
++ internal_dir_loc = &local->dot_shard_loc;
++ bname = GF_SHARD_DIR;
++ parent = inode_ref(this->itable->root);
++ break;
++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++ internal_dir_loc = &local->dot_shard_rm_loc;
++ bname = GF_SHARD_REMOVE_ME_DIR;
++ parent = inode_ref(priv->dot_shard_inode);
++ break;
++ default:
++ break;
++ }
++
++ internal_dir_loc->inode = inode_new(this->itable);
++ internal_dir_loc->parent = parent;
++ ret = inode_path(internal_dir_loc->parent, bname,
++ (char **)&internal_dir_loc->path);
++ if (ret < 0 || !(internal_dir_loc->inode)) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++ "Inode path failed on %s", bname);
++ goto out;
++ }
++
++ internal_dir_loc->name = strrchr(internal_dir_loc->path, '/');
++ if (internal_dir_loc->name)
++ internal_dir_loc->name++;
++
++ ret = 0;
++out:
++ return ret;
++}
++
++inode_t *__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
++ inode_t *base_inode, int block_num,
++ uuid_t gfid) {
++ char block_bname[256] = {
++ 0,
++ };
++ inode_t *lru_inode = NULL;
++ shard_priv_t *priv = NULL;
++ shard_inode_ctx_t *ctx = NULL;
++ shard_inode_ctx_t *lru_inode_ctx = NULL;
++ shard_inode_ctx_t *lru_base_inode_ctx = NULL;
++ inode_t *fsync_inode = NULL;
++ inode_t *lru_base_inode = NULL;
++ gf_boolean_t do_fsync = _gf_false;
++
++ priv = this->private;
++
++ shard_inode_ctx_get(linked_inode, this, &ctx);
++
++ if (list_empty(&ctx->ilist)) {
++ if (priv->inode_count + 1 <= priv->lru_limit) {
++ /* If this inode was linked here for the first time (indicated
++ * by empty list), and if there is still space in the priv list,
++ * add this ctx to the tail of the list.
++ */
++ /* For as long as an inode is in lru list, we try to
++ * keep it alive by holding a ref on it.
++ */
++ inode_ref(linked_inode);
++ if (base_inode)
++ gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
++ else
++ gf_uuid_copy(ctx->base_gfid, gfid);
++ ctx->block_num = block_num;
++ list_add_tail(&ctx->ilist, &priv->ilist_head);
++ priv->inode_count++;
++ ctx->base_inode = inode_ref(base_inode);
++ } else {
++ /*If on the other hand there is no available slot for this inode
++ * in the list, delete the lru inode from the head of the list,
++ * unlink it. And in its place add this new inode into the list.
++ */
++ lru_inode_ctx =
++ list_first_entry(&priv->ilist_head, shard_inode_ctx_t, ilist);
++ GF_ASSERT(lru_inode_ctx->block_num > 0);
++ lru_base_inode = lru_inode_ctx->base_inode;
++ list_del_init(&lru_inode_ctx->ilist);
++ lru_inode = inode_find(linked_inode->table, lru_inode_ctx->stat.ia_gfid);
++ /* If the lru inode was part of the pending-fsync list,
++ * the base inode needs to be unref'd, the lru inode
++ * deleted from fsync list and fsync'd in a new frame,
++ * and then unlinked in memory and forgotten.
++ */
++ if (!lru_base_inode)
++ goto after_fsync_check;
++ LOCK(&lru_base_inode->lock);
++ LOCK(&lru_inode->lock);
++ {
++ if (!list_empty(&lru_inode_ctx->to_fsync_list)) {
++ list_del_init(&lru_inode_ctx->to_fsync_list);
++ lru_inode_ctx->fsync_needed = 0;
++ do_fsync = _gf_true;
++ __shard_inode_ctx_get(lru_base_inode, this, &lru_base_inode_ctx);
++ lru_base_inode_ctx->fsync_count--;
++ }
++ }
++ UNLOCK(&lru_inode->lock);
++ UNLOCK(&lru_base_inode->lock);
++
++ after_fsync_check:
++ if (!do_fsync) {
++ shard_make_block_bname(lru_inode_ctx->block_num,
++ lru_inode_ctx->base_gfid, block_bname,
++ sizeof(block_bname));
++ /* The following unref corresponds to the ref held at
++ * the time the shard was added to the lru list.
++ */
++ inode_unref(lru_inode);
++ inode_unlink(lru_inode, priv->dot_shard_inode, block_bname);
++ inode_forget(lru_inode, 0);
++ } else {
++ /* The following unref corresponds to the ref
++ * held when the shard was added to fsync list.
++ */
++ inode_unref(lru_inode);
++ fsync_inode = lru_inode;
++ if (lru_base_inode)
++ inode_unref(lru_base_inode);
++ }
++ /* The following unref corresponds to the ref
++ * held by inode_find() above.
++ */
++ inode_unref(lru_inode);
++
++ /* The following unref corresponds to the ref held on the base shard
++ * at the time of adding shard inode to lru list
++ */
++ if (lru_base_inode)
++ inode_unref(lru_base_inode);
++
++ /* For as long as an inode is in lru list, we try to
++ * keep it alive by holding a ref on it.
++ */
++ inode_ref(linked_inode);
++ if (base_inode)
++ gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
++ else
++ gf_uuid_copy(ctx->base_gfid, gfid);
++ ctx->block_num = block_num;
++ ctx->base_inode = inode_ref(base_inode);
++ list_add_tail(&ctx->ilist, &priv->ilist_head);
++ }
++ } else {
++ /* If this is not the first time this inode is being operated on, move
++ * it to the most recently used end of the list.
++ */
++ list_move_tail(&ctx->ilist, &priv->ilist_head);
++ }
++ return fsync_inode;
++}
++
++int shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame,
++ int32_t op_ret, int32_t op_errno) {
++ switch (fop) {
++ case GF_FOP_LOOKUP:
++ SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, NULL, NULL);
++ break;
++ case GF_FOP_STAT:
++ SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL);
++ break;
++ case GF_FOP_FSTAT:
++ SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL);
++ break;
++ case GF_FOP_TRUNCATE:
++ SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, NULL);
++ break;
++ case GF_FOP_FTRUNCATE:
++ SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, NULL);
++ break;
++ case GF_FOP_MKNOD:
++ SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
++ NULL);
++ break;
++ case GF_FOP_LINK:
++ SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
++ NULL);
++ break;
++ case GF_FOP_CREATE:
++ SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
++ NULL, NULL);
++ break;
++ case GF_FOP_UNLINK:
++ SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, NULL);
++ break;
++ case GF_FOP_RENAME:
++ SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
++ NULL, NULL);
++ break;
++ case GF_FOP_WRITE:
++ SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, NULL);
++ break;
++ case GF_FOP_FALLOCATE:
++ SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, NULL);
++ break;
++ case GF_FOP_ZEROFILL:
++ SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL);
++ break;
++ case GF_FOP_DISCARD:
++ SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, NULL);
++ break;
++ case GF_FOP_READ:
++ SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, NULL,
++ NULL);
++ break;
++ case GF_FOP_FSYNC:
++ SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, NULL);
++ break;
++ case GF_FOP_REMOVEXATTR:
++ SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL);
++ break;
++ case GF_FOP_FREMOVEXATTR:
++ SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL);
++ break;
++ case GF_FOP_FGETXATTR:
++ SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL);
++ break;
++ case GF_FOP_GETXATTR:
++ SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL);
++ break;
++ case GF_FOP_FSETXATTR:
++ SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL);
++ break;
++ case GF_FOP_SETXATTR:
++ SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL);
++ break;
++ case GF_FOP_SETATTR:
++ SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, NULL);
++ break;
++ case GF_FOP_FSETATTR:
++ SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, NULL);
++ break;
++ case GF_FOP_SEEK:
++ SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL);
++ break;
++ default:
++ gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++ "Invalid fop id = %d", fop);
++ break;
++ }
++ return 0;
++}
++
++int shard_common_inode_write_success_unwind(glusterfs_fop_t fop,
++ call_frame_t *frame,
++ int32_t op_ret) {
++ shard_local_t *local = NULL;
++
++ local = frame->local;
++
++ switch (fop) {
++ case GF_FOP_WRITE:
++ SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf,
++ &local->postbuf, local->xattr_rsp);
++ break;
++ case GF_FOP_FALLOCATE:
++ SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf,
++ &local->postbuf, local->xattr_rsp);
++ break;
++ case GF_FOP_ZEROFILL:
++ SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf,
++ &local->postbuf, local->xattr_rsp);
++ break;
++ case GF_FOP_DISCARD:
++ SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf,
++ &local->postbuf, local->xattr_rsp);
++ break;
++ default:
++ gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++ "Invalid fop id = %d", fop);
++ break;
++ }
++ return 0;
++}
++
++int shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret,
++ int32_t op_errno, struct iatt *prebuf,
++ struct iatt *postbuf, dict_t *xdata) {
++ char block_bname[256] = {
++ 0,
++ };
++ fd_t *anon_fd = cookie;
++ inode_t *shard_inode = NULL;
++ shard_inode_ctx_t *ctx = NULL;
++ shard_priv_t *priv = NULL;
++
++ priv = this->private;
++
++ if (anon_fd == NULL || op_ret < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED,
++ "fsync failed on shard");
++ goto out;
++ }
++ shard_inode = anon_fd->inode;
++
++ LOCK(&priv->lock);
++ LOCK(&shard_inode->lock);
++ {
++ __shard_inode_ctx_get(shard_inode, this, &ctx);
++ if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) {
++ shard_make_block_bname(ctx->block_num, shard_inode->gfid, block_bname,
++ sizeof(block_bname));
++ inode_unlink(shard_inode, priv->dot_shard_inode, block_bname);
++ /* The following unref corresponds to the ref held by
++ * inode_link() at the time the shard was created or
++ * looked up
++ */
++ inode_unref(shard_inode);
++ inode_forget(shard_inode, 0);
++ }
++ }
++ UNLOCK(&shard_inode->lock);
++ UNLOCK(&priv->lock);
+
+- LOCK(&frame->lock);
+- {
+- call_count = --local->call_count;
++out:
++ if (anon_fd)
++ fd_unref(anon_fd);
++ STACK_DESTROY(frame->root);
++ return 0;
++}
++
++int shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) {
++ fd_t *anon_fd = NULL;
++ call_frame_t *fsync_frame = NULL;
++
++ fsync_frame = create_frame(this, this->ctx->pool);
++ if (!fsync_frame) {
++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++ "Failed to create new frame "
++ "to fsync shard");
++ return -1;
++ }
++
++ anon_fd = fd_anonymous(inode);
++ if (!anon_fd) {
++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++ "Failed to create anon fd to"
++ " fsync shard");
++ STACK_DESTROY(fsync_frame->root);
++ return -1;
++ }
++
++ STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd,
++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, anon_fd,
++ 1, NULL);
++ return 0;
++}
++
++int shard_common_resolve_shards(
++ call_frame_t *frame, xlator_t *this,
++ shard_post_resolve_fop_handler_t post_res_handler) {
++ int i = -1;
++ uint32_t shard_idx_iter = 0;
++ char path[PATH_MAX] = {
++ 0,
++ };
++ uuid_t gfid = {
++ 0,
++ };
++ inode_t *inode = NULL;
++ inode_t *res_inode = NULL;
++ inode_t *fsync_inode = NULL;
++ shard_priv_t *priv = NULL;
++ shard_local_t *local = NULL;
++
++ priv = this->private;
++ local = frame->local;
++ local->call_count = 0;
++ shard_idx_iter = local->first_block;
++ res_inode = local->resolver_base_inode;
++ if (res_inode)
++ gf_uuid_copy(gfid, res_inode->gfid);
++ else
++ gf_uuid_copy(gfid, local->base_gfid);
++
++ if ((local->op_ret < 0) || (local->resolve_not))
++ goto out;
++
++ while (shard_idx_iter <= local->last_block) {
++ i++;
++ if (shard_idx_iter == 0) {
++ local->inode_list[i] = inode_ref(res_inode);
++ shard_idx_iter++;
++ continue;
++ }
++
++ shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
++
++ inode = NULL;
++ inode = inode_resolve(this->itable, path);
++ if (inode) {
++ gf_msg_debug(this->name, 0, "Shard %d already "
++ "present. gfid=%s. Saving inode for future.",
++ shard_idx_iter, uuid_utoa(inode->gfid));
++ local->inode_list[i] = inode;
++ /* Let the ref on the inodes that are already present
++ * in inode table still be held so that they don't get
++ * forgotten by the time the fop reaches the actual
++ * write stage.
++ */
++ LOCK(&priv->lock);
++ {
++ fsync_inode = __shard_update_shards_inode_list(inode, this, res_inode,
++ shard_idx_iter, gfid);
++ }
++ UNLOCK(&priv->lock);
++ shard_idx_iter++;
++ if (fsync_inode)
++ shard_initiate_evicted_inode_fsync(this, fsync_inode);
++ continue;
++ } else {
++ local->call_count++;
++ shard_idx_iter++;
+ }
+- UNLOCK(&frame->lock);
++ }
++out:
++ post_res_handler(frame, this);
++ return 0;
++}
++
++int shard_update_file_size_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret, int32_t op_errno,
++ dict_t *dict, dict_t *xdata) {
++ inode_t *inode = NULL;
++ shard_local_t *local = NULL;
++
++ local = frame->local;
++
++ if ((local->fd) && (local->fd->inode))
++ inode = local->fd->inode;
++ else if (local->loc.inode)
++ inode = local->loc.inode;
++
++ if (op_ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, op_errno,
++ SHARD_MSG_UPDATE_FILE_SIZE_FAILED, "Update to file size"
++ " xattr failed on %s",
++ uuid_utoa(inode->gfid));
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto err;
++ }
+
+- return call_count;
++ if (shard_modify_size_and_block_count(&local->postbuf, dict)) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto err;
++ }
++err:
++ local->post_update_size_handler(frame, this);
++ return 0;
+ }
+
+-static char *
+-shard_internal_dir_string(shard_internal_dir_type_t type)
+-{
+- char *str = NULL;
++int shard_set_size_attrs(int64_t size, int64_t block_count,
++ int64_t **size_attr_p) {
++ int ret = -1;
++ int64_t *size_attr = NULL;
+
+- switch (type) {
+- case SHARD_INTERNAL_DIR_DOT_SHARD:
+- str = GF_SHARD_DIR;
+- break;
+- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+- str = GF_SHARD_REMOVE_ME_DIR;
+- break;
+- default:
+- break;
+- }
+- return str;
+-}
+-
+-static int
+-shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local,
+- shard_internal_dir_type_t type)
+-{
+- int ret = -1;
+- char *bname = NULL;
+- inode_t *parent = NULL;
+- loc_t *internal_dir_loc = NULL;
+- shard_priv_t *priv = NULL;
+-
+- priv = this->private;
+- if (!local)
+- return -1;
+-
+- switch (type) {
+- case SHARD_INTERNAL_DIR_DOT_SHARD:
+- internal_dir_loc = &local->dot_shard_loc;
+- bname = GF_SHARD_DIR;
+- parent = inode_ref(this->itable->root);
+- break;
+- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+- internal_dir_loc = &local->dot_shard_rm_loc;
+- bname = GF_SHARD_REMOVE_ME_DIR;
+- parent = inode_ref(priv->dot_shard_inode);
+- break;
+- default:
+- break;
+- }
++ if (!size_attr_p)
++ goto out;
+
+- internal_dir_loc->inode = inode_new(this->itable);
+- internal_dir_loc->parent = parent;
+- ret = inode_path(internal_dir_loc->parent, bname,
+- (char **)&internal_dir_loc->path);
+- if (ret < 0 || !(internal_dir_loc->inode)) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+- "Inode path failed on %s", bname);
+- goto out;
+- }
++ size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t);
++ if (!size_attr)
++ goto out;
+
+- internal_dir_loc->name = strrchr(internal_dir_loc->path, '/');
+- if (internal_dir_loc->name)
+- internal_dir_loc->name++;
++ size_attr[0] = hton64(size);
++ /* As sharding evolves, it _may_ be necessary to embed more pieces of
++ * information within the same xattr. So allocating slots for them in
++ * advance. For now, only bytes 0-63 and 128-191 which would make up the
++ * current size and block count respectively of the file are valid.
++ */
++ size_attr[2] = hton64(block_count);
+
+- ret = 0;
++ *size_attr_p = size_attr;
++
++ ret = 0;
+ out:
+- return ret;
++ return ret;
+ }
+
+-inode_t *
+-__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
+- inode_t *base_inode, int block_num,
+- uuid_t gfid)
+-{
+- char block_bname[256] = {
+- 0,
+- };
+- inode_t *lru_inode = NULL;
+- shard_priv_t *priv = NULL;
+- shard_inode_ctx_t *ctx = NULL;
+- shard_inode_ctx_t *lru_inode_ctx = NULL;
+- shard_inode_ctx_t *lru_base_inode_ctx = NULL;
+- inode_t *fsync_inode = NULL;
+- inode_t *lru_base_inode = NULL;
+- gf_boolean_t do_fsync = _gf_false;
+-
+- priv = this->private;
+-
+- shard_inode_ctx_get(linked_inode, this, &ctx);
+-
+- if (list_empty(&ctx->ilist)) {
+- if (priv->inode_count + 1 <= priv->lru_limit) {
+- /* If this inode was linked here for the first time (indicated
+- * by empty list), and if there is still space in the priv list,
+- * add this ctx to the tail of the list.
+- */
+- /* For as long as an inode is in lru list, we try to
+- * keep it alive by holding a ref on it.
+- */
+- inode_ref(linked_inode);
+- if (base_inode)
+- gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
+- else
+- gf_uuid_copy(ctx->base_gfid, gfid);
+- ctx->block_num = block_num;
+- list_add_tail(&ctx->ilist, &priv->ilist_head);
+- priv->inode_count++;
+- ctx->base_inode = inode_ref(base_inode);
+- } else {
+- /*If on the other hand there is no available slot for this inode
+- * in the list, delete the lru inode from the head of the list,
+- * unlink it. And in its place add this new inode into the list.
+- */
+- lru_inode_ctx = list_first_entry(&priv->ilist_head,
+- shard_inode_ctx_t, ilist);
+- GF_ASSERT(lru_inode_ctx->block_num > 0);
+- lru_base_inode = lru_inode_ctx->base_inode;
+- list_del_init(&lru_inode_ctx->ilist);
+- lru_inode = inode_find(linked_inode->table,
+- lru_inode_ctx->stat.ia_gfid);
+- /* If the lru inode was part of the pending-fsync list,
+- * the base inode needs to be unref'd, the lru inode
+- * deleted from fsync list and fsync'd in a new frame,
+- * and then unlinked in memory and forgotten.
+- */
+- if (!lru_base_inode)
+- goto after_fsync_check;
+- LOCK(&lru_base_inode->lock);
+- LOCK(&lru_inode->lock);
+- {
+- if (!list_empty(&lru_inode_ctx->to_fsync_list)) {
+- list_del_init(&lru_inode_ctx->to_fsync_list);
+- lru_inode_ctx->fsync_needed = 0;
+- do_fsync = _gf_true;
+- __shard_inode_ctx_get(lru_base_inode, this,
+- &lru_base_inode_ctx);
+- lru_base_inode_ctx->fsync_count--;
+- }
+- }
+- UNLOCK(&lru_inode->lock);
+- UNLOCK(&lru_base_inode->lock);
+-
+- after_fsync_check:
+- if (!do_fsync) {
+- shard_make_block_bname(lru_inode_ctx->block_num,
+- lru_inode_ctx->base_gfid, block_bname,
+- sizeof(block_bname));
+- /* The following unref corresponds to the ref held at
+- * the time the shard was added to the lru list.
+- */
+- inode_unref(lru_inode);
+- inode_unlink(lru_inode, priv->dot_shard_inode, block_bname);
+- inode_forget(lru_inode, 0);
+- } else {
+- /* The following unref corresponds to the ref
+- * held when the shard was added to fsync list.
+- */
+- inode_unref(lru_inode);
+- fsync_inode = lru_inode;
+- if (lru_base_inode)
+- inode_unref(lru_base_inode);
+- }
+- /* The following unref corresponds to the ref
+- * held by inode_find() above.
+- */
+- inode_unref(lru_inode);
+-
+- /* The following unref corresponds to the ref held on the base shard
+- * at the time of adding shard inode to lru list
+- */
+- if (lru_base_inode)
+- inode_unref(lru_base_inode);
+-
+- /* For as long as an inode is in lru list, we try to
+- * keep it alive by holding a ref on it.
+- */
+- inode_ref(linked_inode);
+- if (base_inode)
+- gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
+- else
+- gf_uuid_copy(ctx->base_gfid, gfid);
+- ctx->block_num = block_num;
+- ctx->base_inode = inode_ref(base_inode);
+- list_add_tail(&ctx->ilist, &priv->ilist_head);
+- }
+- } else {
+- /* If this is not the first time this inode is being operated on, move
+- * it to the most recently used end of the list.
+- */
+- list_move_tail(&ctx->ilist, &priv->ilist_head);
+- }
+- return fsync_inode;
+-}
++int shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
++ loc_t *loc,
++ shard_post_update_size_fop_handler_t handler) {
++ int ret = -1;
++ int64_t *size_attr = NULL;
++ int64_t delta_blocks = 0;
++ inode_t *inode = NULL;
++ shard_local_t *local = NULL;
++ dict_t *xattr_req = NULL;
+
+-int
+-shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame,
+- int32_t op_ret, int32_t op_errno)
+-{
+- switch (fop) {
+- case GF_FOP_LOOKUP:
+- SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL,
+- NULL, NULL);
+- break;
+- case GF_FOP_STAT:
+- SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL);
+- break;
+- case GF_FOP_FSTAT:
+- SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL);
+- break;
+- case GF_FOP_TRUNCATE:
+- SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL,
+- NULL);
+- break;
+- case GF_FOP_FTRUNCATE:
+- SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL,
+- NULL);
+- break;
+- case GF_FOP_MKNOD:
+- SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL,
+- NULL, NULL);
+- break;
+- case GF_FOP_LINK:
+- SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL,
+- NULL, NULL);
+- break;
+- case GF_FOP_CREATE:
+- SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL,
+- NULL, NULL, NULL, NULL);
+- break;
+- case GF_FOP_UNLINK:
+- SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL,
+- NULL);
+- break;
+- case GF_FOP_RENAME:
+- SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL,
+- NULL, NULL, NULL, NULL);
+- break;
+- case GF_FOP_WRITE:
+- SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL,
+- NULL);
+- break;
+- case GF_FOP_FALLOCATE:
+- SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL,
+- NULL);
+- break;
+- case GF_FOP_ZEROFILL:
+- SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL,
+- NULL);
+- break;
+- case GF_FOP_DISCARD:
+- SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL,
+- NULL);
+- break;
+- case GF_FOP_READ:
+- SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL,
+- NULL, NULL);
+- break;
+- case GF_FOP_FSYNC:
+- SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL,
+- NULL);
+- break;
+- case GF_FOP_REMOVEXATTR:
+- SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL);
+- break;
+- case GF_FOP_FREMOVEXATTR:
+- SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL);
+- break;
+- case GF_FOP_FGETXATTR:
+- SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL);
+- break;
+- case GF_FOP_GETXATTR:
+- SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL);
+- break;
+- case GF_FOP_FSETXATTR:
+- SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL);
+- break;
+- case GF_FOP_SETXATTR:
+- SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL);
+- break;
+- case GF_FOP_SETATTR:
+- SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL,
+- NULL);
+- break;
+- case GF_FOP_FSETATTR:
+- SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL,
+- NULL);
+- break;
+- case GF_FOP_SEEK:
+- SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL);
+- break;
+- default:
+- gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+- "Invalid fop id = %d", fop);
+- break;
+- }
+- return 0;
+-}
++ local = frame->local;
++ local->post_update_size_handler = handler;
+
+-int
+-shard_common_inode_write_success_unwind(glusterfs_fop_t fop,
+- call_frame_t *frame, int32_t op_ret)
+-{
+- shard_local_t *local = NULL;
++ xattr_req = dict_new();
++ if (!xattr_req) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto out;
++ }
++
++ if (fd)
++ inode = fd->inode;
++ else
++ inode = loc->inode;
++
++ /* If both size and block count have not changed, then skip the xattrop.
++ */
++ delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
++ if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
++ goto out;
++ }
++
++ ret = shard_set_size_attrs(local->delta_size + local->hole_size, delta_blocks,
++ &size_attr);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
++ "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto out;
++ }
++
++ ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to set key %s into dict. gfid=%s", GF_XATTR_SHARD_FILE_SIZE,
++ uuid_utoa(inode->gfid));
++ GF_FREE(size_attr);
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto out;
++ }
+
+- local = frame->local;
++ if (fd)
++ STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fxattrop, fd, GF_XATTROP_ADD_ARRAY64,
++ xattr_req, NULL);
++ else
++ STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->xattrop, loc, GF_XATTROP_ADD_ARRAY64,
++ xattr_req, NULL);
+
+- switch (fop) {
+- case GF_FOP_WRITE:
+- SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf,
+- &local->postbuf, local->xattr_rsp);
+- break;
+- case GF_FOP_FALLOCATE:
+- SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf,
+- &local->postbuf, local->xattr_rsp);
+- break;
+- case GF_FOP_ZEROFILL:
+- SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf,
+- &local->postbuf, local->xattr_rsp);
+- break;
+- case GF_FOP_DISCARD:
+- SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf,
+- &local->postbuf, local->xattr_rsp);
+- break;
+- default:
+- gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+- "Invalid fop id = %d", fop);
+- break;
+- }
+- return 0;
+-}
++ dict_unref(xattr_req);
++ return 0;
+
+-int
+-shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno,
+- struct iatt *prebuf, struct iatt *postbuf,
+- dict_t *xdata)
+-{
+- char block_bname[256] = {
+- 0,
+- };
+- fd_t *anon_fd = cookie;
+- inode_t *shard_inode = NULL;
+- shard_inode_ctx_t *ctx = NULL;
+- shard_priv_t *priv = NULL;
++out:
++ if (xattr_req)
++ dict_unref(xattr_req);
++ handler(frame, this);
++ return 0;
++}
++
++static inode_t *shard_link_internal_dir_inode(shard_local_t *local,
++ inode_t *inode, struct iatt *buf,
++ shard_internal_dir_type_t type) {
++ inode_t *linked_inode = NULL;
++ shard_priv_t *priv = NULL;
++ char *bname = NULL;
++ inode_t **priv_inode = NULL;
++ inode_t *parent = NULL;
++
++ priv = THIS->private;
++
++ switch (type) {
++ case SHARD_INTERNAL_DIR_DOT_SHARD:
++ bname = GF_SHARD_DIR;
++ priv_inode = &priv->dot_shard_inode;
++ parent = inode->table->root;
++ break;
++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++ bname = GF_SHARD_REMOVE_ME_DIR;
++ priv_inode = &priv->dot_shard_rm_inode;
++ parent = priv->dot_shard_inode;
++ break;
++ default:
++ break;
++ }
++
++ linked_inode = inode_link(inode, parent, bname, buf);
++ inode_lookup(linked_inode);
++ *priv_inode = linked_inode;
++ return linked_inode;
++}
++
++int shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret,
++ int32_t op_errno, inode_t *inode,
++ struct iatt *buf, dict_t *xdata,
++ struct iatt *postparent) {
++ shard_local_t *local = NULL;
++ inode_t *linked_inode = NULL;
++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
++
++ local = frame->local;
++
++ if (op_ret) {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto out;
++ }
++
++ /* To-Do: Fix refcount increment per call to
++ * shard_link_internal_dir_inode().
++ */
++ linked_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++ shard_inode_ctx_mark_dir_refreshed(linked_inode, this);
++out:
++ shard_common_resolve_shards(frame, this, local->post_res_handler);
++ return 0;
++}
++
++int shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this,
++ shard_internal_dir_type_t type) {
++ loc_t loc = {
++ 0,
++ };
++ inode_t *inode = NULL;
++ shard_priv_t *priv = NULL;
++ shard_local_t *local = NULL;
++ uuid_t gfid = {
++ 0,
++ };
++
++ local = frame->local;
++ priv = this->private;
++
++ switch (type) {
++ case SHARD_INTERNAL_DIR_DOT_SHARD:
++ gf_uuid_copy(gfid, priv->dot_shard_gfid);
++ break;
++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++ gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
++ break;
++ default:
++ break;
++ }
++
++ inode = inode_find(this->itable, gfid);
++
++ if (!shard_inode_ctx_needs_lookup(inode, this)) {
++ local->op_ret = 0;
++ goto out;
++ }
+
+- priv = this->private;
++ /* Plain assignment because the ref is already taken above through
++ * call to inode_find()
++ */
++ loc.inode = inode;
++ gf_uuid_copy(loc.gfid, gfid);
+
+- if (anon_fd == NULL || op_ret < 0) {
+- gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED,
+- "fsync failed on shard");
+- goto out;
+- }
+- shard_inode = anon_fd->inode;
++ STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type,
++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc,
++ NULL);
++ loc_wipe(&loc);
+
+- LOCK(&priv->lock);
+- LOCK(&shard_inode->lock);
+- {
+- __shard_inode_ctx_get(shard_inode, this, &ctx);
+- if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) {
+- shard_make_block_bname(ctx->block_num, shard_inode->gfid,
+- block_bname, sizeof(block_bname));
+- inode_unlink(shard_inode, priv->dot_shard_inode, block_bname);
+- /* The following unref corresponds to the ref held by
+- * inode_link() at the time the shard was created or
+- * looked up
+- */
+- inode_unref(shard_inode);
+- inode_forget(shard_inode, 0);
+- }
+- }
+- UNLOCK(&shard_inode->lock);
+- UNLOCK(&priv->lock);
++ return 0;
+
+ out:
+- if (anon_fd)
+- fd_unref(anon_fd);
+- STACK_DESTROY(frame->root);
+- return 0;
++ shard_common_resolve_shards(frame, this, local->post_res_handler);
++ return 0;
+ }
+
+-int
+-shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
+-{
+- fd_t *anon_fd = NULL;
+- call_frame_t *fsync_frame = NULL;
++int shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret,
++ int32_t op_errno, inode_t *inode,
++ struct iatt *buf, dict_t *xdata,
++ struct iatt *postparent) {
++ inode_t *link_inode = NULL;
++ shard_local_t *local = NULL;
++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+
+- fsync_frame = create_frame(this, this->ctx->pool);
+- if (!fsync_frame) {
+- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+- "Failed to create new frame "
+- "to fsync shard");
+- return -1;
+- }
++ local = frame->local;
+
+- anon_fd = fd_anonymous(inode);
+- if (!anon_fd) {
+- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+- "Failed to create anon fd to"
+- " fsync shard");
+- STACK_DESTROY(fsync_frame->root);
+- return -1;
+- }
++ if (op_ret) {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto unwind;
++ }
++
++ if (!IA_ISDIR(buf->ia_type)) {
++ gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR,
++ "%s already exists and "
++ "is not a directory. Please remove it from all bricks "
++ "and try again",
++ shard_internal_dir_string(type));
++ local->op_ret = -1;
++ local->op_errno = EIO;
++ goto unwind;
++ }
++
++ link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++ if (link_inode != inode) {
++ shard_refresh_internal_dir(frame, this, type);
++ } else {
++ shard_inode_ctx_mark_dir_refreshed(link_inode, this);
++ shard_common_resolve_shards(frame, this, local->post_res_handler);
++ }
++ return 0;
+
+- STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd,
+- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
+- anon_fd, 1, NULL);
+- return 0;
+-}
++unwind:
++ local->post_res_handler(frame, this);
++ return 0;
++}
++
++int shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this,
++ shard_post_resolve_fop_handler_t post_res_handler,
++ shard_internal_dir_type_t type) {
++ int ret = -1;
++ dict_t *xattr_req = NULL;
++ shard_priv_t *priv = NULL;
++ shard_local_t *local = NULL;
++ uuid_t *gfid = NULL;
++ loc_t *loc = NULL;
++ gf_boolean_t free_gfid = _gf_true;
++
++ local = frame->local;
++ priv = this->private;
++ local->post_res_handler = post_res_handler;
++
++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
++ if (!gfid)
++ goto err;
++
++ xattr_req = dict_new();
++ if (!xattr_req) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto err;
++ }
++
++ switch (type) {
++ case SHARD_INTERNAL_DIR_DOT_SHARD:
++ gf_uuid_copy(*gfid, priv->dot_shard_gfid);
++ loc = &local->dot_shard_loc;
++ break;
++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++ gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
++ loc = &local->dot_shard_rm_loc;
++ break;
++ default:
++ bzero(*gfid, sizeof(uuid_t));
++ break;
++ }
++
++ ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to set gfid of %s into dict",
++ shard_internal_dir_string(type));
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto err;
++ } else {
++ free_gfid = _gf_false;
++ }
+
+-int
+-shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
+- shard_post_resolve_fop_handler_t post_res_handler)
+-{
+- int i = -1;
+- uint32_t shard_idx_iter = 0;
+- char path[PATH_MAX] = {
+- 0,
+- };
+- uuid_t gfid = {
+- 0,
+- };
+- inode_t *inode = NULL;
+- inode_t *res_inode = NULL;
+- inode_t *fsync_inode = NULL;
+- shard_priv_t *priv = NULL;
+- shard_local_t *local = NULL;
+-
+- priv = this->private;
+- local = frame->local;
+- local->call_count = 0;
+- shard_idx_iter = local->first_block;
+- res_inode = local->resolver_base_inode;
+- if (res_inode)
+- gf_uuid_copy(gfid, res_inode->gfid);
+- else
+- gf_uuid_copy(gfid, local->base_gfid);
++ STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type,
++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc,
++ xattr_req);
+
+- if ((local->op_ret < 0) || (local->resolve_not))
+- goto out;
++ dict_unref(xattr_req);
++ return 0;
+
+- while (shard_idx_iter <= local->last_block) {
+- i++;
+- if (shard_idx_iter == 0) {
+- local->inode_list[i] = inode_ref(res_inode);
+- shard_idx_iter++;
+- continue;
+- }
++err:
++ if (xattr_req)
++ dict_unref(xattr_req);
++ if (free_gfid)
++ GF_FREE(gfid);
++ post_res_handler(frame, this);
++ return 0;
++}
++
++static void shard_inode_ctx_update(inode_t *inode, xlator_t *this,
++ dict_t *xdata, struct iatt *buf) {
++ int ret = 0;
++ uint64_t size = 0;
++ void *bsize = NULL;
++
++ if (shard_inode_ctx_get_block_size(inode, this, &size)) {
++ /* Fresh lookup */
++ ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
++ if (!ret)
++ size = ntoh64(*((uint64_t *)bsize));
++ /* If the file is sharded, set its block size, otherwise just
++ * set 0.
++ */
+
+- shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
+-
+- inode = NULL;
+- inode = inode_resolve(this->itable, path);
+- if (inode) {
+- gf_msg_debug(this->name, 0,
+- "Shard %d already "
+- "present. gfid=%s. Saving inode for future.",
+- shard_idx_iter, uuid_utoa(inode->gfid));
+- local->inode_list[i] = inode;
+- /* Let the ref on the inodes that are already present
+- * in inode table still be held so that they don't get
+- * forgotten by the time the fop reaches the actual
+- * write stage.
+- */
+- LOCK(&priv->lock);
+- {
+- fsync_inode = __shard_update_shards_inode_list(
+- inode, this, res_inode, shard_idx_iter, gfid);
+- }
+- UNLOCK(&priv->lock);
+- shard_idx_iter++;
+- if (fsync_inode)
+- shard_initiate_evicted_inode_fsync(this, fsync_inode);
+- continue;
+- } else {
+- local->call_count++;
+- shard_idx_iter++;
+- }
+- }
+-out:
+- post_res_handler(frame, this);
+- return 0;
++ shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE);
++ }
++ /* If the file is sharded, also set the remaining attributes,
++ * except for ia_size and ia_blocks.
++ */
++ if (size) {
++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
++ (void)shard_inode_ctx_invalidate(inode, this, buf);
++ }
++}
++
++int shard_delete_shards(void *opaque);
++
++int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data);
++
++int shard_start_background_deletion(xlator_t *this) {
++ int ret = 0;
++ gf_boolean_t i_cleanup = _gf_true;
++ shard_priv_t *priv = NULL;
++ call_frame_t *cleanup_frame = NULL;
++
++ priv = this->private;
++
++ LOCK(&priv->lock);
++ {
++ switch (priv->bg_del_state) {
++ case SHARD_BG_DELETION_NONE:
++ i_cleanup = _gf_true;
++ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
++ break;
++ case SHARD_BG_DELETION_LAUNCHING:
++ i_cleanup = _gf_false;
++ break;
++ case SHARD_BG_DELETION_IN_PROGRESS:
++ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
++ i_cleanup = _gf_false;
++ break;
++ default:
++ break;
++ }
++ }
++ UNLOCK(&priv->lock);
++ if (!i_cleanup)
++ return 0;
++
++ cleanup_frame = create_frame(this, this->ctx->pool);
++ if (!cleanup_frame) {
++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++ "Failed to create "
++ "new frame to delete shards");
++ ret = -ENOMEM;
++ goto err;
++ }
++
++ set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
++
++ ret = synctask_new(this->ctx->env, shard_delete_shards,
++ shard_delete_shards_cbk, cleanup_frame, cleanup_frame);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, errno, SHARD_MSG_SHARDS_DELETION_FAILED,
++ "failed to create task to do background "
++ "cleanup of shards");
++ STACK_DESTROY(cleanup_frame->root);
++ goto err;
++ }
++ return 0;
++
++err:
++ LOCK(&priv->lock);
++ { priv->bg_del_state = SHARD_BG_DELETION_NONE; }
++ UNLOCK(&priv->lock);
++ return ret;
+ }
+
+-int
+-shard_update_file_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, dict_t *dict,
+- dict_t *xdata)
+-{
+- inode_t *inode = NULL;
+- shard_local_t *local = NULL;
++int shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, inode_t *inode,
++ struct iatt *buf, dict_t *xdata, struct iatt *postparent) {
++ int ret = -1;
++ shard_priv_t *priv = NULL;
++ gf_boolean_t i_start_cleanup = _gf_false;
+
+- local = frame->local;
++ priv = this->private;
+
+- if ((local->fd) && (local->fd->inode))
+- inode = local->fd->inode;
+- else if (local->loc.inode)
+- inode = local->loc.inode;
++ if (op_ret < 0)
++ goto unwind;
+
+- if (op_ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, op_errno,
+- SHARD_MSG_UPDATE_FILE_SIZE_FAILED,
+- "Update to file size"
+- " xattr failed on %s",
+- uuid_utoa(inode->gfid));
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- goto err;
+- }
++ if (IA_ISDIR(buf->ia_type))
++ goto unwind;
+
+- if (shard_modify_size_and_block_count(&local->postbuf, dict)) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto err;
+- }
+-err:
+- local->post_update_size_handler(frame, this);
+- return 0;
+-}
++ /* Also, if the file is sharded, get the file size and block cnt xattr,
++ * and store them in the stbuf appropriately.
++ */
+
+-int
+-shard_set_size_attrs(int64_t size, int64_t block_count, int64_t **size_attr_p)
+-{
+- int ret = -1;
+- int64_t *size_attr = NULL;
++ if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) &&
++ frame->root->pid != GF_CLIENT_PID_GSYNCD)
++ shard_modify_size_and_block_count(buf, xdata);
+
+- if (!size_attr_p)
+- goto out;
++ /* If this was a fresh lookup, there are two possibilities:
++ * 1) If the file is sharded (indicated by the presence of block size
++ * xattr), store this block size, along with rdev and mode in its
++ * inode ctx.
++ * 2) If the file is not sharded, store size along with rdev and mode
++ * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is
++ * already initialised to all zeroes, nothing more needs to be done.
++ */
+
+- size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t);
+- if (!size_attr)
+- goto out;
++ (void)shard_inode_ctx_update(inode, this, xdata, buf);
+
+- size_attr[0] = hton64(size);
+- /* As sharding evolves, it _may_ be necessary to embed more pieces of
+- * information within the same xattr. So allocating slots for them in
+- * advance. For now, only bytes 0-63 and 128-191 which would make up the
+- * current size and block count respectively of the file are valid.
+- */
+- size_attr[2] = hton64(block_count);
++ LOCK(&priv->lock);
++ {
++ if (priv->first_lookup_done == _gf_false) {
++ priv->first_lookup_done = _gf_true;
++ i_start_cleanup = _gf_true;
++ }
++ }
++ UNLOCK(&priv->lock);
+
+- *size_attr_p = size_attr;
++ if (!i_start_cleanup)
++ goto unwind;
+
+- ret = 0;
+-out:
+- return ret;
++ ret = shard_start_background_deletion(this);
++ if (ret < 0) {
++ LOCK(&priv->lock);
++ { priv->first_lookup_done = _gf_false; }
++ UNLOCK(&priv->lock);
++ }
++
++unwind:
++ SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
++ postparent);
++ return 0;
+ }
+
+-int
+-shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
+- loc_t *loc, shard_post_update_size_fop_handler_t handler)
+-{
+- int ret = -1;
+- int64_t *size_attr = NULL;
+- int64_t delta_blocks = 0;
+- inode_t *inode = NULL;
+- shard_local_t *local = NULL;
+- dict_t *xattr_req = NULL;
++int shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc,
++ dict_t *xattr_req) {
++ int ret = -1;
++ int32_t op_errno = ENOMEM;
++ uint64_t block_size = 0;
++ shard_local_t *local = NULL;
+
+- local = frame->local;
+- local->post_update_size_handler = handler;
++ this->itable = loc->inode->table;
++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++ SHARD_ENTRY_FOP_CHECK(loc, op_errno, err);
++ }
+
+- xattr_req = dict_new();
+- if (!xattr_req) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto out;
+- }
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
+
+- if (fd)
+- inode = fd->inode;
+- else
+- inode = loc->inode;
++ frame->local = local;
+
+- /* If both size and block count have not changed, then skip the xattrop.
+- */
+- delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
+- if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
+- goto out;
+- }
++ loc_copy(&local->loc, loc);
+
+- ret = shard_set_size_attrs(local->delta_size + local->hole_size,
+- delta_blocks, &size_attr);
++ local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
++ if (!local->xattr_req)
++ goto err;
++
++ if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) {
++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+ if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
+- "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto out;
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to set dict"
++ " value: key:%s for path %s",
++ GF_XATTR_SHARD_BLOCK_SIZE, loc->path);
++ goto err;
+ }
++ }
+
+- ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4);
++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
+ if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+- "Failed to set key %s into dict. gfid=%s",
+- GF_XATTR_SHARD_FILE_SIZE, uuid_utoa(inode->gfid));
+- GF_FREE(size_attr);
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto out;
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to set dict value: key:%s for path %s.",
++ GF_XATTR_SHARD_FILE_SIZE, loc->path);
++ goto err;
+ }
++ }
+
+- if (fd)
+- STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->fxattrop, fd,
+- GF_XATTROP_ADD_ARRAY64, xattr_req, NULL);
+- else
+- STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->xattrop, loc,
+- GF_XATTROP_ADD_ARRAY64, xattr_req, NULL);
+-
+- dict_unref(xattr_req);
+- return 0;
+-
+-out:
+- if (xattr_req)
+- dict_unref(xattr_req);
+- handler(frame, this);
+- return 0;
+-}
+-
+-static inode_t *
+-shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode,
+- struct iatt *buf, shard_internal_dir_type_t type)
+-{
+- inode_t *linked_inode = NULL;
+- shard_priv_t *priv = NULL;
+- char *bname = NULL;
+- inode_t **priv_inode = NULL;
+- inode_t *parent = NULL;
+-
+- priv = THIS->private;
+-
+- switch (type) {
+- case SHARD_INTERNAL_DIR_DOT_SHARD:
+- bname = GF_SHARD_DIR;
+- priv_inode = &priv->dot_shard_inode;
+- parent = inode->table->root;
+- break;
+- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+- bname = GF_SHARD_REMOVE_ME_DIR;
+- priv_inode = &priv->dot_shard_rm_inode;
+- parent = priv->dot_shard_inode;
+- break;
+- default:
+- break;
+- }
++ if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY)))
++ dict_del(xattr_req, GF_CONTENT_KEY);
+
+- linked_inode = inode_link(inode, parent, bname, buf);
+- inode_lookup(linked_inode);
+- *priv_inode = linked_inode;
+- return linked_inode;
++ STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req);
++ return 0;
++err:
++ shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno);
++ return 0;
+ }
+
+-int
+-shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie,
++int shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xdata,
+- struct iatt *postparent)
+-{
+- shard_local_t *local = NULL;
+- inode_t *linked_inode = NULL;
+- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+-
+- local = frame->local;
+-
+- if (op_ret) {
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- goto out;
+- }
++ struct iatt *postparent) {
++ int ret = -1;
++ int32_t mask = SHARD_INODE_WRITE_MASK;
++ shard_local_t *local = NULL;
++ shard_inode_ctx_t ctx = {
++ 0,
++ };
++
++ local = frame->local;
++
++ if (op_ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, op_errno,
++ SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file"
++ " failed : %s",
++ loc_gfid_utoa(&(local->loc)));
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto unwind;
++ }
++
++ local->prebuf = *buf;
++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
++ local->op_ret = -1;
++ local->op_errno = EINVAL;
++ goto unwind;
++ }
++
++ if (shard_inode_ctx_get_all(inode, this, &ctx))
++ mask = SHARD_ALL_MASK;
++
++ ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0,
++ (mask | SHARD_MASK_REFRESH_RESET));
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0,
++ "Failed to set inode"
++ " write params into inode ctx for %s",
++ uuid_utoa(buf->ia_gfid));
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto unwind;
++ }
++
++unwind:
++ local->handler(frame, this);
++ return 0;
++}
++
++int shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
++ shard_post_fop_handler_t handler) {
++ int ret = -1;
++ shard_local_t *local = NULL;
++ dict_t *xattr_req = NULL;
++ gf_boolean_t need_refresh = _gf_false;
++
++ local = frame->local;
++ local->handler = handler;
++
++ ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
++ &need_refresh);
++ /* By this time, inode ctx should have been created either in create,
++ * mknod, readdirp or lookup. If not it is a bug!
++ */
++ if ((ret == 0) && (need_refresh == _gf_false)) {
++ gf_msg_debug(this->name, 0, "Skipping lookup on base file: %s"
++ "Serving prebuf off the inode ctx cache",
++ uuid_utoa(loc->gfid));
++ goto out;
++ }
++
++ xattr_req = dict_new();
++ if (!xattr_req) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto out;
++ }
++
++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
++
++ STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
++
++ dict_unref(xattr_req);
++ return 0;
+
+- /* To-Do: Fix refcount increment per call to
+- * shard_link_internal_dir_inode().
+- */
+- linked_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+- shard_inode_ctx_mark_dir_refreshed(linked_inode, this);
+ out:
+- shard_common_resolve_shards(frame, this, local->post_res_handler);
+- return 0;
++ if (xattr_req)
++ dict_unref(xattr_req);
++ handler(frame, this);
++ return 0;
+ }
+
+-int
+-shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this,
+- shard_internal_dir_type_t type)
+-{
+- loc_t loc = {
+- 0,
+- };
+- inode_t *inode = NULL;
+- shard_priv_t *priv = NULL;
+- shard_local_t *local = NULL;
+- uuid_t gfid = {
+- 0,
+- };
++int shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- local = frame->local;
+- priv = this->private;
+-
+- switch (type) {
+- case SHARD_INTERNAL_DIR_DOT_SHARD:
+- gf_uuid_copy(gfid, priv->dot_shard_gfid);
+- break;
+- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+- gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
+- break;
+- default:
+- break;
+- }
++ local = frame->local;
+
+- inode = inode_find(this->itable, gfid);
++ if (local->op_ret >= 0)
++ shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0,
++ SHARD_LOOKUP_MASK);
+
+- if (!shard_inode_ctx_needs_lookup(inode, this)) {
+- local->op_ret = 0;
+- goto out;
+- }
++ SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno,
++ &local->prebuf, local->xattr_rsp);
++ return 0;
++}
+
+- /* Plain assignment because the ref is already taken above through
+- * call to inode_find()
+- */
+- loc.inode = inode;
+- gf_uuid_copy(loc.gfid, gfid);
++int shard_post_stat_handler(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type,
+- FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc,
+- NULL);
+- loc_wipe(&loc);
++ local = frame->local;
+
+- return 0;
++ if (local->op_ret >= 0)
++ shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0,
++ SHARD_LOOKUP_MASK);
+
+-out:
+- shard_common_resolve_shards(frame, this, local->post_res_handler);
+- return 0;
++ SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
++ &local->prebuf, local->xattr_rsp);
++ return 0;
+ }
+
+-int
+-shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, inode_t *inode,
+- struct iatt *buf, dict_t *xdata,
+- struct iatt *postparent)
+-{
+- inode_t *link_inode = NULL;
+- shard_local_t *local = NULL;
+- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
++int shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, struct iatt *buf,
++ dict_t *xdata) {
++ inode_t *inode = NULL;
++ shard_local_t *local = NULL;
+
+- local = frame->local;
++ local = frame->local;
+
+- if (op_ret) {
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- goto unwind;
+- }
++ if (op_ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED,
++ "stat failed: %s", local->fd ? uuid_utoa(local->fd->inode->gfid)
++ : uuid_utoa((local->loc.inode)->gfid));
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto unwind;
++ }
+
+- if (!IA_ISDIR(buf->ia_type)) {
+- gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR,
+- "%s already exists and "
+- "is not a directory. Please remove it from all bricks "
+- "and try again",
+- shard_internal_dir_string(type));
+- local->op_ret = -1;
+- local->op_errno = EIO;
+- goto unwind;
+- }
++ local->prebuf = *buf;
++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
++ local->op_ret = -1;
++ local->op_errno = EINVAL;
++ goto unwind;
++ }
++ local->xattr_rsp = dict_ref(xdata);
+
+- link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+- if (link_inode != inode) {
+- shard_refresh_internal_dir(frame, this, type);
+- } else {
+- shard_inode_ctx_mark_dir_refreshed(link_inode, this);
+- shard_common_resolve_shards(frame, this, local->post_res_handler);
+- }
+- return 0;
++ if (local->loc.inode)
++ inode = local->loc.inode;
++ else
++ inode = local->fd->inode;
++
++ shard_inode_ctx_invalidate(inode, this, &local->prebuf);
+
+ unwind:
+- local->post_res_handler(frame, this);
+- return 0;
++ local->handler(frame, this);
++ return 0;
+ }
+
+-int
+-shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this,
+- shard_post_resolve_fop_handler_t post_res_handler,
+- shard_internal_dir_type_t type)
+-{
+- int ret = -1;
+- dict_t *xattr_req = NULL;
+- shard_priv_t *priv = NULL;
+- shard_local_t *local = NULL;
+- uuid_t *gfid = NULL;
+- loc_t *loc = NULL;
+- gf_boolean_t free_gfid = _gf_true;
+-
+- local = frame->local;
+- priv = this->private;
+- local->post_res_handler = post_res_handler;
+-
+- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+- if (!gfid)
+- goto err;
+-
+- xattr_req = dict_new();
+- if (!xattr_req) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto err;
+- }
+-
+- switch (type) {
+- case SHARD_INTERNAL_DIR_DOT_SHARD:
+- gf_uuid_copy(*gfid, priv->dot_shard_gfid);
+- loc = &local->dot_shard_loc;
+- break;
+- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+- gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
+- loc = &local->dot_shard_rm_loc;
+- break;
+- default:
+- bzero(*gfid, sizeof(uuid_t));
+- break;
+- }
++int shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) {
++ int ret = -1;
++ uint64_t block_size = 0;
++ shard_local_t *local = NULL;
+
+- ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+- "Failed to set gfid of %s into dict",
+- shard_internal_dir_string(type));
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto err;
+- } else {
+- free_gfid = _gf_false;
+- }
++ if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
++ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->stat, loc, xdata);
++ return 0;
++ }
+
+- STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type,
+- FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc,
+- xattr_req);
++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get block "
++ "size from inode ctx of %s",
++ uuid_utoa(loc->inode->gfid));
++ goto err;
++ }
+
+- dict_unref(xattr_req);
++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
++ }
++
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
+
++ frame->local = local;
++
++ local->handler = shard_post_stat_handler;
++ loc_copy(&local->loc, loc);
++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++ if (!local->xattr_req)
++ goto err;
++
++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
++ local, err);
++
++ STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->stat, loc, local->xattr_req);
++ return 0;
+ err:
+- if (xattr_req)
+- dict_unref(xattr_req);
+- if (free_gfid)
+- GF_FREE(gfid);
+- post_res_handler(frame, this);
+- return 0;
++ shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM);
++ return 0;
+ }
+
+-static void
+-shard_inode_ctx_update(inode_t *inode, xlator_t *this, dict_t *xdata,
+- struct iatt *buf)
+-{
+- int ret = 0;
+- uint64_t size = 0;
+- void *bsize = NULL;
+-
+- if (shard_inode_ctx_get_block_size(inode, this, &size)) {
+- /* Fresh lookup */
+- ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
+- if (!ret)
+- size = ntoh64(*((uint64_t *)bsize));
+- /* If the file is sharded, set its block size, otherwise just
+- * set 0.
+- */
++int shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) {
++ int ret = -1;
++ uint64_t block_size = 0;
++ shard_local_t *local = NULL;
+
+- shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE);
+- }
+- /* If the file is sharded, also set the remaining attributes,
+- * except for ia_size and ia_blocks.
+- */
+- if (size) {
+- shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
+- (void)shard_inode_ctx_invalidate(inode, this, buf);
+- }
+-}
++ if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
++ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fstat, fd, xdata);
++ return 0;
++ }
+
+-int
+-shard_delete_shards(void *opaque);
++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get block "
++ "size from inode ctx of %s",
++ uuid_utoa(fd->inode->gfid));
++ goto err;
++ }
+
+-int
+-shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data);
++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fstat, fd, xdata);
++ return 0;
++ }
+
+-int
+-shard_start_background_deletion(xlator_t *this)
+-{
+- int ret = 0;
+- gf_boolean_t i_cleanup = _gf_true;
+- shard_priv_t *priv = NULL;
+- call_frame_t *cleanup_frame = NULL;
++ if (!this->itable)
++ this->itable = fd->inode->table;
+
+- priv = this->private;
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
+
+- LOCK(&priv->lock);
+- {
+- switch (priv->bg_del_state) {
+- case SHARD_BG_DELETION_NONE:
+- i_cleanup = _gf_true;
+- priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+- break;
+- case SHARD_BG_DELETION_LAUNCHING:
+- i_cleanup = _gf_false;
+- break;
+- case SHARD_BG_DELETION_IN_PROGRESS:
+- priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+- i_cleanup = _gf_false;
+- break;
+- default:
+- break;
+- }
+- }
+- UNLOCK(&priv->lock);
+- if (!i_cleanup)
+- return 0;
+-
+- cleanup_frame = create_frame(this, this->ctx->pool);
+- if (!cleanup_frame) {
+- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+- "Failed to create "
+- "new frame to delete shards");
+- ret = -ENOMEM;
+- goto err;
+- }
++ frame->local = local;
+
+- set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
++ local->handler = shard_post_fstat_handler;
++ local->fd = fd_ref(fd);
++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++ if (!local->xattr_req)
++ goto err;
+
+- ret = synctask_new(this->ctx->env, shard_delete_shards,
+- shard_delete_shards_cbk, cleanup_frame, cleanup_frame);
+- if (ret < 0) {
+- gf_msg(this->name, GF_LOG_WARNING, errno,
+- SHARD_MSG_SHARDS_DELETION_FAILED,
+- "failed to create task to do background "
+- "cleanup of shards");
+- STACK_DESTROY(cleanup_frame->root);
+- goto err;
+- }
+- return 0;
++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
++ local, err);
+
++ STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req);
++ return 0;
+ err:
+- LOCK(&priv->lock);
+- {
+- priv->bg_del_state = SHARD_BG_DELETION_NONE;
+- }
+- UNLOCK(&priv->lock);
+- return ret;
++ shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM);
++ return 0;
+ }
+
+-int
+-shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, inode_t *inode,
+- struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+-{
+- int ret = -1;
+- shard_priv_t *priv = NULL;
+- gf_boolean_t i_start_cleanup = _gf_false;
++int shard_post_update_size_truncate_handler(call_frame_t *frame,
++ xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- priv = this->private;
++ local = frame->local;
+
+- if (op_ret < 0)
+- goto unwind;
++ if (local->fop == GF_FOP_TRUNCATE)
++ SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
++ &local->prebuf, &local->postbuf, NULL);
++ else
++ SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno,
++ &local->prebuf, &local->postbuf, NULL);
++ return 0;
++}
+
+- if (IA_ISDIR(buf->ia_type))
+- goto unwind;
++int shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret,
++ int32_t op_errno, struct iatt *prebuf,
++ struct iatt *postbuf, dict_t *xdata) {
++ inode_t *inode = NULL;
++ int64_t delta_blocks = 0;
++ shard_local_t *local = NULL;
+
+- /* Also, if the file is sharded, get the file size and block cnt xattr,
+- * and store them in the stbuf appropriately.
+- */
++ local = frame->local;
+
+- if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) &&
+- frame->root->pid != GF_CLIENT_PID_GSYNCD)
+- shard_modify_size_and_block_count(buf, xdata);
+-
+- /* If this was a fresh lookup, there are two possibilities:
+- * 1) If the file is sharded (indicated by the presence of block size
+- * xattr), store this block size, along with rdev and mode in its
+- * inode ctx.
+- * 2) If the file is not sharded, store size along with rdev and mode
+- * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is
+- * already initialised to all zeroes, nothing more needs to be done.
+- */
++ SHARD_UNSET_ROOT_FS_ID(frame, local);
+
+- (void)shard_inode_ctx_update(inode, this, xdata, buf);
++ inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode;
++ if (op_ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, op_errno,
++ SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, "truncate on last"
++ " shard failed : %s",
++ uuid_utoa(inode->gfid));
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto err;
++ }
++
++ local->postbuf.ia_size = local->offset;
++ /* Let the delta be negative. We want xattrop to do subtraction */
++ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
++ delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
++ postbuf->ia_blocks - prebuf->ia_blocks);
++ GF_ASSERT(delta_blocks <= 0);
++ local->postbuf.ia_blocks += delta_blocks;
++ local->hole_size = 0;
++
++ shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
++ shard_update_file_size(frame, this, NULL, &local->loc,
++ shard_post_update_size_truncate_handler);
++ return 0;
++err:
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
++ return 0;
++}
++
++int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this,
++ inode_t *inode) {
++ size_t last_shard_size_after = 0;
++ loc_t loc = {
++ 0,
++ };
++ shard_local_t *local = NULL;
++
++ local = frame->local;
++
++ /* A NULL inode could be due to the fact that the last shard which
++ * needs to be truncated does not exist due to it lying in a hole
++ * region. So the only thing left to do in that case would be an
++ * update to file size xattr.
++ */
++ if (!inode) {
++ gf_msg_debug(this->name, 0,
++ "Last shard to be truncated absent"
++ " in backend: %s. Directly proceeding to update "
++ "file size",
++ uuid_utoa(inode->gfid));
++ shard_update_file_size(frame, this, NULL, &local->loc,
++ shard_post_update_size_truncate_handler);
++ return 0;
++ }
+
+- LOCK(&priv->lock);
+- {
+- if (priv->first_lookup_done == _gf_false) {
+- priv->first_lookup_done = _gf_true;
+- i_start_cleanup = _gf_true;
+- }
+- }
+- UNLOCK(&priv->lock);
++ SHARD_SET_ROOT_FS_ID(frame, local);
+
+- if (!i_start_cleanup)
+- goto unwind;
++ loc.inode = inode_ref(inode);
++ gf_uuid_copy(loc.gfid, inode->gfid);
+
+- ret = shard_start_background_deletion(this);
+- if (ret < 0) {
+- LOCK(&priv->lock);
+- {
+- priv->first_lookup_done = _gf_false;
+- }
+- UNLOCK(&priv->lock);
+- }
++ last_shard_size_after = (local->offset % local->block_size);
+
+-unwind:
+- SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+- postparent);
+- return 0;
++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after,
++ NULL);
++ loc_wipe(&loc);
++ return 0;
+ }
+
+-int
+-shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+-{
+- int ret = -1;
+- int32_t op_errno = ENOMEM;
+- uint64_t block_size = 0;
+- shard_local_t *local = NULL;
+-
+- this->itable = loc->inode->table;
+- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+- SHARD_ENTRY_FOP_CHECK(loc, op_errno, err);
+- }
++void shard_unlink_block_inode(shard_local_t *local, int shard_block_num);
+
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto err;
++int shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno,
++ struct iatt *preparent, struct iatt *postparent,
++ dict_t *xdata) {
++ int ret = 0;
++ int call_count = 0;
++ int shard_block_num = (long)cookie;
++ uint64_t block_count = 0;
++ shard_local_t *local = NULL;
+
+- frame->local = local;
++ local = frame->local;
+
+- loc_copy(&local->loc, loc);
++ if (op_ret < 0) {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto done;
++ }
++ ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
++ if (!ret) {
++ GF_ATOMIC_SUB(local->delta_blocks, block_count);
++ } else {
++ /* dict_get failed possibly due to a heterogeneous cluster? */
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to get key %s from dict during truncate of gfid %s",
++ GF_GET_FILE_BLOCK_COUNT,
++ uuid_utoa(local->resolver_base_inode->gfid));
++ }
++
++ shard_unlink_block_inode(local, shard_block_num);
++done:
++ call_count = shard_call_count_return(frame);
++ if (call_count == 0) {
++ SHARD_UNSET_ROOT_FS_ID(frame, local);
++ shard_truncate_last_shard(frame, this, local->inode_list[0]);
++ }
++ return 0;
++}
++
++int shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) {
++ int i = 1;
++ int ret = -1;
++ int call_count = 0;
++ uint32_t cur_block = 0;
++ uint32_t last_block = 0;
++ char path[PATH_MAX] = {
++ 0,
++ };
++ char *bname = NULL;
++ loc_t loc = {
++ 0,
++ };
++ gf_boolean_t wind_failed = _gf_false;
++ shard_local_t *local = NULL;
++ shard_priv_t *priv = NULL;
++ dict_t *xdata_req = NULL;
++
++ local = frame->local;
++ priv = this->private;
++
++ cur_block = local->first_block + 1;
++ last_block = local->last_block;
++
++ /* Determine call count */
++ for (i = 1; i < local->num_blocks; i++) {
++ if (!local->inode_list[i])
++ continue;
++ call_count++;
++ }
++
++ if (!call_count) {
++ /* Call count = 0 implies that all of the shards that need to be
++ * unlinked do not exist. So shard xlator would now proceed to
++ * do the final truncate + size updates.
++ */
++ gf_msg_debug(this->name, 0, "Shards to be unlinked as part of "
++ "truncate absent in backend: %s. Directly "
++ "proceeding to update file size",
++ uuid_utoa(inode->gfid));
++ local->postbuf.ia_size = local->offset;
++ local->postbuf.ia_blocks = local->prebuf.ia_blocks;
++ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
++ GF_ATOMIC_INIT(local->delta_blocks, 0);
++ local->hole_size = 0;
++ shard_update_file_size(frame, this, local->fd, &local->loc,
++ shard_post_update_size_truncate_handler);
++ return 0;
++ }
+
+- local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
+- if (!local->xattr_req)
+- goto err;
++ local->call_count = call_count;
++ i = 1;
++ xdata_req = dict_new();
++ if (!xdata_req) {
++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++ return 0;
++ }
++ ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to set key %s into dict during truncate of %s",
++ GF_GET_FILE_BLOCK_COUNT,
++ uuid_utoa(local->resolver_base_inode->gfid));
++ dict_unref(xdata_req);
++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++ return 0;
++ }
+
+- if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) {
+- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+- "Failed to set dict"
+- " value: key:%s for path %s",
+- GF_XATTR_SHARD_BLOCK_SIZE, loc->path);
+- goto err;
+- }
++ SHARD_SET_ROOT_FS_ID(frame, local);
++ while (cur_block <= last_block) {
++ if (!local->inode_list[i]) {
++ cur_block++;
++ i++;
++ continue;
++ }
++ if (wind_failed) {
++ shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM,
++ NULL, NULL, NULL);
++ goto next;
+ }
+
+- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE,
+- 8 * 4);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+- "Failed to set dict value: key:%s for path %s.",
+- GF_XATTR_SHARD_FILE_SIZE, loc->path);
+- goto err;
+- }
++ shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path));
++ bname = strrchr(path, '/') + 1;
++ loc.parent = inode_ref(priv->dot_shard_inode);
++ ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++ "Inode path failed"
++ " on %s. Base file gfid = %s",
++ bname, uuid_utoa(inode->gfid));
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ loc_wipe(&loc);
++ wind_failed = _gf_true;
++ shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM,
++ NULL, NULL, NULL);
++ goto next;
+ }
++ loc.name = strrchr(loc.path, '/');
++ if (loc.name)
++ loc.name++;
++ loc.inode = inode_ref(local->inode_list[i]);
+
+- if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY)))
+- dict_del(xattr_req, GF_CONTENT_KEY);
++ STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, (void *)(long)cur_block,
++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, &loc,
++ 0, xdata_req);
++ loc_wipe(&loc);
++ next:
++ i++;
++ cur_block++;
++ if (!--call_count)
++ break;
++ }
++ dict_unref(xdata_req);
++ return 0;
++}
+
+- STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req);
+- return 0;
+-err:
+- shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno);
++int shard_truncate_do(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = NULL;
++
++ local = frame->local;
++
++ if (local->num_blocks == 1) {
++ /* This means that there are no shards to be unlinked.
++ * The fop boils down to truncating the last shard, updating
++ * the size and unwinding.
++ */
++ shard_truncate_last_shard(frame, this, local->inode_list[0]);
+ return 0;
++ } else {
++ shard_truncate_htol(frame, this, local->loc.inode);
++ }
++ return 0;
+ }
+
+-int
+-shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, inode_t *inode,
+- struct iatt *buf, dict_t *xdata,
+- struct iatt *postparent)
+-{
+- int ret = -1;
+- int32_t mask = SHARD_INODE_WRITE_MASK;
+- shard_local_t *local = NULL;
+- shard_inode_ctx_t ctx = {
+- 0,
+- };
+-
+- local = frame->local;
++int shard_post_lookup_shards_truncate_handler(call_frame_t *frame,
++ xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- if (op_ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, op_errno,
+- SHARD_MSG_BASE_FILE_LOOKUP_FAILED,
+- "Lookup on base file"
+- " failed : %s",
+- loc_gfid_utoa(&(local->loc)));
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- goto unwind;
+- }
++ local = frame->local;
+
+- local->prebuf = *buf;
+- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+- local->op_ret = -1;
+- local->op_errno = EINVAL;
+- goto unwind;
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
++ return 0;
++ }
++
++ shard_truncate_do(frame, this);
++ return 0;
++}
++
++void shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
++ struct iatt *buf) {
++ int list_index = 0;
++ char block_bname[256] = {
++ 0,
++ };
++ uuid_t gfid = {
++ 0,
++ };
++ inode_t *linked_inode = NULL;
++ xlator_t *this = NULL;
++ inode_t *fsync_inode = NULL;
++ shard_priv_t *priv = NULL;
++ inode_t *base_inode = NULL;
++
++ this = THIS;
++ priv = this->private;
++ if (local->loc.inode) {
++ gf_uuid_copy(gfid, local->loc.inode->gfid);
++ base_inode = local->loc.inode;
++ } else if (local->resolver_base_inode) {
++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
++ base_inode = local->resolver_base_inode;
++ } else {
++ gf_uuid_copy(gfid, local->base_gfid);
++ }
++
++ shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname));
++
++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
++ linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf);
++ inode_lookup(linked_inode);
++ list_index = block_num - local->first_block;
++ local->inode_list[list_index] = linked_inode;
++
++ LOCK(&priv->lock);
++ {
++ fsync_inode = __shard_update_shards_inode_list(linked_inode, this,
++ base_inode, block_num, gfid);
++ }
++ UNLOCK(&priv->lock);
++ if (fsync_inode)
++ shard_initiate_evicted_inode_fsync(this, fsync_inode);
++}
++
++int shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret,
++ int32_t op_errno, inode_t *inode,
++ struct iatt *buf, dict_t *xdata,
++ struct iatt *postparent) {
++ int call_count = 0;
++ int shard_block_num = (long)cookie;
++ uuid_t gfid = {
++ 0,
++ };
++ shard_local_t *local = NULL;
++
++ local = frame->local;
++ if (local->resolver_base_inode)
++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
++ else
++ gf_uuid_copy(gfid, local->base_gfid);
++
++ if (op_ret < 0) {
++ /* Ignore absence of shards in the backend in truncate fop. */
++ switch (local->fop) {
++ case GF_FOP_TRUNCATE:
++ case GF_FOP_FTRUNCATE:
++ case GF_FOP_RENAME:
++ case GF_FOP_UNLINK:
++ if (op_errno == ENOENT)
++ goto done;
++ break;
++ case GF_FOP_WRITE:
++ case GF_FOP_READ:
++ case GF_FOP_ZEROFILL:
++ case GF_FOP_DISCARD:
++ case GF_FOP_FALLOCATE:
++ if ((!local->first_lookup_done) && (op_errno == ENOENT)) {
++ LOCK(&frame->lock);
++ { local->create_count++; }
++ UNLOCK(&frame->lock);
++ goto done;
++ }
++ break;
++ default:
++ break;
+ }
+
+- if (shard_inode_ctx_get_all(inode, this, &ctx))
+- mask = SHARD_ALL_MASK;
++ /* else */
++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_LOOKUP_SHARD_FAILED,
++ "Lookup on shard %d "
++ "failed. Base file gfid = %s",
++ shard_block_num, uuid_utoa(gfid));
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto done;
++ }
+
+- ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0,
+- (mask | SHARD_MASK_REFRESH_RESET));
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0,
+- "Failed to set inode"
+- " write params into inode ctx for %s",
+- uuid_utoa(buf->ia_gfid));
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto unwind;
+- }
++ shard_link_block_inode(local, shard_block_num, inode, buf);
+
+-unwind:
+- local->handler(frame, this);
++done:
++ if (local->lookup_shards_barriered) {
++ syncbarrier_wake(&local->barrier);
+ return 0;
++ } else {
++ call_count = shard_call_count_return(frame);
++ if (call_count == 0) {
++ if (!local->first_lookup_done)
++ local->first_lookup_done = _gf_true;
++ local->pls_fop_handler(frame, this);
++ }
++ }
++ return 0;
+ }
+
+-int
+-shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+- shard_post_fop_handler_t handler)
+-{
+- int ret = -1;
+- shard_local_t *local = NULL;
+- dict_t *xattr_req = NULL;
+- gf_boolean_t need_refresh = _gf_false;
++dict_t *shard_create_gfid_dict(dict_t *dict) {
++ int ret = 0;
++ dict_t *new = NULL;
++ unsigned char *gfid = NULL;
+
+- local = frame->local;
+- local->handler = handler;
++ new = dict_copy_with_ref(dict, NULL);
++ if (!new)
++ return NULL;
+
+- ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
+- &need_refresh);
+- /* By this time, inode ctx should have been created either in create,
+- * mknod, readdirp or lookup. If not it is a bug!
+- */
+- if ((ret == 0) && (need_refresh == _gf_false)) {
+- gf_msg_debug(this->name, 0,
+- "Skipping lookup on base file: %s"
+- "Serving prebuf off the inode ctx cache",
+- uuid_utoa(loc->gfid));
+- goto out;
++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
++ if (!gfid) {
++ ret = -1;
++ goto out;
++ }
++
++ gf_uuid_generate(gfid);
++
++ ret = dict_set_gfuuid(new, "gfid-req", gfid, false);
++
++out:
++ if (ret) {
++ dict_unref(new);
++ new = NULL;
++ GF_FREE(gfid);
++ }
++
++ return new;
++}
++
++int shard_common_lookup_shards(call_frame_t *frame, xlator_t *this,
++ inode_t *inode,
++ shard_post_lookup_shards_fop_handler_t handler) {
++ int i = 0;
++ int ret = 0;
++ int count = 0;
++ int call_count = 0;
++ int32_t shard_idx_iter = 0;
++ int last_block = 0;
++ char path[PATH_MAX] = {
++ 0,
++ };
++ char *bname = NULL;
++ uuid_t gfid = {
++ 0,
++ };
++ loc_t loc = {
++ 0,
++ };
++ shard_local_t *local = NULL;
++ shard_priv_t *priv = NULL;
++ gf_boolean_t wind_failed = _gf_false;
++ dict_t *xattr_req = NULL;
++
++ priv = this->private;
++ local = frame->local;
++ count = call_count = local->call_count;
++ shard_idx_iter = local->first_block;
++ last_block = local->last_block;
++ local->pls_fop_handler = handler;
++ if (local->lookup_shards_barriered)
++ local->barrier.waitfor = local->call_count;
++
++ if (inode)
++ gf_uuid_copy(gfid, inode->gfid);
++ else
++ gf_uuid_copy(gfid, local->base_gfid);
++
++ while (shard_idx_iter <= last_block) {
++ if (local->inode_list[i]) {
++ i++;
++ shard_idx_iter++;
++ continue;
++ }
++
++ if (wind_failed) {
++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this,
++ -1, ENOMEM, NULL, NULL, NULL, NULL);
++ goto next;
++ }
++
++ shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
++
++ bname = strrchr(path, '/') + 1;
++ loc.inode = inode_new(this->itable);
++ loc.parent = inode_ref(priv->dot_shard_inode);
++ gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid);
++ ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++ if (ret < 0 || !(loc.inode)) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++ "Inode path failed"
++ " on %s, base file gfid = %s",
++ bname, uuid_utoa(gfid));
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ loc_wipe(&loc);
++ wind_failed = _gf_true;
++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this,
++ -1, ENOMEM, NULL, NULL, NULL, NULL);
++ goto next;
+ }
+
+- xattr_req = dict_new();
++ loc.name = strrchr(loc.path, '/');
++ if (loc.name)
++ loc.name++;
++
++ xattr_req = shard_create_gfid_dict(local->xattr_req);
+ if (!xattr_req) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto out;
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ wind_failed = _gf_true;
++ loc_wipe(&loc);
++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this,
++ -1, ENOMEM, NULL, NULL, NULL, NULL);
++ goto next;
++ }
++
++ STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk,
++ (void *)(long)shard_idx_iter, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->lookup, &loc, xattr_req);
++ loc_wipe(&loc);
++ dict_unref(xattr_req);
++ next:
++ shard_idx_iter++;
++ i++;
++
++ if (!--call_count)
++ break;
++ }
++ if (local->lookup_shards_barriered) {
++ syncbarrier_wait(&local->barrier, count);
++ local->pls_fop_handler(frame, this);
++ }
++ return 0;
++}
++
++int shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = NULL;
++
++ local = frame->local;
++
++ if (local->op_ret < 0) {
++ if (local->op_errno == ENOENT) {
++ /* If lookup on /.shard fails with ENOENT, it means that
++ * the file was 0-byte in size but truncated sometime in
++ * the past to a higher size which is reflected in the
++ * size xattr, and now being truncated to a lower size.
++ * In this case, the only thing that needs to be done is
++ * to update the size xattr of the file and unwind.
++ */
++ local->first_block = local->last_block = 0;
++ local->num_blocks = 1;
++ local->call_count = 0;
++ local->op_ret = 0;
++ local->postbuf.ia_size = local->offset;
++ shard_update_file_size(frame, this, local->fd, &local->loc,
++ shard_post_update_size_truncate_handler);
++ return 0;
++ } else {
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
++ return 0;
+ }
++ }
+
+- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
++ if (!local->call_count)
++ shard_truncate_do(frame, this);
++ else
++ shard_common_lookup_shards(frame, this, local->loc.inode,
++ shard_post_lookup_shards_truncate_handler);
++
++ return 0;
++}
++
++int shard_truncate_begin(call_frame_t *frame, xlator_t *this) {
++ int ret = 0;
++ shard_local_t *local = NULL;
++ shard_priv_t *priv = NULL;
++
++ priv = this->private;
++ local = frame->local;
++
++ /* First participant block here is the lowest numbered block that would
++ * hold the last byte of the file post successful truncation.
++ * Last participant block is the block that contains the last byte in
++ * the current state of the file.
++ * If (first block == last_block):
++ * then that means that the file only needs truncation of the
++ * first (or last since both are same) block.
++ * Else
++ * if (new_size % block_size == 0)
++ * then that means there is no truncate to be done with
++ * only shards from first_block + 1 through the last
++ * block needing to be unlinked.
++ * else
++ * both truncate of the first block and unlink of the
++ * remaining shards until end of file is required.
++ */
++ local->first_block =
++ (local->offset == 0) ? 0 : get_lowest_block(local->offset - 1,
++ local->block_size);
++ local->last_block =
++ get_highest_block(0, local->prebuf.ia_size, local->block_size);
++
++ local->num_blocks = local->last_block - local->first_block + 1;
++ local->resolver_base_inode =
++ (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode;
++
++ if ((local->first_block == 0) && (local->num_blocks == 1)) {
++ if (local->fop == GF_FOP_TRUNCATE)
++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->truncate, &local->loc, local->offset,
++ local->xattr_req);
++ else
++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->ftruncate, local->fd, local->offset,
++ local->xattr_req);
++ return 0;
++ }
+
+- STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
++ local->inode_list =
++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list);
++ if (!local->inode_list)
++ goto err;
+
+- dict_unref(xattr_req);
+- return 0;
++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
++ if (!local->dot_shard_loc.inode) {
++ ret =
++ shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
++ if (ret)
++ goto err;
++ shard_lookup_internal_dir(frame, this, shard_post_resolve_truncate_handler,
++ SHARD_INTERNAL_DIR_DOT_SHARD);
++ } else {
++ local->post_res_handler = shard_post_resolve_truncate_handler;
++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
++ }
++ return 0;
+
+-out:
+- if (xattr_req)
+- dict_unref(xattr_req);
+- handler(frame, this);
+- return 0;
++err:
++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++ return 0;
+ }
+
+-int
+-shard_post_fstat_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
++int shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = NULL;
++ struct iatt tmp_stbuf = {
++ 0,
++ };
+
+- local = frame->local;
+-
+- if (local->op_ret >= 0)
+- shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0,
+- SHARD_LOOKUP_MASK);
++ local = frame->local;
+
+- SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno,
+- &local->prebuf, local->xattr_rsp);
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
+ return 0;
++ }
++
++ local->postbuf = tmp_stbuf = local->prebuf;
++
++ if (local->prebuf.ia_size == local->offset) {
++ /* If the file size is same as requested size, unwind the call
++ * immediately.
++ */
++ if (local->fop == GF_FOP_TRUNCATE)
++ SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, &local->postbuf,
++ NULL);
++ else
++ SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf,
++ &local->postbuf, NULL);
++ } else if (local->offset > local->prebuf.ia_size) {
++ /* If the truncate is from a lower to a higher size, set the
++ * new size xattr and unwind.
++ */
++ local->hole_size = local->offset - local->prebuf.ia_size;
++ local->delta_size = 0;
++ GF_ATOMIC_INIT(local->delta_blocks, 0);
++ local->postbuf.ia_size = local->offset;
++ tmp_stbuf.ia_size = local->offset;
++ shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
++ SHARD_INODE_WRITE_MASK);
++ shard_update_file_size(frame, this, NULL, &local->loc,
++ shard_post_update_size_truncate_handler);
++ } else {
++ /* ... else
++ * i. unlink all shards that need to be unlinked.
++ * ii. truncate the last of the shards.
++ * iii. update the new size using setxattr.
++ * and unwind the fop.
++ */
++ local->hole_size = 0;
++ local->delta_size = (local->offset - local->prebuf.ia_size);
++ GF_ATOMIC_INIT(local->delta_blocks, 0);
++ tmp_stbuf.ia_size = local->offset;
++ shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
++ SHARD_INODE_WRITE_MASK);
++ shard_truncate_begin(frame, this);
++ }
++ return 0;
+ }
+
+-int
+-shard_post_stat_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
++/* TO-DO:
++ * Fix updates to size and block count with racing write(s) and truncate(s).
++ */
+
+- local = frame->local;
++int shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc,
++ off_t offset, dict_t *xdata) {
++ int ret = -1;
++ uint64_t block_size = 0;
++ shard_local_t *local = NULL;
+
+- if (local->op_ret >= 0)
+- shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0,
+- SHARD_LOOKUP_MASK);
++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get block "
++ "size from inode ctx of %s",
++ uuid_utoa(loc->inode->gfid));
++ goto err;
++ }
+
+- SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
+- &local->prebuf, local->xattr_rsp);
++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++ STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+-}
++ }
+
+-int
+-shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, struct iatt *buf,
+- dict_t *xdata)
+-{
+- inode_t *inode = NULL;
+- shard_local_t *local = NULL;
++ if (!this->itable)
++ this->itable = loc->inode->table;
+
+- local = frame->local;
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
++
++ frame->local = local;
++
++ ret = syncbarrier_init(&local->barrier);
++ if (ret)
++ goto err;
++ loc_copy(&local->loc, loc);
++ local->offset = offset;
++ local->block_size = block_size;
++ local->fop = GF_FOP_TRUNCATE;
++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++ if (!local->xattr_req)
++ goto err;
++ local->resolver_base_inode = loc->inode;
++ GF_ATOMIC_INIT(local->delta_blocks, 0);
++
++ shard_lookup_base_file(frame, this, &local->loc,
++ shard_post_lookup_truncate_handler);
++ return 0;
+
+- if (op_ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED,
+- "stat failed: %s",
+- local->fd ? uuid_utoa(local->fd->inode->gfid)
+- : uuid_utoa((local->loc.inode)->gfid));
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- goto unwind;
+- }
++err:
++ shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM);
++ return 0;
++}
++
++int shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++ dict_t *xdata) {
++ int ret = -1;
++ uint64_t block_size = 0;
++ shard_local_t *local = NULL;
++
++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get block "
++ "size from inode ctx of %s",
++ uuid_utoa(fd->inode->gfid));
++ goto err;
++ }
++
++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++ STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
++ return 0;
++ }
++
++ if (!this->itable)
++ this->itable = fd->inode->table;
++
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
++
++ frame->local = local;
++ ret = syncbarrier_init(&local->barrier);
++ if (ret)
++ goto err;
++ local->fd = fd_ref(fd);
++ local->offset = offset;
++ local->block_size = block_size;
++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++ if (!local->xattr_req)
++ goto err;
++ local->fop = GF_FOP_FTRUNCATE;
++
++ local->loc.inode = inode_ref(fd->inode);
++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++ local->resolver_base_inode = fd->inode;
++ GF_ATOMIC_INIT(local->delta_blocks, 0);
++
++ shard_lookup_base_file(frame, this, &local->loc,
++ shard_post_lookup_truncate_handler);
++ return 0;
++err:
++ shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
++ return 0;
++}
+
+- local->prebuf = *buf;
+- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+- local->op_ret = -1;
+- local->op_errno = EINVAL;
+- goto unwind;
+- }
+- local->xattr_rsp = dict_ref(xdata);
++int shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, inode_t *inode,
++ struct iatt *buf, struct iatt *preparent,
++ struct iatt *postparent, dict_t *xdata) {
++ int ret = -1;
++ shard_local_t *local = NULL;
+
+- if (local->loc.inode)
+- inode = local->loc.inode;
+- else
+- inode = local->fd->inode;
++ local = frame->local;
+
+- shard_inode_ctx_invalidate(inode, this, &local->prebuf);
++ if (op_ret == -1)
++ goto unwind;
++
++ ret =
++ shard_inode_ctx_set(inode, this, buf, local->block_size, SHARD_ALL_MASK);
++ if (ret)
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
++ "Failed to set inode "
++ "ctx for %s",
++ uuid_utoa(inode->gfid));
+
+ unwind:
+- local->handler(frame, this);
+- return 0;
+-}
++ SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
++ postparent, xdata);
+
+-int
+-shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+-{
+- int ret = -1;
+- uint64_t block_size = 0;
+- shard_local_t *local = NULL;
++ return 0;
++}
+
+- if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
+- STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->stat, loc, xdata);
+- return 0;
+- }
++int shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
++ dev_t rdev, mode_t umask, dict_t *xdata) {
++ shard_priv_t *priv = NULL;
++ shard_local_t *local = NULL;
+
+- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+- "Failed to get block "
+- "size from inode ctx of %s",
+- uuid_utoa(loc->inode->gfid));
+- goto err;
+- }
++ priv = this->private;
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
+
+- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+- STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->stat, loc, xdata);
+- return 0;
+- }
++ frame->local = local;
++ local->block_size = priv->block_size;
++ if (!__is_gsyncd_on_shard_dir(frame, loc)) {
++ SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
++ }
+
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto err;
++ STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
++ return 0;
++err:
++ shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM);
++ return 0;
++}
+
+- frame->local = local;
++int32_t shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, inode_t *inode,
++ struct iatt *buf, struct iatt *preparent,
++ struct iatt *postparent, dict_t *xdata) {
++ shard_local_t *local = NULL;
+
+- local->handler = shard_post_stat_handler;
+- loc_copy(&local->loc, loc);
+- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+- if (!local->xattr_req)
+- goto err;
++ local = frame->local;
++ if (op_ret < 0)
++ goto err;
+
+- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
+- local, err);
++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_MASK_NLINK | SHARD_MASK_TIMES);
++ buf->ia_size = local->prebuf.ia_size;
++ buf->ia_blocks = local->prebuf.ia_blocks;
+
+- STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->stat, loc, local->xattr_req);
+- return 0;
++ SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
++ postparent, xdata);
++ return 0;
+ err:
+- shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM);
+- return 0;
++ shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno);
++ return 0;
+ }
+
+-int
+-shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+-{
+- int ret = -1;
+- uint64_t block_size = 0;
+- shard_local_t *local = NULL;
++int shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
+- STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->fstat, fd, xdata);
+- return 0;
+- }
++ local = frame->local;
+
+- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+- "Failed to get block "
+- "size from inode ctx of %s",
+- uuid_utoa(fd->inode->gfid));
+- goto err;
+- }
++ if (local->op_ret < 0) {
++ SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, NULL,
++ NULL, NULL, NULL);
++ return 0;
++ }
+
+- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+- STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->fstat, fd, xdata);
+- return 0;
+- }
++ STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2,
++ local->xattr_req);
++ return 0;
++}
+
+- if (!this->itable)
+- this->itable = fd->inode->table;
++int32_t shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
++ loc_t *newloc, dict_t *xdata) {
++ int ret = -1;
++ uint64_t block_size = 0;
++ shard_local_t *local = NULL;
+
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto err;
++ ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get block "
++ "size from inode ctx of %s",
++ uuid_utoa(oldloc->inode->gfid));
++ goto err;
++ }
+
+- frame->local = local;
++ if (!block_size) {
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
++ oldloc, newloc, xdata);
++ return 0;
++ }
+
+- local->handler = shard_post_fstat_handler;
+- local->fd = fd_ref(fd);
+- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+- if (!local->xattr_req)
+- goto err;
++ if (!this->itable)
++ this->itable = oldloc->inode->table;
+
+- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+- local, err);
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
+
+- STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req);
+- return 0;
++ frame->local = local;
++
++ loc_copy(&local->loc, oldloc);
++ loc_copy(&local->loc2, newloc);
++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++ if (!local->xattr_req)
++ goto err;
++
++ shard_lookup_base_file(frame, this, &local->loc,
++ shard_post_lookup_link_handler);
++ return 0;
+ err:
+- shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM);
+- return 0;
++ shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
++ return 0;
+ }
+
+-int
+-shard_post_update_size_truncate_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
++int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode);
+
+- local = frame->local;
++int shard_post_lookup_shards_unlink_handler(call_frame_t *frame,
++ xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- if (local->fop == GF_FOP_TRUNCATE)
+- SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
+- &local->prebuf, &local->postbuf, NULL);
+- else
+- SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno,
+- &local->prebuf, &local->postbuf, NULL);
++ local = frame->local;
++
++ if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
++ gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED,
++ "failed to delete shards of %s",
++ uuid_utoa(local->resolver_base_inode->gfid));
+ return 0;
+-}
++ }
++ local->op_ret = 0;
++ local->op_errno = 0;
+
+-int
+-shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno,
+- struct iatt *prebuf, struct iatt *postbuf,
+- dict_t *xdata)
+-{
+- inode_t *inode = NULL;
+- int64_t delta_blocks = 0;
+- shard_local_t *local = NULL;
++ shard_unlink_shards_do(frame, this, local->resolver_base_inode);
++ return 0;
++}
+
+- local = frame->local;
++int shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- SHARD_UNSET_ROOT_FS_ID(frame, local);
++ local = frame->local;
++ local->lookup_shards_barriered = _gf_true;
+
+- inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode
+- : local->fd->inode;
+- if (op_ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, op_errno,
+- SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED,
+- "truncate on last"
+- " shard failed : %s",
+- uuid_utoa(inode->gfid));
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- goto err;
+- }
++ if (!local->call_count)
++ shard_unlink_shards_do(frame, this, local->resolver_base_inode);
++ else
++ shard_common_lookup_shards(frame, this, local->resolver_base_inode,
++ shard_post_lookup_shards_unlink_handler);
++ return 0;
++}
++
++void shard_unlink_block_inode(shard_local_t *local, int shard_block_num) {
++ char block_bname[256] = {
++ 0,
++ };
++ uuid_t gfid = {
++ 0,
++ };
++ inode_t *inode = NULL;
++ inode_t *base_inode = NULL;
++ xlator_t *this = NULL;
++ shard_priv_t *priv = NULL;
++ shard_inode_ctx_t *ctx = NULL;
++ shard_inode_ctx_t *base_ictx = NULL;
++ int unref_base_inode = 0;
++ int unref_shard_inode = 0;
++
++ this = THIS;
++ priv = this->private;
++
++ inode = local->inode_list[shard_block_num - local->first_block];
++ shard_inode_ctx_get(inode, this, &ctx);
++ base_inode = ctx->base_inode;
++ if (base_inode)
++ gf_uuid_copy(gfid, base_inode->gfid);
++ else
++ gf_uuid_copy(gfid, ctx->base_gfid);
++ shard_make_block_bname(shard_block_num, gfid, block_bname,
++ sizeof(block_bname));
++
++ LOCK(&priv->lock);
++ if (base_inode)
++ LOCK(&base_inode->lock);
++ LOCK(&inode->lock);
++ {
++ __shard_inode_ctx_get(inode, this, &ctx);
++ if (!list_empty(&ctx->ilist)) {
++ list_del_init(&ctx->ilist);
++ priv->inode_count--;
++ unref_base_inode++;
++ unref_shard_inode++;
++ GF_ASSERT(priv->inode_count >= 0);
++ }
++ if (ctx->fsync_needed) {
++ unref_base_inode++;
++ unref_shard_inode++;
++ list_del_init(&ctx->to_fsync_list);
++ if (base_inode) {
++ __shard_inode_ctx_get(base_inode, this, &base_ictx);
++ base_ictx->fsync_count--;
++ }
++ }
++ }
++ UNLOCK(&inode->lock);
++ if (base_inode)
++ UNLOCK(&base_inode->lock);
+
+- local->postbuf.ia_size = local->offset;
+- /* Let the delta be negative. We want xattrop to do subtraction */
+- local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
+- delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
+- postbuf->ia_blocks - prebuf->ia_blocks);
+- GF_ASSERT(delta_blocks <= 0);
+- local->postbuf.ia_blocks += delta_blocks;
+- local->hole_size = 0;
++ inode_unlink(inode, priv->dot_shard_inode, block_bname);
++ inode_ref_reduce_by_n(inode, unref_shard_inode);
++ inode_forget(inode, 0);
+
+- shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
+- shard_update_file_size(frame, this, NULL, &local->loc,
+- shard_post_update_size_truncate_handler);
+- return 0;
+-err:
+- shard_common_failure_unwind(local->fop, frame, local->op_ret,
+- local->op_errno);
+- return 0;
++ if (base_inode && unref_base_inode)
++ inode_ref_reduce_by_n(base_inode, unref_base_inode);
++ UNLOCK(&priv->lock);
+ }
+
+-int
+-shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode)
+-{
+- size_t last_shard_size_after = 0;
+- loc_t loc = {
+- 0,
+- };
+- shard_local_t *local = NULL;
++int shard_rename_cbk(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- local = frame->local;
++ local = frame->local;
+
+- /* A NULL inode could be due to the fact that the last shard which
+- * needs to be truncated does not exist due to it lying in a hole
+- * region. So the only thing left to do in that case would be an
+- * update to file size xattr.
+- */
+- if (!inode) {
+- gf_msg_debug(this->name, 0,
+- "Last shard to be truncated absent"
+- " in backend: %s. Directly proceeding to update "
+- "file size",
+- uuid_utoa(inode->gfid));
+- shard_update_file_size(frame, this, NULL, &local->loc,
+- shard_post_update_size_truncate_handler);
+- return 0;
+- }
++ SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
++ &local->prebuf, &local->preoldparent,
++ &local->postoldparent, &local->prenewparent,
++ &local->postnewparent, local->xattr_rsp);
++ return 0;
++}
+
+- SHARD_SET_ROOT_FS_ID(frame, local);
++int32_t shard_unlink_cbk(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = frame->local;
+
+- loc.inode = inode_ref(inode);
+- gf_uuid_copy(loc.gfid, inode->gfid);
++ SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
++ &local->preoldparent, &local->postoldparent,
++ local->xattr_rsp);
++ return 0;
++}
+
+- last_shard_size_after = (local->offset % local->block_size);
++int shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret, int32_t op_errno,
++ struct iatt *preparent, struct iatt *postparent,
++ dict_t *xdata) {
++ int shard_block_num = (long)cookie;
++ shard_local_t *local = NULL;
+
+- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after,
+- NULL);
+- loc_wipe(&loc);
+- return 0;
+-}
++ local = frame->local;
+
+-void
+-shard_unlink_block_inode(shard_local_t *local, int shard_block_num);
++ if (op_ret < 0) {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto done;
++ }
+
+-int
+-shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno,
+- struct iatt *preparent, struct iatt *postparent,
+- dict_t *xdata)
+-{
+- int ret = 0;
+- int call_count = 0;
+- int shard_block_num = (long)cookie;
+- uint64_t block_count = 0;
+- shard_local_t *local = NULL;
++ shard_unlink_block_inode(local, shard_block_num);
++done:
++ syncbarrier_wake(&local->barrier);
++ return 0;
++}
++
++int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this,
++ inode_t *inode) {
++ int i = 0;
++ int ret = -1;
++ int count = 0;
++ uint32_t cur_block = 0;
++ uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */
++ char *bname = NULL;
++ char path[PATH_MAX] = {
++ 0,
++ };
++ uuid_t gfid = {
++ 0,
++ };
++ loc_t loc = {
++ 0,
++ };
++ gf_boolean_t wind_failed = _gf_false;
++ shard_local_t *local = NULL;
++ shard_priv_t *priv = NULL;
++
++ priv = this->private;
++ local = frame->local;
++
++ if (inode)
++ gf_uuid_copy(gfid, inode->gfid);
++ else
++ gf_uuid_copy(gfid, local->base_gfid);
++
++ for (i = 0; i < local->num_blocks; i++) {
++ if (!local->inode_list[i])
++ continue;
++ count++;
++ }
++
++ if (!count) {
++ /* callcount = 0 implies that all of the shards that need to be
++ * unlinked are non-existent (in other words the file is full of
++ * holes).
++ */
++ gf_msg_debug(this->name, 0, "All shards that need to be "
++ "unlinked are non-existent: %s",
++ uuid_utoa(gfid));
++ return 0;
++ }
+
+- local = frame->local;
++ SHARD_SET_ROOT_FS_ID(frame, local);
++ local->barrier.waitfor = count;
++ cur_block = cur_block_idx + local->first_block;
+
+- if (op_ret < 0) {
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- goto done;
+- }
+- ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
+- if (!ret) {
+- GF_ATOMIC_SUB(local->delta_blocks, block_count);
+- } else {
+- /* dict_get failed possibly due to a heterogeneous cluster? */
+- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+- "Failed to get key %s from dict during truncate of gfid %s",
+- GF_GET_FILE_BLOCK_COUNT,
+- uuid_utoa(local->resolver_base_inode->gfid));
+- }
+-
+- shard_unlink_block_inode(local, shard_block_num);
+-done:
+- call_count = shard_call_count_return(frame);
+- if (call_count == 0) {
+- SHARD_UNSET_ROOT_FS_ID(frame, local);
+- shard_truncate_last_shard(frame, this, local->inode_list[0]);
+- }
+- return 0;
+-}
+-
+-int
+-shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
+-{
+- int i = 1;
+- int ret = -1;
+- int call_count = 0;
+- uint32_t cur_block = 0;
+- uint32_t last_block = 0;
+- char path[PATH_MAX] = {
+- 0,
+- };
+- char *bname = NULL;
+- loc_t loc = {
+- 0,
+- };
+- gf_boolean_t wind_failed = _gf_false;
+- shard_local_t *local = NULL;
+- shard_priv_t *priv = NULL;
+- dict_t *xdata_req = NULL;
+-
+- local = frame->local;
+- priv = this->private;
+-
+- cur_block = local->first_block + 1;
+- last_block = local->last_block;
+-
+- /* Determine call count */
+- for (i = 1; i < local->num_blocks; i++) {
+- if (!local->inode_list[i])
+- continue;
+- call_count++;
+- }
++ while (cur_block_idx < local->num_blocks) {
++ if (!local->inode_list[cur_block_idx])
++ goto next;
+
+- if (!call_count) {
+- /* Call count = 0 implies that all of the shards that need to be
+- * unlinked do not exist. So shard xlator would now proceed to
+- * do the final truncate + size updates.
+- */
+- gf_msg_debug(this->name, 0,
+- "Shards to be unlinked as part of "
+- "truncate absent in backend: %s. Directly "
+- "proceeding to update file size",
+- uuid_utoa(inode->gfid));
+- local->postbuf.ia_size = local->offset;
+- local->postbuf.ia_blocks = local->prebuf.ia_blocks;
+- local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
+- GF_ATOMIC_INIT(local->delta_blocks, 0);
+- local->hole_size = 0;
+- shard_update_file_size(frame, this, local->fd, &local->loc,
+- shard_post_update_size_truncate_handler);
+- return 0;
++ if (wind_failed) {
++ shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
++ ENOMEM, NULL, NULL, NULL);
++ goto next;
+ }
+
+- local->call_count = call_count;
+- i = 1;
+- xdata_req = dict_new();
+- if (!xdata_req) {
+- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+- return 0;
+- }
+- ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+- "Failed to set key %s into dict during truncate of %s",
+- GF_GET_FILE_BLOCK_COUNT,
+- uuid_utoa(local->resolver_base_inode->gfid));
+- dict_unref(xdata_req);
+- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+- return 0;
++ shard_make_block_abspath(cur_block, gfid, path, sizeof(path));
++ bname = strrchr(path, '/') + 1;
++ loc.parent = inode_ref(priv->dot_shard_inode);
++ ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++ "Inode path failed"
++ " on %s, base file gfid = %s",
++ bname, uuid_utoa(gfid));
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ loc_wipe(&loc);
++ wind_failed = _gf_true;
++ shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
++ ENOMEM, NULL, NULL, NULL);
++ goto next;
+ }
+
+- SHARD_SET_ROOT_FS_ID(frame, local);
+- while (cur_block <= last_block) {
+- if (!local->inode_list[i]) {
+- cur_block++;
+- i++;
+- continue;
+- }
+- if (wind_failed) {
+- shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1,
+- ENOMEM, NULL, NULL, NULL);
+- goto next;
+- }
+-
+- shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path));
+- bname = strrchr(path, '/') + 1;
+- loc.parent = inode_ref(priv->dot_shard_inode);
+- ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+- if (ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+- "Inode path failed"
+- " on %s. Base file gfid = %s",
+- bname, uuid_utoa(inode->gfid));
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- loc_wipe(&loc);
+- wind_failed = _gf_true;
+- shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1,
+- ENOMEM, NULL, NULL, NULL);
+- goto next;
+- }
+- loc.name = strrchr(loc.path, '/');
+- if (loc.name)
+- loc.name++;
+- loc.inode = inode_ref(local->inode_list[i]);
+-
+- STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk,
+- (void *)(long)cur_block, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req);
+- loc_wipe(&loc);
+- next:
+- i++;
+- cur_block++;
+- if (!--call_count)
+- break;
+- }
+- dict_unref(xdata_req);
+- return 0;
+-}
++ loc.name = strrchr(loc.path, '/');
++ if (loc.name)
++ loc.name++;
++ loc.inode = inode_ref(local->inode_list[cur_block_idx]);
+
+-int
+-shard_truncate_do(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
++ STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk,
++ (void *)(long)cur_block, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->unlink, &loc, local->xflag,
++ local->xattr_req);
++ loc_wipe(&loc);
++ next:
++ cur_block++;
++ cur_block_idx++;
++ }
++ syncbarrier_wait(&local->barrier, count);
++ SHARD_UNSET_ROOT_FS_ID(frame, local);
++ return 0;
++}
++
++int shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this,
++ int now, int first_block,
++ gf_dirent_t *entry) {
++ int i = 0;
++ int ret = 0;
++ shard_local_t *local = NULL;
++ uuid_t gfid = {
++ 0,
++ };
++
++ local = cleanup_frame->local;
++
++ local->inode_list = GF_CALLOC(now, sizeof(inode_t *), gf_shard_mt_inode_list);
++ if (!local->inode_list)
++ return -ENOMEM;
++
++ local->first_block = first_block;
++ local->last_block = first_block + now - 1;
++ local->num_blocks = now;
++ gf_uuid_parse(entry->d_name, gfid);
++ gf_uuid_copy(local->base_gfid, gfid);
++ local->resolver_base_inode = inode_find(this->itable, gfid);
++ local->call_count = 0;
++ ret = syncbarrier_init(&local->barrier);
++ if (ret) {
++ GF_FREE(local->inode_list);
++ local->inode_list = NULL;
++ inode_unref(local->resolver_base_inode);
++ local->resolver_base_inode = NULL;
++ return -errno;
++ }
++ shard_common_resolve_shards(cleanup_frame, this,
++ shard_post_resolve_unlink_handler);
++
++ for (i = 0; i < local->num_blocks; i++) {
++ if (local->inode_list[i])
++ inode_unref(local->inode_list[i]);
++ }
++ GF_FREE(local->inode_list);
++ local->inode_list = NULL;
++ if (local->op_ret)
++ ret = -local->op_errno;
++ syncbarrier_destroy(&local->barrier);
++ inode_unref(local->resolver_base_inode);
++ local->resolver_base_inode = NULL;
++ STACK_RESET(cleanup_frame->root);
++ return ret;
++}
++
++int __shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
++ gf_dirent_t *entry, inode_t *inode) {
++ int ret = 0;
++ int shard_count = 0;
++ int first_block = 0;
++ int now = 0;
++ uint64_t size = 0;
++ uint64_t block_size = 0;
++ uint64_t size_array[4] = {
++ 0,
++ };
++ void *bsize = NULL;
++ void *size_attr = NULL;
++ dict_t *xattr_rsp = NULL;
++ loc_t loc = {
++ 0,
++ };
++ shard_local_t *local = NULL;
++ shard_priv_t *priv = NULL;
++
++ priv = this->private;
++ local = cleanup_frame->local;
++ ret = dict_reset(local->xattr_req);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to reset dict");
++ ret = -ENOMEM;
++ goto err;
++ }
++
++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
++ ret = -ENOMEM;
++ goto err;
++ }
++
++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
++ ret = -ENOMEM;
++ goto err;
++ }
++
++ loc.inode = inode_ref(inode);
++ loc.parent = inode_ref(priv->dot_shard_rm_inode);
++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++ "Inode path failed on %s", entry->d_name);
++ ret = -ENOMEM;
++ goto err;
++ }
++
++ loc.name = strrchr(loc.path, '/');
++ if (loc.name)
++ loc.name++;
++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req,
++ &xattr_rsp);
++ if (ret)
++ goto err;
++
++ ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
++ goto err;
++ }
++ block_size = ntoh64(*((uint64_t *)bsize));
++
++ ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
++ goto err;
++ }
++
++ memcpy(size_array, size_attr, sizeof(size_array));
++ size = ntoh64(size_array[0]);
++
++ shard_count = (size / block_size) - 1;
++ if (shard_count < 0) {
++ gf_msg_debug(this->name, 0, "Size of %s hasn't grown beyond "
++ "its shard-block-size. Nothing to delete. "
++ "Returning",
++ entry->d_name);
++ /* File size < shard-block-size, so nothing to delete */
++ ret = 0;
++ goto delete_marker;
++ }
++ if ((size % block_size) > 0)
++ shard_count++;
++
++ if (shard_count == 0) {
++ gf_msg_debug(this->name, 0, "Size of %s is exactly equal to "
++ "its shard-block-size. Nothing to delete. "
++ "Returning",
++ entry->d_name);
++ ret = 0;
++ goto delete_marker;
++ }
++ gf_msg_debug(this->name, 0,
++ "base file = %s, "
++ "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 ", "
++ "shard_count=%d",
++ entry->d_name, block_size, size, shard_count);
++
++ /* Perform a gfid-based lookup to see if gfid corresponding to marker
++ * file's base name exists.
++ */
++ loc_wipe(&loc);
++ loc.inode = inode_new(this->itable);
++ if (!loc.inode) {
++ ret = -ENOMEM;
++ goto err;
++ }
++ gf_uuid_parse(entry->d_name, loc.gfid);
++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
++ if (!ret) {
++ gf_msg_debug(this->name, 0, "Base shard corresponding to gfid "
++ "%s is present. Skipping shard deletion. "
++ "Returning",
++ entry->d_name);
++ ret = 0;
++ goto delete_marker;
++ }
+
+- local = frame->local;
++ first_block = 1;
+
+- if (local->num_blocks == 1) {
+- /* This means that there are no shards to be unlinked.
+- * The fop boils down to truncating the last shard, updating
+- * the size and unwinding.
+- */
+- shard_truncate_last_shard(frame, this, local->inode_list[0]);
+- return 0;
++ while (shard_count) {
++ if (shard_count < local->deletion_rate) {
++ now = shard_count;
++ shard_count = 0;
+ } else {
+- shard_truncate_htol(frame, this, local->loc.inode);
+- }
+- return 0;
+-}
+-
+-int
+-shard_post_lookup_shards_truncate_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(local->fop, frame, local->op_ret,
+- local->op_errno);
+- return 0;
++ now = local->deletion_rate;
++ shard_count -= local->deletion_rate;
+ }
+
+- shard_truncate_do(frame, this);
+- return 0;
+-}
++ gf_msg_debug(this->name, 0, "deleting %d shards starting from "
++ "block %d of gfid %s",
++ now, first_block, entry->d_name);
++ ret = shard_regulated_shards_deletion(cleanup_frame, this, now, first_block,
++ entry);
++ if (ret)
++ goto err;
++ first_block += now;
++ }
+
+-void
+-shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
+- struct iatt *buf)
+-{
+- int list_index = 0;
+- char block_bname[256] = {
+- 0,
+- };
+- uuid_t gfid = {
+- 0,
+- };
+- inode_t *linked_inode = NULL;
+- xlator_t *this = NULL;
+- inode_t *fsync_inode = NULL;
+- shard_priv_t *priv = NULL;
+- inode_t *base_inode = NULL;
+-
+- this = THIS;
+- priv = this->private;
+- if (local->loc.inode) {
+- gf_uuid_copy(gfid, local->loc.inode->gfid);
+- base_inode = local->loc.inode;
+- } else if (local->resolver_base_inode) {
+- gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+- base_inode = local->resolver_base_inode;
++delete_marker:
++ loc_wipe(&loc);
++ loc.inode = inode_ref(inode);
++ loc.parent = inode_ref(priv->dot_shard_rm_inode);
++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++ "Inode path failed on %s", entry->d_name);
++ ret = -ENOMEM;
++ goto err;
++ }
++ loc.name = strrchr(loc.path, '/');
++ if (loc.name)
++ loc.name++;
++ ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL);
++ if (ret)
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED,
++ "Failed to delete %s "
++ "from /%s",
++ entry->d_name, GF_SHARD_REMOVE_ME_DIR);
++err:
++ if (xattr_rsp)
++ dict_unref(xattr_rsp);
++ loc_wipe(&loc);
++ return ret;
++}
++
++int shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
++ gf_dirent_t *entry, inode_t *inode) {
++ int ret = -1;
++ loc_t loc = {
++ 0,
++ };
++ shard_priv_t *priv = NULL;
++
++ priv = this->private;
++ loc.inode = inode_ref(priv->dot_shard_rm_inode);
++
++ ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
++ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL);
++ if (ret < 0) {
++ if (ret == -EAGAIN) {
++ ret = 0;
++ }
++ goto out;
++ }
++ { ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); }
++ syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
++ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL);
++out:
++ loc_wipe(&loc);
++ return ret;
++}
++
++int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) {
++ SHARD_STACK_DESTROY(frame);
++ return 0;
++}
++
++int shard_resolve_internal_dir(xlator_t *this, shard_local_t *local,
++ shard_internal_dir_type_t type) {
++ int ret = 0;
++ char *bname = NULL;
++ loc_t *loc = NULL;
++ shard_priv_t *priv = NULL;
++ uuid_t gfid = {
++ 0,
++ };
++ struct iatt stbuf = {
++ 0,
++ };
++
++ priv = this->private;
++
++ switch (type) {
++ case SHARD_INTERNAL_DIR_DOT_SHARD:
++ loc = &local->dot_shard_loc;
++ gf_uuid_copy(gfid, priv->dot_shard_gfid);
++ bname = GF_SHARD_DIR;
++ break;
++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++ loc = &local->dot_shard_rm_loc;
++ gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
++ bname = GF_SHARD_REMOVE_ME_DIR;
++ break;
++ default:
++ break;
++ }
++
++ loc->inode = inode_find(this->itable, gfid);
++ if (!loc->inode) {
++ ret = shard_init_internal_dir_loc(this, local, type);
++ if (ret)
++ goto err;
++ ret = dict_reset(local->xattr_req);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to reset "
++ "dict");
++ ret = -ENOMEM;
++ goto err;
++ }
++ ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true);
++ ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, local->xattr_req,
++ NULL);
++ if (ret < 0) {
++ if (ret != -ENOENT)
++ gf_msg(this->name, GF_LOG_ERROR, -ret, SHARD_MSG_SHARDS_DELETION_FAILED,
++ "Lookup on %s failed, exiting", bname);
++ goto err;
+ } else {
+- gf_uuid_copy(gfid, local->base_gfid);
++ shard_link_internal_dir_inode(local, loc->inode, &stbuf, type);
+ }
++ }
++ ret = 0;
++err:
++ return ret;
++}
++
++int shard_lookup_marker_entry(xlator_t *this, shard_local_t *local,
++ gf_dirent_t *entry) {
++ int ret = 0;
++ loc_t loc = {
++ 0,
++ };
++
++ loc.inode = inode_new(this->itable);
++ if (!loc.inode) {
++ ret = -ENOMEM;
++ goto err;
++ }
++ loc.parent = inode_ref(local->fd->inode);
++
++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++ "Inode path failed on %s", entry->d_name);
++ ret = -ENOMEM;
++ goto err;
++ }
++
++ loc.name = strrchr(loc.path, '/');
++ if (loc.name)
++ loc.name++;
++
++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
++ if (ret < 0) {
++ goto err;
++ }
++ entry->inode = inode_ref(loc.inode);
++ ret = 0;
++err:
++ loc_wipe(&loc);
++ return ret;
++}
++
++int shard_delete_shards(void *opaque) {
++ int ret = 0;
++ off_t offset = 0;
++ loc_t loc = {
++ 0,
++ };
++ inode_t *link_inode = NULL;
++ xlator_t *this = NULL;
++ shard_priv_t *priv = NULL;
++ shard_local_t *local = NULL;
++ gf_dirent_t entries;
++ gf_dirent_t *entry = NULL;
++ call_frame_t *cleanup_frame = NULL;
++ gf_boolean_t done = _gf_false;
++
++ this = THIS;
++ priv = this->private;
++ INIT_LIST_HEAD(&entries.list);
++
++ cleanup_frame = opaque;
++
++ local = mem_get0(this->local_pool);
++ if (!local) {
++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++ "Failed to create local to "
++ "delete shards");
++ ret = -ENOMEM;
++ goto err;
++ }
++ cleanup_frame->local = local;
++ local->fop = GF_FOP_UNLINK;
++
++ local->xattr_req = dict_new();
++ if (!local->xattr_req) {
++ ret = -ENOMEM;
++ goto err;
++ }
++ local->deletion_rate = priv->deletion_rate;
++
++ ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
++ if (ret == -ENOENT) {
++ gf_msg_debug(this->name, 0, ".shard absent. Nothing to"
++ " delete. Exiting");
++ ret = 0;
++ goto err;
++ } else if (ret < 0) {
++ goto err;
++ }
+
+- shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname));
+-
+- shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
+- linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf);
+- inode_lookup(linked_inode);
+- list_index = block_num - local->first_block;
+- local->inode_list[list_index] = linked_inode;
+-
++ ret = shard_resolve_internal_dir(this, local,
++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
++ if (ret == -ENOENT) {
++ gf_msg_debug(this->name, 0, ".remove_me absent. "
++ "Nothing to delete. Exiting");
++ ret = 0;
++ goto err;
++ } else if (ret < 0) {
++ goto err;
++ }
++
++ local->fd = fd_anonymous(local->dot_shard_rm_loc.inode);
++ if (!local->fd) {
++ ret = -ENOMEM;
++ goto err;
++ }
++
++ for (;;) {
++ offset = 0;
+ LOCK(&priv->lock);
+ {
+- fsync_inode = __shard_update_shards_inode_list(
+- linked_inode, this, base_inode, block_num, gfid);
++ if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
++ priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
++ } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
++ priv->bg_del_state = SHARD_BG_DELETION_NONE;
++ done = _gf_true;
++ }
+ }
+ UNLOCK(&priv->lock);
+- if (fsync_inode)
+- shard_initiate_evicted_inode_fsync(this, fsync_inode);
+-}
+-
+-int
+-shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie,
+- xlator_t *this, int32_t op_ret, int32_t op_errno,
+- inode_t *inode, struct iatt *buf, dict_t *xdata,
+- struct iatt *postparent)
+-{
+- int call_count = 0;
+- int shard_block_num = (long)cookie;
+- uuid_t gfid = {
+- 0,
+- };
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+- if (local->resolver_base_inode)
+- gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+- else
+- gf_uuid_copy(gfid, local->base_gfid);
+-
+- if (op_ret < 0) {
+- /* Ignore absence of shards in the backend in truncate fop. */
+- switch (local->fop) {
+- case GF_FOP_TRUNCATE:
+- case GF_FOP_FTRUNCATE:
+- case GF_FOP_RENAME:
+- case GF_FOP_UNLINK:
+- if (op_errno == ENOENT)
+- goto done;
+- break;
+- case GF_FOP_WRITE:
+- case GF_FOP_READ:
+- case GF_FOP_ZEROFILL:
+- case GF_FOP_DISCARD:
+- case GF_FOP_FALLOCATE:
+- if ((!local->first_lookup_done) && (op_errno == ENOENT)) {
+- LOCK(&frame->lock);
+- {
+- local->create_count++;
+- }
+- UNLOCK(&frame->lock);
+- goto done;
+- }
+- break;
+- default:
+- break;
+- }
+-
+- /* else */
+- gf_msg(this->name, GF_LOG_ERROR, op_errno,
+- SHARD_MSG_LOOKUP_SHARD_FAILED,
+- "Lookup on shard %d "
+- "failed. Base file gfid = %s",
+- shard_block_num, uuid_utoa(gfid));
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- goto done;
+- }
+-
+- shard_link_block_inode(local, shard_block_num, inode, buf);
+-
+-done:
+- if (local->lookup_shards_barriered) {
+- syncbarrier_wake(&local->barrier);
+- return 0;
+- } else {
+- call_count = shard_call_count_return(frame);
+- if (call_count == 0) {
+- if (!local->first_lookup_done)
+- local->first_lookup_done = _gf_true;
+- local->pls_fop_handler(frame, this);
+- }
+- }
+- return 0;
+-}
+-
+-dict_t *
+-shard_create_gfid_dict(dict_t *dict)
+-{
+- int ret = 0;
+- dict_t *new = NULL;
+- unsigned char *gfid = NULL;
+-
+- new = dict_copy_with_ref(dict, NULL);
+- if (!new)
+- return NULL;
+-
+- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
+- if (!gfid) {
+- ret = -1;
+- goto out;
+- }
+-
+- gf_uuid_generate(gfid);
+-
+- ret = dict_set_gfuuid(new, "gfid-req", gfid, false);
+-
+-out:
+- if (ret) {
+- dict_unref(new);
+- new = NULL;
+- GF_FREE(gfid);
+- }
+-
+- return new;
+-}
++ if (done)
++ break;
++ while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
++ &entries, local->xattr_req, NULL))) {
++ if (ret > 0)
++ ret = 0;
++ list_for_each_entry(entry, &entries.list, list) {
++ offset = entry->d_off;
+
+-int
+-shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+- shard_post_lookup_shards_fop_handler_t handler)
+-{
+- int i = 0;
+- int ret = 0;
+- int count = 0;
+- int call_count = 0;
+- int32_t shard_idx_iter = 0;
+- int last_block = 0;
+- char path[PATH_MAX] = {
+- 0,
+- };
+- char *bname = NULL;
+- uuid_t gfid = {
+- 0,
+- };
+- loc_t loc = {
+- 0,
+- };
+- shard_local_t *local = NULL;
+- shard_priv_t *priv = NULL;
+- gf_boolean_t wind_failed = _gf_false;
+- dict_t *xattr_req = NULL;
+-
+- priv = this->private;
+- local = frame->local;
+- count = call_count = local->call_count;
+- shard_idx_iter = local->first_block;
+- last_block = local->last_block;
+- local->pls_fop_handler = handler;
+- if (local->lookup_shards_barriered)
+- local->barrier.waitfor = local->call_count;
+-
+- if (inode)
+- gf_uuid_copy(gfid, inode->gfid);
+- else
+- gf_uuid_copy(gfid, local->base_gfid);
++ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
++ continue;
+
+- while (shard_idx_iter <= last_block) {
+- if (local->inode_list[i]) {
+- i++;
+- shard_idx_iter++;
++ if (!entry->inode) {
++ ret = shard_lookup_marker_entry(this, local, entry);
++ if (ret < 0)
+ continue;
+ }
++ link_inode = inode_link(entry->inode, local->fd->inode, entry->d_name,
++ &entry->d_stat);
+
+- if (wind_failed) {
+- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
+- this, -1, ENOMEM, NULL, NULL, NULL,
+- NULL);
+- goto next;
+- }
+-
+- shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
+-
+- bname = strrchr(path, '/') + 1;
+- loc.inode = inode_new(this->itable);
+- loc.parent = inode_ref(priv->dot_shard_inode);
+- gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid);
+- ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+- if (ret < 0 || !(loc.inode)) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+- "Inode path failed"
+- " on %s, base file gfid = %s",
+- bname, uuid_utoa(gfid));
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- loc_wipe(&loc);
+- wind_failed = _gf_true;
+- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
+- this, -1, ENOMEM, NULL, NULL, NULL,
+- NULL);
+- goto next;
+- }
+-
+- loc.name = strrchr(loc.path, '/');
+- if (loc.name)
+- loc.name++;
+-
+- xattr_req = shard_create_gfid_dict(local->xattr_req);
+- if (!xattr_req) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- wind_failed = _gf_true;
+- loc_wipe(&loc);
+- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
+- this, -1, ENOMEM, NULL, NULL, NULL,
+- NULL);
+- goto next;
++ gf_msg_debug(this->name, 0, "Initiating deletion of "
++ "shards of gfid %s",
++ entry->d_name);
++ ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
++ link_inode);
++ inode_unlink(link_inode, local->fd->inode, entry->d_name);
++ inode_unref(link_inode);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, -ret,
++ SHARD_MSG_SHARDS_DELETION_FAILED,
++ "Failed to clean up shards of gfid %s", entry->d_name);
++ continue;
+ }
++ gf_msg(this->name, GF_LOG_INFO, 0, SHARD_MSG_SHARD_DELETION_COMPLETED,
++ "Deleted "
++ "shards of gfid=%s from backend",
++ entry->d_name);
++ }
++ gf_dirent_free(&entries);
++ if (ret)
++ break;
++ }
++ }
++ ret = 0;
++ loc_wipe(&loc);
++ return ret;
+
+- STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk,
+- (void *)(long)shard_idx_iter, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->lookup, &loc, xattr_req);
+- loc_wipe(&loc);
+- dict_unref(xattr_req);
+- next:
+- shard_idx_iter++;
+- i++;
+-
+- if (!--call_count)
+- break;
+- }
+- if (local->lookup_shards_barriered) {
+- syncbarrier_wait(&local->barrier, count);
+- local->pls_fop_handler(frame, this);
+- }
+- return 0;
++err:
++ LOCK(&priv->lock);
++ { priv->bg_del_state = SHARD_BG_DELETION_NONE; }
++ UNLOCK(&priv->lock);
++ loc_wipe(&loc);
++ return ret;
++}
++
++int shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *xdata) {
++ if (op_ret)
++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++ "Unlock failed. Please check brick logs for "
++ "more details");
++ SHARD_STACK_DESTROY(frame);
++ return 0;
++}
++
++int shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) {
++ loc_t *loc = NULL;
++ call_frame_t *lk_frame = NULL;
++ shard_local_t *local = NULL;
++ shard_local_t *lk_local = NULL;
++ shard_inodelk_t *lock = NULL;
++
++ local = frame->local;
++ lk_frame = local->inodelk_frame;
++ lk_local = lk_frame->local;
++ local->inodelk_frame = NULL;
++ loc = &local->int_inodelk.loc;
++ lock = &lk_local->int_inodelk;
++ lock->flock.l_type = F_UNLCK;
++
++ STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK,
++ &lock->flock, NULL);
++ local->int_inodelk.acquired_lock = _gf_false;
++ return 0;
++}
++
++int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, struct iatt *buf,
++ struct iatt *preoldparent, struct iatt *postoldparent,
++ struct iatt *prenewparent, struct iatt *postnewparent,
++ dict_t *xdata);
++int shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) {
++ int ret = 0;
++ loc_t *dst_loc = NULL;
++ loc_t tmp_loc = {
++ 0,
++ };
++ shard_local_t *local = frame->local;
++
++ if (local->dst_block_size) {
++ tmp_loc.parent = inode_ref(local->loc2.parent);
++ ret = inode_path(tmp_loc.parent, local->loc2.name, (char **)&tmp_loc.path);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++ "Inode path failed"
++ " on pargfid=%s bname=%s",
++ uuid_utoa(tmp_loc.parent->gfid), local->loc2.name);
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto err;
++ }
++
++ tmp_loc.name = strrchr(tmp_loc.path, '/');
++ if (tmp_loc.name)
++ tmp_loc.name++;
++ dst_loc = &tmp_loc;
++ } else {
++ dst_loc = &local->loc2;
++ }
++
++ /* To-Do: Request open-fd count on dst base file */
++ STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc,
++ local->xattr_req);
++ loc_wipe(&tmp_loc);
++ return 0;
++err:
++ loc_wipe(&tmp_loc);
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
++ return 0;
++}
++
++int shard_unlink_base_file(call_frame_t *frame, xlator_t *this);
++
++int shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret,
++ int32_t op_errno, dict_t *dict,
++ dict_t *xdata) {
++ shard_priv_t *priv = NULL;
++ shard_local_t *local = NULL;
++
++ priv = this->private;
++ local = frame->local;
++ if (op_ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++ "Xattrop on marker file failed "
++ "while performing %s; entry gfid=%s",
++ gf_fop_string(local->fop), local->newloc.name);
++ goto err;
++ }
++
++ inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode,
++ local->newloc.name);
++
++ if (local->fop == GF_FOP_UNLINK)
++ shard_unlink_base_file(frame, this);
++ else if (local->fop == GF_FOP_RENAME)
++ shard_rename_src_base_file(frame, this);
++ return 0;
++err:
++ shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
++ return 0;
++}
++
++int shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) {
++ int op_errno = ENOMEM;
++ uint64_t bs = 0;
++ dict_t *xdata = NULL;
++ shard_local_t *local = NULL;
++
++ local = frame->local;
++ xdata = dict_new();
++ if (!xdata)
++ goto err;
++
++ if (local->fop == GF_FOP_UNLINK)
++ bs = local->block_size;
++ else if (local->fop == GF_FOP_RENAME)
++ bs = local->dst_block_size;
++ SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc,
++ local->prebuf.ia_size, 0, err);
++ STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->xattrop, &local->newloc,
++ GF_XATTROP_GET_AND_SET, xdata, NULL);
++ dict_unref(xdata);
++ return 0;
++err:
++ if (xdata)
++ dict_unref(xdata);
++ shard_common_failure_unwind(local->fop, frame, -1, op_errno);
++ return 0;
+ }
+
+-int
+-shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if (local->op_ret < 0) {
+- if (local->op_errno == ENOENT) {
+- /* If lookup on /.shard fails with ENOENT, it means that
+- * the file was 0-byte in size but truncated sometime in
+- * the past to a higher size which is reflected in the
+- * size xattr, and now being truncated to a lower size.
+- * In this case, the only thing that needs to be done is
+- * to update the size xattr of the file and unwind.
+- */
+- local->first_block = local->last_block = 0;
+- local->num_blocks = 1;
+- local->call_count = 0;
+- local->op_ret = 0;
+- local->postbuf.ia_size = local->offset;
+- shard_update_file_size(frame, this, local->fd, &local->loc,
+- shard_post_update_size_truncate_handler);
+- return 0;
+- } else {
+- shard_common_failure_unwind(local->fop, frame, local->op_ret,
+- local->op_errno);
+- return 0;
+- }
+- }
+-
+- if (!local->call_count)
+- shard_truncate_do(frame, this);
+- else
+- shard_common_lookup_shards(frame, this, local->loc.inode,
+- shard_post_lookup_shards_truncate_handler);
+-
+- return 0;
++int shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret,
++ int32_t op_errno, inode_t *inode,
++ struct iatt *buf, dict_t *xdata,
++ struct iatt *postparent) {
++ inode_t *linked_inode = NULL;
++ shard_priv_t *priv = NULL;
++ shard_local_t *local = NULL;
++
++ local = frame->local;
++ priv = this->private;
++
++ if (op_ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++ "Lookup on marker file failed "
++ "while performing %s; entry gfid=%s",
++ gf_fop_string(local->fop), local->newloc.name);
++ goto err;
++ }
++
++ linked_inode =
++ inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf);
++ inode_unref(local->newloc.inode);
++ local->newloc.inode = linked_inode;
++ shard_set_size_attrs_on_marker_file(frame, this);
++ return 0;
++err:
++ shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
++ return 0;
+ }
+
+-int
+-shard_truncate_begin(call_frame_t *frame, xlator_t *this)
+-{
+- int ret = 0;
+- shard_local_t *local = NULL;
+- shard_priv_t *priv = NULL;
+-
+- priv = this->private;
+- local = frame->local;
+-
+- /* First participant block here is the lowest numbered block that would
+- * hold the last byte of the file post successful truncation.
+- * Last participant block is the block that contains the last byte in
+- * the current state of the file.
+- * If (first block == last_block):
+- * then that means that the file only needs truncation of the
+- * first (or last since both are same) block.
+- * Else
+- * if (new_size % block_size == 0)
+- * then that means there is no truncate to be done with
+- * only shards from first_block + 1 through the last
+- * block needing to be unlinked.
+- * else
+- * both truncate of the first block and unlink of the
+- * remaining shards until end of file is required.
+- */
+- local->first_block = (local->offset == 0)
+- ? 0
+- : get_lowest_block(local->offset - 1,
+- local->block_size);
+- local->last_block = get_highest_block(0, local->prebuf.ia_size,
+- local->block_size);
+-
+- local->num_blocks = local->last_block - local->first_block + 1;
+- local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE)
+- ? local->loc.inode
+- : local->fd->inode;
+-
+- if ((local->first_block == 0) && (local->num_blocks == 1)) {
+- if (local->fop == GF_FOP_TRUNCATE)
+- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->truncate, &local->loc,
+- local->offset, local->xattr_req);
+- else
+- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->ftruncate, local->fd,
+- local->offset, local->xattr_req);
+- return 0;
+- }
++int shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) {
++ int op_errno = ENOMEM;
++ dict_t *xattr_req = NULL;
++ shard_local_t *local = NULL;
+
+- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
+- gf_shard_mt_inode_list);
+- if (!local->inode_list)
+- goto err;
++ local = frame->local;
+
+- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+- if (!local->dot_shard_loc.inode) {
+- ret = shard_init_internal_dir_loc(this, local,
+- SHARD_INTERNAL_DIR_DOT_SHARD);
+- if (ret)
+- goto err;
+- shard_lookup_internal_dir(frame, this,
+- shard_post_resolve_truncate_handler,
+- SHARD_INTERNAL_DIR_DOT_SHARD);
+- } else {
+- local->post_res_handler = shard_post_resolve_truncate_handler;
+- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+- }
+- return 0;
++ xattr_req = shard_create_gfid_dict(local->xattr_req);
++ if (!xattr_req)
++ goto err;
+
++ STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req);
++ dict_unref(xattr_req);
++ return 0;
+ err:
+- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+- return 0;
++ shard_common_failure_unwind(local->fop, frame, -1, op_errno);
++ return 0;
+ }
+
+-int
+-shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
+- struct iatt tmp_stbuf = {
+- 0,
+- };
+-
+- local = frame->local;
+-
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(local->fop, frame, local->op_ret,
+- local->op_errno);
+- return 0;
+- }
+-
+- local->postbuf = tmp_stbuf = local->prebuf;
+-
+- if (local->prebuf.ia_size == local->offset) {
+- /* If the file size is same as requested size, unwind the call
+- * immediately.
+- */
+- if (local->fop == GF_FOP_TRUNCATE)
+- SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf,
+- &local->postbuf, NULL);
+- else
+- SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf,
+- &local->postbuf, NULL);
+- } else if (local->offset > local->prebuf.ia_size) {
+- /* If the truncate is from a lower to a higher size, set the
+- * new size xattr and unwind.
+- */
+- local->hole_size = local->offset - local->prebuf.ia_size;
+- local->delta_size = 0;
+- GF_ATOMIC_INIT(local->delta_blocks, 0);
+- local->postbuf.ia_size = local->offset;
+- tmp_stbuf.ia_size = local->offset;
+- shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
+- SHARD_INODE_WRITE_MASK);
+- shard_update_file_size(frame, this, NULL, &local->loc,
+- shard_post_update_size_truncate_handler);
++int shard_create_marker_file_under_remove_me_cbk(
++ call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
++ int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent,
++ struct iatt *postparent, dict_t *xdata) {
++ inode_t *linked_inode = NULL;
++ shard_priv_t *priv = NULL;
++ shard_local_t *local = NULL;
++
++ local = frame->local;
++ priv = this->private;
++
++ SHARD_UNSET_ROOT_FS_ID(frame, local);
++ if (op_ret < 0) {
++ if ((op_errno != EEXIST) && (op_errno != ENODATA)) {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++ "Marker file creation "
++ "failed while performing %s; entry gfid=%s",
++ gf_fop_string(local->fop), local->newloc.name);
++ goto err;
+ } else {
+- /* ... else
+- * i. unlink all shards that need to be unlinked.
+- * ii. truncate the last of the shards.
+- * iii. update the new size using setxattr.
+- * and unwind the fop.
+- */
+- local->hole_size = 0;
+- local->delta_size = (local->offset - local->prebuf.ia_size);
+- GF_ATOMIC_INIT(local->delta_blocks, 0);
+- tmp_stbuf.ia_size = local->offset;
+- shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
+- SHARD_INODE_WRITE_MASK);
+- shard_truncate_begin(frame, this);
+- }
+- return 0;
+-}
+-
+-/* TO-DO:
+- * Fix updates to size and block count with racing write(s) and truncate(s).
+- */
+-
+-int
+-shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+- dict_t *xdata)
+-{
+- int ret = -1;
+- uint64_t block_size = 0;
+- shard_local_t *local = NULL;
+-
+- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+- "Failed to get block "
+- "size from inode ctx of %s",
+- uuid_utoa(loc->inode->gfid));
+- goto err;
++ shard_lookup_marker_file(frame, this);
++ return 0;
+ }
++ }
+
+- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+- STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+- return 0;
+- }
+-
+- if (!this->itable)
+- this->itable = loc->inode->table;
+-
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto err;
+-
+- frame->local = local;
+-
+- ret = syncbarrier_init(&local->barrier);
+- if (ret)
+- goto err;
+- loc_copy(&local->loc, loc);
+- local->offset = offset;
+- local->block_size = block_size;
+- local->fop = GF_FOP_TRUNCATE;
+- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+- if (!local->xattr_req)
+- goto err;
+- local->resolver_base_inode = loc->inode;
+- GF_ATOMIC_INIT(local->delta_blocks, 0);
+-
+- shard_lookup_base_file(frame, this, &local->loc,
+- shard_post_lookup_truncate_handler);
+- return 0;
++ linked_inode =
++ inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf);
++ inode_unref(local->newloc.inode);
++ local->newloc.inode = linked_inode;
+
++ if (local->fop == GF_FOP_UNLINK)
++ shard_unlink_base_file(frame, this);
++ else if (local->fop == GF_FOP_RENAME)
++ shard_rename_src_base_file(frame, this);
++ return 0;
+ err:
+- shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM);
+- return 0;
+-}
+-
+-int
+-shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+- dict_t *xdata)
+-{
+- int ret = -1;
+- uint64_t block_size = 0;
+- shard_local_t *local = NULL;
+-
+- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+- "Failed to get block "
+- "size from inode ctx of %s",
+- uuid_utoa(fd->inode->gfid));
+- goto err;
+- }
+-
+- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+- STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+- return 0;
+- }
+-
+- if (!this->itable)
+- this->itable = fd->inode->table;
+-
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto err;
+-
+- frame->local = local;
+- ret = syncbarrier_init(&local->barrier);
+- if (ret)
+- goto err;
+- local->fd = fd_ref(fd);
+- local->offset = offset;
+- local->block_size = block_size;
+- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+- if (!local->xattr_req)
+- goto err;
+- local->fop = GF_FOP_FTRUNCATE;
++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
++ return 0;
++}
++
++int shard_create_marker_file_under_remove_me(call_frame_t *frame,
++ xlator_t *this, loc_t *loc) {
++ int ret = 0;
++ int op_errno = ENOMEM;
++ uint64_t bs = 0;
++ char g1[64] = {
++ 0,
++ };
++ char g2[64] = {
++ 0,
++ };
++ dict_t *xattr_req = NULL;
++ shard_priv_t *priv = NULL;
++ shard_local_t *local = NULL;
++
++ priv = this->private;
++ local = frame->local;
++
++ SHARD_SET_ROOT_FS_ID(frame, local);
++
++ xattr_req = shard_create_gfid_dict(local->xattr_req);
++ if (!xattr_req)
++ goto err;
++
++ local->newloc.inode = inode_new(this->itable);
++ local->newloc.parent = inode_ref(priv->dot_shard_rm_inode);
++ ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid),
++ (char **)&local->newloc.path);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++ "Inode path failed on "
++ "pargfid=%s bname=%s",
++ uuid_utoa_r(priv->dot_shard_rm_gfid, g1),
++ uuid_utoa_r(loc->inode->gfid, g2));
++ goto err;
++ }
++ local->newloc.name = strrchr(local->newloc.path, '/');
++ if (local->newloc.name)
++ local->newloc.name++;
++
++ if (local->fop == GF_FOP_UNLINK)
++ bs = local->block_size;
++ else if (local->fop == GF_FOP_RENAME)
++ bs = local->dst_block_size;
++
++ SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc,
++ local->prebuf.ia_size, 0, err);
++
++ STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk,
++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, &local->newloc,
++ 0, 0, 0644, xattr_req);
++ dict_unref(xattr_req);
++ return 0;
+
+- local->loc.inode = inode_ref(fd->inode);
+- gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+- local->resolver_base_inode = fd->inode;
+- GF_ATOMIC_INIT(local->delta_blocks, 0);
+-
+- shard_lookup_base_file(frame, this, &local->loc,
+- shard_post_lookup_truncate_handler);
+- return 0;
+ err:
+- shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
+- return 0;
++ if (xattr_req)
++ dict_unref(xattr_req);
++ shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno,
++ NULL, NULL, NULL, NULL, NULL);
++ return 0;
+ }
+
+-int
+-shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, inode_t *inode,
+- struct iatt *buf, struct iatt *preparent,
+- struct iatt *postparent, dict_t *xdata)
+-{
+- int ret = -1;
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if (op_ret == -1)
+- goto unwind;
+-
+- ret = shard_inode_ctx_set(inode, this, buf, local->block_size,
+- SHARD_ALL_MASK);
+- if (ret)
+- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
+- "Failed to set inode "
+- "ctx for %s",
+- uuid_utoa(inode->gfid));
+-
+-unwind:
+- SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+- postparent, xdata);
++int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
+
+- return 0;
+-}
++int shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret, int32_t op_errno,
++ struct iatt *preparent, struct iatt *postparent,
++ dict_t *xdata) {
++ int ret = 0;
++ shard_local_t *local = NULL;
+
+-int
+-shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+- dev_t rdev, mode_t umask, dict_t *xdata)
+-{
+- shard_priv_t *priv = NULL;
+- shard_local_t *local = NULL;
++ local = frame->local;
+
+- priv = this->private;
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto err;
++ if (op_ret < 0) {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ } else {
++ shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
++ local->preoldparent = *preparent;
++ local->postoldparent = *postparent;
++ if (xdata)
++ local->xattr_rsp = dict_ref(xdata);
++ if (local->cleanup_required)
++ shard_start_background_deletion(this);
++ }
+
+- frame->local = local;
+- local->block_size = priv->block_size;
+- if (!__is_gsyncd_on_shard_dir(frame, loc)) {
+- SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
++ if (local->entrylk_frame) {
++ ret = shard_unlock_entrylk(frame, this);
++ if (ret < 0) {
++ local->op_ret = -1;
++ local->op_errno = -ret;
+ }
++ }
+
+- STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+- return 0;
+-err:
+- shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM);
+- return 0;
+-}
+-
+-int32_t
+-shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, inode_t *inode,
+- struct iatt *buf, struct iatt *preparent,
+- struct iatt *postparent, dict_t *xdata)
+-{
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+- if (op_ret < 0)
+- goto err;
+-
+- shard_inode_ctx_set(inode, this, buf, 0,
+- SHARD_MASK_NLINK | SHARD_MASK_TIMES);
+- buf->ia_size = local->prebuf.ia_size;
+- buf->ia_blocks = local->prebuf.ia_blocks;
+-
+- SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
+- postparent, xdata);
+- return 0;
++ ret = shard_unlock_inodelk(frame, this);
++ if (ret < 0) {
++ local->op_ret = -1;
++ local->op_errno = -ret;
++ }
++
++ shard_unlink_cbk(frame, this);
++ return 0;
++}
++
++int shard_unlink_base_file(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = frame->local;
++
++ /* To-Do: Request open-fd count on base file */
++ STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
++ local->xattr_req);
++ return 0;
++}
++
++int shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *xdata) {
++ if (op_ret)
++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++ "Unlock failed. Please check brick logs for "
++ "more details");
++ SHARD_STACK_DESTROY(frame);
++ return 0;
++}
++
++int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) {
++ loc_t *loc = NULL;
++ call_frame_t *lk_frame = NULL;
++ shard_local_t *local = NULL;
++ shard_local_t *lk_local = NULL;
++ shard_entrylk_t *lock = NULL;
++
++ local = frame->local;
++ lk_frame = local->entrylk_frame;
++ lk_local = lk_frame->local;
++ local->entrylk_frame = NULL;
++ lock = &lk_local->int_entrylk;
++ loc = &lock->loc;
++
++ STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->entrylk, this->name, loc,
++ lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK,
++ NULL);
++ local->int_entrylk.acquired_lock = _gf_false;
++ return 0;
++}
++
++int shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = NULL;
++
++ local = frame->local;
++
++ switch (local->fop) {
++ case GF_FOP_UNLINK:
++ case GF_FOP_RENAME:
++ shard_create_marker_file_under_remove_me(frame, this,
++ &local->int_inodelk.loc);
++ break;
++ default:
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++ "post-entrylk handler not defined. This case should not"
++ " be hit");
++ break;
++ }
++ return 0;
++}
++
++int shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *xdata) {
++ call_frame_t *main_frame = NULL;
++ shard_local_t *local = NULL;
++ shard_local_t *main_local = NULL;
++
++ local = frame->local;
++ main_frame = local->main_frame;
++ main_local = main_frame->local;
++
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno);
++ return 0;
++ }
++ main_local->int_entrylk.acquired_lock = _gf_true;
++ shard_post_entrylk_fop_handler(main_frame, this);
++ return 0;
++}
++
++int shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
++ uuid_t gfid) {
++ char gfid_str[GF_UUID_BUF_SIZE] = {
++ 0,
++ };
++ shard_local_t *local = NULL;
++ shard_local_t *entrylk_local = NULL;
++ shard_entrylk_t *int_entrylk = NULL;
++ call_frame_t *entrylk_frame = NULL;
++
++ local = frame->local;
++ entrylk_frame = create_frame(this, this->ctx->pool);
++ if (!entrylk_frame) {
++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++ "Failed to create new frame "
++ "to lock marker file");
++ goto err;
++ }
++
++ entrylk_local = mem_get0(this->local_pool);
++ if (!entrylk_local) {
++ STACK_DESTROY(entrylk_frame->root);
++ goto err;
++ }
++
++ entrylk_frame->local = entrylk_local;
++ entrylk_local->main_frame = frame;
++ int_entrylk = &entrylk_local->int_entrylk;
++
++ int_entrylk->loc.inode = inode_ref(inode);
++ set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root);
++ local->entrylk_frame = entrylk_frame;
++ gf_uuid_unparse(gfid, gfid_str);
++ int_entrylk->basename = gf_strdup(gfid_str);
++
++ STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc,
++ int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
++ return 0;
+ err:
+- shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno);
+- return 0;
+-}
+-
+-int
+-shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if (local->op_ret < 0) {
+- SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL,
+- NULL, NULL, NULL, NULL);
+- return 0;
+- }
+-
+- STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2,
+- local->xattr_req);
+- return 0;
++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++ return 0;
+ }
+
+-int32_t
+-shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+- dict_t *xdata)
+-{
+- int ret = -1;
+- uint64_t block_size = 0;
+- shard_local_t *local = NULL;
+-
+- ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+- "Failed to get block "
+- "size from inode ctx of %s",
+- uuid_utoa(oldloc->inode->gfid));
+- goto err;
+- }
+-
+- if (!block_size) {
+- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+- oldloc, newloc, xdata);
+- return 0;
+- }
+-
+- if (!this->itable)
+- this->itable = oldloc->inode->table;
+-
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto err;
+-
+- frame->local = local;
+-
+- loc_copy(&local->loc, oldloc);
+- loc_copy(&local->loc2, newloc);
+- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+- if (!local->xattr_req)
+- goto err;
+-
+- shard_lookup_base_file(frame, this, &local->loc,
+- shard_post_lookup_link_handler);
+- return 0;
+-err:
+- shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
+- return 0;
+-}
+-
+-int
+-shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode);
+-
+-int
+-shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
+- gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED,
+- "failed to delete shards of %s",
+- uuid_utoa(local->resolver_base_inode->gfid));
+- return 0;
+- }
+- local->op_ret = 0;
+- local->op_errno = 0;
+-
+- shard_unlink_shards_do(frame, this, local->resolver_base_inode);
+- return 0;
+-}
+-
+-int
+-shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+- local->lookup_shards_barriered = _gf_true;
+-
+- if (!local->call_count)
+- shard_unlink_shards_do(frame, this, local->resolver_base_inode);
+- else
+- shard_common_lookup_shards(frame, this, local->resolver_base_inode,
+- shard_post_lookup_shards_unlink_handler);
+- return 0;
+-}
+-
+-void
+-shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
+-{
+- char block_bname[256] = {
+- 0,
+- };
+- uuid_t gfid = {
+- 0,
+- };
+- inode_t *inode = NULL;
+- inode_t *base_inode = NULL;
+- xlator_t *this = NULL;
+- shard_priv_t *priv = NULL;
+- shard_inode_ctx_t *ctx = NULL;
+- shard_inode_ctx_t *base_ictx = NULL;
+- int unref_base_inode = 0;
+- int unref_shard_inode = 0;
+-
+- this = THIS;
+- priv = this->private;
+-
+- inode = local->inode_list[shard_block_num - local->first_block];
+- shard_inode_ctx_get(inode, this, &ctx);
+- base_inode = ctx->base_inode;
+- if (base_inode)
+- gf_uuid_copy(gfid, base_inode->gfid);
+- else
+- gf_uuid_copy(gfid, ctx->base_gfid);
+- shard_make_block_bname(shard_block_num, gfid, block_bname,
+- sizeof(block_bname));
+-
+- LOCK(&priv->lock);
+- if (base_inode)
+- LOCK(&base_inode->lock);
+- LOCK(&inode->lock);
+- {
+- __shard_inode_ctx_get(inode, this, &ctx);
+- if (!list_empty(&ctx->ilist)) {
+- list_del_init(&ctx->ilist);
+- priv->inode_count--;
+- unref_base_inode++;
+- unref_shard_inode++;
+- GF_ASSERT(priv->inode_count >= 0);
+- }
+- if (ctx->fsync_needed) {
+- unref_base_inode++;
+- unref_shard_inode++;
+- list_del_init(&ctx->to_fsync_list);
+- if (base_inode) {
+- __shard_inode_ctx_get(base_inode, this, &base_ictx);
+- base_ictx->fsync_count--;
+- }
+- }
+- }
+- UNLOCK(&inode->lock);
+- if (base_inode)
+- UNLOCK(&base_inode->lock);
+-
+- inode_unlink(inode, priv->dot_shard_inode, block_bname);
+- inode_ref_reduce_by_n(inode, unref_shard_inode);
+- inode_forget(inode, 0);
+-
+- if (base_inode && unref_base_inode)
+- inode_ref_reduce_by_n(base_inode, unref_base_inode);
+- UNLOCK(&priv->lock);
+-}
+-
+-int
+-shard_rename_cbk(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
+- &local->prebuf, &local->preoldparent,
+- &local->postoldparent, &local->prenewparent,
+- &local->postnewparent, local->xattr_rsp);
+- return 0;
+-}
+-
+-int32_t
+-shard_unlink_cbk(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = frame->local;
+-
+- SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+- &local->preoldparent, &local->postoldparent,
+- local->xattr_rsp);
+- return 0;
+-}
+-
+-int
+-shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno,
+- struct iatt *preparent, struct iatt *postparent,
+- dict_t *xdata)
+-{
+- int shard_block_num = (long)cookie;
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if (op_ret < 0) {
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- goto done;
+- }
+-
+- shard_unlink_block_inode(local, shard_block_num);
+-done:
+- syncbarrier_wake(&local->barrier);
+- return 0;
+-}
+-
+-int
+-shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
+-{
+- int i = 0;
+- int ret = -1;
+- int count = 0;
+- uint32_t cur_block = 0;
+- uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */
+- char *bname = NULL;
+- char path[PATH_MAX] = {
+- 0,
+- };
+- uuid_t gfid = {
+- 0,
+- };
+- loc_t loc = {
+- 0,
+- };
+- gf_boolean_t wind_failed = _gf_false;
+- shard_local_t *local = NULL;
+- shard_priv_t *priv = NULL;
+-
+- priv = this->private;
+- local = frame->local;
+-
+- if (inode)
+- gf_uuid_copy(gfid, inode->gfid);
+- else
+- gf_uuid_copy(gfid, local->base_gfid);
+-
+- for (i = 0; i < local->num_blocks; i++) {
+- if (!local->inode_list[i])
+- continue;
+- count++;
+- }
+-
+- if (!count) {
+- /* callcount = 0 implies that all of the shards that need to be
+- * unlinked are non-existent (in other words the file is full of
+- * holes).
+- */
+- gf_msg_debug(this->name, 0,
+- "All shards that need to be "
+- "unlinked are non-existent: %s",
+- uuid_utoa(gfid));
+- return 0;
+- }
+-
+- SHARD_SET_ROOT_FS_ID(frame, local);
+- local->barrier.waitfor = count;
+- cur_block = cur_block_idx + local->first_block;
+-
+- while (cur_block_idx < local->num_blocks) {
+- if (!local->inode_list[cur_block_idx])
+- goto next;
+-
+- if (wind_failed) {
+- shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
+- ENOMEM, NULL, NULL, NULL);
+- goto next;
+- }
+-
+- shard_make_block_abspath(cur_block, gfid, path, sizeof(path));
+- bname = strrchr(path, '/') + 1;
+- loc.parent = inode_ref(priv->dot_shard_inode);
+- ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+- if (ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+- "Inode path failed"
+- " on %s, base file gfid = %s",
+- bname, uuid_utoa(gfid));
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- loc_wipe(&loc);
+- wind_failed = _gf_true;
+- shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
+- ENOMEM, NULL, NULL, NULL);
+- goto next;
+- }
+-
+- loc.name = strrchr(loc.path, '/');
+- if (loc.name)
+- loc.name++;
+- loc.inode = inode_ref(local->inode_list[cur_block_idx]);
+-
+- STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk,
+- (void *)(long)cur_block, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->unlink, &loc, local->xflag,
+- local->xattr_req);
+- loc_wipe(&loc);
+- next:
+- cur_block++;
+- cur_block_idx++;
+- }
+- syncbarrier_wait(&local->barrier, count);
+- SHARD_UNSET_ROOT_FS_ID(frame, local);
+- return 0;
+-}
+-
+-int
+-shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this,
+- int now, int first_block, gf_dirent_t *entry)
+-{
+- int i = 0;
+- int ret = 0;
+- shard_local_t *local = NULL;
+- uuid_t gfid = {
+- 0,
+- };
+-
+- local = cleanup_frame->local;
+-
+- local->inode_list = GF_CALLOC(now, sizeof(inode_t *),
+- gf_shard_mt_inode_list);
+- if (!local->inode_list)
+- return -ENOMEM;
+-
+- local->first_block = first_block;
+- local->last_block = first_block + now - 1;
+- local->num_blocks = now;
+- gf_uuid_parse(entry->d_name, gfid);
+- gf_uuid_copy(local->base_gfid, gfid);
+- local->resolver_base_inode = inode_find(this->itable, gfid);
+- local->call_count = 0;
+- ret = syncbarrier_init(&local->barrier);
+- if (ret) {
+- GF_FREE(local->inode_list);
+- local->inode_list = NULL;
+- inode_unref(local->resolver_base_inode);
+- local->resolver_base_inode = NULL;
+- return -errno;
+- }
+- shard_common_resolve_shards(cleanup_frame, this,
+- shard_post_resolve_unlink_handler);
+-
+- for (i = 0; i < local->num_blocks; i++) {
+- if (local->inode_list[i])
+- inode_unref(local->inode_list[i]);
+- }
+- GF_FREE(local->inode_list);
+- local->inode_list = NULL;
+- if (local->op_ret)
+- ret = -local->op_errno;
+- syncbarrier_destroy(&local->barrier);
+- inode_unref(local->resolver_base_inode);
+- local->resolver_base_inode = NULL;
+- STACK_RESET(cleanup_frame->root);
+- return ret;
+-}
+-
+-int
+-__shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
+- gf_dirent_t *entry, inode_t *inode)
+-{
+- int ret = 0;
+- int shard_count = 0;
+- int first_block = 0;
+- int now = 0;
+- uint64_t size = 0;
+- uint64_t block_size = 0;
+- uint64_t size_array[4] = {
+- 0,
+- };
+- void *bsize = NULL;
+- void *size_attr = NULL;
+- dict_t *xattr_rsp = NULL;
+- loc_t loc = {
+- 0,
+- };
+- shard_local_t *local = NULL;
+- shard_priv_t *priv = NULL;
+-
+- priv = this->private;
+- local = cleanup_frame->local;
+- ret = dict_reset(local->xattr_req);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+- "Failed to reset dict");
+- ret = -ENOMEM;
+- goto err;
+- }
+-
+- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+- "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
+- ret = -ENOMEM;
+- goto err;
+- }
+-
+- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+- "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
+- ret = -ENOMEM;
+- goto err;
+- }
+-
+- loc.inode = inode_ref(inode);
+- loc.parent = inode_ref(priv->dot_shard_rm_inode);
+- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+- if (ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+- "Inode path failed on %s", entry->d_name);
+- ret = -ENOMEM;
+- goto err;
+- }
+-
+- loc.name = strrchr(loc.path, '/');
+- if (loc.name)
+- loc.name++;
+- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req,
+- &xattr_rsp);
+- if (ret)
+- goto err;
+-
+- ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+- "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
+- goto err;
+- }
+- block_size = ntoh64(*((uint64_t *)bsize));
+-
+- ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+- "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
+- goto err;
+- }
+-
+- memcpy(size_array, size_attr, sizeof(size_array));
+- size = ntoh64(size_array[0]);
+-
+- shard_count = (size / block_size) - 1;
+- if (shard_count < 0) {
+- gf_msg_debug(this->name, 0,
+- "Size of %s hasn't grown beyond "
+- "its shard-block-size. Nothing to delete. "
+- "Returning",
+- entry->d_name);
+- /* File size < shard-block-size, so nothing to delete */
+- ret = 0;
+- goto delete_marker;
+- }
+- if ((size % block_size) > 0)
+- shard_count++;
+-
+- if (shard_count == 0) {
+- gf_msg_debug(this->name, 0,
+- "Size of %s is exactly equal to "
+- "its shard-block-size. Nothing to delete. "
+- "Returning",
+- entry->d_name);
+- ret = 0;
+- goto delete_marker;
+- }
+- gf_msg_debug(this->name, 0,
+- "base file = %s, "
+- "shard-block-size=%" PRIu64 ", file-size=%" PRIu64
+- ", "
+- "shard_count=%d",
+- entry->d_name, block_size, size, shard_count);
+-
+- /* Perform a gfid-based lookup to see if gfid corresponding to marker
+- * file's base name exists.
+- */
+- loc_wipe(&loc);
+- loc.inode = inode_new(this->itable);
+- if (!loc.inode) {
+- ret = -ENOMEM;
+- goto err;
+- }
+- gf_uuid_parse(entry->d_name, loc.gfid);
+- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
+- if (!ret) {
+- gf_msg_debug(this->name, 0,
+- "Base shard corresponding to gfid "
+- "%s is present. Skipping shard deletion. "
+- "Returning",
+- entry->d_name);
+- ret = 0;
+- goto delete_marker;
+- }
+-
+- first_block = 1;
+-
+- while (shard_count) {
+- if (shard_count < local->deletion_rate) {
+- now = shard_count;
+- shard_count = 0;
+- } else {
+- now = local->deletion_rate;
+- shard_count -= local->deletion_rate;
+- }
+-
+- gf_msg_debug(this->name, 0,
+- "deleting %d shards starting from "
+- "block %d of gfid %s",
+- now, first_block, entry->d_name);
+- ret = shard_regulated_shards_deletion(cleanup_frame, this, now,
+- first_block, entry);
+- if (ret)
+- goto err;
+- first_block += now;
+- }
+-
+-delete_marker:
+- loc_wipe(&loc);
+- loc.inode = inode_ref(inode);
+- loc.parent = inode_ref(priv->dot_shard_rm_inode);
+- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+- if (ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+- "Inode path failed on %s", entry->d_name);
+- ret = -ENOMEM;
+- goto err;
+- }
+- loc.name = strrchr(loc.path, '/');
+- if (loc.name)
+- loc.name++;
+- ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL);
+- if (ret)
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED,
+- "Failed to delete %s "
+- "from /%s",
+- entry->d_name, GF_SHARD_REMOVE_ME_DIR);
+-err:
+- if (xattr_rsp)
+- dict_unref(xattr_rsp);
+- loc_wipe(&loc);
+- return ret;
+-}
+-
+-int
+-shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
+- gf_dirent_t *entry, inode_t *inode)
+-{
+- int ret = -1;
+- loc_t loc = {
+- 0,
+- };
+- shard_priv_t *priv = NULL;
+-
+- priv = this->private;
+- loc.inode = inode_ref(priv->dot_shard_rm_inode);
+-
+- ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
+- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL);
+- if (ret < 0) {
+- if (ret == -EAGAIN) {
+- ret = 0;
+- }
+- goto out;
+- }
+- {
+- ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode);
+- }
+- syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
+- ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL);
+-out:
+- loc_wipe(&loc);
+- return ret;
+-}
+-
+-int
+-shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data)
+-{
+- SHARD_STACK_DESTROY(frame);
+- return 0;
+-}
+-
+-int
+-shard_resolve_internal_dir(xlator_t *this, shard_local_t *local,
+- shard_internal_dir_type_t type)
+-{
+- int ret = 0;
+- char *bname = NULL;
+- loc_t *loc = NULL;
+- shard_priv_t *priv = NULL;
+- uuid_t gfid = {
+- 0,
+- };
+- struct iatt stbuf = {
+- 0,
+- };
+-
+- priv = this->private;
+-
+- switch (type) {
+- case SHARD_INTERNAL_DIR_DOT_SHARD:
+- loc = &local->dot_shard_loc;
+- gf_uuid_copy(gfid, priv->dot_shard_gfid);
+- bname = GF_SHARD_DIR;
+- break;
+- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+- loc = &local->dot_shard_rm_loc;
+- gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
+- bname = GF_SHARD_REMOVE_ME_DIR;
+- break;
+- default:
+- break;
+- }
+-
+- loc->inode = inode_find(this->itable, gfid);
+- if (!loc->inode) {
+- ret = shard_init_internal_dir_loc(this, local, type);
+- if (ret)
+- goto err;
+- ret = dict_reset(local->xattr_req);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+- "Failed to reset "
+- "dict");
+- ret = -ENOMEM;
+- goto err;
+- }
+- ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true);
+- ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL,
+- local->xattr_req, NULL);
+- if (ret < 0) {
+- if (ret != -ENOENT)
+- gf_msg(this->name, GF_LOG_ERROR, -ret,
+- SHARD_MSG_SHARDS_DELETION_FAILED,
+- "Lookup on %s failed, exiting", bname);
+- goto err;
+- } else {
+- shard_link_internal_dir_inode(local, loc->inode, &stbuf, type);
+- }
+- }
+- ret = 0;
+-err:
+- return ret;
+-}
+-
+-int
+-shard_lookup_marker_entry(xlator_t *this, shard_local_t *local,
+- gf_dirent_t *entry)
+-{
+- int ret = 0;
+- loc_t loc = {
+- 0,
+- };
+-
+- loc.inode = inode_new(this->itable);
+- if (!loc.inode) {
+- ret = -ENOMEM;
+- goto err;
+- }
+- loc.parent = inode_ref(local->fd->inode);
+-
+- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+- if (ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+- "Inode path failed on %s", entry->d_name);
+- ret = -ENOMEM;
+- goto err;
+- }
+-
+- loc.name = strrchr(loc.path, '/');
+- if (loc.name)
+- loc.name++;
+-
+- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
+- if (ret < 0) {
+- goto err;
+- }
+- entry->inode = inode_ref(loc.inode);
+- ret = 0;
+-err:
+- loc_wipe(&loc);
+- return ret;
+-}
+-
+-int
+-shard_delete_shards(void *opaque)
+-{
+- int ret = 0;
+- off_t offset = 0;
+- loc_t loc = {
+- 0,
+- };
+- inode_t *link_inode = NULL;
+- xlator_t *this = NULL;
+- shard_priv_t *priv = NULL;
+- shard_local_t *local = NULL;
+- gf_dirent_t entries;
+- gf_dirent_t *entry = NULL;
+- call_frame_t *cleanup_frame = NULL;
+- gf_boolean_t done = _gf_false;
+-
+- this = THIS;
+- priv = this->private;
+- INIT_LIST_HEAD(&entries.list);
+-
+- cleanup_frame = opaque;
+-
+- local = mem_get0(this->local_pool);
+- if (!local) {
+- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+- "Failed to create local to "
+- "delete shards");
+- ret = -ENOMEM;
+- goto err;
+- }
+- cleanup_frame->local = local;
+- local->fop = GF_FOP_UNLINK;
+-
+- local->xattr_req = dict_new();
+- if (!local->xattr_req) {
+- ret = -ENOMEM;
+- goto err;
+- }
+- local->deletion_rate = priv->deletion_rate;
+-
+- ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
+- if (ret == -ENOENT) {
+- gf_msg_debug(this->name, 0,
+- ".shard absent. Nothing to"
+- " delete. Exiting");
+- ret = 0;
+- goto err;
+- } else if (ret < 0) {
+- goto err;
+- }
+-
+- ret = shard_resolve_internal_dir(this, local,
+- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+- if (ret == -ENOENT) {
+- gf_msg_debug(this->name, 0,
+- ".remove_me absent. "
+- "Nothing to delete. Exiting");
+- ret = 0;
+- goto err;
+- } else if (ret < 0) {
+- goto err;
+- }
+-
+- local->fd = fd_anonymous(local->dot_shard_rm_loc.inode);
+- if (!local->fd) {
+- ret = -ENOMEM;
+- goto err;
+- }
+-
+- for (;;) {
+- offset = 0;
+- LOCK(&priv->lock);
+- {
+- if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
+- priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
+- } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
+- priv->bg_del_state = SHARD_BG_DELETION_NONE;
+- done = _gf_true;
+- }
+- }
+- UNLOCK(&priv->lock);
+- if (done)
+- break;
+- while (
+- (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
+- &entries, local->xattr_req, NULL))) {
+- if (ret > 0)
+- ret = 0;
+- list_for_each_entry(entry, &entries.list, list)
+- {
+- offset = entry->d_off;
+-
+- if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+- continue;
+-
+- if (!entry->inode) {
+- ret = shard_lookup_marker_entry(this, local, entry);
+- if (ret < 0)
+- continue;
+- }
+- link_inode = inode_link(entry->inode, local->fd->inode,
+- entry->d_name, &entry->d_stat);
+-
+- gf_msg_debug(this->name, 0,
+- "Initiating deletion of "
+- "shards of gfid %s",
+- entry->d_name);
+- ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
+- link_inode);
+- inode_unlink(link_inode, local->fd->inode, entry->d_name);
+- inode_unref(link_inode);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, -ret,
+- SHARD_MSG_SHARDS_DELETION_FAILED,
+- "Failed to clean up shards of gfid %s",
+- entry->d_name);
+- continue;
+- }
+- gf_msg(this->name, GF_LOG_INFO, 0,
+- SHARD_MSG_SHARD_DELETION_COMPLETED,
+- "Deleted "
+- "shards of gfid=%s from backend",
+- entry->d_name);
+- }
+- gf_dirent_free(&entries);
+- if (ret)
+- break;
+- }
+- }
+- ret = 0;
+- loc_wipe(&loc);
+- return ret;
+-
+-err:
+- LOCK(&priv->lock);
+- {
+- priv->bg_del_state = SHARD_BG_DELETION_NONE;
+- }
+- UNLOCK(&priv->lock);
+- loc_wipe(&loc);
+- return ret;
+-}
+-
+-int
+-shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+-{
+- if (op_ret)
+- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+- "Unlock failed. Please check brick logs for "
+- "more details");
+- SHARD_STACK_DESTROY(frame);
+- return 0;
+-}
+-
+-int
+-shard_unlock_inodelk(call_frame_t *frame, xlator_t *this)
+-{
+- loc_t *loc = NULL;
+- call_frame_t *lk_frame = NULL;
+- shard_local_t *local = NULL;
+- shard_local_t *lk_local = NULL;
+- shard_inodelk_t *lock = NULL;
+-
+- local = frame->local;
+- lk_frame = local->inodelk_frame;
+- lk_local = lk_frame->local;
+- local->inodelk_frame = NULL;
+- loc = &local->int_inodelk.loc;
+- lock = &lk_local->int_inodelk;
+- lock->flock.l_type = F_UNLCK;
+-
+- STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK,
+- &lock->flock, NULL);
+- local->int_inodelk.acquired_lock = _gf_false;
+- return 0;
+-}
+-
+-int
+-shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, struct iatt *buf,
+- struct iatt *preoldparent, struct iatt *postoldparent,
+- struct iatt *prenewparent, struct iatt *postnewparent,
+- dict_t *xdata);
+-int
+-shard_rename_src_base_file(call_frame_t *frame, xlator_t *this)
+-{
+- int ret = 0;
+- loc_t *dst_loc = NULL;
+- loc_t tmp_loc = {
+- 0,
+- };
+- shard_local_t *local = frame->local;
+-
+- if (local->dst_block_size) {
+- tmp_loc.parent = inode_ref(local->loc2.parent);
+- ret = inode_path(tmp_loc.parent, local->loc2.name,
+- (char **)&tmp_loc.path);
+- if (ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+- "Inode path failed"
+- " on pargfid=%s bname=%s",
+- uuid_utoa(tmp_loc.parent->gfid), local->loc2.name);
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto err;
+- }
+-
+- tmp_loc.name = strrchr(tmp_loc.path, '/');
+- if (tmp_loc.name)
+- tmp_loc.name++;
+- dst_loc = &tmp_loc;
+- } else {
+- dst_loc = &local->loc2;
+- }
+-
+- /* To-Do: Request open-fd count on dst base file */
+- STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc,
+- local->xattr_req);
+- loc_wipe(&tmp_loc);
+- return 0;
+-err:
+- loc_wipe(&tmp_loc);
+- shard_common_failure_unwind(local->fop, frame, local->op_ret,
+- local->op_errno);
+- return 0;
+-}
+-
+-int
+-shard_unlink_base_file(call_frame_t *frame, xlator_t *this);
+-
+-int
+-shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie,
+- xlator_t *this, int32_t op_ret,
+- int32_t op_errno, dict_t *dict,
+- dict_t *xdata)
+-{
+- shard_priv_t *priv = NULL;
+- shard_local_t *local = NULL;
+-
+- priv = this->private;
+- local = frame->local;
+- if (op_ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+- "Xattrop on marker file failed "
+- "while performing %s; entry gfid=%s",
+- gf_fop_string(local->fop), local->newloc.name);
+- goto err;
+- }
+-
+- inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode,
+- local->newloc.name);
+-
+- if (local->fop == GF_FOP_UNLINK)
+- shard_unlink_base_file(frame, this);
+- else if (local->fop == GF_FOP_RENAME)
+- shard_rename_src_base_file(frame, this);
+- return 0;
+-err:
+- shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
+- return 0;
+-}
+-
+-int
+-shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this)
+-{
+- int op_errno = ENOMEM;
+- uint64_t bs = 0;
+- dict_t *xdata = NULL;
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+- xdata = dict_new();
+- if (!xdata)
+- goto err;
+-
+- if (local->fop == GF_FOP_UNLINK)
+- bs = local->block_size;
+- else if (local->fop == GF_FOP_RENAME)
+- bs = local->dst_block_size;
+- SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc,
+- local->prebuf.ia_size, 0, err);
+- STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk,
+- FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop,
+- &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL);
+- dict_unref(xdata);
+- return 0;
+-err:
+- if (xdata)
+- dict_unref(xdata);
+- shard_common_failure_unwind(local->fop, frame, -1, op_errno);
+- return 0;
+-}
+-
+-int
+-shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, inode_t *inode,
+- struct iatt *buf, dict_t *xdata,
+- struct iatt *postparent)
+-{
+- inode_t *linked_inode = NULL;
+- shard_priv_t *priv = NULL;
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+- priv = this->private;
+-
+- if (op_ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+- "Lookup on marker file failed "
+- "while performing %s; entry gfid=%s",
+- gf_fop_string(local->fop), local->newloc.name);
+- goto err;
+- }
+-
+- linked_inode = inode_link(inode, priv->dot_shard_rm_inode,
+- local->newloc.name, buf);
+- inode_unref(local->newloc.inode);
+- local->newloc.inode = linked_inode;
+- shard_set_size_attrs_on_marker_file(frame, this);
+- return 0;
+-err:
+- shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
+- return 0;
+-}
+-
+-int
+-shard_lookup_marker_file(call_frame_t *frame, xlator_t *this)
+-{
+- int op_errno = ENOMEM;
+- dict_t *xattr_req = NULL;
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- xattr_req = shard_create_gfid_dict(local->xattr_req);
+- if (!xattr_req)
+- goto err;
+-
+- STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req);
+- dict_unref(xattr_req);
+- return 0;
+-err:
+- shard_common_failure_unwind(local->fop, frame, -1, op_errno);
+- return 0;
+-}
+-
+-int
+-shard_create_marker_file_under_remove_me_cbk(
+- call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+- int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent,
+- struct iatt *postparent, dict_t *xdata)
+-{
+- inode_t *linked_inode = NULL;
+- shard_priv_t *priv = NULL;
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+- priv = this->private;
+-
+- SHARD_UNSET_ROOT_FS_ID(frame, local);
+- if (op_ret < 0) {
+- if ((op_errno != EEXIST) && (op_errno != ENODATA)) {
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+- "Marker file creation "
+- "failed while performing %s; entry gfid=%s",
+- gf_fop_string(local->fop), local->newloc.name);
+- goto err;
+- } else {
+- shard_lookup_marker_file(frame, this);
+- return 0;
+- }
+- }
+-
+- linked_inode = inode_link(inode, priv->dot_shard_rm_inode,
+- local->newloc.name, buf);
+- inode_unref(local->newloc.inode);
+- local->newloc.inode = linked_inode;
+-
+- if (local->fop == GF_FOP_UNLINK)
+- shard_unlink_base_file(frame, this);
+- else if (local->fop == GF_FOP_RENAME)
+- shard_rename_src_base_file(frame, this);
+- return 0;
+-err:
+- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+- return 0;
+-}
+-
+-int
+-shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this,
+- loc_t *loc)
+-{
+- int ret = 0;
+- int op_errno = ENOMEM;
+- uint64_t bs = 0;
+- char g1[64] = {
+- 0,
+- };
+- char g2[64] = {
+- 0,
+- };
+- dict_t *xattr_req = NULL;
+- shard_priv_t *priv = NULL;
+- shard_local_t *local = NULL;
+-
+- priv = this->private;
+- local = frame->local;
+-
+- SHARD_SET_ROOT_FS_ID(frame, local);
+-
+- xattr_req = shard_create_gfid_dict(local->xattr_req);
+- if (!xattr_req)
+- goto err;
+-
+- local->newloc.inode = inode_new(this->itable);
+- local->newloc.parent = inode_ref(priv->dot_shard_rm_inode);
+- ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid),
+- (char **)&local->newloc.path);
+- if (ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+- "Inode path failed on "
+- "pargfid=%s bname=%s",
+- uuid_utoa_r(priv->dot_shard_rm_gfid, g1),
+- uuid_utoa_r(loc->inode->gfid, g2));
+- goto err;
+- }
+- local->newloc.name = strrchr(local->newloc.path, '/');
+- if (local->newloc.name)
+- local->newloc.name++;
+-
+- if (local->fop == GF_FOP_UNLINK)
+- bs = local->block_size;
+- else if (local->fop == GF_FOP_RENAME)
+- bs = local->dst_block_size;
+-
+- SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc,
+- local->prebuf.ia_size, 0, err);
+-
+- STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk,
+- FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
+- &local->newloc, 0, 0, 0644, xattr_req);
+- dict_unref(xattr_req);
+- return 0;
+-
+-err:
+- if (xattr_req)
+- dict_unref(xattr_req);
+- shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno,
+- NULL, NULL, NULL, NULL, NULL);
+- return 0;
+-}
+-
+-int
+-shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
+-
+-int
+-shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno,
+- struct iatt *preparent, struct iatt *postparent,
+- dict_t *xdata)
+-{
+- int ret = 0;
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if (op_ret < 0) {
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- } else {
+- local->preoldparent = *preparent;
+- local->postoldparent = *postparent;
+- if (xdata)
+- local->xattr_rsp = dict_ref(xdata);
+- if (local->cleanup_required)
+- shard_start_background_deletion(this);
+- }
+-
+- if (local->entrylk_frame) {
+- ret = shard_unlock_entrylk(frame, this);
+- if (ret < 0) {
+- local->op_ret = -1;
+- local->op_errno = -ret;
+- }
+- }
+-
+- ret = shard_unlock_inodelk(frame, this);
+- if (ret < 0) {
+- local->op_ret = -1;
+- local->op_errno = -ret;
+- }
+-
+- shard_unlink_cbk(frame, this);
+- return 0;
+-}
+-
+-int
+-shard_unlink_base_file(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = frame->local;
+-
+- /* To-Do: Request open-fd count on base file */
+- STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
+- local->xattr_req);
+- return 0;
+-}
+-
+-int
+-shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+-{
+- if (op_ret)
+- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+- "Unlock failed. Please check brick logs for "
+- "more details");
+- SHARD_STACK_DESTROY(frame);
+- return 0;
+-}
+-
+-int
+-shard_unlock_entrylk(call_frame_t *frame, xlator_t *this)
+-{
+- loc_t *loc = NULL;
+- call_frame_t *lk_frame = NULL;
+- shard_local_t *local = NULL;
+- shard_local_t *lk_local = NULL;
+- shard_entrylk_t *lock = NULL;
+-
+- local = frame->local;
+- lk_frame = local->entrylk_frame;
+- lk_local = lk_frame->local;
+- local->entrylk_frame = NULL;
+- lock = &lk_local->int_entrylk;
+- loc = &lock->loc;
+-
+- STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->entrylk, this->name, loc,
+- lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK,
+- NULL);
+- local->int_entrylk.acquired_lock = _gf_false;
+- return 0;
+-}
+-
+-int
+-shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- switch (local->fop) {
+- case GF_FOP_UNLINK:
+- case GF_FOP_RENAME:
+- shard_create_marker_file_under_remove_me(frame, this,
+- &local->int_inodelk.loc);
+- break;
+- default:
+- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+- "post-entrylk handler not defined. This case should not"
+- " be hit");
+- break;
+- }
+- return 0;
+-}
+-
+-int
+-shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+-{
+- call_frame_t *main_frame = NULL;
+- shard_local_t *local = NULL;
+- shard_local_t *main_local = NULL;
+-
+- local = frame->local;
+- main_frame = local->main_frame;
+- main_local = main_frame->local;
+-
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(main_local->fop, main_frame, op_ret,
+- op_errno);
+- return 0;
+- }
+- main_local->int_entrylk.acquired_lock = _gf_true;
+- shard_post_entrylk_fop_handler(main_frame, this);
+- return 0;
+-}
+-
+-int
+-shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+- uuid_t gfid)
+-{
+- char gfid_str[GF_UUID_BUF_SIZE] = {
+- 0,
+- };
+- shard_local_t *local = NULL;
+- shard_local_t *entrylk_local = NULL;
+- shard_entrylk_t *int_entrylk = NULL;
+- call_frame_t *entrylk_frame = NULL;
+-
+- local = frame->local;
+- entrylk_frame = create_frame(this, this->ctx->pool);
+- if (!entrylk_frame) {
+- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+- "Failed to create new frame "
+- "to lock marker file");
+- goto err;
+- }
+-
+- entrylk_local = mem_get0(this->local_pool);
+- if (!entrylk_local) {
+- STACK_DESTROY(entrylk_frame->root);
+- goto err;
+- }
+-
+- entrylk_frame->local = entrylk_local;
+- entrylk_local->main_frame = frame;
+- int_entrylk = &entrylk_local->int_entrylk;
+-
+- int_entrylk->loc.inode = inode_ref(inode);
+- set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root);
+- local->entrylk_frame = entrylk_frame;
+- gf_uuid_unparse(gfid, gfid_str);
+- int_entrylk->basename = gf_strdup(gfid_str);
+-
+- STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc,
+- int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+- return 0;
+-err:
+- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+- return 0;
+-}
+-
+-int
+-shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
+- shard_priv_t *priv = NULL;
+-
+- priv = this->private;
+- local = frame->local;
+-
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+- return 0;
+- }
+-
+- if (local->prebuf.ia_nlink > 1) {
+- gf_msg_debug(this->name, 0,
+- "link count on %s > 1:%d, "
+- "performing rename()/unlink()",
+- local->int_inodelk.loc.path, local->prebuf.ia_nlink);
+- if (local->fop == GF_FOP_RENAME)
+- shard_rename_src_base_file(frame, this);
+- else if (local->fop == GF_FOP_UNLINK)
+- shard_unlink_base_file(frame, this);
+- } else {
+- gf_msg_debug(this->name, 0,
+- "link count on %s = 1, creating "
+- "file under .remove_me",
+- local->int_inodelk.loc.path);
+- local->cleanup_required = _gf_true;
+- shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode,
+- local->prebuf.ia_gfid);
+- }
+- return 0;
+-}
+-
+-int
+-shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- switch (local->fop) {
+- case GF_FOP_UNLINK:
+- case GF_FOP_RENAME:
+- shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
+- shard_post_lookup_base_shard_rm_handler);
+- break;
+- default:
+- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+- "post-inodelk handler not defined. This case should not"
+- " be hit");
+- break;
+- }
+- return 0;
+-}
+-
+-int
+-shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+-{
+- call_frame_t *main_frame = NULL;
+- shard_local_t *local = NULL;
+- shard_local_t *main_local = NULL;
+-
+- local = frame->local;
+- main_frame = local->main_frame;
+- main_local = main_frame->local;
+-
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(main_local->fop, main_frame, op_ret,
+- op_errno);
+- return 0;
+- }
+- main_local->int_inodelk.acquired_lock = _gf_true;
+- shard_post_inodelk_fop_handler(main_frame, this);
+- return 0;
+-}
+-
+-int
+-shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc)
+-{
+- call_frame_t *lk_frame = NULL;
+- shard_local_t *local = NULL;
+- shard_local_t *lk_local = NULL;
+- shard_inodelk_t *int_inodelk = NULL;
+-
+- local = frame->local;
+- lk_frame = create_frame(this, this->ctx->pool);
+- if (!lk_frame) {
+- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+- "Failed to create new frame "
+- "to lock base shard");
+- goto err;
+- }
+- lk_local = mem_get0(this->local_pool);
+- if (!lk_local) {
+- STACK_DESTROY(lk_frame->root);
+- goto err;
+- }
+-
+- lk_frame->local = lk_local;
+- lk_local->main_frame = frame;
+- int_inodelk = &lk_local->int_inodelk;
+-
+- int_inodelk->flock.l_len = 0;
+- int_inodelk->flock.l_start = 0;
+- int_inodelk->domain = this->name;
+- int_inodelk->flock.l_type = F_WRLCK;
+- loc_copy(&local->int_inodelk.loc, loc);
+- set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root);
+- local->inodelk_frame = lk_frame;
+-
+- STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain,
+- &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL);
+- return 0;
+-err:
+- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+- return 0;
+-}
+-
+-int
+-shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this)
+-{
+- loc_t *loc = NULL;
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+- return 0;
+- }
+- if (local->fop == GF_FOP_UNLINK)
+- loc = &local->loc;
+- else if (local->fop == GF_FOP_RENAME)
+- loc = &local->loc2;
+- shard_acquire_inodelk(frame, this, loc);
+- return 0;
+-}
+-
+-int
+-shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
+- shard_post_resolve_fop_handler_t handler,
+- shard_internal_dir_type_t type);
+-int
+-shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+- return 0;
+- }
+- shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler,
+- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+- return 0;
+-}
+-
+-void
+-shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this)
+-{
+- shard_priv_t *priv = NULL;
+- shard_local_t *local = NULL;
+-
+- priv = this->private;
+- local = frame->local;
+-
+- local->dot_shard_rm_loc.inode = inode_find(this->itable,
+- priv->dot_shard_rm_gfid);
+- if (!local->dot_shard_rm_loc.inode) {
+- local->dot_shard_loc.inode = inode_find(this->itable,
+- priv->dot_shard_gfid);
+- if (!local->dot_shard_loc.inode) {
+- shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler,
+- SHARD_INTERNAL_DIR_DOT_SHARD);
+- } else {
+- local->post_res_handler = shard_pre_mkdir_rm_handler;
+- shard_refresh_internal_dir(frame, this,
+- SHARD_INTERNAL_DIR_DOT_SHARD);
+- }
+- } else {
+- local->post_res_handler = shard_post_mkdir_rm_handler;
+- shard_refresh_internal_dir(frame, this,
+- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+- }
+-}
+-
+-int
+-shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+- dict_t *xdata)
+-{
+- int ret = -1;
+- uint64_t block_size = 0;
+- shard_local_t *local = NULL;
+-
+- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+- if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+- "Failed to get block "
+- "size from inode ctx of %s",
+- uuid_utoa(loc->inode->gfid));
+- goto err;
+- }
+-
+- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+- STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+- return 0;
+- }
+-
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto err;
+-
+- frame->local = local;
+-
+- loc_copy(&local->loc, loc);
+- local->xflag = xflag;
+- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+- local->block_size = block_size;
+- local->resolver_base_inode = loc->inode;
+- local->fop = GF_FOP_UNLINK;
+- if (!this->itable)
+- this->itable = (local->loc.inode)->table;
+-
+- local->resolve_not = _gf_true;
+- shard_begin_rm_resolution(frame, this);
+- return 0;
+-err:
+- shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM);
+- return 0;
+-}
+-
+-int
+-shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_rename_cbk(frame, this);
+- return 0;
+-}
+-
+-int
+-shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, struct iatt *buf,
+- struct iatt *preoldparent, struct iatt *postoldparent,
+- struct iatt *prenewparent, struct iatt *postnewparent,
+- dict_t *xdata)
+-{
+- int ret = 0;
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if (op_ret < 0) {
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- goto err;
+- }
+- /* Set ctx->refresh to TRUE to force a lookup on disk when
+- * shard_lookup_base_file() is called next to refresh the hard link
+- * count in ctx. Note that this is applicable only to the case where
+- * the rename dst is already existent and sharded.
+- */
+- if ((local->dst_block_size) && (!local->cleanup_required))
+- shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
+-
+- local->prebuf = *buf;
+- local->preoldparent = *preoldparent;
+- local->postoldparent = *postoldparent;
+- local->prenewparent = *prenewparent;
+- local->postnewparent = *postnewparent;
+- if (xdata)
+- local->xattr_rsp = dict_ref(xdata);
+-
+- if (local->dst_block_size) {
+- if (local->entrylk_frame) {
+- ret = shard_unlock_entrylk(frame, this);
+- if (ret < 0) {
+- local->op_ret = -1;
+- local->op_errno = -ret;
+- }
+- }
+-
+- ret = shard_unlock_inodelk(frame, this);
+- if (ret < 0) {
+- local->op_ret = -1;
+- local->op_errno = -ret;
+- goto err;
+- }
+- if (local->cleanup_required)
+- shard_start_background_deletion(this);
+- }
+-
+- /* Now the base file of src, if sharded, is looked up to gather ia_size
+- * and ia_blocks.*/
+- if (local->block_size) {
+- local->tmp_loc.inode = inode_new(this->itable);
+- gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
+- shard_lookup_base_file(frame, this, &local->tmp_loc,
+- shard_post_rename_lookup_handler);
+- } else {
+- shard_rename_cbk(frame, this);
+- }
+- return 0;
+-err:
+- shard_common_failure_unwind(local->fop, frame, local->op_ret,
+- local->op_errno);
+- return 0;
+-}
+-
+-int
+-shard_post_lookup_dst_base_file_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(local->fop, frame, local->op_ret,
+- local->op_errno);
+- return 0;
+- }
+-
+- /* Save dst base file attributes into postbuf so the information is not
+- * lost when it is overwritten after lookup on base file of src in
+- * shard_lookup_base_file_cbk().
+- */
+- local->postbuf = local->prebuf;
+- shard_rename_src_base_file(frame, this);
+- return 0;
+-}
+-
+-int
+-shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+- dict_t *xdata)
+-{
+- int ret = -1;
+- uint64_t block_size = 0;
+- uint64_t dst_block_size = 0;
+- shard_local_t *local = NULL;
+-
+- if (IA_ISDIR(oldloc->inode->ia_type)) {
+- STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+- return 0;
+- }
+-
+- ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
+- if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+- "Failed to get block "
+- "size from inode ctx of %s",
+- uuid_utoa(oldloc->inode->gfid));
+- goto err;
+- }
+-
+- if (newloc->inode)
+- ret = shard_inode_ctx_get_block_size(newloc->inode, this,
+- &dst_block_size);
+-
+- /* The following stack_wind covers the case where:
+- * a. the src file is not sharded and dst doesn't exist, OR
+- * b. the src and dst both exist but are not sharded.
+- */
+- if (((!block_size) && (!dst_block_size)) ||
+- frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+- STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+- return 0;
+- }
+-
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto err;
+-
+- frame->local = local;
+- loc_copy(&local->loc, oldloc);
+- loc_copy(&local->loc2, newloc);
+- local->resolver_base_inode = newloc->inode;
+- local->fop = GF_FOP_RENAME;
+- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+- if (!local->xattr_req)
+- goto err;
+-
+- local->block_size = block_size;
+- local->dst_block_size = dst_block_size;
+- if (!this->itable)
+- this->itable = (local->loc.inode)->table;
+- local->resolve_not = _gf_true;
+-
+- /* The following if-block covers the case where the dst file exists
+- * and is sharded.
+- */
+- if (local->dst_block_size) {
+- shard_begin_rm_resolution(frame, this);
+- } else {
+- /* The following block covers the case where the dst either doesn't
+- * exist or is NOT sharded but the src is sharded. In this case, shard
+- * xlator would go ahead and rename src to dst. Once done, it would also
+- * lookup the base shard of src to get the ia_size and ia_blocks xattr
+- * values.
+- */
+- shard_rename_src_base_file(frame, this);
+- }
+- return 0;
+-
+-err:
+- shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM);
+- return 0;
+-}
+-
+-int
+-shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+- struct iatt *stbuf, struct iatt *preparent,
+- struct iatt *postparent, dict_t *xdata)
+-{
+- int ret = -1;
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if (op_ret == -1)
+- goto unwind;
+-
+- ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size,
+- SHARD_ALL_MASK);
+- if (ret)
+- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
+- "Failed to set inode "
+- "ctx for %s",
+- uuid_utoa(inode->gfid));
+-
+-unwind:
+- SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
+- preparent, postparent, xdata);
+- return 0;
+-}
+-
+-int
+-shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+-{
+- shard_priv_t *priv = NULL;
+- shard_local_t *local = NULL;
+-
+- priv = this->private;
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto err;
+-
+- frame->local = local;
+- local->block_size = priv->block_size;
+-
+- if (!__is_gsyncd_on_shard_dir(frame, loc)) {
+- SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
+- }
+-
+- STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+- xdata);
+- return 0;
+-err:
+- shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM);
+- return 0;
+-}
+-
+-int
+-shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+-{
+- /* To-Do: Handle open with O_TRUNC under locks */
+- SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
+- return 0;
+-}
+-
+-int
+-shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+- fd_t *fd, dict_t *xdata)
+-{
+- STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+- return 0;
+-}
+-
+-int
+-shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, struct iovec *vector,
+- int32_t count, struct iatt *stbuf, struct iobref *iobref,
+- dict_t *xdata)
+-{
+- int i = 0;
+- int call_count = 0;
+- void *address = NULL;
+- uint64_t block_num = 0;
+- off_t off = 0;
+- struct iovec vec = {
+- 0,
+- };
+- shard_local_t *local = NULL;
+- fd_t *anon_fd = cookie;
+- shard_inode_ctx_t *ctx = NULL;
+-
+- local = frame->local;
+-
+- /* If shard has already seen a failure here before, there is no point
+- * in aggregating subsequent reads, so just go to out.
+- */
+- if (local->op_ret < 0)
+- goto out;
+-
+- if (op_ret < 0) {
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- goto out;
+- }
+-
+- if (local->op_ret >= 0)
+- local->op_ret += op_ret;
+-
+- shard_inode_ctx_get(anon_fd->inode, this, &ctx);
+- block_num = ctx->block_num;
+-
+- if (block_num == local->first_block) {
+- address = local->iobuf->ptr;
+- } else {
+- /* else
+- * address to start writing to = beginning of buffer +
+- * number of bytes until end of first block +
+- * + block_size times number of blocks
+- * between the current block and the first
+- */
+- address = (char *)local->iobuf->ptr +
+- (local->block_size - (local->offset % local->block_size)) +
+- ((block_num - local->first_block - 1) * local->block_size);
+- }
+-
+- for (i = 0; i < count; i++) {
+- address = (char *)address + off;
+- memcpy(address, vector[i].iov_base, vector[i].iov_len);
+- off += vector[i].iov_len;
+- }
+-
+-out:
+- if (anon_fd)
+- fd_unref(anon_fd);
+- call_count = shard_call_count_return(frame);
+- if (call_count == 0) {
+- SHARD_UNSET_ROOT_FS_ID(frame, local);
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+- local->op_errno);
+- } else {
+- if (xdata)
+- local->xattr_rsp = dict_ref(xdata);
+- vec.iov_base = local->iobuf->ptr;
+- vec.iov_len = local->total_size;
+- local->op_ret = local->total_size;
+- SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno,
+- &vec, 1, &local->prebuf, local->iobref,
+- local->xattr_rsp);
+- return 0;
+- }
+- }
+-
+- return 0;
+-}
+-
+-int
+-shard_readv_do(call_frame_t *frame, xlator_t *this)
+-{
+- int i = 0;
+- int call_count = 0;
+- int last_block = 0;
+- int cur_block = 0;
+- off_t orig_offset = 0;
+- off_t shard_offset = 0;
+- size_t read_size = 0;
+- size_t remaining_size = 0;
+- fd_t *fd = NULL;
+- fd_t *anon_fd = NULL;
+- shard_local_t *local = NULL;
+- gf_boolean_t wind_failed = _gf_false;
+-
+- local = frame->local;
+- fd = local->fd;
+-
+- orig_offset = local->offset;
+- cur_block = local->first_block;
+- last_block = local->last_block;
+- remaining_size = local->total_size;
+- local->call_count = call_count = local->num_blocks;
+-
+- SHARD_SET_ROOT_FS_ID(frame, local);
+-
+- if (fd->flags & O_DIRECT)
+- local->flags = O_DIRECT;
+-
+- while (cur_block <= last_block) {
+- if (wind_failed) {
+- shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL,
+- 0, NULL, NULL, NULL);
+- goto next;
+- }
+-
+- shard_offset = orig_offset % local->block_size;
+- read_size = local->block_size - shard_offset;
+- if (read_size > remaining_size)
+- read_size = remaining_size;
+-
+- remaining_size -= read_size;
+-
+- if (cur_block == 0) {
+- anon_fd = fd_ref(fd);
+- } else {
+- anon_fd = fd_anonymous(local->inode_list[i]);
+- if (!anon_fd) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- wind_failed = _gf_true;
+- shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1,
+- ENOMEM, NULL, 0, NULL, NULL, NULL);
+- goto next;
+- }
+- }
++int shard_post_lookup_base_shard_rm_handler(call_frame_t *frame,
++ xlator_t *this) {
++ shard_local_t *local = NULL;
++ shard_priv_t *priv = NULL;
+
+- STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->readv, anon_fd, read_size,
+- shard_offset, local->flags, local->xattr_req);
++ priv = this->private;
++ local = frame->local;
+
+- orig_offset += read_size;
+- next:
+- cur_block++;
+- i++;
+- call_count--;
+- }
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+ return 0;
++ }
++
++ if (local->prebuf.ia_nlink > 1) {
++ gf_msg_debug(this->name, 0, "link count on %s > 1:%d, "
++ "performing rename()/unlink()",
++ local->int_inodelk.loc.path, local->prebuf.ia_nlink);
++ if (local->fop == GF_FOP_RENAME)
++ shard_rename_src_base_file(frame, this);
++ else if (local->fop == GF_FOP_UNLINK)
++ shard_unlink_base_file(frame, this);
++ } else {
++ gf_msg_debug(this->name, 0, "link count on %s = 1, creating "
++ "file under .remove_me",
++ local->int_inodelk.loc.path);
++ local->cleanup_required = _gf_true;
++ shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode,
++ local->prebuf.ia_gfid);
++ }
++ return 0;
++}
++
++int shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = NULL;
++
++ local = frame->local;
++
++ switch (local->fop) {
++ case GF_FOP_UNLINK:
++ case GF_FOP_RENAME:
++ shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
++ shard_post_lookup_base_shard_rm_handler);
++ break;
++ default:
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++ "post-inodelk handler not defined. This case should not"
++ " be hit");
++ break;
++ }
++ return 0;
++}
++
++int shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *xdata) {
++ call_frame_t *main_frame = NULL;
++ shard_local_t *local = NULL;
++ shard_local_t *main_local = NULL;
++
++ local = frame->local;
++ main_frame = local->main_frame;
++ main_local = main_frame->local;
++
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno);
++ return 0;
++ }
++ main_local->int_inodelk.acquired_lock = _gf_true;
++ shard_post_inodelk_fop_handler(main_frame, this);
++ return 0;
++}
++
++int shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) {
++ call_frame_t *lk_frame = NULL;
++ shard_local_t *local = NULL;
++ shard_local_t *lk_local = NULL;
++ shard_inodelk_t *int_inodelk = NULL;
++
++ local = frame->local;
++ lk_frame = create_frame(this, this->ctx->pool);
++ if (!lk_frame) {
++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++ "Failed to create new frame "
++ "to lock base shard");
++ goto err;
++ }
++ lk_local = mem_get0(this->local_pool);
++ if (!lk_local) {
++ STACK_DESTROY(lk_frame->root);
++ goto err;
++ }
++
++ lk_frame->local = lk_local;
++ lk_local->main_frame = frame;
++ int_inodelk = &lk_local->int_inodelk;
++
++ int_inodelk->flock.l_len = 0;
++ int_inodelk->flock.l_start = 0;
++ int_inodelk->domain = this->name;
++ int_inodelk->flock.l_type = F_WRLCK;
++ loc_copy(&local->int_inodelk.loc, loc);
++ set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root);
++ local->inodelk_frame = lk_frame;
++
++ STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain,
++ &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL);
++ return 0;
++err:
++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++ return 0;
+ }
+
+-int
+-shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, inode_t *inode,
+- struct iatt *buf, struct iatt *preparent,
+- struct iatt *postparent, dict_t *xdata)
+-{
+- int shard_block_num = (long)cookie;
+- int call_count = 0;
+- shard_local_t *local = NULL;
++int shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) {
++ loc_t *loc = NULL;
++ shard_local_t *local = NULL;
+
+- local = frame->local;
++ local = frame->local;
+
+- if (op_ret < 0) {
+- if (op_errno == EEXIST) {
+- LOCK(&frame->lock);
+- {
+- local->eexist_count++;
+- }
+- UNLOCK(&frame->lock);
+- } else {
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- }
+- gf_msg_debug(this->name, 0,
+- "mknod of shard %d "
+- "failed: %s",
+- shard_block_num, strerror(op_errno));
+- goto done;
+- }
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
++ return 0;
++ }
++ if (local->fop == GF_FOP_UNLINK)
++ loc = &local->loc;
++ else if (local->fop == GF_FOP_RENAME)
++ loc = &local->loc2;
++ shard_acquire_inodelk(frame, this, loc);
++ return 0;
++}
+
+- shard_link_block_inode(local, shard_block_num, inode, buf);
++int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
++ shard_post_resolve_fop_handler_t handler,
++ shard_internal_dir_type_t type);
++int shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = NULL;
+
+-done:
+- call_count = shard_call_count_return(frame);
+- if (call_count == 0) {
+- SHARD_UNSET_ROOT_FS_ID(frame, local);
+- local->create_count = 0;
+- local->post_mknod_handler(frame, this);
+- }
++ local = frame->local;
+
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+ return 0;
++ }
++ shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler,
++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
++ return 0;
+ }
+
+-int
+-shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
+- shard_post_mknod_fop_handler_t post_mknod_handler)
+-{
+- int i = 0;
+- int shard_idx_iter = 0;
+- int last_block = 0;
+- int ret = 0;
+- int call_count = 0;
+- char path[PATH_MAX] = {
+- 0,
+- };
+- mode_t mode = 0;
+- char *bname = NULL;
+- shard_priv_t *priv = NULL;
+- shard_inode_ctx_t ctx_tmp = {
+- 0,
+- };
+- shard_local_t *local = NULL;
+- gf_boolean_t wind_failed = _gf_false;
+- fd_t *fd = NULL;
+- loc_t loc = {
+- 0,
+- };
+- dict_t *xattr_req = NULL;
+-
+- local = frame->local;
+- priv = this->private;
+- fd = local->fd;
+- shard_idx_iter = local->first_block;
+- last_block = local->last_block;
+- call_count = local->call_count = local->create_count;
+- local->post_mknod_handler = post_mknod_handler;
++void shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) {
++ shard_priv_t *priv = NULL;
++ shard_local_t *local = NULL;
+
+- SHARD_SET_ROOT_FS_ID(frame, local);
++ priv = this->private;
++ local = frame->local;
+
+- ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+- "Failed to get inode "
+- "ctx for %s",
+- uuid_utoa(fd->inode->gfid));
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto err;
+- }
+- mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type);
++ local->dot_shard_rm_loc.inode =
++ inode_find(this->itable, priv->dot_shard_rm_gfid);
++ if (!local->dot_shard_rm_loc.inode) {
++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
++ if (!local->dot_shard_loc.inode) {
++ shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler,
++ SHARD_INTERNAL_DIR_DOT_SHARD);
++ } else {
++ local->post_res_handler = shard_pre_mkdir_rm_handler;
++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
++ }
++ } else {
++ local->post_res_handler = shard_post_mkdir_rm_handler;
++ shard_refresh_internal_dir(frame, this,
++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
++ }
++}
++
++int shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
++ dict_t *xdata) {
++ int ret = -1;
++ uint64_t block_size = 0;
++ shard_local_t *local = NULL;
++
++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++ if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get block "
++ "size from inode ctx of %s",
++ uuid_utoa(loc->inode->gfid));
++ goto err;
++ }
++
++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++ STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
++ return 0;
++ }
++
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
++
++ frame->local = local;
++
++ loc_copy(&local->loc, loc);
++ local->xflag = xflag;
++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++ local->block_size = block_size;
++ local->resolver_base_inode = loc->inode;
++ local->fop = GF_FOP_UNLINK;
++ if (!this->itable)
++ this->itable = (local->loc.inode)->table;
++
++ local->resolve_not = _gf_true;
++ shard_begin_rm_resolution(frame, this);
++ return 0;
++err:
++ shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM);
++ return 0;
++}
+
+- while (shard_idx_iter <= last_block) {
+- if (local->inode_list[i]) {
+- shard_idx_iter++;
+- i++;
+- continue;
+- }
++int shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) {
++ shard_rename_cbk(frame, this);
++ return 0;
++}
+
+- if (wind_failed) {
+- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
+- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
+- goto next;
+- }
++int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, struct iatt *buf,
++ struct iatt *preoldparent, struct iatt *postoldparent,
++ struct iatt *prenewparent, struct iatt *postnewparent,
++ dict_t *xdata) {
++ int ret = 0;
++ shard_local_t *local = NULL;
+
+- shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path,
+- sizeof(path));
+-
+- xattr_req = shard_create_gfid_dict(local->xattr_req);
+- if (!xattr_req) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- wind_failed = _gf_true;
+- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
+- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
+- goto next;
+- }
++ local = frame->local;
+
+- bname = strrchr(path, '/') + 1;
+- loc.inode = inode_new(this->itable);
+- loc.parent = inode_ref(priv->dot_shard_inode);
+- ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+- if (ret < 0 || !(loc.inode)) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+- "Inode path failed"
+- "on %s, base file gfid = %s",
+- bname, uuid_utoa(fd->inode->gfid));
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- wind_failed = _gf_true;
+- loc_wipe(&loc);
+- dict_unref(xattr_req);
+- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
+- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
+- goto next;
+- }
++ if (op_ret < 0) {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto err;
++ }
++ /* Set ctx->refresh to TRUE to force a lookup on disk when
++ * shard_lookup_base_file() is called next to refresh the hard link
++ * count in ctx. Note that this is applicable only to the case where
++ * the rename dst is already existent and sharded.
++ */
++ if ((local->dst_block_size) && (!local->cleanup_required))
++ shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
++
++ local->prebuf = *buf;
++ local->preoldparent = *preoldparent;
++ local->postoldparent = *postoldparent;
++ local->prenewparent = *prenewparent;
++ local->postnewparent = *postnewparent;
++ if (xdata)
++ local->xattr_rsp = dict_ref(xdata);
+
+- loc.name = strrchr(loc.path, '/');
+- if (loc.name)
+- loc.name++;
+-
+- STACK_WIND_COOKIE(frame, shard_common_mknod_cbk,
+- (void *)(long)shard_idx_iter, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->mknod, &loc, mode,
+- ctx_tmp.stat.ia_rdev, 0, xattr_req);
+- loc_wipe(&loc);
+- dict_unref(xattr_req);
+-
+- next:
+- shard_idx_iter++;
+- i++;
+- if (!--call_count)
+- break;
++ if (local->dst_block_size) {
++ if (local->entrylk_frame) {
++ ret = shard_unlock_entrylk(frame, this);
++ if (ret < 0) {
++ local->op_ret = -1;
++ local->op_errno = -ret;
++ }
+ }
+
+- return 0;
++ ret = shard_unlock_inodelk(frame, this);
++ if (ret < 0) {
++ local->op_ret = -1;
++ local->op_errno = -ret;
++ goto err;
++ }
++ if (local->cleanup_required)
++ shard_start_background_deletion(this);
++ }
++
++ /* Now the base file of src, if sharded, is looked up to gather ia_size
++ * and ia_blocks.*/
++ if (local->block_size) {
++ local->tmp_loc.inode = inode_new(this->itable);
++ gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
++ shard_lookup_base_file(frame, this, &local->tmp_loc,
++ shard_post_rename_lookup_handler);
++ } else {
++ shard_rename_cbk(frame, this);
++ }
++ return 0;
+ err:
+- /*
+- * This block is for handling failure in shard_inode_ctx_get_all().
+- * Failures in the while-loop are handled within the loop.
+- */
+- SHARD_UNSET_ROOT_FS_ID(frame, local);
+- post_mknod_handler(frame, this);
+- return 0;
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
++ return 0;
+ }
+
+-int
+-shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this);
+-
+-int
+-shard_post_lookup_shards_readv_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+- local->op_errno);
+- return 0;
+- }
++int shard_post_lookup_dst_base_file_handler(call_frame_t *frame,
++ xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- if (local->create_count) {
+- shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler);
+- } else {
+- shard_readv_do(frame, this);
+- }
++ local = frame->local;
+
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
+ return 0;
++ }
++
++ /* Save dst base file attributes into postbuf so the information is not
++ * lost when it is overwritten after lookup on base file of src in
++ * shard_lookup_base_file_cbk().
++ */
++ local->postbuf = local->prebuf;
++ shard_rename_src_base_file(frame, this);
++ return 0;
++}
++
++int shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
++ loc_t *newloc, dict_t *xdata) {
++ int ret = -1;
++ uint64_t block_size = 0;
++ uint64_t dst_block_size = 0;
++ shard_local_t *local = NULL;
++
++ if (IA_ISDIR(oldloc->inode->ia_type)) {
++ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
++ return 0;
++ }
++
++ ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
++ if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get block "
++ "size from inode ctx of %s",
++ uuid_utoa(oldloc->inode->gfid));
++ goto err;
++ }
++
++ if (newloc->inode)
++ ret = shard_inode_ctx_get_block_size(newloc->inode, this, &dst_block_size);
++
++ /* The following stack_wind covers the case where:
++ * a. the src file is not sharded and dst doesn't exist, OR
++ * b. the src and dst both exist but are not sharded.
++ */
++ if (((!block_size) && (!dst_block_size)) ||
++ frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
++ return 0;
++ }
++
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
++
++ frame->local = local;
++ loc_copy(&local->loc, oldloc);
++ loc_copy(&local->loc2, newloc);
++ local->resolver_base_inode = newloc->inode;
++ local->fop = GF_FOP_RENAME;
++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++ if (!local->xattr_req)
++ goto err;
++
++ local->block_size = block_size;
++ local->dst_block_size = dst_block_size;
++ if (!this->itable)
++ this->itable = (local->loc.inode)->table;
++ local->resolve_not = _gf_true;
++
++ /* The following if-block covers the case where the dst file exists
++ * and is sharded.
++ */
++ if (local->dst_block_size) {
++ shard_begin_rm_resolution(frame, this);
++ } else {
++ /* The following block covers the case where the dst either doesn't
++ * exist or is NOT sharded but the src is sharded. In this case, shard
++ * xlator would go ahead and rename src to dst. Once done, it would also
++ * lookup the base shard of src to get the ia_size and ia_blocks xattr
++ * values.
++ */
++ shard_rename_src_base_file(frame, this);
++ }
++ return 0;
++
++err:
++ shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM);
++ return 0;
+ }
+
+-int
+-shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
++int shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
++ struct iatt *stbuf, struct iatt *preparent,
++ struct iatt *postparent, dict_t *xdata) {
++ int ret = -1;
++ shard_local_t *local = NULL;
+
+- local = frame->local;
++ local = frame->local;
+
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+- local->op_errno);
+- return 0;
+- }
++ if (op_ret == -1)
++ goto unwind;
+
+- if (!local->eexist_count) {
+- shard_readv_do(frame, this);
+- } else {
+- local->call_count = local->eexist_count;
+- shard_common_lookup_shards(frame, this, local->loc.inode,
+- shard_post_lookup_shards_readv_handler);
+- }
+- return 0;
++ ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size,
++ SHARD_ALL_MASK);
++ if (ret)
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
++ "Failed to set inode "
++ "ctx for %s",
++ uuid_utoa(inode->gfid));
++
++unwind:
++ SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
++ preparent, postparent, xdata);
++ return 0;
+ }
+
+-int
+-shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
++int shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
++ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) {
++ shard_priv_t *priv = NULL;
++ shard_local_t *local = NULL;
+
+- local = frame->local;
++ priv = this->private;
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
+
+- if (local->op_ret < 0) {
+- if (local->op_errno != ENOENT) {
+- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+- local->op_errno);
+- return 0;
+- } else {
+- struct iovec vec = {
+- 0,
+- };
+-
+- vec.iov_base = local->iobuf->ptr;
+- vec.iov_len = local->total_size;
+- local->op_ret = local->total_size;
+- SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1,
+- &local->prebuf, local->iobref, NULL);
+- return 0;
+- }
+- }
++ frame->local = local;
++ local->block_size = priv->block_size;
+
+- if (local->call_count) {
+- shard_common_lookup_shards(frame, this, local->resolver_base_inode,
+- shard_post_lookup_shards_readv_handler);
+- } else {
+- shard_readv_do(frame, this);
+- }
++ if (!__is_gsyncd_on_shard_dir(frame, loc)) {
++ SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
++ }
+
+- return 0;
+-}
++ STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
++ xdata);
++ return 0;
++err:
++ shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM);
++ return 0;
++}
++
++int shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) {
++ /* To-Do: Handle open with O_TRUNC under locks */
++ SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
++ return 0;
++}
++
++int shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
++ fd_t *fd, dict_t *xdata) {
++ STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
++ return 0;
++}
++
++int shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, struct iovec *vector,
++ int32_t count, struct iatt *stbuf, struct iobref *iobref,
++ dict_t *xdata) {
++ int i = 0;
++ int call_count = 0;
++ void *address = NULL;
++ uint64_t block_num = 0;
++ off_t off = 0;
++ struct iovec vec = {
++ 0,
++ };
++ shard_local_t *local = NULL;
++ fd_t *anon_fd = cookie;
++ shard_inode_ctx_t *ctx = NULL;
++
++ local = frame->local;
++
++ /* If shard has already seen a failure here before, there is no point
++ * in aggregating subsequent reads, so just go to out.
++ */
++ if (local->op_ret < 0)
++ goto out;
++
++ if (op_ret < 0) {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto out;
++ }
++
++ if (local->op_ret >= 0)
++ local->op_ret += op_ret;
+
+-int
+-shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this)
+-{
+- int ret = 0;
+- struct iobuf *iobuf = NULL;
+- shard_local_t *local = NULL;
+- shard_priv_t *priv = NULL;
++ shard_inode_ctx_get(anon_fd->inode, this, &ctx);
++ block_num = ctx->block_num;
++
++ if (block_num == local->first_block) {
++ address = local->iobuf->ptr;
++ } else {
++ /* else
++ * address to start writing to = beginning of buffer +
++ * number of bytes until end of first block +
++ * + block_size times number of blocks
++ * between the current block and the first
++ */
++ address = (char *)local->iobuf->ptr +
++ (local->block_size - (local->offset % local->block_size)) +
++ ((block_num - local->first_block - 1) * local->block_size);
++ }
+
+- priv = this->private;
+- local = frame->local;
++ for (i = 0; i < count; i++) {
++ address = (char *)address + off;
++ memcpy(address, vector[i].iov_base, vector[i].iov_len);
++ off += vector[i].iov_len;
++ }
+
++out:
++ if (anon_fd)
++ fd_unref(anon_fd);
++ call_count = shard_call_count_return(frame);
++ if (call_count == 0) {
++ SHARD_UNSET_ROOT_FS_ID(frame, local);
+ if (local->op_ret < 0) {
+- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+- local->op_errno);
+- return 0;
++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++ local->op_errno);
++ } else {
++ if (xdata)
++ local->xattr_rsp = dict_ref(xdata);
++ vec.iov_base = local->iobuf->ptr;
++ vec.iov_len = local->total_size;
++ local->op_ret = local->total_size;
++ SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, &vec, 1,
++ &local->prebuf, local->iobref, local->xattr_rsp);
++ return 0;
++ }
++ }
++
++ return 0;
++}
++
++int shard_readv_do(call_frame_t *frame, xlator_t *this) {
++ int i = 0;
++ int call_count = 0;
++ int last_block = 0;
++ int cur_block = 0;
++ off_t orig_offset = 0;
++ off_t shard_offset = 0;
++ size_t read_size = 0;
++ size_t remaining_size = 0;
++ fd_t *fd = NULL;
++ fd_t *anon_fd = NULL;
++ shard_local_t *local = NULL;
++ gf_boolean_t wind_failed = _gf_false;
++
++ local = frame->local;
++ fd = local->fd;
++
++ orig_offset = local->offset;
++ cur_block = local->first_block;
++ last_block = local->last_block;
++ remaining_size = local->total_size;
++ local->call_count = call_count = local->num_blocks;
++
++ SHARD_SET_ROOT_FS_ID(frame, local);
++
++ if (fd->flags & O_DIRECT)
++ local->flags = O_DIRECT;
++
++ while (cur_block <= last_block) {
++ if (wind_failed) {
++ shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, 0,
++ NULL, NULL, NULL);
++ goto next;
++ }
++
++ shard_offset = orig_offset % local->block_size;
++ read_size = local->block_size - shard_offset;
++ if (read_size > remaining_size)
++ read_size = remaining_size;
++
++ remaining_size -= read_size;
++
++ if (cur_block == 0) {
++ anon_fd = fd_ref(fd);
++ } else {
++ anon_fd = fd_anonymous(local->inode_list[i]);
++ if (!anon_fd) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ wind_failed = _gf_true;
++ shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, NULL,
++ 0, NULL, NULL, NULL);
++ goto next;
++ }
+ }
+
+- if (local->offset >= local->prebuf.ia_size) {
+- /* If the read is being performed past the end of the file,
+- * unwind the FOP with 0 bytes read as status.
+- */
+- struct iovec vec = {
+- 0,
+- };
+-
+- iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size);
+- if (!iobuf)
+- goto err;
+-
+- vec.iov_base = iobuf->ptr;
+- vec.iov_len = 0;
+- local->iobref = iobref_new();
+- iobref_add(local->iobref, iobuf);
+- iobuf_unref(iobuf);
+-
+- SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf,
+- local->iobref, NULL);
+- return 0;
+- }
++ STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->readv, anon_fd, read_size,
++ shard_offset, local->flags, local->xattr_req);
++
++ orig_offset += read_size;
++ next:
++ cur_block++;
++ i++;
++ call_count--;
++ }
++ return 0;
++}
+
+- local->first_block = get_lowest_block(local->offset, local->block_size);
++int shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, inode_t *inode,
++ struct iatt *buf, struct iatt *preparent,
++ struct iatt *postparent, dict_t *xdata) {
++ int shard_block_num = (long)cookie;
++ int call_count = 0;
++ shard_local_t *local = NULL;
++
++ local = frame->local;
++
++ if (op_ret < 0) {
++ if (op_errno == EEXIST) {
++ LOCK(&frame->lock);
++ { local->eexist_count++; }
++ UNLOCK(&frame->lock);
++ } else {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ }
++ gf_msg_debug(this->name, 0, "mknod of shard %d "
++ "failed: %s",
++ shard_block_num, strerror(op_errno));
++ goto done;
++ }
+
+- local->total_size = local->req_size;
++ shard_link_block_inode(local, shard_block_num, inode, buf);
+
+- local->last_block = get_highest_block(local->offset, local->total_size,
+- local->block_size);
++done:
++ call_count = shard_call_count_return(frame);
++ if (call_count == 0) {
++ SHARD_UNSET_ROOT_FS_ID(frame, local);
++ local->create_count = 0;
++ local->post_mknod_handler(frame, this);
++ }
++
++ return 0;
++}
++
++int shard_common_resume_mknod(
++ call_frame_t *frame, xlator_t *this,
++ shard_post_mknod_fop_handler_t post_mknod_handler) {
++ int i = 0;
++ int shard_idx_iter = 0;
++ int last_block = 0;
++ int ret = 0;
++ int call_count = 0;
++ char path[PATH_MAX] = {
++ 0,
++ };
++ mode_t mode = 0;
++ char *bname = NULL;
++ shard_priv_t *priv = NULL;
++ shard_inode_ctx_t ctx_tmp = {
++ 0,
++ };
++ shard_local_t *local = NULL;
++ gf_boolean_t wind_failed = _gf_false;
++ fd_t *fd = NULL;
++ loc_t loc = {
++ 0,
++ };
++ dict_t *xattr_req = NULL;
++
++ local = frame->local;
++ priv = this->private;
++ fd = local->fd;
++ shard_idx_iter = local->first_block;
++ last_block = local->last_block;
++ call_count = local->call_count = local->create_count;
++ local->post_mknod_handler = post_mknod_handler;
++
++ SHARD_SET_ROOT_FS_ID(frame, local);
++
++ ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get inode "
++ "ctx for %s",
++ uuid_utoa(fd->inode->gfid));
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto err;
++ }
++ mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type);
+
+- local->num_blocks = local->last_block - local->first_block + 1;
+- local->resolver_base_inode = local->loc.inode;
++ while (shard_idx_iter <= last_block) {
++ if (local->inode_list[i]) {
++ shard_idx_iter++;
++ i++;
++ continue;
++ }
+
+- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
+- gf_shard_mt_inode_list);
+- if (!local->inode_list)
+- goto err;
++ if (wind_failed) {
++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1,
++ ENOMEM, NULL, NULL, NULL, NULL, NULL);
++ goto next;
++ }
+
+- iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size);
+- if (!iobuf)
+- goto err;
++ shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path,
++ sizeof(path));
+
+- local->iobref = iobref_new();
+- if (!local->iobref) {
+- iobuf_unref(iobuf);
+- goto err;
++ xattr_req = shard_create_gfid_dict(local->xattr_req);
++ if (!xattr_req) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ wind_failed = _gf_true;
++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1,
++ ENOMEM, NULL, NULL, NULL, NULL, NULL);
++ goto next;
+ }
+
+- if (iobref_add(local->iobref, iobuf) != 0) {
+- iobuf_unref(iobuf);
+- goto err;
++ bname = strrchr(path, '/') + 1;
++ loc.inode = inode_new(this->itable);
++ loc.parent = inode_ref(priv->dot_shard_inode);
++ ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++ if (ret < 0 || !(loc.inode)) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++ "Inode path failed"
++ "on %s, base file gfid = %s",
++ bname, uuid_utoa(fd->inode->gfid));
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ wind_failed = _gf_true;
++ loc_wipe(&loc);
++ dict_unref(xattr_req);
++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1,
++ ENOMEM, NULL, NULL, NULL, NULL, NULL);
++ goto next;
+ }
+
+- memset(iobuf->ptr, 0, local->total_size);
+- iobuf_unref(iobuf);
+- local->iobuf = iobuf;
++ loc.name = strrchr(loc.path, '/');
++ if (loc.name)
++ loc.name++;
+
+- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+- if (!local->dot_shard_loc.inode) {
+- ret = shard_init_internal_dir_loc(this, local,
+- SHARD_INTERNAL_DIR_DOT_SHARD);
+- if (ret)
+- goto err;
+- shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler,
+- SHARD_INTERNAL_DIR_DOT_SHARD);
+- } else {
+- local->post_res_handler = shard_post_resolve_readv_handler;
+- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+- }
+- return 0;
++ STACK_WIND_COOKIE(frame, shard_common_mknod_cbk,
++ (void *)(long)shard_idx_iter, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->mknod, &loc, mode,
++ ctx_tmp.stat.ia_rdev, 0, xattr_req);
++ loc_wipe(&loc);
++ dict_unref(xattr_req);
++
++ next:
++ shard_idx_iter++;
++ i++;
++ if (!--call_count)
++ break;
++ }
++
++ return 0;
+ err:
+- shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
+- return 0;
++ /*
++ * This block is for handling failure in shard_inode_ctx_get_all().
++ * Failures in the while-loop are handled within the loop.
++ */
++ SHARD_UNSET_ROOT_FS_ID(frame, local);
++ post_mknod_handler(frame, this);
++ return 0;
+ }
+
+-int
+-shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+- off_t offset, uint32_t flags, dict_t *xdata)
+-{
+- int ret = 0;
+- uint64_t block_size = 0;
+- shard_local_t *local = NULL;
++int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this);
+
+- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+- "Failed to get block "
+- "size for %s from its inode ctx",
+- uuid_utoa(fd->inode->gfid));
+- goto err;
+- }
++int shard_post_lookup_shards_readv_handler(call_frame_t *frame,
++ xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+- /* block_size = 0 means that the file was created before
+- * sharding was enabled on the volume.
+- */
+- STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+- xdata);
+- return 0;
+- }
++ local = frame->local;
+
+- if (!this->itable)
+- this->itable = fd->inode->table;
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++ local->op_errno);
++ return 0;
++ }
+
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto err;
++ if (local->create_count) {
++ shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler);
++ } else {
++ shard_readv_do(frame, this);
++ }
+
+- frame->local = local;
++ return 0;
++}
+
+- ret = syncbarrier_init(&local->barrier);
+- if (ret)
+- goto err;
+- local->fd = fd_ref(fd);
+- local->block_size = block_size;
+- local->offset = offset;
+- local->req_size = size;
+- local->flags = flags;
+- local->fop = GF_FOP_READ;
+- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+- if (!local->xattr_req)
+- goto err;
++int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- local->loc.inode = inode_ref(fd->inode);
+- gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++ local = frame->local;
+
+- shard_lookup_base_file(frame, this, &local->loc,
+- shard_post_lookup_readv_handler);
+- return 0;
+-err:
+- shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++ local->op_errno);
+ return 0;
++ }
++
++ if (!local->eexist_count) {
++ shard_readv_do(frame, this);
++ } else {
++ local->call_count = local->eexist_count;
++ shard_common_lookup_shards(frame, this, local->loc.inode,
++ shard_post_lookup_shards_readv_handler);
++ }
++ return 0;
+ }
+
+-int
+-shard_common_inode_write_post_update_size_handler(call_frame_t *frame,
+- xlator_t *this)
+-{
+- shard_local_t *local = NULL;
++int shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- local = frame->local;
++ local = frame->local;
+
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(local->fop, frame, local->op_ret,
+- local->op_errno);
++ if (local->op_ret < 0) {
++ if (local->op_errno != ENOENT) {
++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++ local->op_errno);
++ return 0;
+ } else {
+- shard_common_inode_write_success_unwind(local->fop, frame,
+- local->written_size);
++ struct iovec vec = {
++ 0,
++ };
++
++ vec.iov_base = local->iobuf->ptr;
++ vec.iov_len = local->total_size;
++ local->op_ret = local->total_size;
++ SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1,
++ &local->prebuf, local->iobref, NULL);
++ return 0;
+ }
+- return 0;
+-}
++ }
+
+-static gf_boolean_t
+-shard_is_appending_write(shard_local_t *local)
+-{
+- if (local->fop != GF_FOP_WRITE)
+- return _gf_false;
+- if (local->flags & O_APPEND)
+- return _gf_true;
+- if (local->fd->flags & O_APPEND)
+- return _gf_true;
+- return _gf_false;
++ if (local->call_count) {
++ shard_common_lookup_shards(frame, this, local->resolver_base_inode,
++ shard_post_lookup_shards_readv_handler);
++ } else {
++ shard_readv_do(frame, this);
++ }
++
++ return 0;
+ }
+
+-int
+-__shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
+- xlator_t *this)
+-{
+- int ret = -1;
+- uint64_t ctx_uint = 0;
+- shard_inode_ctx_t *ctx = NULL;
++int shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) {
++ int ret = 0;
++ struct iobuf *iobuf = NULL;
++ shard_local_t *local = NULL;
++ shard_priv_t *priv = NULL;
++
++ priv = this->private;
++ local = frame->local;
+
+- ret = __inode_ctx_get(inode, this, &ctx_uint);
+- if (ret < 0)
+- return ret;
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++ local->op_errno);
++ return 0;
++ }
+
+- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++ if (local->offset >= local->prebuf.ia_size) {
++ /* If the read is being performed past the end of the file,
++ * unwind the FOP with 0 bytes read as status.
++ */
++ struct iovec vec = {
++ 0,
++ };
+
+- if (shard_is_appending_write(local)) {
+- local->delta_size = local->total_size;
+- } else if (local->offset + local->total_size > ctx->stat.ia_size) {
+- local->delta_size = (local->offset + local->total_size) -
+- ctx->stat.ia_size;
+- } else {
+- local->delta_size = 0;
+- }
+- ctx->stat.ia_size += (local->delta_size);
+- local->postbuf = ctx->stat;
++ iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size);
++ if (!iobuf)
++ goto err;
++
++ vec.iov_base = iobuf->ptr;
++ vec.iov_len = 0;
++ local->iobref = iobref_new();
++ iobref_add(local->iobref, iobuf);
++ iobuf_unref(iobuf);
+
++ SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf,
++ local->iobref, NULL);
+ return 0;
+-}
++ }
+
+-int
+-shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
+- xlator_t *this)
+-{
+- int ret = -1;
++ local->first_block = get_lowest_block(local->offset, local->block_size);
+
+- LOCK(&inode->lock);
+- {
+- ret = __shard_get_delta_size_from_inode_ctx(local, inode, this);
+- }
+- UNLOCK(&inode->lock);
++ local->total_size = local->req_size;
+
+- return ret;
+-}
++ local->last_block =
++ get_highest_block(local->offset, local->total_size, local->block_size);
+
+-int
+-shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
+- xlator_t *this, int32_t op_ret,
+- int32_t op_errno, struct iatt *pre,
+- struct iatt *post, dict_t *xdata)
+-{
+- int call_count = 0;
+- fd_t *anon_fd = cookie;
+- shard_local_t *local = NULL;
+- glusterfs_fop_t fop = 0;
++ local->num_blocks = local->last_block - local->first_block + 1;
++ local->resolver_base_inode = local->loc.inode;
+
+- local = frame->local;
+- fop = local->fop;
++ local->inode_list =
++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list);
++ if (!local->inode_list)
++ goto err;
+
+- LOCK(&frame->lock);
+- {
+- if (op_ret < 0) {
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- } else {
+- local->written_size += op_ret;
+- GF_ATOMIC_ADD(local->delta_blocks,
+- post->ia_blocks - pre->ia_blocks);
+- local->delta_size += (post->ia_size - pre->ia_size);
+- shard_inode_ctx_set(local->fd->inode, this, post, 0,
+- SHARD_MASK_TIMES);
+- if (local->fd->inode != anon_fd->inode)
+- shard_inode_ctx_add_to_fsync_list(local->fd->inode, this,
+- anon_fd->inode);
+- }
+- }
+- UNLOCK(&frame->lock);
++ iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size);
++ if (!iobuf)
++ goto err;
+
+- if (anon_fd)
+- fd_unref(anon_fd);
++ local->iobref = iobref_new();
++ if (!local->iobref) {
++ iobuf_unref(iobuf);
++ goto err;
++ }
+
+- call_count = shard_call_count_return(frame);
+- if (call_count == 0) {
+- SHARD_UNSET_ROOT_FS_ID(frame, local);
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(fop, frame, local->op_ret,
+- local->op_errno);
+- } else {
+- shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this);
+- local->hole_size = 0;
+- if (xdata)
+- local->xattr_rsp = dict_ref(xdata);
+- shard_update_file_size(
+- frame, this, local->fd, NULL,
+- shard_common_inode_write_post_update_size_handler);
+- }
+- }
++ if (iobref_add(local->iobref, iobuf) != 0) {
++ iobuf_unref(iobuf);
++ goto err;
++ }
+
+- return 0;
++ memset(iobuf->ptr, 0, local->total_size);
++ iobuf_unref(iobuf);
++ local->iobuf = iobuf;
++
++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
++ if (!local->dot_shard_loc.inode) {
++ ret =
++ shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
++ if (ret)
++ goto err;
++ shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler,
++ SHARD_INTERNAL_DIR_DOT_SHARD);
++ } else {
++ local->post_res_handler = shard_post_resolve_readv_handler;
++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
++ }
++ return 0;
++err:
++ shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
++ return 0;
++}
++
++int shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++ off_t offset, uint32_t flags, dict_t *xdata) {
++ int ret = 0;
++ uint64_t block_size = 0;
++ shard_local_t *local = NULL;
++
++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get block "
++ "size for %s from its inode ctx",
++ uuid_utoa(fd->inode->gfid));
++ goto err;
++ }
++
++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++ /* block_size = 0 means that the file was created before
++ * sharding was enabled on the volume.
++ */
++ STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
++ return 0;
++ }
++
++ if (!this->itable)
++ this->itable = fd->inode->table;
++
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
++
++ frame->local = local;
++
++ ret = syncbarrier_init(&local->barrier);
++ if (ret)
++ goto err;
++ local->fd = fd_ref(fd);
++ local->block_size = block_size;
++ local->offset = offset;
++ local->req_size = size;
++ local->flags = flags;
++ local->fop = GF_FOP_READ;
++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++ if (!local->xattr_req)
++ goto err;
++
++ local->loc.inode = inode_ref(fd->inode);
++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++
++ shard_lookup_base_file(frame, this, &local->loc,
++ shard_post_lookup_readv_handler);
++ return 0;
++err:
++ shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
++ return 0;
+ }
+
+-int
+-shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd,
+- struct iovec *vec, int count, off_t shard_offset,
+- size_t size)
+-{
+- shard_local_t *local = NULL;
++int shard_common_inode_write_post_update_size_handler(call_frame_t *frame,
++ xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- local = frame->local;
++ local = frame->local;
+
+- switch (local->fop) {
+- case GF_FOP_WRITE:
+- STACK_WIND_COOKIE(
+- frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->writev, fd, vec, count, shard_offset,
+- local->flags, local->iobref, local->xattr_req);
+- break;
+- case GF_FOP_FALLOCATE:
+- STACK_WIND_COOKIE(
+- frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->fallocate, fd, local->flags,
+- shard_offset, size, local->xattr_req);
+- break;
+- case GF_FOP_ZEROFILL:
+- STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
+- FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->zerofill, fd,
+- shard_offset, size, local->xattr_req);
+- break;
+- case GF_FOP_DISCARD:
+- STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
+- FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->discard, fd,
+- shard_offset, size, local->xattr_req);
+- break;
+- default:
+- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+- "Invalid fop id = %d", local->fop);
+- break;
+- }
+- return 0;
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
++ } else {
++ shard_common_inode_write_success_unwind(local->fop, frame,
++ local->written_size);
++ }
++ return 0;
+ }
+
+-int
+-shard_common_inode_write_do(call_frame_t *frame, xlator_t *this)
+-{
+- int i = 0;
+- int count = 0;
+- int call_count = 0;
+- int last_block = 0;
+- uint32_t cur_block = 0;
+- fd_t *fd = NULL;
+- fd_t *anon_fd = NULL;
+- shard_local_t *local = NULL;
+- struct iovec *vec = NULL;
+- gf_boolean_t wind_failed = _gf_false;
+- gf_boolean_t odirect = _gf_false;
+- off_t orig_offset = 0;
+- off_t shard_offset = 0;
+- off_t vec_offset = 0;
+- size_t remaining_size = 0;
+- size_t shard_write_size = 0;
+-
+- local = frame->local;
+- fd = local->fd;
+-
+- orig_offset = local->offset;
+- remaining_size = local->total_size;
+- cur_block = local->first_block;
+- local->call_count = call_count = local->num_blocks;
+- last_block = local->last_block;
+-
+- SHARD_SET_ROOT_FS_ID(frame, local);
+-
+- if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+- "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC
+- " into "
+- "dict: %s",
+- uuid_utoa(fd->inode->gfid));
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- local->call_count = 1;
+- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1,
+- ENOMEM, NULL, NULL, NULL);
+- return 0;
+- }
++static gf_boolean_t shard_is_appending_write(shard_local_t *local) {
++ if (local->fop != GF_FOP_WRITE)
++ return _gf_false;
++ if (local->flags & O_APPEND)
++ return _gf_true;
++ if (local->fd->flags & O_APPEND)
++ return _gf_true;
++ return _gf_false;
++}
+
+- if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE))
+- odirect = _gf_true;
++int __shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
++ xlator_t *this) {
++ int ret = -1;
++ uint64_t ctx_uint = 0;
++ shard_inode_ctx_t *ctx = NULL;
+
+- while (cur_block <= last_block) {
+- if (wind_failed) {
+- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1,
+- ENOMEM, NULL, NULL, NULL);
+- goto next;
+- }
++ ret = __inode_ctx_get(inode, this, &ctx_uint);
++ if (ret < 0)
++ return ret;
+
+- shard_offset = orig_offset % local->block_size;
+- shard_write_size = local->block_size - shard_offset;
+- if (shard_write_size > remaining_size)
+- shard_write_size = remaining_size;
+-
+- remaining_size -= shard_write_size;
+-
+- if (local->fop == GF_FOP_WRITE) {
+- count = iov_subset(local->vector, local->count, vec_offset,
+- vec_offset + shard_write_size, NULL);
+-
+- vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec);
+- if (!vec) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- wind_failed = _gf_true;
+- GF_FREE(vec);
+- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this,
+- -1, ENOMEM, NULL, NULL, NULL);
+- goto next;
+- }
+- count = iov_subset(local->vector, local->count, vec_offset,
+- vec_offset + shard_write_size, vec);
+- }
++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+- if (cur_block == 0) {
+- anon_fd = fd_ref(fd);
+- } else {
+- anon_fd = fd_anonymous(local->inode_list[i]);
+- if (!anon_fd) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- wind_failed = _gf_true;
+- GF_FREE(vec);
+- shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd,
+- this, -1, ENOMEM, NULL, NULL,
+- NULL);
+- goto next;
+- }
+-
+- if (local->fop == GF_FOP_WRITE) {
+- if (odirect)
+- local->flags = O_DIRECT;
+- else
+- local->flags = GF_ANON_FD_FLAGS;
+- }
+- }
++ if (shard_is_appending_write(local)) {
++ local->delta_size = local->total_size;
++ } else if (local->offset + local->total_size > ctx->stat.ia_size) {
++ local->delta_size = (local->offset + local->total_size) - ctx->stat.ia_size;
++ } else {
++ local->delta_size = 0;
++ }
++ ctx->stat.ia_size += (local->delta_size);
++ local->postbuf = ctx->stat;
+
+- shard_common_inode_write_wind(frame, this, anon_fd, vec, count,
+- shard_offset, shard_write_size);
+- if (vec)
+- vec_offset += shard_write_size;
+- orig_offset += shard_write_size;
+- GF_FREE(vec);
+- vec = NULL;
+- next:
+- cur_block++;
+- i++;
+- call_count--;
+- }
+- return 0;
++ return 0;
+ }
+
+-int
+-shard_common_inode_write_post_mknod_handler(call_frame_t *frame,
+- xlator_t *this);
++int shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
++ xlator_t *this) {
++ int ret = -1;
++
++ LOCK(&inode->lock);
++ { ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); }
++ UNLOCK(&inode->lock);
+
+-int
+-shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
+- xlator_t *this)
+-{
+- shard_local_t *local = NULL;
++ return ret;
++}
+
+- local = frame->local;
++int shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret,
++ int32_t op_errno, struct iatt *pre,
++ struct iatt *post, dict_t *xdata) {
++ int call_count = 0;
++ fd_t *anon_fd = cookie;
++ shard_local_t *local = NULL;
++ glusterfs_fop_t fop = 0;
+
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(local->fop, frame, local->op_ret,
+- local->op_errno);
+- return 0;
+- }
++ local = frame->local;
++ fop = local->fop;
+
+- if (local->create_count) {
+- shard_common_resume_mknod(frame, this,
+- shard_common_inode_write_post_mknod_handler);
++ LOCK(&frame->lock);
++ {
++ if (op_ret < 0) {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
+ } else {
+- shard_common_inode_write_do(frame, this);
++ local->written_size += op_ret;
++ GF_ATOMIC_ADD(local->delta_blocks, post->ia_blocks - pre->ia_blocks);
++ local->delta_size += (post->ia_size - pre->ia_size);
++ shard_inode_ctx_set(local->fd->inode, this, post, 0, SHARD_MASK_TIMES);
++ if (local->fd->inode != anon_fd->inode)
++ shard_inode_ctx_add_to_fsync_list(local->fd->inode, this,
++ anon_fd->inode);
++ }
++ }
++ UNLOCK(&frame->lock);
++
++ if (anon_fd)
++ fd_unref(anon_fd);
++
++ call_count = shard_call_count_return(frame);
++ if (call_count == 0) {
++ SHARD_UNSET_ROOT_FS_ID(frame, local);
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(fop, frame, local->op_ret, local->op_errno);
++ } else {
++ shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this);
++ local->hole_size = 0;
++ if (xdata)
++ local->xattr_rsp = dict_ref(xdata);
++ shard_update_file_size(frame, this, local->fd, NULL,
++ shard_common_inode_write_post_update_size_handler);
+ }
++ }
+
+- return 0;
++ return 0;
+ }
+
+-int
+-shard_common_inode_write_post_mknod_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
++int shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd,
++ struct iovec *vec, int count,
++ off_t shard_offset, size_t size) {
++ shard_local_t *local = NULL;
+
+- local = frame->local;
++ local = frame->local;
+
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(local->fop, frame, local->op_ret,
+- local->op_errno);
+- return 0;
+- }
++ switch (local->fop) {
++ case GF_FOP_WRITE:
++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd,
++ vec, count, shard_offset, local->flags, local->iobref,
++ local->xattr_req);
++ break;
++ case GF_FOP_FALLOCATE:
++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, fd,
++ local->flags, shard_offset, size, local->xattr_req);
++ break;
++ case GF_FOP_ZEROFILL:
++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd,
++ shard_offset, size, local->xattr_req);
++ break;
++ case GF_FOP_DISCARD:
++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd,
++ shard_offset, size, local->xattr_req);
++ break;
++ default:
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++ "Invalid fop id = %d", local->fop);
++ break;
++ }
++ return 0;
++}
++
++int shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) {
++ int i = 0;
++ int count = 0;
++ int call_count = 0;
++ int last_block = 0;
++ uint32_t cur_block = 0;
++ fd_t *fd = NULL;
++ fd_t *anon_fd = NULL;
++ shard_local_t *local = NULL;
++ struct iovec *vec = NULL;
++ gf_boolean_t wind_failed = _gf_false;
++ gf_boolean_t odirect = _gf_false;
++ off_t orig_offset = 0;
++ off_t shard_offset = 0;
++ off_t vec_offset = 0;
++ size_t remaining_size = 0;
++ size_t shard_write_size = 0;
++
++ local = frame->local;
++ fd = local->fd;
++
++ orig_offset = local->offset;
++ remaining_size = local->total_size;
++ cur_block = local->first_block;
++ local->call_count = call_count = local->num_blocks;
++ last_block = local->last_block;
++
++ SHARD_SET_ROOT_FS_ID(frame, local);
++
++ if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC " into "
++ "dict: %s",
++ uuid_utoa(fd->inode->gfid));
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ local->call_count = 1;
++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM,
++ NULL, NULL, NULL);
++ return 0;
++ }
+
+- if (!local->eexist_count) {
+- shard_common_inode_write_do(frame, this);
+- } else {
+- local->call_count = local->eexist_count;
+- shard_common_lookup_shards(
+- frame, this, local->loc.inode,
+- shard_common_inode_write_post_lookup_shards_handler);
++ if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE))
++ odirect = _gf_true;
++
++ while (cur_block <= last_block) {
++ if (wind_failed) {
++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM,
++ NULL, NULL, NULL);
++ goto next;
+ }
+
+- return 0;
+-}
++ shard_offset = orig_offset % local->block_size;
++ shard_write_size = local->block_size - shard_offset;
++ if (shard_write_size > remaining_size)
++ shard_write_size = remaining_size;
+
+-int
+-shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
+- xlator_t *this)
+-{
+- shard_local_t *local = NULL;
++ remaining_size -= shard_write_size;
+
+- local = frame->local;
++ if (local->fop == GF_FOP_WRITE) {
++ count = iov_subset(local->vector, local->count, vec_offset,
++ vec_offset + shard_write_size, NULL);
+
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(local->fop, frame, local->op_ret,
+- local->op_errno);
+- return 0;
++ vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec);
++ if (!vec) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ wind_failed = _gf_true;
++ GF_FREE(vec);
++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1,
++ ENOMEM, NULL, NULL, NULL);
++ goto next;
++ }
++ count = iov_subset(local->vector, local->count, vec_offset,
++ vec_offset + shard_write_size, vec);
+ }
+
+- if (local->call_count) {
+- shard_common_lookup_shards(
+- frame, this, local->resolver_base_inode,
+- shard_common_inode_write_post_lookup_shards_handler);
++ if (cur_block == 0) {
++ anon_fd = fd_ref(fd);
+ } else {
+- shard_common_inode_write_do(frame, this);
+- }
++ anon_fd = fd_anonymous(local->inode_list[i]);
++ if (!anon_fd) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ wind_failed = _gf_true;
++ GF_FREE(vec);
++ shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, this, -1,
++ ENOMEM, NULL, NULL, NULL);
++ goto next;
++ }
+
+- return 0;
++ if (local->fop == GF_FOP_WRITE) {
++ if (odirect)
++ local->flags = O_DIRECT;
++ else
++ local->flags = GF_ANON_FD_FLAGS;
++ }
++ }
++
++ shard_common_inode_write_wind(frame, this, anon_fd, vec, count,
++ shard_offset, shard_write_size);
++ if (vec)
++ vec_offset += shard_write_size;
++ orig_offset += shard_write_size;
++ GF_FREE(vec);
++ vec = NULL;
++ next:
++ cur_block++;
++ i++;
++ call_count--;
++ }
++ return 0;
+ }
+
+-int
+-shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
+- xlator_t *this)
+-{
+- shard_local_t *local = frame->local;
+- shard_priv_t *priv = this->private;
+-
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(local->fop, frame, local->op_ret,
+- local->op_errno);
+- return 0;
+- }
+-
+- local->postbuf = local->prebuf;
+-
+- /*Adjust offset to EOF so that correct shard is chosen for append*/
+- if (shard_is_appending_write(local))
+- local->offset = local->prebuf.ia_size;
+-
+- local->first_block = get_lowest_block(local->offset, local->block_size);
+- local->last_block = get_highest_block(local->offset, local->total_size,
+- local->block_size);
+- local->num_blocks = local->last_block - local->first_block + 1;
+- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
+- gf_shard_mt_inode_list);
+- if (!local->inode_list) {
+- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+- return 0;
+- }
++int shard_common_inode_write_post_mknod_handler(call_frame_t *frame,
++ xlator_t *this);
+
+- gf_msg_trace(this->name, 0,
+- "%s: gfid=%s first_block=%" PRIu32
+- " "
+- "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64
+- " total_size=%zu flags=%" PRId32 "",
+- gf_fop_list[local->fop],
+- uuid_utoa(local->resolver_base_inode->gfid),
+- local->first_block, local->last_block, local->num_blocks,
+- local->offset, local->total_size, local->flags);
++int shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
++ xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
++ local = frame->local;
+
+- if (!local->dot_shard_loc.inode) {
+- /*change handler*/
+- shard_mkdir_internal_dir(frame, this,
+- shard_common_inode_write_post_resolve_handler,
+- SHARD_INTERNAL_DIR_DOT_SHARD);
+- } else {
+- /*change handler*/
+- local->post_res_handler = shard_common_inode_write_post_resolve_handler;
+- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+- }
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
+ return 0;
+-}
+-
+-int
+-shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, inode_t *inode,
+- struct iatt *buf, struct iatt *preparent,
+- struct iatt *postparent, dict_t *xdata)
+-{
+- inode_t *link_inode = NULL;
+- shard_local_t *local = NULL;
+- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
++ }
+
+- local = frame->local;
++ if (local->create_count) {
++ shard_common_resume_mknod(frame, this,
++ shard_common_inode_write_post_mknod_handler);
++ } else {
++ shard_common_inode_write_do(frame, this);
++ }
+
+- SHARD_UNSET_ROOT_FS_ID(frame, local);
++ return 0;
++}
+
+- if (op_ret == -1) {
+- if (op_errno != EEXIST) {
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- goto unwind;
+- } else {
+- gf_msg_debug(this->name, 0,
+- "mkdir on %s failed "
+- "with EEXIST. Attempting lookup now",
+- shard_internal_dir_string(type));
+- shard_lookup_internal_dir(frame, this, local->post_res_handler,
+- type);
+- return 0;
+- }
+- }
++int shard_common_inode_write_post_mknod_handler(call_frame_t *frame,
++ xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+- if (link_inode != inode) {
+- shard_refresh_internal_dir(frame, this, type);
+- } else {
+- shard_inode_ctx_mark_dir_refreshed(link_inode, this);
+- shard_common_resolve_shards(frame, this, local->post_res_handler);
+- }
+- return 0;
+-unwind:
+- shard_common_resolve_shards(frame, this, local->post_res_handler);
+- return 0;
+-}
++ local = frame->local;
+
+-int
+-shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
+- shard_post_resolve_fop_handler_t handler,
+- shard_internal_dir_type_t type)
+-{
+- int ret = -1;
+- shard_local_t *local = NULL;
+- shard_priv_t *priv = NULL;
+- dict_t *xattr_req = NULL;
+- uuid_t *gfid = NULL;
+- loc_t *loc = NULL;
+- gf_boolean_t free_gfid = _gf_true;
+-
+- local = frame->local;
+- priv = this->private;
+-
+- local->post_res_handler = handler;
+- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+- if (!gfid)
+- goto err;
+-
+- switch (type) {
+- case SHARD_INTERNAL_DIR_DOT_SHARD:
+- gf_uuid_copy(*gfid, priv->dot_shard_gfid);
+- loc = &local->dot_shard_loc;
+- break;
+- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+- gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
+- loc = &local->dot_shard_rm_loc;
+- break;
+- default:
+- bzero(*gfid, sizeof(uuid_t));
+- break;
+- }
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
++ return 0;
++ }
+
+- xattr_req = dict_new();
+- if (!xattr_req)
+- goto err;
++ if (!local->eexist_count) {
++ shard_common_inode_write_do(frame, this);
++ } else {
++ local->call_count = local->eexist_count;
++ shard_common_lookup_shards(
++ frame, this, local->loc.inode,
++ shard_common_inode_write_post_lookup_shards_handler);
++ }
+
+- ret = shard_init_internal_dir_loc(this, local, type);
+- if (ret)
+- goto err;
++ return 0;
++}
+
+- ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+- "Failed to set gfid-req for %s",
+- shard_internal_dir_string(type));
+- goto err;
+- } else {
+- free_gfid = _gf_false;
+- }
++int shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
++ xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- SHARD_SET_ROOT_FS_ID(frame, local);
++ local = frame->local;
+
+- STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type,
+- FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc,
+- 0755, 0, xattr_req);
+- dict_unref(xattr_req);
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
+ return 0;
++ }
+
+-err:
+- if (xattr_req)
+- dict_unref(xattr_req);
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- if (free_gfid)
+- GF_FREE(gfid);
+- handler(frame, this);
+- return 0;
+-}
++ if (local->call_count) {
++ shard_common_lookup_shards(
++ frame, this, local->resolver_base_inode,
++ shard_common_inode_write_post_lookup_shards_handler);
++ } else {
++ shard_common_inode_write_do(frame, this);
++ }
+
+-int
+-shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+-{
+- /* To-Do: Wind flush on all shards of the file */
+- SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata);
+- return 0;
++ return 0;
+ }
+
+-int
+-shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+-{
+- STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->flush, fd, xdata);
++int shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
++ xlator_t *this) {
++ shard_local_t *local = frame->local;
++ shard_priv_t *priv = this->private;
++
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
+ return 0;
+-}
++ }
+
+-int
+-__shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
+- xlator_t *this)
+-{
+- int ret = -1;
+- uint64_t ctx_uint = 0;
+- shard_inode_ctx_t *ctx = NULL;
++ local->postbuf = local->prebuf;
+
+- ret = __inode_ctx_get(inode, this, &ctx_uint);
+- if (ret < 0)
+- return ret;
++ /*Adjust offset to EOF so that correct shard is chosen for append*/
++ if (shard_is_appending_write(local))
++ local->offset = local->prebuf.ia_size;
+
+- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++ local->first_block = get_lowest_block(local->offset, local->block_size);
++ local->last_block =
++ get_highest_block(local->offset, local->total_size, local->block_size);
++ local->num_blocks = local->last_block - local->first_block + 1;
++ local->inode_list =
++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list);
++ if (!local->inode_list) {
++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++ return 0;
++ }
+
+- local->postbuf.ia_ctime = ctx->stat.ia_ctime;
+- local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec;
+- local->postbuf.ia_atime = ctx->stat.ia_atime;
+- local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec;
+- local->postbuf.ia_mtime = ctx->stat.ia_mtime;
+- local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec;
++ gf_msg_trace(
++ this->name, 0, "%s: gfid=%s first_block=%" PRIu32 " "
++ "last_block=%" PRIu32 " num_blocks=%" PRIu32
++ " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "",
++ gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid),
++ local->first_block, local->last_block, local->num_blocks, local->offset,
++ local->total_size, local->flags);
+
+- return 0;
+-}
++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+
+-int
+-shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
+- xlator_t *this)
+-{
+- int ret = 0;
++ if (!local->dot_shard_loc.inode) {
++ /*change handler*/
++ shard_mkdir_internal_dir(frame, this,
++ shard_common_inode_write_post_resolve_handler,
++ SHARD_INTERNAL_DIR_DOT_SHARD);
++ } else {
++ /*change handler*/
++ local->post_res_handler = shard_common_inode_write_post_resolve_handler;
++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
++ }
++ return 0;
++}
+
+- LOCK(&inode->lock);
+- {
+- ret = __shard_get_timestamps_from_inode_ctx(local, inode, this);
+- }
+- UNLOCK(&inode->lock);
++int shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret,
++ int32_t op_errno, inode_t *inode,
++ struct iatt *buf, struct iatt *preparent,
++ struct iatt *postparent, dict_t *xdata) {
++ inode_t *link_inode = NULL;
++ shard_local_t *local = NULL;
++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
++
++ local = frame->local;
++
++ SHARD_UNSET_ROOT_FS_ID(frame, local);
++
++ if (op_ret == -1) {
++ if (op_errno != EEXIST) {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto unwind;
++ } else {
++ gf_msg_debug(this->name, 0, "mkdir on %s failed "
++ "with EEXIST. Attempting lookup now",
++ shard_internal_dir_string(type));
++ shard_lookup_internal_dir(frame, this, local->post_res_handler, type);
++ return 0;
++ }
++ }
++
++ link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++ if (link_inode != inode) {
++ shard_refresh_internal_dir(frame, this, type);
++ } else {
++ shard_inode_ctx_mark_dir_refreshed(link_inode, this);
++ shard_common_resolve_shards(frame, this, local->post_res_handler);
++ }
++ return 0;
++unwind:
++ shard_common_resolve_shards(frame, this, local->post_res_handler);
++ return 0;
++}
++
++int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
++ shard_post_resolve_fop_handler_t handler,
++ shard_internal_dir_type_t type) {
++ int ret = -1;
++ shard_local_t *local = NULL;
++ shard_priv_t *priv = NULL;
++ dict_t *xattr_req = NULL;
++ uuid_t *gfid = NULL;
++ loc_t *loc = NULL;
++ gf_boolean_t free_gfid = _gf_true;
++
++ local = frame->local;
++ priv = this->private;
++
++ local->post_res_handler = handler;
++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
++ if (!gfid)
++ goto err;
++
++ switch (type) {
++ case SHARD_INTERNAL_DIR_DOT_SHARD:
++ gf_uuid_copy(*gfid, priv->dot_shard_gfid);
++ loc = &local->dot_shard_loc;
++ break;
++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++ gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
++ loc = &local->dot_shard_rm_loc;
++ break;
++ default:
++ bzero(*gfid, sizeof(uuid_t));
++ break;
++ }
++
++ xattr_req = dict_new();
++ if (!xattr_req)
++ goto err;
++
++ ret = shard_init_internal_dir_loc(this, local, type);
++ if (ret)
++ goto err;
++
++ ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++ "Failed to set gfid-req for %s", shard_internal_dir_string(type));
++ goto err;
++ } else {
++ free_gfid = _gf_false;
++ }
++
++ SHARD_SET_ROOT_FS_ID(frame, local);
++
++ STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type,
++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc,
++ 0755, 0, xattr_req);
++ dict_unref(xattr_req);
++ return 0;
+
+- return ret;
++err:
++ if (xattr_req)
++ dict_unref(xattr_req);
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ if (free_gfid)
++ GF_FREE(gfid);
++ handler(frame, this);
++ return 0;
+ }
+
+-int
+-shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+- struct iatt *postbuf, dict_t *xdata)
+-{
+- int call_count = 0;
+- uint64_t fsync_count = 0;
+- fd_t *anon_fd = cookie;
+- shard_local_t *local = NULL;
+- shard_inode_ctx_t *ctx = NULL;
+- shard_inode_ctx_t *base_ictx = NULL;
+- inode_t *base_inode = NULL;
+- gf_boolean_t unref_shard_inode = _gf_false;
++int shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *xdata) {
++ /* To-Do: Wind flush on all shards of the file */
++ SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata);
++ return 0;
++}
+
+- local = frame->local;
+- base_inode = local->fd->inode;
++int shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) {
++ STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->flush, fd, xdata);
++ return 0;
++}
+
+- if (local->op_ret < 0)
+- goto out;
++int __shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
++ xlator_t *this) {
++ int ret = -1;
++ uint64_t ctx_uint = 0;
++ shard_inode_ctx_t *ctx = NULL;
+
+- LOCK(&frame->lock);
+- {
+- if (op_ret < 0) {
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- UNLOCK(&frame->lock);
+- goto out;
+- }
+- shard_inode_ctx_set(local->fd->inode, this, postbuf, 0,
+- SHARD_MASK_TIMES);
+- }
+- UNLOCK(&frame->lock);
+- fd_ctx_get(anon_fd, this, &fsync_count);
+-out:
+- if (anon_fd && (base_inode != anon_fd->inode)) {
+- LOCK(&base_inode->lock);
+- LOCK(&anon_fd->inode->lock);
+- {
+- __shard_inode_ctx_get(anon_fd->inode, this, &ctx);
+- __shard_inode_ctx_get(base_inode, this, &base_ictx);
+- if (op_ret == 0)
+- ctx->fsync_needed -= fsync_count;
+- GF_ASSERT(ctx->fsync_needed >= 0);
+- if (ctx->fsync_needed != 0) {
+- list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list);
+- base_ictx->fsync_count++;
+- } else {
+- unref_shard_inode = _gf_true;
+- }
+- }
+- UNLOCK(&anon_fd->inode->lock);
+- UNLOCK(&base_inode->lock);
+- }
++ ret = __inode_ctx_get(inode, this, &ctx_uint);
++ if (ret < 0)
++ return ret;
+
+- if (unref_shard_inode)
+- inode_unref(anon_fd->inode);
+- if (anon_fd)
+- fd_unref(anon_fd);
++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+- call_count = shard_call_count_return(frame);
+- if (call_count != 0)
+- return 0;
++ local->postbuf.ia_ctime = ctx->stat.ia_ctime;
++ local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec;
++ local->postbuf.ia_atime = ctx->stat.ia_atime;
++ local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec;
++ local->postbuf.ia_mtime = ctx->stat.ia_mtime;
++ local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec;
+
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
+- local->op_errno);
+- } else {
+- shard_get_timestamps_from_inode_ctx(local, base_inode, this);
+- SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno,
+- &local->prebuf, &local->postbuf, local->xattr_rsp);
+- }
+- return 0;
++ return 0;
+ }
+
+-int
+-shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this)
+-{
+- int ret = 0;
+- int call_count = 0;
+- int fsync_count = 0;
+- fd_t *anon_fd = NULL;
+- inode_t *base_inode = NULL;
+- shard_local_t *local = NULL;
+- shard_inode_ctx_t *ctx = NULL;
+- shard_inode_ctx_t *iter = NULL;
+- struct list_head copy = {
+- 0,
+- };
+- shard_inode_ctx_t *tmp = NULL;
++int shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
++ xlator_t *this) {
++ int ret = 0;
+
+- local = frame->local;
+- base_inode = local->fd->inode;
+- local->postbuf = local->prebuf;
+- INIT_LIST_HEAD(&copy);
++ LOCK(&inode->lock);
++ { ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); }
++ UNLOCK(&inode->lock);
+
+- if (local->op_ret < 0) {
+- shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
+- local->op_errno);
+- return 0;
+- }
++ return ret;
++}
+
++int shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno,
++ struct iatt *prebuf, struct iatt *postbuf,
++ dict_t *xdata) {
++ int call_count = 0;
++ uint64_t fsync_count = 0;
++ fd_t *anon_fd = cookie;
++ shard_local_t *local = NULL;
++ shard_inode_ctx_t *ctx = NULL;
++ shard_inode_ctx_t *base_ictx = NULL;
++ inode_t *base_inode = NULL;
++ gf_boolean_t unref_shard_inode = _gf_false;
++
++ local = frame->local;
++ base_inode = local->fd->inode;
++
++ if (local->op_ret < 0)
++ goto out;
++
++ LOCK(&frame->lock);
++ {
++ if (op_ret < 0) {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ UNLOCK(&frame->lock);
++ goto out;
++ }
++ shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, SHARD_MASK_TIMES);
++ }
++ UNLOCK(&frame->lock);
++ fd_ctx_get(anon_fd, this, &fsync_count);
++out:
++ if (anon_fd && (base_inode != anon_fd->inode)) {
+ LOCK(&base_inode->lock);
++ LOCK(&anon_fd->inode->lock);
+ {
+- __shard_inode_ctx_get(base_inode, this, &ctx);
+- list_splice_init(&ctx->to_fsync_list, &copy);
+- call_count = ctx->fsync_count;
+- ctx->fsync_count = 0;
+- }
++ __shard_inode_ctx_get(anon_fd->inode, this, &ctx);
++ __shard_inode_ctx_get(base_inode, this, &base_ictx);
++ if (op_ret == 0)
++ ctx->fsync_needed -= fsync_count;
++ GF_ASSERT(ctx->fsync_needed >= 0);
++ if (ctx->fsync_needed != 0) {
++ list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list);
++ base_ictx->fsync_count++;
++ } else {
++ unref_shard_inode = _gf_true;
++ }
++ }
++ UNLOCK(&anon_fd->inode->lock);
+ UNLOCK(&base_inode->lock);
++ }
++
++ if (unref_shard_inode)
++ inode_unref(anon_fd->inode);
++ if (anon_fd)
++ fd_unref(anon_fd);
++
++ call_count = shard_call_count_return(frame);
++ if (call_count != 0)
++ return 0;
+
+- local->call_count = ++call_count;
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
++ local->op_errno);
++ } else {
++ shard_get_timestamps_from_inode_ctx(local, base_inode, this);
++ SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno,
++ &local->prebuf, &local->postbuf, local->xattr_rsp);
++ }
++ return 0;
++}
++
++int shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) {
++ int ret = 0;
++ int call_count = 0;
++ int fsync_count = 0;
++ fd_t *anon_fd = NULL;
++ inode_t *base_inode = NULL;
++ shard_local_t *local = NULL;
++ shard_inode_ctx_t *ctx = NULL;
++ shard_inode_ctx_t *iter = NULL;
++ struct list_head copy = {
++ 0,
++ };
++ shard_inode_ctx_t *tmp = NULL;
++
++ local = frame->local;
++ base_inode = local->fd->inode;
++ local->postbuf = local->prebuf;
++ INIT_LIST_HEAD(&copy);
++
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
++ local->op_errno);
++ return 0;
++ }
++
++ LOCK(&base_inode->lock);
++ {
++ __shard_inode_ctx_get(base_inode, this, &ctx);
++ list_splice_init(&ctx->to_fsync_list, &copy);
++ call_count = ctx->fsync_count;
++ ctx->fsync_count = 0;
++ }
++ UNLOCK(&base_inode->lock);
++
++ local->call_count = ++call_count;
++
++ /* Send fsync() on the base shard first */
++ anon_fd = fd_ref(local->fd);
++ STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync,
++ local->xattr_req);
++ call_count--;
++ anon_fd = NULL;
++
++ list_for_each_entry_safe(iter, tmp, &copy, to_fsync_list) {
++ list_del_init(&iter->to_fsync_list);
++ fsync_count = 0;
++ shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count);
++ GF_ASSERT(fsync_count > 0);
++ anon_fd = fd_anonymous(iter->inode);
++ if (!anon_fd) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++ "Failed to create "
++ "anon fd to fsync shard");
++ shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM,
++ NULL, NULL, NULL);
++ continue;
++ }
+
+- /* Send fsync() on the base shard first */
+- anon_fd = fd_ref(local->fd);
++ ret = fd_ctx_set(anon_fd, this, fsync_count);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED,
++ "Failed to set fd "
++ "ctx for shard inode gfid=%s",
++ uuid_utoa(iter->inode->gfid));
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM,
++ NULL, NULL, NULL);
++ continue;
++ }
+ STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync,
+ local->xattr_req);
+ call_count--;
+- anon_fd = NULL;
+-
+- list_for_each_entry_safe(iter, tmp, &copy, to_fsync_list)
+- {
+- list_del_init(&iter->to_fsync_list);
+- fsync_count = 0;
+- shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count);
+- GF_ASSERT(fsync_count > 0);
+- anon_fd = fd_anonymous(iter->inode);
+- if (!anon_fd) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+- SHARD_MSG_MEMALLOC_FAILED,
+- "Failed to create "
+- "anon fd to fsync shard");
+- shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1,
+- ENOMEM, NULL, NULL, NULL);
+- continue;
+- }
+-
+- ret = fd_ctx_set(anon_fd, this, fsync_count);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED,
+- "Failed to set fd "
+- "ctx for shard inode gfid=%s",
+- uuid_utoa(iter->inode->gfid));
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1,
+- ENOMEM, NULL, NULL, NULL);
+- continue;
+- }
+- STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd,
+- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
+- anon_fd, local->datasync, local->xattr_req);
+- call_count--;
+- }
++ }
+
+- return 0;
++ return 0;
+ }
+
+-int
+-shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+- dict_t *xdata)
+-{
+- int ret = 0;
+- uint64_t block_size = 0;
+- shard_local_t *local = NULL;
++int shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
++ dict_t *xdata) {
++ int ret = 0;
++ uint64_t block_size = 0;
++ shard_local_t *local = NULL;
+
+- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+- "Failed to get block "
+- "size for %s from its inode ctx",
+- uuid_utoa(fd->inode->gfid));
+- goto err;
+- }
++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get block "
++ "size for %s from its inode ctx",
++ uuid_utoa(fd->inode->gfid));
++ goto err;
++ }
+
+- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+- STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+- return 0;
+- }
++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++ STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
++ return 0;
++ }
+
+- if (!this->itable)
+- this->itable = fd->inode->table;
++ if (!this->itable)
++ this->itable = fd->inode->table;
+
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto err;
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
+
+- frame->local = local;
++ frame->local = local;
+
+- local->fd = fd_ref(fd);
+- local->fop = GF_FOP_FSYNC;
+- local->datasync = datasync;
+- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+- if (!local->xattr_req)
+- goto err;
++ local->fd = fd_ref(fd);
++ local->fop = GF_FOP_FSYNC;
++ local->datasync = datasync;
++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++ if (!local->xattr_req)
++ goto err;
+
+- local->loc.inode = inode_ref(fd->inode);
+- gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++ local->loc.inode = inode_ref(fd->inode);
++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+
+- shard_lookup_base_file(frame, this, &local->loc,
+- shard_post_lookup_fsync_handler);
+- return 0;
++ shard_lookup_base_file(frame, this, &local->loc,
++ shard_post_lookup_fsync_handler);
++ return 0;
+ err:
+- shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
+- return 0;
++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
++ return 0;
+ }
+
+-int
+-shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie,
+- xlator_t *this, int32_t op_ret,
+- int32_t op_errno, gf_dirent_t *orig_entries,
+- dict_t *xdata)
+-{
+- gf_dirent_t *entry = NULL;
+- gf_dirent_t *tmp = NULL;
+- shard_local_t *local = NULL;
++int shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret,
++ int32_t op_errno,
++ gf_dirent_t *orig_entries, dict_t *xdata) {
++ gf_dirent_t *entry = NULL;
++ gf_dirent_t *tmp = NULL;
++ shard_local_t *local = NULL;
+
+- local = frame->local;
++ local = frame->local;
+
+- if (op_ret < 0)
+- goto unwind;
++ if (op_ret < 0)
++ goto unwind;
+
+- list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list)
+- {
+- list_del_init(&entry->list);
+- list_add_tail(&entry->list, &local->entries_head.list);
++ list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) {
++ list_del_init(&entry->list);
++ list_add_tail(&entry->list, &local->entries_head.list);
+
+- if (!entry->dict)
+- continue;
++ if (!entry->dict)
++ continue;
+
+- if (IA_ISDIR(entry->d_stat.ia_type))
+- continue;
++ if (IA_ISDIR(entry->d_stat.ia_type))
++ continue;
+
+- if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE))
+- shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
+- if (!entry->inode)
+- continue;
++ if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE))
++ shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
++ if (!entry->inode)
++ continue;
+
+- shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
+- }
+- local->op_ret += op_ret;
++ shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
++ }
++ local->op_ret += op_ret;
+
+ unwind:
+- if (local->fop == GF_FOP_READDIR)
+- SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno,
+- &local->entries_head, xdata);
+- else
+- SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno,
+- &local->entries_head, xdata);
+- return 0;
++ if (local->fop == GF_FOP_READDIR)
++ SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno,
++ &local->entries_head, xdata);
++ else
++ SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head,
++ xdata);
++ return 0;
+ }
+
+-int32_t
+-shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries,
+- dict_t *xdata)
+-{
+- fd_t *fd = NULL;
+- gf_dirent_t *entry = NULL;
+- gf_dirent_t *tmp = NULL;
+- shard_local_t *local = NULL;
+- gf_boolean_t last_entry = _gf_false;
++int32_t shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno,
++ gf_dirent_t *orig_entries, dict_t *xdata) {
++ fd_t *fd = NULL;
++ gf_dirent_t *entry = NULL;
++ gf_dirent_t *tmp = NULL;
++ shard_local_t *local = NULL;
++ gf_boolean_t last_entry = _gf_false;
+
+- local = frame->local;
+- fd = local->fd;
++ local = frame->local;
++ fd = local->fd;
+
+- if (op_ret < 0)
+- goto unwind;
++ if (op_ret < 0)
++ goto unwind;
+
+- list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list)
+- {
+- if (last_entry)
+- last_entry = _gf_false;
+-
+- if (__is_root_gfid(fd->inode->gfid) &&
+- !(strcmp(entry->d_name, GF_SHARD_DIR))) {
+- local->offset = entry->d_off;
+- op_ret--;
+- last_entry = _gf_true;
+- continue;
+- }
++ list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) {
++ if (last_entry)
++ last_entry = _gf_false;
+
+- list_del_init(&entry->list);
+- list_add_tail(&entry->list, &local->entries_head.list);
++ if (__is_root_gfid(fd->inode->gfid) &&
++ !(strcmp(entry->d_name, GF_SHARD_DIR))) {
++ local->offset = entry->d_off;
++ op_ret--;
++ last_entry = _gf_true;
++ continue;
++ }
+
+- if (!entry->dict)
+- continue;
++ list_del_init(&entry->list);
++ list_add_tail(&entry->list, &local->entries_head.list);
+
+- if (IA_ISDIR(entry->d_stat.ia_type))
+- continue;
++ if (!entry->dict)
++ continue;
+
+- if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) &&
+- frame->root->pid != GF_CLIENT_PID_GSYNCD)
+- shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
++ if (IA_ISDIR(entry->d_stat.ia_type))
++ continue;
+
+- if (!entry->inode)
+- continue;
++ if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) &&
++ frame->root->pid != GF_CLIENT_PID_GSYNCD)
++ shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
+
+- shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
+- }
++ if (!entry->inode)
++ continue;
+
+- local->op_ret = op_ret;
++ shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
++ }
+
+- if (last_entry) {
+- if (local->fop == GF_FOP_READDIR)
+- STACK_WIND(frame, shard_readdir_past_dot_shard_cbk,
+- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir,
+- local->fd, local->readdir_size, local->offset,
+- local->xattr_req);
+- else
+- STACK_WIND(frame, shard_readdir_past_dot_shard_cbk,
+- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+- local->fd, local->readdir_size, local->offset,
+- local->xattr_req);
+- return 0;
+- }
++ local->op_ret = op_ret;
+
+-unwind:
++ if (last_entry) {
+ if (local->fop == GF_FOP_READDIR)
+- SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno,
+- &local->entries_head, xdata);
++ STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->readdir, local->fd,
++ local->readdir_size, local->offset, local->xattr_req);
+ else
+- SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno,
+- &local->entries_head, xdata);
++ STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->readdirp, local->fd,
++ local->readdir_size, local->offset, local->xattr_req);
+ return 0;
+-}
++ }
+
+-int
+-shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+- off_t offset, int whichop, dict_t *xdata)
+-{
+- int ret = 0;
+- shard_local_t *local = NULL;
+-
+- local = mem_get0(this->local_pool);
+- if (!local) {
+- goto err;
++unwind:
++ if (local->fop == GF_FOP_READDIR)
++ SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, &local->entries_head,
++ xdata);
++ else
++ SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head,
++ xdata);
++ return 0;
++}
++
++int shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++ off_t offset, int whichop, dict_t *xdata) {
++ int ret = 0;
++ shard_local_t *local = NULL;
++
++ local = mem_get0(this->local_pool);
++ if (!local) {
++ goto err;
++ }
++
++ frame->local = local;
++
++ local->fd = fd_ref(fd);
++ local->fop = whichop;
++ local->readdir_size = size;
++ INIT_LIST_HEAD(&local->entries_head.list);
++ local->list_inited = _gf_true;
++
++ if (whichop == GF_FOP_READDIR) {
++ STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
++ } else {
++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
++ local, err);
++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
++ if (ret) {
++ gf_log(this->name, GF_LOG_WARNING,
++ "Failed to set "
++ "dict value: key:%s, directory gfid=%s",
++ GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid));
++ goto err;
+ }
+
+- frame->local = local;
+-
+- local->fd = fd_ref(fd);
+- local->fop = whichop;
+- local->readdir_size = size;
+- INIT_LIST_HEAD(&local->entries_head.list);
+- local->list_inited = _gf_true;
+-
+- if (whichop == GF_FOP_READDIR) {
+- STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
+- } else {
+- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+- local, err);
+- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+- if (ret) {
+- gf_log(this->name, GF_LOG_WARNING,
+- "Failed to set "
+- "dict value: key:%s, directory gfid=%s",
+- GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid));
+- goto err;
+- }
+-
+- STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->readdirp, fd, size, offset,
+- local->xattr_req);
+- }
++ STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->readdirp, fd, size, offset,
++ local->xattr_req);
++ }
+
+- return 0;
++ return 0;
+
+ err:
+- STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL);
+- return 0;
++ STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL);
++ return 0;
+ }
+
+-int32_t
+-shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+- off_t offset, dict_t *xdata)
+-{
+- shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
+- return 0;
++int32_t shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd,
++ size_t size, off_t offset, dict_t *xdata) {
++ shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
++ return 0;
+ }
+
+-int32_t
+-shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+- off_t offset, dict_t *xdata)
+-{
+- shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata);
+- return 0;
++int32_t shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd,
++ size_t size, off_t offset, dict_t *xdata) {
++ shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata);
++ return 0;
+ }
+
+-int32_t
+-shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+- const char *name, dict_t *xdata)
+-{
+- int op_errno = EINVAL;
++int32_t shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++ const char *name, dict_t *xdata) {
++ int op_errno = EINVAL;
+
+- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
+- }
++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
++ }
+
+- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
+- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
+- }
++ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
++ dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
++ dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
++ }
+
+- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+- return 0;
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
++ return 0;
+ out:
+- shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno);
+- return 0;
++ shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno);
++ return 0;
+ }
+
+-int32_t
+-shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+- const char *name, dict_t *xdata)
+-{
+- int op_errno = EINVAL;
++int32_t shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
++ const char *name, dict_t *xdata) {
++ int op_errno = EINVAL;
+
+- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
+- }
++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
++ }
+
+- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
+- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
+- }
++ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
++ dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
++ dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
++ }
+
+- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+- return 0;
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
++ return 0;
+ out:
+- shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno);
+- return 0;
++ shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno);
++ return 0;
+ }
+
+-int32_t
+-shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, dict_t *dict,
+- dict_t *xdata)
+-{
+- if (op_ret < 0)
+- goto unwind;
++int32_t shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *dict,
++ dict_t *xdata) {
++ if (op_ret < 0)
++ goto unwind;
+
+- if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+- dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
+- dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
+- }
++ if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
++ dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
++ dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
++ }
+
+ unwind:
+- SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata);
+- return 0;
++ SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata);
++ return 0;
+ }
+
+-int32_t
+-shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+- dict_t *xdata)
+-{
+- int op_errno = EINVAL;
++int32_t shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
++ const char *name, dict_t *xdata) {
++ int op_errno = EINVAL;
+
+- if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
+- (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) {
+- op_errno = ENODATA;
+- goto out;
+- }
++ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
++ (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) {
++ op_errno = ENODATA;
++ goto out;
++ }
+
+- STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+- return 0;
++ STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
++ return 0;
+ out:
+- shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno);
+- return 0;
++ shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno);
++ return 0;
+ }
+
+-int32_t
+-shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, dict_t *dict,
+- dict_t *xdata)
+-{
+- if (op_ret < 0)
+- goto unwind;
++int32_t shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *dict,
++ dict_t *xdata) {
++ if (op_ret < 0)
++ goto unwind;
+
+- if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+- dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
+- dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
+- }
++ if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
++ dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
++ dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
++ }
+
+ unwind:
+- SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+- return 0;
++ SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
++ return 0;
+ }
+
+-int32_t
+-shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+- const char *name, dict_t *xdata)
+-{
+- int op_errno = EINVAL;
++int32_t shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++ const char *name, dict_t *xdata) {
++ int op_errno = EINVAL;
+
+- if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
+- (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) {
+- op_errno = ENODATA;
+- goto out;
+- }
++ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
++ (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) {
++ op_errno = ENODATA;
++ goto out;
++ }
+
+- STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+- return 0;
++ STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
++ return 0;
+ out:
+- shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno);
+- return 0;
++ shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno);
++ return 0;
+ }
+
+-int32_t
+-shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+- int32_t flags, dict_t *xdata)
+-{
+- int op_errno = EINVAL;
++int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
++ dict_t *dict, int32_t flags, dict_t *xdata) {
++ int op_errno = EINVAL;
+
+- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
+- }
++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
++ }
+
+- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+- return 0;
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr,
++ fd, dict, flags, xdata);
++ return 0;
+ out:
+- shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno);
+- return 0;
++ shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno);
++ return 0;
+ }
+
+-int32_t
+-shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+- int32_t flags, dict_t *xdata)
+-{
+- int op_errno = EINVAL;
++int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++ dict_t *dict, int32_t flags, dict_t *xdata) {
++ int op_errno = EINVAL;
+
+- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
+- }
++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
++ }
+
+- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+- loc, dict, flags, xdata);
+- return 0;
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
++ loc, dict, flags, xdata);
++ return 0;
+ out:
+- shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno);
+- return 0;
++ shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno);
++ return 0;
+ }
+
+-int
+-shard_post_setattr_handler(call_frame_t *frame, xlator_t *this)
+-{
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if (local->fop == GF_FOP_SETATTR) {
+- if (local->op_ret >= 0)
+- shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0,
+- SHARD_LOOKUP_MASK);
+- SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
+- &local->prebuf, &local->postbuf, local->xattr_rsp);
+- } else if (local->fop == GF_FOP_FSETATTR) {
+- if (local->op_ret >= 0)
+- shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0,
+- SHARD_LOOKUP_MASK);
+- SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno,
+- &local->prebuf, &local->postbuf, local->xattr_rsp);
+- }
+-
+- return 0;
+-}
++int shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) {
++ shard_local_t *local = NULL;
+
+-int
+-shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+- struct iatt *postbuf, dict_t *xdata)
+-{
+- shard_local_t *local = NULL;
+-
+- local = frame->local;
+-
+- if (op_ret < 0) {
+- local->op_ret = op_ret;
+- local->op_errno = op_errno;
+- goto unwind;
+- }
++ local = frame->local;
+
+- local->prebuf = *prebuf;
+- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+- local->op_ret = -1;
+- local->op_errno = EINVAL;
+- goto unwind;
+- }
+- if (xdata)
+- local->xattr_rsp = dict_ref(xdata);
+- local->postbuf = *postbuf;
+- local->postbuf.ia_size = local->prebuf.ia_size;
+- local->postbuf.ia_blocks = local->prebuf.ia_blocks;
++ if (local->fop == GF_FOP_SETATTR) {
++ if (local->op_ret >= 0)
++ shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0,
++ SHARD_LOOKUP_MASK);
++ SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
++ &local->prebuf, &local->postbuf, local->xattr_rsp);
++ } else if (local->fop == GF_FOP_FSETATTR) {
++ if (local->op_ret >= 0)
++ shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0,
++ SHARD_LOOKUP_MASK);
++ SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno,
++ &local->prebuf, &local->postbuf, local->xattr_rsp);
++ }
+
+-unwind:
+- local->handler(frame, this);
+- return 0;
++ return 0;
+ }
+
+-int
+-shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+-{
+- int ret = -1;
+- uint64_t block_size = 0;
+- shard_local_t *local = NULL;
+-
+- if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
+- STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+- return 0;
+- }
+-
+- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+- "Failed to get block size from inode ctx of %s",
+- uuid_utoa(loc->inode->gfid));
+- goto err;
+- }
+-
+- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+- STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+- return 0;
+- }
+-
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto err;
++int shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno,
++ struct iatt *prebuf, struct iatt *postbuf,
++ dict_t *xdata) {
++ shard_local_t *local = NULL;
+
+- frame->local = local;
++ local = frame->local;
+
+- local->handler = shard_post_setattr_handler;
+- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+- if (!local->xattr_req)
+- goto err;
+- local->fop = GF_FOP_SETATTR;
+- loc_copy(&local->loc, loc);
++ if (op_ret < 0) {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto unwind;
++ }
+
+- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
+- local, err);
++ local->prebuf = *prebuf;
++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
++ local->op_ret = -1;
++ local->op_errno = EINVAL;
++ goto unwind;
++ }
++ if (xdata)
++ local->xattr_rsp = dict_ref(xdata);
++ local->postbuf = *postbuf;
++ local->postbuf.ia_size = local->prebuf.ia_size;
++ local->postbuf.ia_blocks = local->prebuf.ia_blocks;
+
+- STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid,
+- local->xattr_req);
+- return 0;
+-err:
+- shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM);
+- return 0;
++unwind:
++ local->handler(frame, this);
++ return 0;
+ }
+
+-int
+-shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+-{
+- int ret = -1;
+- uint64_t block_size = 0;
+- shard_local_t *local = NULL;
+-
+- if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
+- STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+- return 0;
+- }
++int shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++ struct iatt *stbuf, int32_t valid, dict_t *xdata) {
++ int ret = -1;
++ uint64_t block_size = 0;
++ shard_local_t *local = NULL;
+
+- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+- "Failed to get block size from inode ctx of %s",
+- uuid_utoa(fd->inode->gfid));
+- goto err;
+- }
++ if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
++ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
++ return 0;
++ }
+
+- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+- STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+- return 0;
+- }
++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get block size from inode ctx of %s",
++ uuid_utoa(loc->inode->gfid));
++ goto err;
++ }
+
+- if (!this->itable)
+- this->itable = fd->inode->table;
++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
++ return 0;
++ }
+
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto err;
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
+
+- frame->local = local;
++ frame->local = local;
+
+- local->handler = shard_post_setattr_handler;
+- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+- if (!local->xattr_req)
+- goto err;
+- local->fop = GF_FOP_FSETATTR;
+- local->fd = fd_ref(fd);
++ local->handler = shard_post_setattr_handler;
++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++ if (!local->xattr_req)
++ goto err;
++ local->fop = GF_FOP_SETATTR;
++ loc_copy(&local->loc, loc);
+
+- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+- local, err);
++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
++ local, err);
+
+- STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid,
+- local->xattr_req);
+- return 0;
++ STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid,
++ local->xattr_req);
++ return 0;
+ err:
+- shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM);
+- return 0;
++ shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM);
++ return 0;
+ }
+
+-int
+-shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
+- glusterfs_fop_t fop, fd_t *fd,
+- struct iovec *vector, int32_t count,
+- off_t offset, uint32_t flags, size_t len,
+- struct iobref *iobref, dict_t *xdata)
+-{
+- int ret = 0;
+- int i = 0;
+- uint64_t block_size = 0;
+- shard_local_t *local = NULL;
++int shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
++ struct iatt *stbuf, int32_t valid, dict_t *xdata) {
++ int ret = -1;
++ uint64_t block_size = 0;
++ shard_local_t *local = NULL;
+
+- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+- "Failed to get block "
+- "size for %s from its inode ctx",
+- uuid_utoa(fd->inode->gfid));
+- goto out;
+- }
++ if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
++ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
++ return 0;
++ }
+
+- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+- /* block_size = 0 means that the file was created before
+- * sharding was enabled on the volume.
+- */
+- switch (fop) {
+- case GF_FOP_WRITE:
+- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->writev, fd, vector,
+- count, offset, flags, iobref, xdata);
+- break;
+- case GF_FOP_FALLOCATE:
+- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->fallocate, fd, flags,
+- offset, len, xdata);
+- break;
+- case GF_FOP_ZEROFILL:
+- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->zerofill, fd, offset,
+- len, xdata);
+- break;
+- case GF_FOP_DISCARD:
+- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->discard, fd, offset,
+- len, xdata);
+- break;
+- default:
+- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+- "Invalid fop id = %d", fop);
+- break;
+- }
+- return 0;
+- }
++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get block size from inode ctx of %s",
++ uuid_utoa(fd->inode->gfid));
++ goto err;
++ }
+
+- if (!this->itable)
+- this->itable = fd->inode->table;
++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
++ return 0;
++ }
+
+- local = mem_get0(this->local_pool);
+- if (!local)
+- goto out;
++ if (!this->itable)
++ this->itable = fd->inode->table;
+
+- frame->local = local;
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
+
+- ret = syncbarrier_init(&local->barrier);
+- if (ret)
+- goto out;
+- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+- if (!local->xattr_req)
+- goto out;
+-
+- if (vector) {
+- local->vector = iov_dup(vector, count);
+- if (!local->vector)
+- goto out;
+- for (i = 0; i < count; i++)
+- local->total_size += vector[i].iov_len;
+- local->count = count;
+- } else {
+- local->total_size = len;
+- }
++ frame->local = local;
+
+- local->fop = fop;
+- local->offset = offset;
+- local->flags = flags;
+- if (iobref)
+- local->iobref = iobref_ref(iobref);
+- local->fd = fd_ref(fd);
+- local->block_size = block_size;
+- local->resolver_base_inode = local->fd->inode;
+- GF_ATOMIC_INIT(local->delta_blocks, 0);
++ local->handler = shard_post_setattr_handler;
++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++ if (!local->xattr_req)
++ goto err;
++ local->fop = GF_FOP_FSETATTR;
++ local->fd = fd_ref(fd);
+
+- local->loc.inode = inode_ref(fd->inode);
+- gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
++ local, err);
+
+- shard_lookup_base_file(frame, this, &local->loc,
+- shard_common_inode_write_post_lookup_handler);
+- return 0;
++ STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid,
++ local->xattr_req);
++ return 0;
++err:
++ shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM);
++ return 0;
++}
++
++int shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
++ glusterfs_fop_t fop, fd_t *fd,
++ struct iovec *vector, int32_t count,
++ off_t offset, uint32_t flags, size_t len,
++ struct iobref *iobref, dict_t *xdata) {
++ int ret = 0;
++ int i = 0;
++ uint64_t block_size = 0;
++ shard_local_t *local = NULL;
++
++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get block "
++ "size for %s from its inode ctx",
++ uuid_utoa(fd->inode->gfid));
++ goto out;
++ }
++
++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++ /* block_size = 0 means that the file was created before
++ * sharding was enabled on the volume.
++ */
++ switch (fop) {
++ case GF_FOP_WRITE:
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
++ fd, vector, count, offset, flags, iobref, xdata);
++ break;
++ case GF_FOP_FALLOCATE:
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fallocate, fd, flags, offset,
++ len, xdata);
++ break;
++ case GF_FOP_ZEROFILL:
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->zerofill, fd, offset, len,
++ xdata);
++ break;
++ case GF_FOP_DISCARD:
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
++ break;
++ default:
++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++ "Invalid fop id = %d", fop);
++ break;
++ }
++ return 0;
++ }
++
++ if (!this->itable)
++ this->itable = fd->inode->table;
++
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto out;
++
++ frame->local = local;
++
++ ret = syncbarrier_init(&local->barrier);
++ if (ret)
++ goto out;
++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++ if (!local->xattr_req)
++ goto out;
++
++ if (vector) {
++ local->vector = iov_dup(vector, count);
++ if (!local->vector)
++ goto out;
++ for (i = 0; i < count; i++)
++ local->total_size += vector[i].iov_len;
++ local->count = count;
++ } else {
++ local->total_size = len;
++ }
++
++ local->fop = fop;
++ local->offset = offset;
++ local->flags = flags;
++ if (iobref)
++ local->iobref = iobref_ref(iobref);
++ local->fd = fd_ref(fd);
++ local->block_size = block_size;
++ local->resolver_base_inode = local->fd->inode;
++ GF_ATOMIC_INIT(local->delta_blocks, 0);
++
++ local->loc.inode = inode_ref(fd->inode);
++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++
++ shard_lookup_base_file(frame, this, &local->loc,
++ shard_common_inode_write_post_lookup_handler);
++ return 0;
+ out:
+- shard_common_failure_unwind(fop, frame, -1, ENOMEM);
+- return 0;
++ shard_common_failure_unwind(fop, frame, -1, ENOMEM);
++ return 0;
+ }
+
+-int
+-shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+- struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+- struct iobref *iobref, dict_t *xdata)
+-{
+- shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count,
+- offset, flags, 0, iobref, xdata);
+- return 0;
++int shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
++ struct iovec *vector, int32_t count, off_t offset,
++ uint32_t flags, struct iobref *iobref, dict_t *xdata) {
++ shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count,
++ offset, flags, 0, iobref, xdata);
++ return 0;
+ }
+
+-int
+-shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+- int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+-{
+- if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) &&
+- (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)))
+- goto out;
++int shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
++ int32_t keep_size, off_t offset, size_t len,
++ dict_t *xdata) {
++ if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) &&
++ (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)))
++ goto out;
+
+- shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0,
+- offset, keep_size, len, NULL, xdata);
+- return 0;
++ shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0,
++ offset, keep_size, len, NULL, xdata);
++ return 0;
+ out:
+- shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP);
+- return 0;
++ shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP);
++ return 0;
+ }
+
+-int
+-shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+- off_t len, dict_t *xdata)
+-{
+- shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0,
+- offset, 0, len, NULL, xdata);
+- return 0;
++int shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++ off_t len, dict_t *xdata) {
++ shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0,
++ offset, 0, len, NULL, xdata);
++ return 0;
+ }
+
+-int
+-shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+- size_t len, dict_t *xdata)
+-{
+- shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0,
+- offset, 0, len, NULL, xdata);
+- return 0;
++int shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++ size_t len, dict_t *xdata) {
++ shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0,
++ offset, 0, len, NULL, xdata);
++ return 0;
+ }
+
+-int32_t
+-shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+- gf_seek_what_t what, dict_t *xdata)
+-{
+- /* TBD */
+- gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED,
+- "seek called on %s.", uuid_utoa(fd->inode->gfid));
+- shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP);
+- return 0;
++int32_t shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++ gf_seek_what_t what, dict_t *xdata) {
++ /* TBD */
++ gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED,
++ "seek called on %s.", uuid_utoa(fd->inode->gfid));
++ shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP);
++ return 0;
+ }
+
+-int32_t
+-mem_acct_init(xlator_t *this)
+-{
+- int ret = -1;
+-
+- if (!this)
+- return ret;
++int32_t mem_acct_init(xlator_t *this) {
++ int ret = -1;
+
+- ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1);
++ if (!this)
++ return ret;
+
+- if (ret != 0) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED,
+- "Memory accounting init"
+- "failed");
+- return ret;
+- }
++ ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1);
+
++ if (ret != 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED,
++ "Memory accounting init"
++ "failed");
+ return ret;
++ }
++
++ return ret;
+ }
+
+-int
+-init(xlator_t *this)
+-{
+- int ret = -1;
+- shard_priv_t *priv = NULL;
++int init(xlator_t *this) {
++ int ret = -1;
++ shard_priv_t *priv = NULL;
+
+- if (!this) {
+- gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS,
+- "this is NULL. init() failed");
+- return -1;
+- }
++ if (!this) {
++ gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS,
++ "this is NULL. init() failed");
++ return -1;
++ }
+
+- if (!this->parents) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
+- "Dangling volume. Check volfile");
+- goto out;
+- }
++ if (!this->parents) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
++ "Dangling volume. Check volfile");
++ goto out;
++ }
+
+- if (!this->children || this->children->next) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
+- "shard not configured with exactly one sub-volume. "
+- "Check volfile");
+- goto out;
+- }
++ if (!this->children || this->children->next) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
++ "shard not configured with exactly one sub-volume. "
++ "Check volfile");
++ goto out;
++ }
+
+- priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t);
+- if (!priv)
+- goto out;
++ priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t);
++ if (!priv)
++ goto out;
+
+- GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out);
++ GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out);
+
+- GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out);
++ GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out);
+
+- GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out);
++ GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out);
+
+- this->local_pool = mem_pool_new(shard_local_t, 128);
+- if (!this->local_pool) {
+- ret = -1;
+- goto out;
+- }
+- gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid);
+- gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid);
++ this->local_pool = mem_pool_new(shard_local_t, 128);
++ if (!this->local_pool) {
++ ret = -1;
++ goto out;
++ }
++ gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid);
++ gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid);
+
+- this->private = priv;
+- LOCK_INIT(&priv->lock);
+- INIT_LIST_HEAD(&priv->ilist_head);
+- ret = 0;
++ this->private = priv;
++ LOCK_INIT(&priv->lock);
++ INIT_LIST_HEAD(&priv->ilist_head);
++ ret = 0;
+ out:
+- if (ret) {
+- GF_FREE(priv);
+- mem_pool_destroy(this->local_pool);
+- }
++ if (ret) {
++ GF_FREE(priv);
++ mem_pool_destroy(this->local_pool);
++ }
+
+- return ret;
++ return ret;
+ }
+
+-void
+-fini(xlator_t *this)
+-{
+- shard_priv_t *priv = NULL;
++void fini(xlator_t *this) {
++ shard_priv_t *priv = NULL;
+
+- GF_VALIDATE_OR_GOTO("shard", this, out);
++ GF_VALIDATE_OR_GOTO("shard", this, out);
+
+- mem_pool_destroy(this->local_pool);
+- this->local_pool = NULL;
++ mem_pool_destroy(this->local_pool);
++ this->local_pool = NULL;
+
+- priv = this->private;
+- if (!priv)
+- goto out;
++ priv = this->private;
++ if (!priv)
++ goto out;
+
+- this->private = NULL;
+- LOCK_DESTROY(&priv->lock);
+- GF_FREE(priv);
++ this->private = NULL;
++ LOCK_DESTROY(&priv->lock);
++ GF_FREE(priv);
+
+ out:
+- return;
++ return;
+ }
+
+-int
+-reconfigure(xlator_t *this, dict_t *options)
+-{
+- int ret = -1;
+- shard_priv_t *priv = NULL;
++int reconfigure(xlator_t *this, dict_t *options) {
++ int ret = -1;
++ shard_priv_t *priv = NULL;
+
+- priv = this->private;
++ priv = this->private;
+
+- GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out);
++ GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out);
+
+- GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options,
+- uint32, out);
+- ret = 0;
++ GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, uint32,
++ out);
++ ret = 0;
+
+ out:
+- return ret;
++ return ret;
+ }
+
+-int
+-shard_forget(xlator_t *this, inode_t *inode)
+-{
+- uint64_t ctx_uint = 0;
+- shard_inode_ctx_t *ctx = NULL;
+- shard_priv_t *priv = NULL;
++int shard_forget(xlator_t *this, inode_t *inode) {
++ uint64_t ctx_uint = 0;
++ shard_inode_ctx_t *ctx = NULL;
++ shard_priv_t *priv = NULL;
+
+- priv = this->private;
+- if (!priv)
+- return 0;
++ priv = this->private;
++ if (!priv)
++ return 0;
+
+- inode_ctx_del(inode, this, &ctx_uint);
+- if (!ctx_uint)
+- return 0;
++ inode_ctx_del(inode, this, &ctx_uint);
++ if (!ctx_uint)
++ return 0;
+
+- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+- /* When LRU limit reaches inode will be forcefully removed from the
+- * table, inode needs to be removed from LRU of shard as well.
+- */
+- if (!list_empty(&ctx->ilist)) {
+- LOCK(&priv->lock);
+- {
+- list_del_init(&ctx->ilist);
+- priv->inode_count--;
+- }
+- UNLOCK(&priv->lock);
++ /* When LRU limit reaches inode will be forcefully removed from the
++ * table, inode needs to be removed from LRU of shard as well.
++ */
++ if (!list_empty(&ctx->ilist)) {
++ LOCK(&priv->lock);
++ {
++ list_del_init(&ctx->ilist);
++ priv->inode_count--;
+ }
+- GF_FREE(ctx);
++ UNLOCK(&priv->lock);
++ }
++ GF_FREE(ctx);
+
+- return 0;
++ return 0;
+ }
+
+-int
+-shard_release(xlator_t *this, fd_t *fd)
+-{
+- /* TBD */
+- return 0;
++int shard_release(xlator_t *this, fd_t *fd) {
++ /* TBD */
++ return 0;
+ }
+
+-int
+-shard_priv_dump(xlator_t *this)
+-{
+- shard_priv_t *priv = NULL;
+- char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+- 0,
+- };
+- char *str = NULL;
++int shard_priv_dump(xlator_t *this) {
++ shard_priv_t *priv = NULL;
++ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
++ 0,
++ };
++ char *str = NULL;
+
+- priv = this->private;
++ priv = this->private;
+
+- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+- gf_proc_dump_add_section("%s", key_prefix);
+- str = gf_uint64_2human_readable(priv->block_size);
+- gf_proc_dump_write("shard-block-size", "%s", str);
+- gf_proc_dump_write("inode-count", "%d", priv->inode_count);
+- gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head);
+- gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit);
++ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
++ gf_proc_dump_add_section("%s", key_prefix);
++ str = gf_uint64_2human_readable(priv->block_size);
++ gf_proc_dump_write("shard-block-size", "%s", str);
++ gf_proc_dump_write("inode-count", "%d", priv->inode_count);
++ gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head);
++ gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit);
+
+- GF_FREE(str);
++ GF_FREE(str);
+
+- return 0;
++ return 0;
+ }
+
+-int
+-shard_releasedir(xlator_t *this, fd_t *fd)
+-{
+- return 0;
+-}
++int shard_releasedir(xlator_t *this, fd_t *fd) { return 0; }
+
+ struct xlator_fops fops = {
+ .lookup = shard_lookup,
+--
+1.8.3.1
+
diff --git a/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch b/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch
new file mode 100644
index 0000000..df971b8
--- /dev/null
+++ b/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch
@@ -0,0 +1,162 @@
+From 562283ad34021bbf4fc540127ee7072d5152d34d Mon Sep 17 00:00:00 2001
+From: Yuval Turgeman <yturgema@redhat.com>
+Date: Wed, 24 Jul 2019 16:42:22 +0300
+Subject: [PATCH 336/336] spec: check and return exit code in rpm scripts
+
+lua's error() call expects a value as its second argument, and this is
+taken from the `val` variable, while the `ok` is boolean. This causes
+the rpm scripts to fail on:
+
+bad argument #2 to 'error' (number expected, got boolean)
+
+Label: DOWNSTREAM ONLY
+BUG: 1768786
+Change-Id: I9c6b1f62ebf15dbc93196d018bc1fd628b36fc33
+>Signed-off-by: Yuval Turgeman <yturgema@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/186405
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 55 +++++++++++++++++++++++++++++++++----------------------
+ 1 file changed, 33 insertions(+), 22 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 91180db..1b975b2 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1572,8 +1572,9 @@ fi
+ ]]
+
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+- error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++ error("Detected running glusterfs processes", rc)
+ end
+
+
+@@ -1606,8 +1607,9 @@ fi
+ ]]
+
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+- error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++ error("Detected running glusterfs processes", rc)
+ end
+
+
+@@ -1640,8 +1642,9 @@ fi
+ ]]
+
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+- error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++ error("Detected running glusterfs processes", rc)
+ end
+
+
+@@ -1674,8 +1677,9 @@ fi
+ ]]
+
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+- error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++ error("Detected running glusterfs processes", rc)
+ end
+
+
+@@ -1707,8 +1711,9 @@ fi
+ ]]
+
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+- error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++ error("Detected running glusterfs processes", rc)
+ end
+
+
+@@ -1740,8 +1745,9 @@ fi
+ ]]
+
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+- error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++ error("Detected running glusterfs processes", rc)
+ end
+
+
+@@ -1775,8 +1781,9 @@ fi
+ ]]
+
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+- error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++ error("Detected running glusterfs processes", rc)
+ end
+ %endif
+
+@@ -1810,8 +1817,9 @@ fi
+ ]]
+
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+- error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++ error("Detected running glusterfs processes", rc)
+ end
+
+
+@@ -1845,8 +1853,9 @@ fi
+ ]]
+
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+- error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++ error("Detected running glusterfs processes", rc)
+ end
+ %endif
+
+@@ -1881,8 +1890,9 @@ fi
+ ]]
+
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+- error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++ error("Detected running glusterfs processes", rc)
+ end
+ %endif
+
+@@ -1916,8 +1926,9 @@ fi
+ ]]
+
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+- error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++ error("Detected running glusterfs processes", rc)
+ end
+
+ %posttrans server
+--
+1.8.3.1
+
diff --git a/0337-fuse-Set-limit-on-invalidate-queue-size.patch b/0337-fuse-Set-limit-on-invalidate-queue-size.patch
new file mode 100644
index 0000000..b18ef4f
--- /dev/null
+++ b/0337-fuse-Set-limit-on-invalidate-queue-size.patch
@@ -0,0 +1,455 @@
+From ddb0038de77a4269fa7eed1bb217bfb6bed1b7ba Mon Sep 17 00:00:00 2001
+From: N Balachandran <nbalacha@redhat.com>
+Date: Fri, 9 Aug 2019 14:34:22 +0530
+Subject: [PATCH 337/344] fuse: Set limit on invalidate queue size
+
+If the glusterfs fuse client process is unable to
+process the invalidate requests quickly enough, the
+number of such requests quickly grows large enough
+to use a significant amount of memory.
+We are now introducing another option to set an upper
+limit on these to prevent runaway memory usage.
+
+> Upstream https://review.gluster.org/23187
+> Change-Id: Iddfff1ee2de1466223e6717f7abd4b28ed947788
+> Fixes: bz#1732717
+> Signed-off-by: N Balachandran <nbalacha@redhat.com>
+
+BUG: 1763208
+Change-Id: I666cdf6c70999a0f0bc79969e8df0a9dde93b6e4
+Signed-off-by: Csaba Henk <csaba@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/187529
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ doc/mount.glusterfs.8 | 5 +++
+ glusterfsd/src/glusterfsd.c | 21 ++++++++++
+ glusterfsd/src/glusterfsd.h | 3 +-
+ libglusterfs/src/glusterfs/glusterfs.h | 1 +
+ libglusterfs/src/glusterfs/inode.h | 1 +
+ libglusterfs/src/inode.c | 31 +++++++++++----
+ xlators/mount/fuse/src/fuse-bridge.c | 60 ++++++++++++++++++++++-------
+ xlators/mount/fuse/src/fuse-bridge.h | 3 +-
+ xlators/mount/fuse/utils/mount.glusterfs.in | 7 ++++
+ 9 files changed, 108 insertions(+), 24 deletions(-)
+
+diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
+index 286631b..b35b362 100644
+--- a/doc/mount.glusterfs.8
++++ b/doc/mount.glusterfs.8
+@@ -126,6 +126,11 @@ Provide list of backup volfile servers in the following format [default: None]
+ Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072]
+ .TP
+ .TP
++\fBinvalidate-limit=\fRN
++Suspend fuse invalidations implied by 'lru-limit' if number of outstanding
++invalidations reaches N
++.TP
++.TP
+ \fBbackground-qlen=\fRN
+ Set fuse module's background queue length to N [default: 64]
+ .TP
+diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
+index 5b5e996..0856471 100644
+--- a/glusterfsd/src/glusterfsd.c
++++ b/glusterfsd/src/glusterfsd.c
+@@ -212,6 +212,9 @@ static struct argp_option gf_options[] = {
+ {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0,
+ "Set fuse module's limit for number of inodes kept in LRU list to N "
+ "[default: 131072]"},
++ {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0,
++ "Suspend inode invalidations implied by 'lru-limit' if the number of "
++ "outstanding invalidations reaches N"},
+ {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
+ "Set fuse module's background queue length to N "
+ "[default: 64]"},
+@@ -504,6 +507,16 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
+ }
+ }
+
++ if (cmd_args->invalidate_limit >= 0) {
++ ret = dict_set_int32(options, "invalidate-limit",
++ cmd_args->invalidate_limit);
++ if (ret < 0) {
++ gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
++ "invalidate-limit");
++ goto err;
++ }
++ }
++
+ if (cmd_args->background_qlen) {
+ ret = dict_set_int32(options, "background-qlen",
+ cmd_args->background_qlen);
+@@ -1283,6 +1296,14 @@ parse_opts(int key, char *arg, struct argp_state *state)
+ argp_failure(state, -1, 0, "unknown LRU limit option %s", arg);
+ break;
+
++ case ARGP_FUSE_INVALIDATE_LIMIT_KEY:
++ if (!gf_string2int32(arg, &cmd_args->invalidate_limit))
++ break;
++
++ argp_failure(state, -1, 0, "unknown invalidate limit option %s",
++ arg);
++ break;
++
+ case ARGP_FUSE_BACKGROUND_QLEN_KEY:
+ if (!gf_string2int(arg, &cmd_args->background_qlen))
+ break;
+diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
+index fa55789..ee655f0 100644
+--- a/glusterfsd/src/glusterfsd.h
++++ b/glusterfsd/src/glusterfsd.h
+@@ -111,7 +111,8 @@ enum argp_option_keys {
+ ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189,
+ ARGP_FUSE_LRU_LIMIT_KEY = 190,
+ ARGP_FUSE_AUTO_INVAL_KEY = 191,
+- ARGP_BRICK_MUX_KEY = 192
++ ARGP_BRICK_MUX_KEY = 192,
++ ARGP_FUSE_INVALIDATE_LIMIT_KEY = 195,
+ };
+
+ struct _gfd_vol_top_priv {
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index 79c93ae..3b594c0 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -541,6 +541,7 @@ struct _cmd_args {
+ int client_pid_set;
+ unsigned uid_map_root;
+ int32_t lru_limit;
++ int32_t invalidate_limit;
+ int background_qlen;
+ int congestion_threshold;
+ char *fuse_mountopts;
+diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
+index 52efdd8..4421c47 100644
+--- a/libglusterfs/src/glusterfs/inode.h
++++ b/libglusterfs/src/glusterfs/inode.h
+@@ -107,6 +107,7 @@ struct _inode {
+ struct list_head list; /* active/lru/purge */
+
+ struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
++ bool in_invalidate_list; /* Set if inode is in table invalidate list */
+ bool invalidate_sent; /* Set it if invalidator_fn is called for inode */
+ };
+
+diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
+index 96ddea5..5331e93 100644
+--- a/libglusterfs/src/inode.c
++++ b/libglusterfs/src/inode.c
+@@ -558,8 +558,8 @@ __inode_unref(inode_t *inode, bool clear)
+
+ this = THIS;
+
+- if (clear && inode->invalidate_sent) {
+- inode->invalidate_sent = false;
++ if (clear && inode->in_invalidate_list) {
++ inode->in_invalidate_list = false;
+ inode->table->invalidate_size--;
+ __inode_activate(inode);
+ }
+@@ -573,7 +573,7 @@ __inode_unref(inode_t *inode, bool clear)
+ inode->_ctx[index].ref--;
+ }
+
+- if (!inode->ref && !inode->invalidate_sent) {
++ if (!inode->ref && !inode->in_invalidate_list) {
+ inode->table->active_size--;
+
+ nlookup = GF_ATOMIC_GET(inode->nlookup);
+@@ -609,14 +609,14 @@ __inode_ref(inode_t *inode, bool is_invalidate)
+ return inode;
+
+ if (!inode->ref) {
+- if (inode->invalidate_sent) {
+- inode->invalidate_sent = false;
++ if (inode->in_invalidate_list) {
++ inode->in_invalidate_list = false;
+ inode->table->invalidate_size--;
+ } else {
+ inode->table->lru_size--;
+ }
+ if (is_invalidate) {
+- inode->invalidate_sent = true;
++ inode->in_invalidate_list = true;
+ inode->table->invalidate_size++;
+ list_move_tail(&inode->list, &inode->table->invalidate);
+ } else {
+@@ -1609,6 +1609,7 @@ static int
+ inode_table_prune(inode_table_t *table)
+ {
+ int ret = 0;
++ int ret1 = 0;
+ struct list_head purge = {
+ 0,
+ };
+@@ -1647,6 +1648,10 @@ inode_table_prune(inode_table_t *table)
+ /* check for valid inode with 'nlookup' */
+ nlookup = GF_ATOMIC_GET(entry->nlookup);
+ if (nlookup) {
++ if (entry->invalidate_sent) {
++ list_move_tail(&entry->list, &table->lru);
++ continue;
++ }
+ __inode_ref(entry, true);
+ tmp = entry;
+ break;
+@@ -1668,9 +1673,19 @@ inode_table_prune(inode_table_t *table)
+ if (tmp) {
+ xlator_t *old_THIS = THIS;
+ THIS = table->invalidator_xl;
+- table->invalidator_fn(table->invalidator_xl, tmp);
++ ret1 = table->invalidator_fn(table->invalidator_xl, tmp);
+ THIS = old_THIS;
+- inode_unref(tmp);
++ pthread_mutex_lock(&table->lock);
++ {
++ if (!ret1) {
++ tmp->invalidate_sent = true;
++ __inode_unref(tmp, false);
++ } else {
++ /* Move this back to the lru list*/
++ __inode_unref(tmp, true);
++ }
++ }
++ pthread_mutex_unlock(&table->lock);
+ }
+
+ /* Just so that if purge list is handled too, then clear it off */
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index 1c946a2..8b2e7f0 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -26,7 +26,7 @@ static int gf_fuse_xattr_enotsup_log;
+ void
+ fini(xlator_t *this_xl);
+
+-static void
++static int32_t
+ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino);
+
+ /*
+@@ -312,7 +312,7 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
+ #define send_fuse_obj(this, finh, obj) \
+ send_fuse_data(this, finh, obj, sizeof(*(obj)))
+
+-static void
++static int32_t
+ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
+ {
+ #if FUSE_KERNEL_MINOR_VERSION >= 11
+@@ -328,17 +328,22 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
+
+ priv = this->private;
+ if (!priv->reverse_fuse_thread_started)
+- return;
++ return -1;
++
++ if (priv->invalidate_limit &&
++ (priv->invalidate_count >= priv->invalidate_limit)) {
++ return -1;
++ }
+
+ inode = (inode_t *)(unsigned long)fuse_ino;
+ if (inode == NULL)
+- return;
++ return -1;
+
+ list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list)
+ {
+ node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
+ if (node == NULL)
+- break;
++ return -1;
+
+ INIT_LIST_HEAD(&node->next);
+
+@@ -375,20 +380,21 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
+ pthread_mutex_lock(&priv->invalidate_mutex);
+ {
+ list_add_tail(&node->next, &priv->invalidate_list);
++ priv->invalidate_count++;
+ pthread_cond_signal(&priv->invalidate_cond);
+ }
+ pthread_mutex_unlock(&priv->invalidate_mutex);
+ }
+
+ #endif
+- return;
++ return 0;
+ }
+
+ /*
+ * Send an inval inode notification to fuse. This causes an invalidation of the
+ * entire page cache mapping on the inode.
+ */
+-static void
++static int32_t
+ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
+ {
+ #if FUSE_KERNEL_MINOR_VERSION >= 11
+@@ -401,15 +407,20 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
+ priv = this->private;
+
+ if (!priv->reverse_fuse_thread_started)
+- return;
++ return -1;
++
++ if (priv->invalidate_limit &&
++ (priv->invalidate_count >= priv->invalidate_limit)) {
++ return -1;
++ }
+
+ inode = (inode_t *)(unsigned long)fuse_ino;
+ if (inode == NULL)
+- return;
++ return -1;
+
+ node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
+ if (node == NULL)
+- return;
++ return -1;
+
+ INIT_LIST_HEAD(&node->next);
+
+@@ -435,6 +446,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
+ pthread_mutex_lock(&priv->invalidate_mutex);
+ {
+ list_add_tail(&node->next, &priv->invalidate_list);
++ priv->invalidate_count++;
+ pthread_cond_signal(&priv->invalidate_cond);
+ }
+ pthread_mutex_unlock(&priv->invalidate_mutex);
+@@ -443,7 +455,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
+ gf_log("glusterfs-fuse", GF_LOG_WARNING,
+ "fuse_invalidate_inode not implemented on this system");
+ #endif
+- return;
++ return 0;
+ }
+
+ #if FUSE_KERNEL_MINOR_VERSION >= 11
+@@ -451,8 +463,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
+ static int32_t
+ fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode)
+ {
+- fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
+- return 0;
++ int32_t ret = 0;
++ ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
++ return ret;
+ }
+ #endif
+
+@@ -4003,7 +4016,9 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
+ gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64,
+ finh->nodeid);
+ #if FUSE_KERNEL_MINOR_VERSION >= 11
+- fuse_invalidate_entry(this, finh->nodeid);
++ ret = fuse_invalidate_entry(this, finh->nodeid);
++ if (ret)
++ op_errno = EBUSY;
+ #endif
+ goto done;
+ }
+@@ -4812,6 +4827,7 @@ notify_kernel_loop(void *data)
+ fuse_invalidate_node_t, next);
+
+ list_del_init(&node->next);
++ priv->invalidate_count--;
+ }
+ pthread_mutex_unlock(&priv->invalidate_mutex);
+
+@@ -4855,6 +4871,7 @@ notify_kernel_loop(void *data)
+ list_del_init(&node->next);
+ GF_FREE(node);
+ }
++ priv->invalidate_count = 0;
+ }
+ pthread_mutex_unlock(&priv->invalidate_mutex);
+
+@@ -6080,6 +6097,9 @@ fuse_priv_dump(xlator_t *this)
+ (int)private->timed_response_fuse_thread_started);
+ gf_proc_dump_write("reverse_thread_started", "%d",
+ (int)private->reverse_fuse_thread_started);
++ gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit);
++ gf_proc_dump_write("invalidate_queue_length", "%" PRIu64,
++ private->invalidate_count);
+ gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp);
+
+ return 0;
+@@ -6619,6 +6639,9 @@ init(xlator_t *this_xl)
+
+ GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit);
+
++ GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32,
++ cleanup_exit);
++
+ GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit);
+
+ GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit);
+@@ -6955,6 +6978,15 @@ struct volume_options options[] = {
+ "reaching this limit (0 means 'unlimited')",
+ },
+ {
++ .key = {"invalidate-limit"},
++ .type = GF_OPTION_TYPE_INT,
++ .default_value = "0",
++ .min = 0,
++ .description = "suspend invalidations as of 'lru-limit' if the number "
++ "of outstanding invalidations reaches this limit "
++ "(0 means 'unlimited')",
++ },
++ {
+ .key = {"auto-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
+index 697bd88..2311582 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.h
++++ b/xlators/mount/fuse/src/fuse-bridge.h
+@@ -139,7 +139,7 @@ struct fuse_private {
+ pthread_cond_t invalidate_cond;
+ pthread_mutex_t invalidate_mutex;
+ gf_boolean_t reverse_fuse_thread_started;
+-
++ uint64_t invalidate_count;
+ /* For communicating with separate mount thread. */
+ int status_pipe[2];
+
+@@ -191,6 +191,7 @@ struct fuse_private {
+
+ /* LRU Limit, if not set, default is 128k for now */
+ uint32_t lru_limit;
++ uint32_t invalidate_limit;
+ };
+ typedef struct fuse_private fuse_private_t;
+
+diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
+index cbde42d..61d7422 100755
+--- a/xlators/mount/fuse/utils/mount.glusterfs.in
++++ b/xlators/mount/fuse/utils/mount.glusterfs.in
+@@ -257,6 +257,10 @@ start_glusterfs ()
+ cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit");
+ fi
+
++ if [ -n "$invalidate_limit" ]; then
++ cmd_line=$(echo "$cmd_line --invalidate-limit=$invalidate_limit");
++ fi
++
+ if [ -n "$bg_qlen" ]; then
+ cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen");
+ fi
+@@ -505,6 +509,9 @@ with_options()
+ "lru-limit")
+ lru_limit=$value
+ ;;
++ "invalidate-limit")
++ invalidate_limit=$value
++ ;;
+ "background-qlen")
+ bg_qlen=$value
+ ;;
+--
+1.8.3.1
+
diff --git a/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch b/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch
new file mode 100644
index 0000000..b108bd0
--- /dev/null
+++ b/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch
@@ -0,0 +1,83 @@
+From 6d2e12a53ef0bcbeea274c47537a0c707a3f7b1e Mon Sep 17 00:00:00 2001
+From: N Balachandran <nbalacha@redhat.com>
+Date: Fri, 20 Sep 2019 13:30:42 +0530
+Subject: [PATCH 338/344] glusterfs/fuse: Reduce the default lru-limit value
+
+The current lru-limit value still uses memory for
+upto 128K inodes.
+Reduce the default value of lru-limit to 64K.
+
+> Upstream https://review.gluster.org/23461
+> Change-Id: Ica2dd4f8f5fde45cb5180d8f02c3d86114ac52b3
+> Fixes: bz#1753880
+> Signed-off-by: N Balachandran <nbalacha@redhat.com>
+> Signed-off-by: Csaba Henk <csaba@redhat.com>
+
+BUG: 1763208
+Change-Id: I04ab39b5278e702aacdceebfa5b63702b9f9703b
+Signed-off-by: Csaba Henk <csaba@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/187535
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ doc/mount.glusterfs.8 | 2 +-
+ glusterfsd/src/glusterfsd.c | 2 +-
+ xlators/mount/fuse/src/fuse-bridge.c | 2 +-
+ xlators/mount/fuse/src/fuse-bridge.h | 2 +-
+ 4 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
+index b35b362..87a5669 100644
+--- a/doc/mount.glusterfs.8
++++ b/doc/mount.glusterfs.8
+@@ -123,7 +123,7 @@ Provide list of backup volfile servers in the following format [default: None]
+ .TP
+ .TP
+ \fBlru-limit=\fRN
+-Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072]
++Set fuse module's limit for number of inodes kept in LRU list to N [default: 65536]
+ .TP
+ .TP
+ \fBinvalidate-limit=\fRN
+diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
+index 0856471..974fb88 100644
+--- a/glusterfsd/src/glusterfsd.c
++++ b/glusterfsd/src/glusterfsd.c
+@@ -211,7 +211,7 @@ static struct argp_option gf_options[] = {
+ "Resolve all auxiliary groups in fuse translator (max 32 otherwise)"},
+ {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0,
+ "Set fuse module's limit for number of inodes kept in LRU list to N "
+- "[default: 131072]"},
++ "[default: 65536]"},
+ {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0,
+ "Suspend inode invalidations implied by 'lru-limit' if the number of "
+ "outstanding invalidations reaches N"},
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index 8b2e7f0..ebe5c28 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -6972,7 +6972,7 @@ struct volume_options options[] = {
+ {
+ .key = {"lru-limit"},
+ .type = GF_OPTION_TYPE_INT,
+- .default_value = "131072",
++ .default_value = "65536",
+ .min = 0,
+ .description = "makes glusterfs invalidate kernel inodes after "
+ "reaching this limit (0 means 'unlimited')",
+diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
+index 2311582..cf4479c 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.h
++++ b/xlators/mount/fuse/src/fuse-bridge.h
+@@ -189,7 +189,7 @@ struct fuse_private {
+ gf_boolean_t flush_handle_interrupt;
+ gf_boolean_t fuse_auto_inval;
+
+- /* LRU Limit, if not set, default is 128k for now */
++ /* LRU Limit, if not set, default is 64k for now */
+ uint32_t lru_limit;
+ uint32_t invalidate_limit;
+ };
+--
+1.8.3.1
+
diff --git a/0339-geo-rep-fix-integer-config-validation.patch b/0339-geo-rep-fix-integer-config-validation.patch
new file mode 100644
index 0000000..45f3ede
--- /dev/null
+++ b/0339-geo-rep-fix-integer-config-validation.patch
@@ -0,0 +1,93 @@
+From 8b5b3b247a00515d3188453c27b0ba749e93d325 Mon Sep 17 00:00:00 2001
+From: Aravinda VK <avishwan@redhat.com>
+Date: Tue, 26 Mar 2019 13:20:13 +0530
+Subject: [PATCH 339/344] geo-rep: fix integer config validation
+
+ssh-port validation is mentioned as `validation=int` in template
+`gsyncd.conf`, but not handled this during geo-rep config set.
+
+upstream patch:
+ https://review.gluster.org/#/c/glusterfs/+/22418/
+Backport of:
+
+ >Fixes: bz#1692666
+ >Change-Id: I3f19d9b471b0a3327e4d094dfbefcc58ed2c34f6
+ >Signed-off-by: Aravinda VK <avishwan@redhat.com>
+ >Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+BUG: 1782162
+Change-Id: I3f19d9b471b0a3327e4d094dfbefcc58ed2c34f6
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/187533
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/gsyncdconfig.py | 23 ++++++++++++++++++-----
+ tests/00-geo-rep/georep-basic-dr-rsync.t | 3 +++
+ 2 files changed, 21 insertions(+), 5 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py
+index f823311..8848071 100644
+--- a/geo-replication/syncdaemon/gsyncdconfig.py
++++ b/geo-replication/syncdaemon/gsyncdconfig.py
+@@ -329,6 +329,9 @@ class Gconf(object):
+ if item["validation"] == "unixtime":
+ return validate_unixtime(value)
+
++ if item["validation"] == "int":
++ return validate_int(value)
++
+ return False
+
+ def _is_config_changed(self):
+@@ -381,6 +384,14 @@ def config_upgrade(config_file, ret):
+ config.write(configfile)
+
+
++def validate_int(value):
++ try:
++ _ = int(value)
++ return True
++ except ValueError:
++ return False
++
++
+ def validate_unixtime(value):
+ try:
+ y = datetime.fromtimestamp(int(value)).strftime("%Y")
+@@ -393,11 +404,13 @@ def validate_unixtime(value):
+
+
+ def validate_minmax(value, minval, maxval):
+- value = int(value)
+- minval = int(minval)
+- maxval = int(maxval)
+-
+- return value >= minval and value <= maxval
++ try:
++ value = int(value)
++ minval = int(minval)
++ maxval = int(maxval)
++ return value >= minval and value <= maxval
++ except ValueError:
++ return False
+
+
+ def validate_choice(value, allowed_values):
+diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t
+index b432635..b6fbf18 100644
+--- a/tests/00-geo-rep/georep-basic-dr-rsync.t
++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t
+@@ -71,6 +71,9 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Created"
+ #Config gluster-command-dir
+ TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
++#Config Set ssh-port to validate int validation
++TEST $GEOREP_CLI $master $slave config ssh-port 22
++
+ #Config gluster-command-dir
+ TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+--
+1.8.3.1
+
diff --git a/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch b/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch
new file mode 100644
index 0000000..54b2706
--- /dev/null
+++ b/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch
@@ -0,0 +1,46 @@
+From 0c996d6c40c625f8a0ee6be2c220c89aaf70c840 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Tue, 10 Dec 2019 08:35:23 +0530
+Subject: [PATCH 340/344] rpc: event_slot_alloc converted infinite loop after
+ reach slot_used to 1024
+
+Problem: In the commit faf5ac13c4ee00a05e9451bf8da3be2a9043bbf2 missed one
+ condition to come out from the loop so after reach the slot_used to
+ 1024 loop has become infinite loop
+
+Solution: Correct the code path to avoid the infinite loop
+
+> Change-Id: Ia02a109571f0d8cc9902c32db3e9b9282ee5c1db
+> Fixes: bz#1781440
+> Credits: Xavi Hernandez <xhernandez@redhat.com>
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Cherry picked from commit 8030f9c0f092170ceb50cedf59b9c330022825b7)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23843/)
+
+Change-Id: Ia02a109571f0d8cc9902c32db3e9b9282ee5c1db
+BUG: 1781444
+Credits: Xavi Hernandez <xhernandez@redhat.com>
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/187460
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
+---
+ libglusterfs/src/event-epoll.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c
+index 65f5efd..5afb2f2 100644
+--- a/libglusterfs/src/event-epoll.c
++++ b/libglusterfs/src/event-epoll.c
+@@ -92,7 +92,7 @@ retry:
+ while (i < EVENT_EPOLL_TABLES) {
+ switch (event_pool->slots_used[i]) {
+ case EVENT_EPOLL_SLOTS:
+- continue;
++ break;
+ case 0:
+ if (!event_pool->ereg[i]) {
+ table = __event_newtable(event_pool, i);
+--
+1.8.3.1
+
diff --git a/0341-socket-fix-error-handling.patch b/0341-socket-fix-error-handling.patch
new file mode 100644
index 0000000..0eb68d1
--- /dev/null
+++ b/0341-socket-fix-error-handling.patch
@@ -0,0 +1,742 @@
+From 2c99b7db00a6238fd43053dd672c8ce519d8fd27 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Wed, 11 Dec 2019 18:21:14 +0100
+Subject: [PATCH 341/344] socket: fix error handling
+
+When __socket_proto_state_machine() detected a problem in the size of
+the request or it couldn't allocate an iobuf of the requested size, it
+returned -ENOMEM (-12). However the caller was expecting only -1 in
+case of error. For this reason the error passes undetected initially,
+adding back the socket to the epoll object. On further processing,
+however, the error is finally detected and the connection terminated.
+Meanwhile, another thread could receive a poll_in event from the same
+connection, which could cause races with the connection destruction.
+When this happened, the process crashed.
+
+To fix this, all error detection conditions have been hardened to be
+more strict on what is valid and what not. Also, we don't return
+-ENOMEM anymore. We always return -1 in case of error.
+
+An additional change has been done to prevent destruction of the
+transport object while it may still be needed.
+
+Upstream patch:
+> Change-Id: I6e59cd81cbf670f7adfdde942625d4e6c3fbc82d
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23861
+> Fixes: bz#1782495
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: I6e59cd81cbf670f7adfdde942625d4e6c3fbc82d
+BUG: 1779696
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/187689
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
+---
+ rpc/rpc-transport/socket/src/socket.c | 173 ++++++++++++++++++----------------
+ 1 file changed, 90 insertions(+), 83 deletions(-)
+
+diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
+index bf2fa71..f54ca83 100644
+--- a/rpc/rpc-transport/socket/src/socket.c
++++ b/rpc/rpc-transport/socket/src/socket.c
+@@ -173,7 +173,7 @@ ssl_setup_connection_params(rpc_transport_t *this);
+ \
+ ret = __socket_readv(this, in->pending_vector, 1, &in->pending_vector, \
+ &in->pending_count, &bytes_read); \
+- if (ret == -1) \
++ if (ret < 0) \
+ break; \
+ __socket_proto_update_priv_after_read(priv, ret, bytes_read); \
+ }
+@@ -739,7 +739,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count,
+ ret = sys_writev(sock, opvector, IOV_MIN(opcount));
+ }
+
+- if (ret == 0 || (ret == -1 && errno == EAGAIN)) {
++ if ((ret == 0) || ((ret < 0) && (errno == EAGAIN))) {
+ /* done for now */
+ break;
+ } else if (ret > 0)
+@@ -754,7 +754,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count,
+ errno = ENODATA;
+ ret = -1;
+ }
+- if (ret == -1 && errno == EAGAIN) {
++ if ((ret < 0) && (errno == EAGAIN)) {
+ /* done for now */
+ break;
+ } else if (ret > 0)
+@@ -770,7 +770,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count,
+ errno = ENOTCONN;
+ break;
+ }
+- if (ret == -1) {
++ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+
+@@ -907,7 +907,7 @@ __socket_disconnect(rpc_transport_t *this)
+ gf_log(this->name, GF_LOG_TRACE, "disconnecting %p, sock=%d", this,
+ priv->sock);
+
+- if (priv->sock != -1) {
++ if (priv->sock >= 0) {
+ gf_log_callingfn(this->name, GF_LOG_TRACE,
+ "tearing down socket connection");
+ ret = __socket_teardown_connection(this);
+@@ -942,7 +942,7 @@ __socket_server_bind(rpc_transport_t *this)
+
+ ret = setsockopt(priv->sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setsockopt() for SO_REUSEADDR failed (%s)", strerror(errno));
+ }
+@@ -955,7 +955,7 @@ __socket_server_bind(rpc_transport_t *this)
+ if (reuse_check_sock >= 0) {
+ ret = connect(reuse_check_sock, SA(&unix_addr),
+ this->myinfo.sockaddr_len);
+- if ((ret == -1) && (ECONNREFUSED == errno)) {
++ if ((ret != 0) && (ECONNREFUSED == errno)) {
+ sys_unlink(((struct sockaddr_un *)&unix_addr)->sun_path);
+ }
+ gf_log(this->name, GF_LOG_INFO,
+@@ -967,7 +967,7 @@ __socket_server_bind(rpc_transport_t *this)
+ ret = bind(priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
+ this->myinfo.sockaddr_len);
+
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "binding to %s failed: %s",
+ this->myinfo.identifier, strerror(errno));
+ if (errno == EADDRINUSE) {
+@@ -976,7 +976,7 @@ __socket_server_bind(rpc_transport_t *this)
+ }
+ if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) {
+ if (getsockname(priv->sock, SA(&this->myinfo.sockaddr),
+- &this->myinfo.sockaddr_len) == -1) {
++ &this->myinfo.sockaddr_len) != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "getsockname on (%d) failed (%s)", priv->sock,
+ strerror(errno));
+@@ -1004,7 +1004,7 @@ __socket_nonblock(int fd)
+
+ flags = fcntl(fd, F_GETFL);
+
+- if (flags != -1)
++ if (flags >= 0)
+ ret = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+
+ return ret;
+@@ -1034,7 +1034,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
+ #endif
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set keep alive option on socket %d", fd);
+ goto err;
+@@ -1051,7 +1051,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepaliveintvl,
+ sizeof(keepaliveintvl));
+ #endif
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set keep alive interval on socket %d", fd);
+ goto err;
+@@ -1062,7 +1062,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
+
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepaliveidle,
+ sizeof(keepaliveidle));
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set keep idle %d on socket %d, %s", keepaliveidle, fd,
+ strerror(errno));
+@@ -1070,7 +1070,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
+ }
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepaliveintvl,
+ sizeof(keepaliveintvl));
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set keep interval %d on socket %d, %s",
+ keepaliveintvl, fd, strerror(errno));
+@@ -1082,7 +1082,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
+ goto done;
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout_ms,
+ sizeof(timeout_ms));
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set "
+ "TCP_USER_TIMEOUT %d on socket %d, %s",
+@@ -1093,7 +1093,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
+ #if defined(TCP_KEEPCNT)
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &keepalivecnt,
+ sizeof(keepalivecnt));
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set "
+ "TCP_KEEPCNT %d on socket %d, %s",
+@@ -1366,7 +1366,7 @@ socket_event_poll_err(rpc_transport_t *this, int gen, int idx)
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+- if ((priv->gen == gen) && (priv->idx == idx) && (priv->sock != -1)) {
++ if ((priv->gen == gen) && (priv->idx == idx) && (priv->sock >= 0)) {
+ __socket_ioq_flush(this);
+ __socket_reset(this);
+ socket_closed = _gf_true;
+@@ -1405,7 +1405,7 @@ socket_event_poll_out(rpc_transport_t *this)
+ if (priv->connected == 1) {
+ ret = __socket_ioq_churn(this);
+
+- if (ret == -1) {
++ if (ret < 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "__socket_ioq_churn returned -1; "
+ "disconnecting socket");
+@@ -1463,7 +1463,7 @@ __socket_read_simple_msg(rpc_transport_t *this)
+ &bytes_read);
+ }
+
+- if (ret == -1) {
++ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "reading from socket failed. Error (%s), "
+ "peer (%s)",
+@@ -1661,8 +1661,8 @@ __socket_read_vectored_request(rpc_transport_t *this,
+
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+
+- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
+- RPC_LASTFRAG(in->fraghdr))) {
++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
++ RPC_LASTFRAG(in->fraghdr))) {
+ request->vector_state = SP_STATE_VECTORED_REQUEST_INIT;
+ in->payload_vector.iov_len = ((unsigned long)frag->fragcurrent -
+ (unsigned long)
+@@ -1739,8 +1739,8 @@ __socket_read_request(rpc_transport_t *this)
+
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+
+- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
+- (RPC_LASTFRAG(in->fraghdr)))) {
++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
++ (RPC_LASTFRAG(in->fraghdr)))) {
+ request->header_state = SP_STATE_REQUEST_HEADER_INIT;
+ }
+
+@@ -1870,8 +1870,8 @@ __socket_read_accepted_successful_reply(rpc_transport_t *this)
+ /* now read the entire remaining msg into new iobuf */
+ ret = __socket_read_simple_msg(this);
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
+- RPC_LASTFRAG(in->fraghdr))) {
++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
++ RPC_LASTFRAG(in->fraghdr))) {
+ frag->call_body.reply.accepted_success_state =
+ SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT;
+ }
+@@ -2003,8 +2003,8 @@ __socket_read_accepted_successful_reply_v2(rpc_transport_t *this)
+ /* now read the entire remaining msg into new iobuf */
+ ret = __socket_read_simple_msg(this);
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
+- RPC_LASTFRAG(in->fraghdr))) {
++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
++ RPC_LASTFRAG(in->fraghdr))) {
+ frag->call_body.reply.accepted_success_state =
+ SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT;
+ }
+@@ -2103,8 +2103,8 @@ __socket_read_accepted_reply(rpc_transport_t *this)
+
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+
+- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
+- (RPC_LASTFRAG(in->fraghdr)))) {
++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
++ (RPC_LASTFRAG(in->fraghdr)))) {
+ frag->call_body.reply
+ .accepted_state = SP_STATE_ACCEPTED_REPLY_INIT;
+ }
+@@ -2169,8 +2169,8 @@ __socket_read_vectored_reply(rpc_transport_t *this)
+
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+
+- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
+- (RPC_LASTFRAG(in->fraghdr)))) {
++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
++ (RPC_LASTFRAG(in->fraghdr)))) {
+ frag->call_body.reply
+ .status_state = SP_STATE_VECTORED_REPLY_STATUS_INIT;
+ in->payload_vector.iov_len = (unsigned long)frag->fragcurrent -
+@@ -2237,7 +2237,7 @@ __socket_read_reply(rpc_transport_t *this)
+ /* Transition back to externally visible state. */
+ frag->state = SP_STATE_READ_MSGTYPE;
+
+- if (ret == -1) {
++ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "notify for event MAP_XID failed for %s",
+ this->peerinfo.identifier);
+@@ -2315,8 +2315,8 @@ __socket_read_frag(rpc_transport_t *this)
+
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+
+- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
+- (RPC_LASTFRAG(in->fraghdr)))) {
++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
++ (RPC_LASTFRAG(in->fraghdr)))) {
+ /* frag->state = SP_STATE_NADA; */
+ frag->state = SP_STATE_RPCFRAG_INIT;
+ }
+@@ -2400,7 +2400,7 @@ __socket_proto_state_machine(rpc_transport_t *this,
+ ret = __socket_readv(this, in->pending_vector, 1,
+ &in->pending_vector, &in->pending_count,
+ NULL);
+- if (ret == -1)
++ if (ret < 0)
+ goto out;
+
+ if (ret > 0) {
+@@ -2422,7 +2422,7 @@ __socket_proto_state_machine(rpc_transport_t *this,
+ in->total_bytes_read += RPC_FRAGSIZE(in->fraghdr);
+
+ if (in->total_bytes_read >= GF_UNIT_GB) {
+- ret = -ENOMEM;
++ ret = -1;
+ goto out;
+ }
+
+@@ -2430,7 +2430,7 @@ __socket_proto_state_machine(rpc_transport_t *this,
+ this->ctx->iobuf_pool,
+ (in->total_bytes_read + sizeof(in->fraghdr)));
+ if (!iobuf) {
+- ret = -ENOMEM;
++ ret = -1;
+ goto out;
+ }
+
+@@ -2457,7 +2457,7 @@ __socket_proto_state_machine(rpc_transport_t *this,
+ case SP_STATE_READING_FRAG:
+ ret = __socket_read_frag(this);
+
+- if ((ret == -1) ||
++ if ((ret < 0) ||
+ (frag->bytes_read != RPC_FRAGSIZE(in->fraghdr))) {
+ goto out;
+ }
+@@ -2575,7 +2575,7 @@ socket_event_poll_in(rpc_transport_t *this, gf_boolean_t notify_handled)
+ pthread_mutex_unlock(&priv->notify.lock);
+ }
+
+- if (notify_handled && (ret != -1))
++ if (notify_handled && (ret >= 0))
+ event_handled(ctx->event_pool, priv->sock, priv->idx, priv->gen);
+
+ if (pollin) {
+@@ -2618,10 +2618,10 @@ socket_connect_finish(rpc_transport_t *this)
+
+ ret = __socket_connect_finish(priv->sock);
+
+- if (ret == -1 && errno == EINPROGRESS)
++ if ((ret < 0) && (errno == EINPROGRESS))
+ ret = 1;
+
+- if (ret == -1 && errno != EINPROGRESS) {
++ if ((ret < 0) && (errno != EINPROGRESS)) {
+ if (!priv->connect_finish_log) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "connection to %s failed (%s); "
+@@ -2640,7 +2640,7 @@ socket_connect_finish(rpc_transport_t *this)
+
+ ret = getsockname(priv->sock, SA(&this->myinfo.sockaddr),
+ &this->myinfo.sockaddr_len);
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "getsockname on (%d) failed (%s) - "
+ "disconnecting socket",
+@@ -2924,6 +2924,13 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+ return;
+ }
+
++ /* At this point we are sure no other thread is using the transport because
++ * we cannot receive more events until we call gf_event_handled(). However
++ * this function may call gf_event_handled() in some cases. When this is
++ * done, the transport may be destroyed at any moment if another thread
++ * handled an error event. To prevent that we take a reference here. */
++ rpc_transport_ref(this);
++
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
+ GF_VALIDATE_OR_GOTO("socket", this->xl, out);
+@@ -2960,7 +2967,7 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+ if (ret > 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "(sock:%d) returning to wait on socket", priv->sock);
+- return;
++ goto out;
+ }
+ } else {
+ char *sock_type = (priv->is_server ? "Server" : "Client");
+@@ -3015,7 +3022,7 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+ }
+
+ out:
+- return;
++ rpc_transport_unref(this);
+ }
+
+ static void
+@@ -3074,7 +3081,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+
+ event_handled(ctx->event_pool, fd, idx, gen);
+
+- if (new_sock == -1) {
++ if (new_sock < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "accept on %d failed (%s)",
+ priv->sock, strerror(errno));
+ goto out;
+@@ -3082,7 +3089,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+
+ if (priv->nodelay && (new_sockaddr.ss_family != AF_UNIX)) {
+ ret = __socket_nodelay(new_sock);
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "setsockopt() failed for "
+ "NODELAY (%s)",
+@@ -3094,7 +3101,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+ ret = __socket_keepalive(new_sock, new_sockaddr.ss_family,
+ priv->keepaliveintvl, priv->keepaliveidle,
+ priv->keepalivecnt, priv->timeout);
+- if (ret == -1)
++ if (ret != 0)
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set keep-alive: %s", strerror(errno));
+ }
+@@ -3110,7 +3117,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+ }
+
+ ret = pthread_mutex_init(&new_trans->lock, NULL);
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "pthread_mutex_init() failed: %s; closing newly accepted "
+ "socket %d",
+@@ -3130,7 +3137,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+
+ ret = getsockname(new_sock, SA(&new_trans->myinfo.sockaddr),
+ &new_trans->myinfo.sockaddr_len);
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "getsockname on socket %d "
+ "failed (errno:%s); closing newly accepted socket",
+@@ -3237,7 +3244,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+ */
+ ret = rpc_transport_notify(this, RPC_TRANSPORT_ACCEPT, new_trans);
+
+- if (ret != -1) {
++ if (ret >= 0) {
+ new_priv->idx = event_register(
+ ctx->event_pool, new_sock, socket_event_handler, new_trans,
+ 1, 0, new_trans->notify_poller_death);
+@@ -3275,7 +3282,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+ rpc_transport_unref(new_trans);
+ }
+
+- if (ret == -1) {
++ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "closing newly accepted socket");
+ sys_close(new_sock);
+ /* this unref is to actually cause the destruction of
+@@ -3406,7 +3413,7 @@ socket_connect(rpc_transport_t *this, int port)
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+- if (priv->sock != -1) {
++ if (priv->sock >= 0) {
+ gf_log_callingfn(this->name, GF_LOG_TRACE,
+ "connect () called on transport "
+ "already connected");
+@@ -3420,7 +3427,7 @@ socket_connect(rpc_transport_t *this, int port)
+
+ ret = socket_client_get_remote_sockaddr(this, &sock_union.sa,
+ &sockaddr_len, &sa_family);
+- if (ret == -1) {
++ if (ret < 0) {
+ /* logged inside client_get_remote_sockaddr */
+ goto unlock;
+ }
+@@ -3439,7 +3446,7 @@ socket_connect(rpc_transport_t *this, int port)
+ this->peerinfo.sockaddr_len = sockaddr_len;
+
+ priv->sock = sys_socket(sa_family, SOCK_STREAM, 0);
+- if (priv->sock == -1) {
++ if (priv->sock < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)",
+ strerror(errno));
+ ret = -1;
+@@ -3451,7 +3458,7 @@ socket_connect(rpc_transport_t *this, int port)
+ */
+ if (priv->windowsize != 0) {
+ if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize,
+- sizeof(priv->windowsize)) < 0) {
++ sizeof(priv->windowsize)) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setting receive window "
+ "size failed: %d: %d: %s",
+@@ -3459,7 +3466,7 @@ socket_connect(rpc_transport_t *this, int port)
+ }
+
+ if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize,
+- sizeof(priv->windowsize)) < 0) {
++ sizeof(priv->windowsize)) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setting send window size "
+ "failed: %d: %d: %s",
+@@ -3484,7 +3491,7 @@ socket_connect(rpc_transport_t *this, int port)
+ if (priv->nodelay && (sa_family != AF_UNIX)) {
+ ret = __socket_nodelay(priv->sock);
+
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "NODELAY on %d failed (%s)",
+ priv->sock, strerror(errno));
+ }
+@@ -3494,7 +3501,7 @@ socket_connect(rpc_transport_t *this, int port)
+ ret = __socket_keepalive(priv->sock, sa_family,
+ priv->keepaliveintvl, priv->keepaliveidle,
+ priv->keepalivecnt, priv->timeout);
+- if (ret == -1)
++ if (ret != 0)
+ gf_log(this->name, GF_LOG_ERROR, "Failed to set keep-alive: %s",
+ strerror(errno));
+ }
+@@ -3516,7 +3523,7 @@ socket_connect(rpc_transport_t *this, int port)
+
+ ret = client_bind(this, SA(&this->myinfo.sockaddr),
+ &this->myinfo.sockaddr_len, priv->sock);
+- if (ret == -1) {
++ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "client bind failed: %s",
+ strerror(errno));
+ goto handler;
+@@ -3525,7 +3532,7 @@ socket_connect(rpc_transport_t *this, int port)
+ /* make socket non-blocking for all types of sockets */
+ if (!priv->bio) {
+ ret = __socket_nonblock(priv->sock);
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "NBIO on %d failed (%s)",
+ priv->sock, strerror(errno));
+ goto handler;
+@@ -3552,7 +3559,7 @@ socket_connect(rpc_transport_t *this, int port)
+
+ connect_attempted = _gf_true;
+
+- if (ret == -1 && errno == ENOENT && ign_enoent) {
++ if ((ret != 0) && (errno == ENOENT) && ign_enoent) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Ignore failed connection attempt on %s, (%s) ",
+ this->peerinfo.identifier, strerror(errno));
+@@ -3570,7 +3577,7 @@ socket_connect(rpc_transport_t *this, int port)
+ goto handler;
+ }
+
+- if (ret == -1 && ((errno != EINPROGRESS) && (errno != ENOENT))) {
++ if ((ret != 0) && (errno != EINPROGRESS) && (errno != ENOENT)) {
+ /* For unix path based sockets, the socket path is
+ * cryptic (md5sum of path) and may not be useful for
+ * the user in debugging so log it in DEBUG
+@@ -3634,8 +3641,8 @@ socket_connect(rpc_transport_t *this, int port)
+ pthread_mutex_unlock(&priv->out_lock);
+
+ err:
+- /* if sock != -1, then cleanup is done from the event handler */
+- if (ret == -1 && sock == -1) {
++ /* if sock >= 0, then cleanup is done from the event handler */
++ if ((ret < 0) && (sock < 0)) {
+ /* Cleaup requires to send notification to upper layer which
+ intern holds the big_lock. There can be dead-lock situation
+ if big_lock is already held by the current thread.
+@@ -3689,20 +3696,20 @@ socket_listen(rpc_transport_t *this)
+ }
+ pthread_mutex_unlock(&priv->out_lock);
+
+- if (sock != -1) {
++ if (sock >= 0) {
+ gf_log_callingfn(this->name, GF_LOG_DEBUG, "already listening");
+ return ret;
+ }
+
+ ret = socket_server_get_local_sockaddr(this, SA(&sockaddr), &sockaddr_len,
+ &sa_family);
+- if (ret == -1) {
++ if (ret < 0) {
+ return ret;
+ }
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+- if (priv->sock != -1) {
++ if (priv->sock >= 0) {
+ gf_log(this->name, GF_LOG_DEBUG, "already listening");
+ goto unlock;
+ }
+@@ -3712,7 +3719,7 @@ socket_listen(rpc_transport_t *this)
+
+ priv->sock = sys_socket(sa_family, SOCK_STREAM, 0);
+
+- if (priv->sock == -1) {
++ if (priv->sock < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)",
+ strerror(errno));
+ goto unlock;
+@@ -3723,7 +3730,7 @@ socket_listen(rpc_transport_t *this)
+ */
+ if (priv->windowsize != 0) {
+ if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize,
+- sizeof(priv->windowsize)) < 0) {
++ sizeof(priv->windowsize)) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setting receive window size "
+ "failed: %d: %d: %s",
+@@ -3731,7 +3738,7 @@ socket_listen(rpc_transport_t *this)
+ }
+
+ if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize,
+- sizeof(priv->windowsize)) < 0) {
++ sizeof(priv->windowsize)) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setting send window size failed:"
+ " %d: %d: %s",
+@@ -3741,7 +3748,7 @@ socket_listen(rpc_transport_t *this)
+
+ if (priv->nodelay && (sa_family != AF_UNIX)) {
+ ret = __socket_nodelay(priv->sock);
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setsockopt() failed for NODELAY (%s)", strerror(errno));
+ }
+@@ -3750,7 +3757,7 @@ socket_listen(rpc_transport_t *this)
+ if (!priv->bio) {
+ ret = __socket_nonblock(priv->sock);
+
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "NBIO on socket %d failed "
+ "(errno:%s); closing socket",
+@@ -3763,7 +3770,7 @@ socket_listen(rpc_transport_t *this)
+
+ ret = __socket_server_bind(this);
+
+- if ((ret == -EADDRINUSE) || (ret == -1)) {
++ if (ret < 0) {
+ /* logged inside __socket_server_bind() */
+ gf_log(this->name, GF_LOG_ERROR,
+ "__socket_server_bind failed;"
+@@ -3779,7 +3786,7 @@ socket_listen(rpc_transport_t *this)
+
+ ret = listen(priv->sock, priv->backlog);
+
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "could not set socket %d to listen mode (errno:%s); "
+ "closing socket",
+@@ -4025,7 +4032,7 @@ reconfigure(rpc_transport_t *this, dict_t *options)
+ priv = this->private;
+
+ if (dict_get_str(options, "transport.socket.keepalive", &optstr) == 0) {
+- if (gf_string2boolean(optstr, &tmp_bool) == -1) {
++ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'transport.socket.keepalive' takes only "
+ "boolean options, not taking any action");
+@@ -4094,7 +4101,7 @@ reconfigure(rpc_transport_t *this, dict_t *options)
+ if (dict_get(options, "non-blocking-io")) {
+ optstr = data_to_str(dict_get(options, "non-blocking-io"));
+
+- if (gf_string2boolean(optstr, &tmp_bool) == -1) {
++ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'non-blocking-io' takes only boolean options,"
+ " not taking any action");
+@@ -4109,7 +4116,7 @@ reconfigure(rpc_transport_t *this, dict_t *options)
+
+ if (!priv->bio) {
+ ret = __socket_nonblock(priv->sock);
+- if (ret == -1) {
++ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING, "NBIO on %d failed (%s)",
+ priv->sock, strerror(errno));
+ goto out;
+@@ -4508,7 +4515,7 @@ socket_init(rpc_transport_t *this)
+ if (dict_get(this->options, "non-blocking-io")) {
+ optstr = data_to_str(dict_get(this->options, "non-blocking-io"));
+
+- if (gf_string2boolean(optstr, &tmp_bool) == -1) {
++ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'non-blocking-io' takes only boolean options,"
+ " not taking any action");
+@@ -4528,7 +4535,7 @@ socket_init(rpc_transport_t *this)
+ optstr = data_to_str(
+ dict_get(this->options, "transport.socket.nodelay"));
+
+- if (gf_string2boolean(optstr, &tmp_bool) == -1) {
++ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'transport.socket.nodelay' takes only "
+ "boolean options, not taking any action");
+@@ -4559,7 +4566,7 @@ socket_init(rpc_transport_t *this)
+ priv->keepalivecnt = GF_KEEPALIVE_COUNT;
+ if (dict_get_str(this->options, "transport.socket.keepalive", &optstr) ==
+ 0) {
+- if (gf_string2boolean(optstr, &tmp_bool) == -1) {
++ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'transport.socket.keepalive' takes only "
+ "boolean options, not taking any action");
+@@ -4609,7 +4616,7 @@ socket_init(rpc_transport_t *this)
+ if (dict_get(this->options, "transport.socket.read-fail-log")) {
+ optstr = data_to_str(
+ dict_get(this->options, "transport.socket.read-fail-log"));
+- if (gf_string2boolean(optstr, &tmp_bool) == -1) {
++ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "'transport.socket.read-fail-log' takes only "
+ "boolean options; logging socket read fails");
+@@ -4646,7 +4653,7 @@ fini(rpc_transport_t *this)
+
+ priv = this->private;
+ if (priv) {
+- if (priv->sock != -1) {
++ if (priv->sock >= 0) {
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ __socket_ioq_flush(this);
+@@ -4683,7 +4690,7 @@ init(rpc_transport_t *this)
+
+ ret = socket_init(this);
+
+- if (ret == -1) {
++ if (ret < 0) {
+ gf_log(this->name, GF_LOG_DEBUG, "socket_init() failed");
+ }
+
+--
+1.8.3.1
+
diff --git a/0342-Revert-hooks-remove-selinux-hooks.patch b/0342-Revert-hooks-remove-selinux-hooks.patch
new file mode 100644
index 0000000..028a227
--- /dev/null
+++ b/0342-Revert-hooks-remove-selinux-hooks.patch
@@ -0,0 +1,120 @@
+From eb37a3b57415d2d4206ecdd2db10530366a0d1b1 Mon Sep 17 00:00:00 2001
+From: Anoop C S <anoopcs@redhat.com>
+Date: Fri, 13 Dec 2019 15:20:27 +0530
+Subject: [PATCH 342/344] Revert "hooks: remove selinux hooks"
+
+This reverts commit 421743b7cfa6a249544f6abb4cca5a612bd20ea1.
+
+Note:- We are not bringing back features.selinux but just the hooks for
+ setting SELinux context on bricks
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Iccc10428361cac59b294e1d7aa1ba8187c20029e
+BUG: 1686800
+Signed-off-by: Anoop C S <anoopcs@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/187691
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Niels de Vos <ndevos@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ configure.ac | 4 ++++
+ extras/hook-scripts/Makefile.am | 2 +-
+ extras/hook-scripts/create/Makefile.am | 1 +
+ extras/hook-scripts/create/post/Makefile.am | 6 ++++++
+ extras/hook-scripts/delete/Makefile.am | 1 +
+ extras/hook-scripts/delete/pre/Makefile.am | 6 ++++++
+ glusterfs.spec.in | 2 ++
+ 7 files changed, 21 insertions(+), 1 deletion(-)
+ create mode 100644 extras/hook-scripts/create/Makefile.am
+ create mode 100644 extras/hook-scripts/create/post/Makefile.am
+ create mode 100644 extras/hook-scripts/delete/Makefile.am
+ create mode 100644 extras/hook-scripts/delete/pre/Makefile.am
+
+diff --git a/configure.ac b/configure.ac
+index 327733e..98ee311 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -221,6 +221,10 @@ AC_CONFIG_FILES([Makefile
+ extras/hook-scripts/add-brick/Makefile
+ extras/hook-scripts/add-brick/pre/Makefile
+ extras/hook-scripts/add-brick/post/Makefile
++ extras/hook-scripts/create/Makefile
++ extras/hook-scripts/create/post/Makefile
++ extras/hook-scripts/delete/Makefile
++ extras/hook-scripts/delete/pre/Makefile
+ extras/hook-scripts/start/Makefile
+ extras/hook-scripts/start/post/Makefile
+ extras/hook-scripts/set/Makefile
+diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am
+index 771b37e..26059d7 100644
+--- a/extras/hook-scripts/Makefile.am
++++ b/extras/hook-scripts/Makefile.am
+@@ -1,5 +1,5 @@
+ EXTRA_DIST = S40ufo-stop.py S56glusterd-geo-rep-create-post.sh
+-SUBDIRS = add-brick set start stop reset
++SUBDIRS = add-brick create delete set start stop reset
+
+ scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/
+ if USE_GEOREP
+diff --git a/extras/hook-scripts/create/Makefile.am b/extras/hook-scripts/create/Makefile.am
+new file mode 100644
+index 0000000..b083a91
+--- /dev/null
++++ b/extras/hook-scripts/create/Makefile.am
+@@ -0,0 +1 @@
++SUBDIRS = post
+diff --git a/extras/hook-scripts/create/post/Makefile.am b/extras/hook-scripts/create/post/Makefile.am
+new file mode 100644
+index 0000000..919801a
+--- /dev/null
++++ b/extras/hook-scripts/create/post/Makefile.am
+@@ -0,0 +1,6 @@
++EXTRA_DIST = S10selinux-label-brick.sh
++
++scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/
++if WITH_SERVER
++scripts_SCRIPTS = S10selinux-label-brick.sh
++endif
+diff --git a/extras/hook-scripts/delete/Makefile.am b/extras/hook-scripts/delete/Makefile.am
+new file mode 100644
+index 0000000..c98a05d
+--- /dev/null
++++ b/extras/hook-scripts/delete/Makefile.am
+@@ -0,0 +1 @@
++SUBDIRS = pre
+diff --git a/extras/hook-scripts/delete/pre/Makefile.am b/extras/hook-scripts/delete/pre/Makefile.am
+new file mode 100644
+index 0000000..93a6b85
+--- /dev/null
++++ b/extras/hook-scripts/delete/pre/Makefile.am
+@@ -0,0 +1,6 @@
++EXTRA_DIST = S10selinux-del-fcontext.sh
++
++scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/
++if WITH_SERVER
++scripts_SCRIPTS = S10selinux-del-fcontext.sh
++endif
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 1b975b2..012989a 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1453,6 +1453,7 @@ exit 0
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post
++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/S10selinux-label-brick.sh
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post
+@@ -1461,6 +1462,7 @@ exit 0
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post
+ %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre
++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/S10selinux-del-fcontext.sh
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre
+--
+1.8.3.1
+
diff --git a/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch b/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch
new file mode 100644
index 0000000..77d2f64
--- /dev/null
+++ b/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch
@@ -0,0 +1,155 @@
+From 8a8c508b529f7609fc5caa10bc79ba817f5d274a Mon Sep 17 00:00:00 2001
+From: Milan Zink <mzink@redhat.com>
+Date: Mon, 5 Feb 2018 15:04:37 +0100
+Subject: [PATCH 343/344] extras/hooks: syntactical errors in SELinux hooks,
+ scipt logic improved
+
+Backport of https://review.gluster.org/c/glusterfs/+/19502
+
+Change-Id: Ia5fa1df81bbaec3a84653d136a331c76b457f42c
+BUG: 1686800
+Signed-off-by: Anoop C S <anoopcs@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/187692
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Niels de Vos <ndevos@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../create/post/S10selinux-label-brick.sh | 13 +++--
+ .../delete/pre/S10selinux-del-fcontext.sh | 60 +++++++++++++---------
+ tests/bugs/glusterfs-server/bug-877992.t | 4 +-
+ 3 files changed, 46 insertions(+), 31 deletions(-)
+
+diff --git a/extras/hook-scripts/create/post/S10selinux-label-brick.sh b/extras/hook-scripts/create/post/S10selinux-label-brick.sh
+index de242d2..f9b4b1a 100755
+--- a/extras/hook-scripts/create/post/S10selinux-label-brick.sh
++++ b/extras/hook-scripts/create/post/S10selinux-label-brick.sh
+@@ -34,18 +34,21 @@ parse_args () {
+
+ set_brick_labels()
+ {
+- volname=${1}
++ volname="${1}"
+
+ # grab the path for each local brick
+- brickpath="/var/lib/glusterd/vols/${volname}/bricks/*"
+- brickdirs=$(grep '^path=' "${brickpath}" | cut -d= -f 2 | sort -u)
++ brickpath="/var/lib/glusterd/vols/${volname}/bricks/"
++ brickdirs=$(
++ find "${brickpath}" -type f -exec grep '^path=' {} \; | \
++ cut -d= -f 2 | \
++ sort -u
++ )
+
+ for b in ${brickdirs}; do
+ # Add a file context for each brick path and associate with the
+ # glusterd_brick_t SELinux type.
+- pattern="${b}\(/.*\)?"
++ pattern="${b}(/.*)?"
+ semanage fcontext --add -t glusterd_brick_t -r s0 "${pattern}"
+-
+ # Set the labels on the new brick path.
+ restorecon -R "${b}"
+ done
+diff --git a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
+index 6eba66f..e7f4e8f 100755
+--- a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
++++ b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
+@@ -15,45 +15,55 @@ OPTSPEC="volname:"
+ VOL=
+
+ function parse_args () {
+- ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
+- eval set -- "$ARGS"
+-
+- while true; do
+- case $1 in
+- --volname)
+- shift
+- VOL=$1
+- ;;
+- *)
+- shift
+- break
+- ;;
+- esac
++ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
++ eval set -- "${ARGS}"
++
++ while true; do
++ case ${1} in
++ --volname)
++ shift
++ VOL=${1}
++ ;;
++ *)
+ shift
+- done
++ break
++ ;;
++ esac
++ shift
++ done
+ }
+
+ function delete_brick_fcontext()
+ {
+- volname=$1
++ volname="${1}"
++
++ # grab the path for each local brick
++ brickpath="/var/lib/glusterd/vols/${volname}/bricks/"
++ brickdirs=$(
++ find "${brickpath}" -type f -exec grep '^path=' {} \; | \
++ cut -d= -f 2 | \
++ sort -u
++ )
++
++ for b in ${brickdirs}
++ do
++ # remove the file context associated with the brick path
++ pattern="${b}(/.*)?"
++ semanage fcontext --delete "${pattern}"
+
+- # grab the path for each local brick
+- brickdirs=$(grep '^path=' /var/lib/glusterd/vols/${volname}/bricks/* | cut -d= -f 2)
++ # remove the labels on brick path.
++ restorecon -R "${b}"
++ done
+
+- for b in $brickdirs
+- do
+- # remove the file context associated with the brick path
+- semanage fcontext --delete $b\(/.*\)?
+- done
+ }
+
+ SELINUX_STATE=$(which getenforce && getenforce)
+ [ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
+
+ parse_args "$@"
+-[ -z "$VOL" ] && exit 1
++[ -z "${VOL}" ] && exit 1
+
+-delete_brick_fcontext $VOL
++delete_brick_fcontext "${VOL}"
+
+ # failure to delete the fcontext is not fatal
+ exit 0
+diff --git a/tests/bugs/glusterfs-server/bug-877992.t b/tests/bugs/glusterfs-server/bug-877992.t
+index aeb73ed..300000b 100755
+--- a/tests/bugs/glusterfs-server/bug-877992.t
++++ b/tests/bugs/glusterfs-server/bug-877992.t
+@@ -46,7 +46,9 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+ EXPECT "$V0" volinfo_field $V0 'Volume Name';
+ EXPECT 'Created' volinfo_field $V0 'Status';
+ EXPECT 'createPre' cat /tmp/pre.out;
+-EXPECT 'createPost' cat /tmp/post.out;
++# Spost.sh comes after S10selinux-label-brick.sh under create post hook script
++# list. So consider the delay in setting SELinux context on bricks
++EXPECT_WITHIN 5 'createPost' cat /tmp/post.out;
+ hooks_cleanup 'create'
+
+
+--
+1.8.3.1
+
diff --git a/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch b/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch
new file mode 100644
index 0000000..341aeae
--- /dev/null
+++ b/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch
@@ -0,0 +1,412 @@
+From 02a93265fe4e78e7fc3fa8c6caa773cbe02f50b6 Mon Sep 17 00:00:00 2001
+From: Anoop C S <anoopcs@redhat.com>
+Date: Fri, 20 Dec 2019 16:01:59 +0530
+Subject: [PATCH 344/344] Revert all fixes to include SELinux hook scripts
+
+Following are the reverts included with this change:
+
+Revert "extras/hooks: syntactical errors in SELinux hooks, scipt logic improved"
+Revert "Revert "hooks: remove selinux hooks""
+Revert "tests: subdir-mount.t is failing for brick_mux regrssion"
+Revert "extras/hooks: Install and package newly added post add-brick hook script"
+Revert "extras/hooks: Add SELinux label on new bricks during add-brick"
+
+Label: DOWNSTREAM ONLY
+
+See bug for more details.
+
+Change-Id: I5c9b9e0e6446568ce16af17257fa39338198a827
+BUG: 1686800
+Signed-off-by: Anoop C S <anoopcs@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/188169
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ configure.ac | 4 -
+ extras/hook-scripts/Makefile.am | 2 +-
+ extras/hook-scripts/add-brick/post/Makefile.am | 4 +-
+ .../add-brick/post/S10selinux-label-brick.sh | 100 ---------------------
+ extras/hook-scripts/create/Makefile.am | 1 -
+ extras/hook-scripts/create/post/Makefile.am | 6 --
+ .../create/post/S10selinux-label-brick.sh | 13 ++-
+ extras/hook-scripts/delete/Makefile.am | 1 -
+ extras/hook-scripts/delete/pre/Makefile.am | 6 --
+ .../delete/pre/S10selinux-del-fcontext.sh | 60 ++++++-------
+ glusterfs.spec.in | 3 -
+ tests/bugs/glusterfs-server/bug-877992.t | 4 +-
+ tests/features/subdir-mount.t | 11 +--
+ 13 files changed, 37 insertions(+), 178 deletions(-)
+ delete mode 100755 extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh
+ delete mode 100644 extras/hook-scripts/create/Makefile.am
+ delete mode 100644 extras/hook-scripts/create/post/Makefile.am
+ delete mode 100644 extras/hook-scripts/delete/Makefile.am
+ delete mode 100644 extras/hook-scripts/delete/pre/Makefile.am
+
+diff --git a/configure.ac b/configure.ac
+index 98ee311..327733e 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -221,10 +221,6 @@ AC_CONFIG_FILES([Makefile
+ extras/hook-scripts/add-brick/Makefile
+ extras/hook-scripts/add-brick/pre/Makefile
+ extras/hook-scripts/add-brick/post/Makefile
+- extras/hook-scripts/create/Makefile
+- extras/hook-scripts/create/post/Makefile
+- extras/hook-scripts/delete/Makefile
+- extras/hook-scripts/delete/pre/Makefile
+ extras/hook-scripts/start/Makefile
+ extras/hook-scripts/start/post/Makefile
+ extras/hook-scripts/set/Makefile
+diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am
+index 26059d7..771b37e 100644
+--- a/extras/hook-scripts/Makefile.am
++++ b/extras/hook-scripts/Makefile.am
+@@ -1,5 +1,5 @@
+ EXTRA_DIST = S40ufo-stop.py S56glusterd-geo-rep-create-post.sh
+-SUBDIRS = add-brick create delete set start stop reset
++SUBDIRS = add-brick set start stop reset
+
+ scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/
+ if USE_GEOREP
+diff --git a/extras/hook-scripts/add-brick/post/Makefile.am b/extras/hook-scripts/add-brick/post/Makefile.am
+index 9b236df..bfc0c1c 100644
+--- a/extras/hook-scripts/add-brick/post/Makefile.am
++++ b/extras/hook-scripts/add-brick/post/Makefile.am
+@@ -1,6 +1,6 @@
+-EXTRA_DIST = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh
++EXTRA_DIST = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh
+
+ hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/
+ if WITH_SERVER
+-hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh
++hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh
+ endif
+diff --git a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh
+deleted file mode 100755
+index 4a17c99..0000000
+--- a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh
++++ /dev/null
+@@ -1,100 +0,0 @@
+-#!/bin/bash
+-#
+-# Install to hooks/<HOOKS_VER>/add-brick/post
+-#
+-# Add an SELinux file context for each brick using the glusterd_brick_t type.
+-# This ensures that the brick is relabeled correctly on an SELinux restart or
+-# restore. Subsequently, run a restore on the brick path to set the selinux
+-# labels.
+-#
+-###
+-
+-PROGNAME="Sselinux"
+-OPTSPEC="volname:,version:,gd-workdir:,volume-op:"
+-VOL=
+-
+-parse_args () {
+- ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
+- eval set -- "${ARGS}"
+-
+- while true; do
+- case ${1} in
+- --volname)
+- shift
+- VOL=${1}
+- ;;
+- --gd-workdir)
+- shift
+- GLUSTERD_WORKDIR=$1
+- ;;
+- --version)
+- shift
+- ;;
+- --volume-op)
+- shift
+- ;;
+- *)
+- shift
+- break
+- ;;
+- esac
+- shift
+- done
+-}
+-
+-set_brick_labels()
+-{
+- local volname="${1}"
+- local fctx
+- local list=()
+-
+- fctx="$(semanage fcontext --list -C)"
+-
+- # wait for new brick path to be updated under
+- # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/
+- sleep 5
+-
+- # grab the path for each local brick
+- brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/"
+- brickdirs=$(
+- find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+- cut -d= -f 2 | \
+- sort -u
+- )
+-
+- # create a list of bricks for which custom SELinux
+- # label doesn't exist
+- for b in ${brickdirs}; do
+- pattern="${b}(/.*)?"
+- echo "${fctx}" | grep "^${pattern}\s" >/dev/null
+- if [[ $? -ne 0 ]]; then
+- list+=("${pattern}")
+- fi
+- done
+-
+- # Add a file context for each brick path in the list and associate with the
+- # glusterd_brick_t SELinux type.
+- for p in ${list[@]}
+- do
+- semanage fcontext --add -t glusterd_brick_t -r s0 "${p}"
+- done
+-
+- # Set the labels for which SELinux label was added above
+- for b in ${brickdirs}
+- do
+- echo "${list[@]}" | grep "${b}" >/dev/null
+- if [[ $? -eq 0 ]]; then
+- restorecon -R "${b}"
+- fi
+- done
+-}
+-
+-SELINUX_STATE=$(which getenforce && getenforce)
+-[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
+-
+-parse_args "$@"
+-[ -z "${VOL}" ] && exit 1
+-
+-set_brick_labels "${VOL}"
+-
+-exit 0
+diff --git a/extras/hook-scripts/create/Makefile.am b/extras/hook-scripts/create/Makefile.am
+deleted file mode 100644
+index b083a91..0000000
+--- a/extras/hook-scripts/create/Makefile.am
++++ /dev/null
+@@ -1 +0,0 @@
+-SUBDIRS = post
+diff --git a/extras/hook-scripts/create/post/Makefile.am b/extras/hook-scripts/create/post/Makefile.am
+deleted file mode 100644
+index 919801a..0000000
+--- a/extras/hook-scripts/create/post/Makefile.am
++++ /dev/null
+@@ -1,6 +0,0 @@
+-EXTRA_DIST = S10selinux-label-brick.sh
+-
+-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/
+-if WITH_SERVER
+-scripts_SCRIPTS = S10selinux-label-brick.sh
+-endif
+diff --git a/extras/hook-scripts/create/post/S10selinux-label-brick.sh b/extras/hook-scripts/create/post/S10selinux-label-brick.sh
+index f9b4b1a..de242d2 100755
+--- a/extras/hook-scripts/create/post/S10selinux-label-brick.sh
++++ b/extras/hook-scripts/create/post/S10selinux-label-brick.sh
+@@ -34,21 +34,18 @@ parse_args () {
+
+ set_brick_labels()
+ {
+- volname="${1}"
++ volname=${1}
+
+ # grab the path for each local brick
+- brickpath="/var/lib/glusterd/vols/${volname}/bricks/"
+- brickdirs=$(
+- find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+- cut -d= -f 2 | \
+- sort -u
+- )
++ brickpath="/var/lib/glusterd/vols/${volname}/bricks/*"
++ brickdirs=$(grep '^path=' "${brickpath}" | cut -d= -f 2 | sort -u)
+
+ for b in ${brickdirs}; do
+ # Add a file context for each brick path and associate with the
+ # glusterd_brick_t SELinux type.
+- pattern="${b}(/.*)?"
++ pattern="${b}\(/.*\)?"
+ semanage fcontext --add -t glusterd_brick_t -r s0 "${pattern}"
++
+ # Set the labels on the new brick path.
+ restorecon -R "${b}"
+ done
+diff --git a/extras/hook-scripts/delete/Makefile.am b/extras/hook-scripts/delete/Makefile.am
+deleted file mode 100644
+index c98a05d..0000000
+--- a/extras/hook-scripts/delete/Makefile.am
++++ /dev/null
+@@ -1 +0,0 @@
+-SUBDIRS = pre
+diff --git a/extras/hook-scripts/delete/pre/Makefile.am b/extras/hook-scripts/delete/pre/Makefile.am
+deleted file mode 100644
+index 93a6b85..0000000
+--- a/extras/hook-scripts/delete/pre/Makefile.am
++++ /dev/null
+@@ -1,6 +0,0 @@
+-EXTRA_DIST = S10selinux-del-fcontext.sh
+-
+-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/
+-if WITH_SERVER
+-scripts_SCRIPTS = S10selinux-del-fcontext.sh
+-endif
+diff --git a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
+index e7f4e8f..6eba66f 100755
+--- a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
++++ b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
+@@ -15,55 +15,45 @@ OPTSPEC="volname:"
+ VOL=
+
+ function parse_args () {
+- ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
+- eval set -- "${ARGS}"
+-
+- while true; do
+- case ${1} in
+- --volname)
+- shift
+- VOL=${1}
+- ;;
+- *)
++ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
++ eval set -- "$ARGS"
++
++ while true; do
++ case $1 in
++ --volname)
++ shift
++ VOL=$1
++ ;;
++ *)
++ shift
++ break
++ ;;
++ esac
+ shift
+- break
+- ;;
+- esac
+- shift
+- done
++ done
+ }
+
+ function delete_brick_fcontext()
+ {
+- volname="${1}"
+-
+- # grab the path for each local brick
+- brickpath="/var/lib/glusterd/vols/${volname}/bricks/"
+- brickdirs=$(
+- find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+- cut -d= -f 2 | \
+- sort -u
+- )
+-
+- for b in ${brickdirs}
+- do
+- # remove the file context associated with the brick path
+- pattern="${b}(/.*)?"
+- semanage fcontext --delete "${pattern}"
++ volname=$1
+
+- # remove the labels on brick path.
+- restorecon -R "${b}"
+- done
++ # grab the path for each local brick
++ brickdirs=$(grep '^path=' /var/lib/glusterd/vols/${volname}/bricks/* | cut -d= -f 2)
+
++ for b in $brickdirs
++ do
++ # remove the file context associated with the brick path
++ semanage fcontext --delete $b\(/.*\)?
++ done
+ }
+
+ SELINUX_STATE=$(which getenforce && getenforce)
+ [ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
+
+ parse_args "$@"
+-[ -z "${VOL}" ] && exit 1
++[ -z "$VOL" ] && exit 1
+
+-delete_brick_fcontext "${VOL}"
++delete_brick_fcontext $VOL
+
+ # failure to delete the fcontext is not fatal
+ exit 0
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 012989a..671ee27 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1447,13 +1447,11 @@ exit 0
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh
+- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S10selinux-label-brick.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post
+- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/S10selinux-label-brick.sh
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post
+@@ -1462,7 +1460,6 @@ exit 0
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post
+ %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre
+- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/S10selinux-del-fcontext.sh
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre
+diff --git a/tests/bugs/glusterfs-server/bug-877992.t b/tests/bugs/glusterfs-server/bug-877992.t
+index 300000b..aeb73ed 100755
+--- a/tests/bugs/glusterfs-server/bug-877992.t
++++ b/tests/bugs/glusterfs-server/bug-877992.t
+@@ -46,9 +46,7 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+ EXPECT "$V0" volinfo_field $V0 'Volume Name';
+ EXPECT 'Created' volinfo_field $V0 'Status';
+ EXPECT 'createPre' cat /tmp/pre.out;
+-# Spost.sh comes after S10selinux-label-brick.sh under create post hook script
+-# list. So consider the delay in setting SELinux context on bricks
+-EXPECT_WITHIN 5 'createPost' cat /tmp/post.out;
++EXPECT 'createPost' cat /tmp/post.out;
+ hooks_cleanup 'create'
+
+
+diff --git a/tests/features/subdir-mount.t b/tests/features/subdir-mount.t
+index a02bd6b..8401946 100644
+--- a/tests/features/subdir-mount.t
++++ b/tests/features/subdir-mount.t
+@@ -85,17 +85,12 @@ TEST $CLI volume start $V0
+ TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2
+ TEST stat $M2
+
+-initcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
+ # mount shouldn't fail even after add-brick
+ TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6};
+
+-# Wait to execute create-subdir-mounts.sh script by glusterd
+-newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
+-while [ $newcnt -eq $initcnt ]
+-do
+- newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
+- sleep 1
+-done
++# Give time for client process to get notified and use the new
++# volfile after add-brick
++sleep 1
+
+ # Existing mount should still be active
+ mount_inode=$(stat --format "%i" "$M2")
+--
+1.8.3.1
+
diff --git a/0345-read-ahead-io-cache-turn-off-by-default.patch b/0345-read-ahead-io-cache-turn-off-by-default.patch
new file mode 100644
index 0000000..48b0cc8
--- /dev/null
+++ b/0345-read-ahead-io-cache-turn-off-by-default.patch
@@ -0,0 +1,82 @@
+From d45c64e17e1eb8003ac1086cbd3abea32414c7f9 Mon Sep 17 00:00:00 2001
+From: Raghavendra Gowdappa <rgowdapp@redhat.com>
+Date: Tue, 12 Feb 2019 18:33:44 +0530
+Subject: [PATCH 345/346] read-ahead/io-cache: turn off by default
+
+We've found perf xlators io-cache and read-ahead not adding any
+performance improvement. At best read-ahead is redundant due to kernel
+read-ahead and at worst io-cache is degrading the performance for
+workloads that doesn't involve re-read. Given that VFS already have
+both these functionalities, this patch makes these two
+translators turned off by default for native fuse mounts.
+
+For non-native fuse mounts like gfapi (NFS-ganesha/samba) we can have
+these xlators on by having custom profiles.
+
+>Change-Id: Ie7535788909d4c741844473696f001274dc0bb60
+>Signed-off-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
+>fixes: bz#1676479
+Upstream fix link: https://review.gluster.org/#/c/glusterfs/+/22203/
+
+BUG: 1788656
+Change-Id: Ie7535788909d4c741844473696f001274dc0bb60
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/188967
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ tests/basic/ec/self-heal.t | 2 ++
+ tests/basic/glusterd/volfile_server_switch.t | 2 +-
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 4 ++--
+ 3 files changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/tests/basic/ec/self-heal.t b/tests/basic/ec/self-heal.t
+index d217559..6329bb6 100644
+--- a/tests/basic/ec/self-heal.t
++++ b/tests/basic/ec/self-heal.t
+@@ -131,6 +131,8 @@ TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+ TEST $CLI volume set $V0 client-log-level DEBUG
+ #Write-behind has a bug where lookup can race over write which leads to size mismatch on the mount after a 'cp'
+ TEST $CLI volume set $V0 performance.write-behind off
++#md-cache can return stale stat due to default timeout being 1 sec
++TEST $CLI volume set $V0 performance.stat-prefetch off
+ EXPECT "Created" volinfo_field $V0 'Status'
+ TEST $CLI volume start $V0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+diff --git a/tests/basic/glusterd/volfile_server_switch.t b/tests/basic/glusterd/volfile_server_switch.t
+index 3090609..e11cfed 100644
+--- a/tests/basic/glusterd/volfile_server_switch.t
++++ b/tests/basic/glusterd/volfile_server_switch.t
+@@ -34,7 +34,7 @@ TEST glusterfs --volfile-id=/$V0 --volfile-server=$H1 --volfile-server=$H2 --vol
+
+ TEST kill_glusterd 1
+
+-TEST $CLI_2 volume set $V0 performance.io-cache off
++TEST $CLI_2 volume set $V0 performance.write-behind off
+
+ # make sure by this time directory will be created
+ # TODO: suggest ideal time to wait
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 16601a2..9001b88 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2235,7 +2235,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "performance.read-ahead",
+ .voltype = "performance/read-ahead",
+ .option = "!perf",
+- .value = "on",
++ .value = "off",
+ .op_version = 1,
+ .description = "enable/disable read-ahead translator in the volume.",
+ .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT},
+@@ -2249,7 +2249,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ {.key = "performance.io-cache",
+ .voltype = "performance/io-cache",
+ .option = "!perf",
+- .value = "on",
++ .value = "off",
+ .op_version = 1,
+ .description = "enable/disable io-cache translator in the volume.",
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
+--
+1.8.3.1
+
diff --git a/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch b/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch
new file mode 100644
index 0000000..9fca79e
--- /dev/null
+++ b/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch
@@ -0,0 +1,223 @@
+From e2af9793014ad67859aa73088765a52307cbe466 Mon Sep 17 00:00:00 2001
+From: Csaba Henk <csaba@redhat.com>
+Date: Tue, 7 Jan 2020 19:43:05 +0100
+Subject: [PATCH 346/346] fuse: degrade logging of write failure to fuse device
+
+Problem:
+
+FUSE uses failures of communicating with /dev/fuse with various
+errnos to indicate in-kernel conditions to userspace. Some of these
+shouldn't be handled as an application error. Also the standard
+POSIX errno description should not be shown as they are misleading
+in this context.
+
+Solution:
+
+When writing to the fuse device, the caller of the respective
+convenience routine can mask those errnos which don't qualify to
+be an error for the application in that context, so then those
+shall be reported at DEBUG level.
+
+The possible non-standard errnos are reported with their
+POSIX name instead of their description to avoid confusion.
+(Eg. for ENOENT we don't log "no such file or directory",
+we log indeed literal "ENOENT".)
+
+Upstream on https://review.gluster.org/23974
+> Change-Id: I510158843e4b1d482bdc496c2e97b1860dc1ba93
+> updates: bz#1193929
+> Signed-off-by: Csaba Henk <csaba@redhat.com>
+
+BUG: 1763208
+Change-Id: Ib1676bb334ed153ce74ae1c0413fc0e58fb388c7
+Signed-off-by: Csaba Henk <csaba@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/189056
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mount/fuse/src/fuse-bridge.c | 78 +++++++++++++++++++++++++++++++++---
+ xlators/mount/fuse/src/fuse-bridge.h | 9 ++++-
+ 2 files changed, 80 insertions(+), 7 deletions(-)
+
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index ebe5c28..6e99053 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -198,7 +198,7 @@ fusedump_setup_meta(struct iovec *iovs, char *dir,
+
+ static int
+ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
+- ssize_t res)
++ ssize_t res, errnomask_t errnomask)
+ {
+ char w = 'W';
+ struct iovec diov[4] = {
+@@ -216,8 +216,59 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
+ struct fuse_out_header *fouh = NULL;
+
+ if (res == -1) {
+- gf_log_callingfn("glusterfs-fuse", GF_LOG_ERROR,
+- "writing to fuse device failed: %s", strerror(errno));
++ const char *errdesc = NULL;
++ gf_loglevel_t loglevel = GF_LOG_ERROR;
++
++ /* If caller masked the errno, then it
++ * does not indicate an error at the application
++ * level, so we degrade the log severity to DEBUG.
++ */
++ if (errnomask && errno < ERRNOMASK_MAX &&
++ GET_ERRNO_MASK(errnomask, errno))
++ loglevel = GF_LOG_DEBUG;
++
++ switch (errno) {
++ /* The listed errnos are FUSE status indicators,
++ * not legit values according to POSIX (see write(3p)),
++ * so resolving them according to the standard
++ * POSIX interpretation would be misleading.
++ */
++ case ENOENT:
++ errdesc = "ENOENT";
++ break;
++ case ENOTDIR:
++ errdesc = "ENOTDIR";
++ break;
++ case ENODEV:
++ errdesc = "ENODEV";
++ break;
++ case EPERM:
++ errdesc = "EPERM";
++ break;
++ case ENOMEM:
++ errdesc = "ENOMEM";
++ break;
++ case ENOTCONN:
++ errdesc = "ENOTCONN";
++ break;
++ case ECONNREFUSED:
++ errdesc = "ECONNREFUSED";
++ break;
++ case EOVERFLOW:
++ errdesc = "EOVERFLOW";
++ break;
++ case EBUSY:
++ errdesc = "EBUSY";
++ break;
++ case ENOTEMPTY:
++ errdesc = "ENOTEMPTY";
++ break;
++ default:
++ errdesc = strerror(errno);
++ }
++
++ gf_log_callingfn("glusterfs-fuse", loglevel,
++ "writing to fuse device failed: %s", errdesc);
+ return errno;
+ }
+
+@@ -282,7 +333,7 @@ send_fuse_iov(xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out,
+ gf_log("glusterfs-fuse", GF_LOG_TRACE, "writev() result %d/%d %s", res,
+ fouh->len, res == -1 ? strerror(errno) : "");
+
+- return check_and_dump_fuse_W(priv, iov_out, count, res);
++ return check_and_dump_fuse_W(priv, iov_out, count, res, NULL);
+ }
+
+ static int
+@@ -353,6 +404,15 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
+ fouh->unique = 0;
+ fouh->error = FUSE_NOTIFY_INVAL_ENTRY;
+
++ if (ENOENT < ERRNOMASK_MAX)
++ MASK_ERRNO(node->errnomask, ENOENT);
++ if (ENOTDIR < ERRNOMASK_MAX)
++ MASK_ERRNO(node->errnomask, ENOTDIR);
++ if (EBUSY < ERRNOMASK_MAX)
++ MASK_ERRNO(node->errnomask, EBUSY);
++ if (ENOTEMPTY < ERRNOMASK_MAX)
++ MASK_ERRNO(node->errnomask, ENOTEMPTY);
++
+ if (dentry->name) {
+ nlen = strlen(dentry->name);
+ fouh->len = sizeof(*fouh) + sizeof(*fnieo) + nlen + 1;
+@@ -437,6 +497,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
+ fniio->off = 0;
+ fniio->len = -1;
+
++ if (ENOENT < ERRNOMASK_MAX)
++ MASK_ERRNO(node->errnomask, ENOENT);
++
+ fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino,
+ uuid_utoa(inode->gfid));
+ gf_log("glusterfs-fuse", GF_LOG_TRACE,
+@@ -482,6 +545,7 @@ fuse_timed_message_new(void)
+ /* should be NULL if not set */
+ dmsg->fuse_message_body = NULL;
+ INIT_LIST_HEAD(&dmsg->next);
++ memset(dmsg->errnomask, 0, sizeof(dmsg->errnomask));
+
+ return dmsg;
+ }
+@@ -680,6 +744,8 @@ fuse_interrupt(xlator_t *this, fuse_in_header_t *finh, void *msg,
+ dmsg->fuse_out_header.unique = finh->unique;
+ dmsg->fuse_out_header.len = sizeof(dmsg->fuse_out_header);
+ dmsg->fuse_out_header.error = -EAGAIN;
++ if (ENOENT < ERRNOMASK_MAX)
++ MASK_ERRNO(dmsg->errnomask, ENOENT);
+ timespec_now(&dmsg->scheduled_ts);
+ timespec_adjust_delta(&dmsg->scheduled_ts,
+ (struct timespec){0, 10000000});
+@@ -4848,7 +4914,7 @@ notify_kernel_loop(void *data)
+ iov_out.iov_base = node->inval_buf;
+ iov_out.iov_len = len;
+ rv = sys_writev(priv->fd, &iov_out, 1);
+- check_and_dump_fuse_W(priv, &iov_out, 1, rv);
++ check_and_dump_fuse_W(priv, &iov_out, 1, rv, node->errnomask);
+
+ GF_FREE(node);
+
+@@ -4940,7 +5006,7 @@ timed_response_loop(void *data)
+ iovs[1] = (struct iovec){dmsg->fuse_message_body,
+ len - sizeof(struct fuse_out_header)};
+ rv = sys_writev(priv->fd, iovs, 2);
+- check_and_dump_fuse_W(priv, iovs, 2, rv);
++ check_and_dump_fuse_W(priv, iovs, 2, rv, dmsg->errnomask);
+
+ fuse_timed_message_free(dmsg);
+
+diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
+index cf4479c..d2d462c 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.h
++++ b/xlators/mount/fuse/src/fuse-bridge.h
+@@ -195,14 +195,20 @@ struct fuse_private {
+ };
+ typedef struct fuse_private fuse_private_t;
+
++typedef uint64_t errnomask_t[2];
++#define MASK_ERRNO(mask, n) ((mask)[(n) >> 6] |= ((uint64_t)1 << ((n)&63)))
++#define GET_ERRNO_MASK(mask, n) ((mask)[(n) >> 6] & ((uint64_t)1 << ((n)&63)))
++#define ERRNOMASK_MAX (64 * (sizeof(errnomask_t) / sizeof(uint64_t)))
++
+ #define INVAL_BUF_SIZE \
+ (sizeof(struct fuse_out_header) + \
+ max(sizeof(struct fuse_notify_inval_inode_out), \
+ sizeof(struct fuse_notify_inval_entry_out) + NAME_MAX + 1))
+
+ struct fuse_invalidate_node {
+- char inval_buf[INVAL_BUF_SIZE];
++ errnomask_t errnomask;
+ struct list_head next;
++ char inval_buf[INVAL_BUF_SIZE];
+ };
+ typedef struct fuse_invalidate_node fuse_invalidate_node_t;
+
+@@ -210,6 +216,7 @@ struct fuse_timed_message {
+ struct fuse_out_header fuse_out_header;
+ void *fuse_message_body;
+ struct timespec scheduled_ts;
++ errnomask_t errnomask;
+ struct list_head next;
+ };
+ typedef struct fuse_timed_message fuse_timed_message_t;
+--
+1.8.3.1
+
diff --git a/0347-tools-glusterfind-handle-offline-bricks.patch b/0347-tools-glusterfind-handle-offline-bricks.patch
new file mode 100644
index 0000000..ff5251d
--- /dev/null
+++ b/0347-tools-glusterfind-handle-offline-bricks.patch
@@ -0,0 +1,236 @@
+From 87e6ea2cd63898c5d243b0f0c719f4f6347fb829 Mon Sep 17 00:00:00 2001
+From: Milind Changire <mchangir@redhat.com>
+Date: Thu, 5 Jan 2017 19:53:19 +0530
+Subject: [PATCH 347/349] tools/glusterfind: handle offline bricks
+
+Problem:
+glusterfind is unable to copy remote output file to local node when a
+remove-brick is in progress on the remote node. After copying remote
+files, in the --full output listing path, a "sort -u" command is run on
+the collected files. However, "sort" exits with an error code if it
+finds any file missing.
+
+Solution:
+Maintain a map of (pid, output file) when the node commands are started
+and remove the mapping for the pid for which the command returns an
+error. Use the list of files present in the map for the "sort" command.
+
+Backport of:
+> Patch: https://review.gluster.org/16332
+> Change-Id: Ie6e019037379f4cb163f24b1c65eb382efc2fb3b
+> fixes: bz#1410439
+> Signed-off-by: Milind Changire <mchangir@redhat.com>
+> Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+
+BUG: 1789447
+Change-Id: Ie6e019037379f4cb163f24b1c65eb382efc2fb3b
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/189214
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunny Kumar <sunkumar@redhat.com>
+---
+ tools/glusterfind/src/gfind_py2py3.py | 25 ++++++++++++++
+ tools/glusterfind/src/main.py | 61 +++++++++++++++++++++--------------
+ 2 files changed, 61 insertions(+), 25 deletions(-)
+
+diff --git a/tools/glusterfind/src/gfind_py2py3.py b/tools/glusterfind/src/gfind_py2py3.py
+index 1d41ec5..87324fb 100644
+--- a/tools/glusterfind/src/gfind_py2py3.py
++++ b/tools/glusterfind/src/gfind_py2py3.py
+@@ -40,6 +40,19 @@ if sys.version_info >= (3,):
+ def gfind_history_changelog_done(libgfc, clfile):
+ return libgfc.gf_history_changelog_done(clfile.encode())
+
++ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep):
++ f.write(u"{0}{1}{2}{3}{4}\n".format(row,
++ field_separator,
++ p_rep,
++ field_separator,
++ row_2_rep))
++
++ def gfind_write(f, row, field_separator, p_rep):
++ f.write(u"{0}{1}{2}\n".format(row,
++ field_separator,
++ p_rep))
++
++
+ else:
+
+ # Raw conversion of bytearray to string
+@@ -61,3 +74,15 @@ else:
+
+ def gfind_history_changelog_done(libgfc, clfile):
+ return libgfc.gf_history_changelog_done(clfile)
++
++ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep):
++ f.write(u"{0}{1}{2}{3}{4}\n".format(row,
++ field_separator,
++ p_rep,
++ field_separator,
++ row_2_rep).encode())
++
++ def gfind_write(f, row, field_separator, p_rep):
++ f.write(u"{0}{1}{2}\n".format(row,
++ field_separator,
++ p_rep).encode())
+diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
+index cc5a86f..fefe4a3 100644
+--- a/tools/glusterfind/src/main.py
++++ b/tools/glusterfind/src/main.py
+@@ -16,6 +16,7 @@ from multiprocessing import Process
+ import os
+ import xml.etree.cElementTree as etree
+ from argparse import ArgumentParser, RawDescriptionHelpFormatter, Action
++from gfind_py2py3 import gfind_write_row, gfind_write
+ import logging
+ import shutil
+ import tempfile
+@@ -35,9 +36,9 @@ GlusterFS Incremental API
+ ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
+
+ logger = logging.getLogger()
+-node_outfiles = []
+ vol_statusStr = ""
+ gtmpfilename = None
++g_pid_nodefile_map = {}
+
+
+ class StoreAbsPath(Action):
+@@ -111,7 +112,7 @@ def node_cmd(host, host_uuid, task, cmd, args, opts):
+
+
+ def run_cmd_nodes(task, args, **kwargs):
+- global node_outfiles
++ global g_pid_nodefile_map
+ nodes = get_nodes(args.volume)
+ pool = []
+ for num, node in enumerate(nodes):
+@@ -142,7 +143,6 @@ def run_cmd_nodes(task, args, **kwargs):
+ if tag == "":
+ tag = '""' if not is_host_local(host_uuid) else ""
+
+- node_outfiles.append(node_outfile)
+ # remote file will be copied into this directory
+ mkdirp(os.path.dirname(node_outfile),
+ exit_on_err=True, logger=logger)
+@@ -180,7 +180,6 @@ def run_cmd_nodes(task, args, **kwargs):
+ if tag == "":
+ tag = '""' if not is_host_local(host_uuid) else ""
+
+- node_outfiles.append(node_outfile)
+ # remote file will be copied into this directory
+ mkdirp(os.path.dirname(node_outfile),
+ exit_on_err=True, logger=logger)
+@@ -264,6 +263,7 @@ def run_cmd_nodes(task, args, **kwargs):
+ args=(host, host_uuid, task, cmd, args, opts))
+ p.start()
+ pool.append(p)
++ g_pid_nodefile_map[p.pid] = node_outfile
+
+ for num, p in enumerate(pool):
+ p.join()
+@@ -271,8 +271,11 @@ def run_cmd_nodes(task, args, **kwargs):
+ logger.warn("Command %s failed in %s" % (task, nodes[num][1]))
+ if task in ["create", "delete"]:
+ fail("Command %s failed in %s" % (task, nodes[num][1]))
+- elif task == "pre" and args.disable_partial:
+- sys.exit(1)
++ elif task == "pre" or task == "query":
++ if args.disable_partial:
++ sys.exit(1)
++ else:
++ del g_pid_nodefile_map[p.pid]
+
+
+ @cache_output
+@@ -512,16 +515,10 @@ def write_output(outfile, outfilemerger, field_separator):
+ continue
+
+ if row_2_rep and row_2_rep != "":
+- f.write(u"{0}{1}{2}{3}{4}\n".format(row[0],
+- field_separator,
+- p_rep,
+- field_separator,
+- row_2_rep).encode())
+- else:
+- f.write(u"{0}{1}{2}\n".format(row[0],
+- field_separator,
+- p_rep).encode())
++ gfind_write_row(f, row[0], field_separator, p_rep, field_separator, row_2_rep)
+
++ else:
++ gfind_write(f, row[0], field_separator, p_rep)
+
+ def mode_create(session_dir, args):
+ logger.debug("Init is called - Session: %s, Volume: %s"
+@@ -571,6 +568,7 @@ def mode_create(session_dir, args):
+
+ def mode_query(session_dir, args):
+ global gtmpfilename
++ global g_pid_nodefile_map
+
+ # Verify volume status
+ cmd = ["gluster", 'volume', 'info', args.volume, "--xml"]
+@@ -634,14 +632,20 @@ def mode_query(session_dir, args):
+
+ # Merger
+ if args.full:
+- cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile]
+- execute(cmd,
+- exit_msg="Failed to merge output files "
+- "collected from nodes", logger=logger)
++ if len(g_pid_nodefile_map) > 0:
++ cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \
++ ["-o", args.outfile]
++ execute(cmd,
++ exit_msg="Failed to merge output files "
++ "collected from nodes", logger=logger)
++ else:
++ fail("Failed to collect any output files from peers. "
++ "Looks like all bricks are offline.", logger=logger)
+ else:
+ # Read each Changelogs db and generate finaldb
+ create_file(args.outfile, exit_on_err=True, logger=logger)
+- outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles)
++ outfilemerger = OutputMerger(args.outfile + ".db",
++ g_pid_nodefile_map.values())
+ write_output(args.outfile, outfilemerger, args.field_separator)
+
+ try:
+@@ -656,6 +660,7 @@ def mode_query(session_dir, args):
+
+ def mode_pre(session_dir, args):
+ global gtmpfilename
++ global g_pid_nodefile_map
+
+ """
+ Read from Session file and write to session.pre file
+@@ -696,14 +701,20 @@ def mode_pre(session_dir, args):
+
+ # Merger
+ if args.full:
+- cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile]
+- execute(cmd,
+- exit_msg="Failed to merge output files "
+- "collected from nodes", logger=logger)
++ if len(g_pid_nodefile_map) > 0:
++ cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \
++ ["-o", args.outfile]
++ execute(cmd,
++ exit_msg="Failed to merge output files "
++ "collected from nodes", logger=logger)
++ else:
++ fail("Failed to collect any output files from peers. "
++ "Looks like all bricks are offline.", logger=logger)
+ else:
+ # Read each Changelogs db and generate finaldb
+ create_file(args.outfile, exit_on_err=True, logger=logger)
+- outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles)
++ outfilemerger = OutputMerger(args.outfile + ".db",
++ g_pid_nodefile_map.values())
+ write_output(args.outfile, outfilemerger, args.field_separator)
+
+ try:
+--
+1.8.3.1
+
diff --git a/0348-glusterfind-Fix-py2-py3-issues.patch b/0348-glusterfind-Fix-py2-py3-issues.patch
new file mode 100644
index 0000000..e1f89f9
--- /dev/null
+++ b/0348-glusterfind-Fix-py2-py3-issues.patch
@@ -0,0 +1,113 @@
+From 1ca8a545833e0a6e674984245338b8675ddc58bc Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Fri, 10 Jan 2020 16:48:14 +0530
+Subject: [PATCH 348/349] glusterfind: Fix py2/py3 issues
+
+1. In dictionary values(), returns list in py2 and not in py3.
+ So explicitly convert it into list.
+2. xattr module returns values in bytes. So explicitly convert
+ them to str to work both with py2 and py3
+
+Backport of:
+ > Patch: https://review.gluster.org/23993
+ > fixes: bz#1789439
+ > Change-Id: I27a639cda4f7a4ece9744a97c3d16e247906bd94
+ > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+BUG: 1789447
+Change-Id: I27a639cda4f7a4ece9744a97c3d16e247906bd94
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/189215
+Reviewed-by: Shwetha Acharya <sacharya@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Hari Gowtham Gopal <hgowtham@redhat.com>
+Reviewed-by: Sunny Kumar <sunkumar@redhat.com>
+---
+ tools/glusterfind/src/changelog.py | 14 +++++++++-----
+ tools/glusterfind/src/main.py | 8 ++++----
+ 2 files changed, 13 insertions(+), 9 deletions(-)
+
+diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py
+index d8f97e0..d972fb5 100644
+--- a/tools/glusterfind/src/changelog.py
++++ b/tools/glusterfind/src/changelog.py
+@@ -14,6 +14,7 @@ import sys
+ import time
+ import xattr
+ import logging
++from gfind_py2py3 import bytearray_to_str
+ from argparse import ArgumentParser, RawDescriptionHelpFormatter
+ import hashlib
+ try:
+@@ -105,9 +106,10 @@ def populate_pgfid_and_inodegfid(brick, changelog_data):
+ changelog_data.inodegfid_add(os.stat(p).st_ino, gfid)
+ file_xattrs = xattr.list(p)
+ for x in file_xattrs:
+- if x.startswith("trusted.pgfid."):
++ x_str = bytearray_to_str(x)
++ if x_str.startswith("trusted.pgfid."):
+ # PGFID in pgfid table
+- changelog_data.pgfid_add(x.split(".")[-1])
++ changelog_data.pgfid_add(x_str.split(".")[-1])
+ except (IOError, OSError):
+ # All OS Errors ignored, since failures will be logged
+ # in End. All GFIDs present in gfidpath table
+@@ -122,10 +124,12 @@ def enum_hard_links_using_gfid2path(brick, gfid, args):
+ try:
+ file_xattrs = xattr.list(p)
+ for x in file_xattrs:
+- if x.startswith("trusted.gfid2path."):
++ x_str = bytearray_to_str(x)
++ if x_str.startswith("trusted.gfid2path."):
+ # get the value for the xattr i.e. <PGFID>/<BN>
+- v = xattr.getxattr(p, x)
+- pgfid, bn = v.split(os.sep)
++ v = xattr.getxattr(p, x_str)
++ v_str = bytearray_to_str(v)
++ pgfid, bn = v_str.split(os.sep)
+ try:
+ path = symlink_gfid_to_path(brick, pgfid)
+ fullpath = os.path.join(path, bn)
+diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
+index fefe4a3..dfc9d07 100644
+--- a/tools/glusterfind/src/main.py
++++ b/tools/glusterfind/src/main.py
+@@ -633,7 +633,7 @@ def mode_query(session_dir, args):
+ # Merger
+ if args.full:
+ if len(g_pid_nodefile_map) > 0:
+- cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \
++ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \
+ ["-o", args.outfile]
+ execute(cmd,
+ exit_msg="Failed to merge output files "
+@@ -645,7 +645,7 @@ def mode_query(session_dir, args):
+ # Read each Changelogs db and generate finaldb
+ create_file(args.outfile, exit_on_err=True, logger=logger)
+ outfilemerger = OutputMerger(args.outfile + ".db",
+- g_pid_nodefile_map.values())
++ list(g_pid_nodefile_map.values()))
+ write_output(args.outfile, outfilemerger, args.field_separator)
+
+ try:
+@@ -702,7 +702,7 @@ def mode_pre(session_dir, args):
+ # Merger
+ if args.full:
+ if len(g_pid_nodefile_map) > 0:
+- cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \
++ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \
+ ["-o", args.outfile]
+ execute(cmd,
+ exit_msg="Failed to merge output files "
+@@ -714,7 +714,7 @@ def mode_pre(session_dir, args):
+ # Read each Changelogs db and generate finaldb
+ create_file(args.outfile, exit_on_err=True, logger=logger)
+ outfilemerger = OutputMerger(args.outfile + ".db",
+- g_pid_nodefile_map.values())
++ list(g_pid_nodefile_map.values()))
+ write_output(args.outfile, outfilemerger, args.field_separator)
+
+ try:
+--
+1.8.3.1
+
diff --git a/0349-glusterfind-python3-compatibility.patch b/0349-glusterfind-python3-compatibility.patch
new file mode 100644
index 0000000..7f1c274
--- /dev/null
+++ b/0349-glusterfind-python3-compatibility.patch
@@ -0,0 +1,56 @@
+From 1354a492cbc758f9801568153380ca896fab7765 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Fri, 10 Jan 2020 14:28:35 +0000
+Subject: [PATCH 349/349] glusterfind: python3 compatibility
+
+Problem:
+While we delete gluster volume the hook script 'S57glusterfind-delete-post.py'
+is failed to execute and error message can be observed in glusterd log.
+
+Traceback:
+ File "/var/lib/glusterd/hooks/1/delete/post/S57glusterfind-delete-post", line 69, in <module>
+ main()
+ File "/var/lib/glusterd/hooks/1/delete/post/S57glusterfind-delete-post", line 39, in main
+ glusterfind_dir = os.path.join(get_glusterd_workdir(), "glusterfind")
+ File "/usr/lib64/python3.7/posixpath.py", line 94, in join
+ genericpath._check_arg_types('join', a, *p)
+ File "/usr/lib64/python3.7/genericpath.py", line 155, in _check_arg_types
+ raise TypeError("Can't mix strings and bytes in path components") from None
+TypeError: Can't mix strings and bytes in path components
+
+Solution:
+
+Added the 'universal_newlines' flag to Popen to support backward compatibility.
+
+Backport of:
+ > Patch: https://review.gluster.org/23994
+ > Change-Id: Ie5655b11b55535c5ad2338108d0448e6fdaacf4f
+ > Fixes: bz#1789478
+ > Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+Change-Id: Ie5655b11b55535c5ad2338108d0448e6fdaacf4f
+BUG: 1789447
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/189216
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ tools/glusterfind/S57glusterfind-delete-post.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/glusterfind/S57glusterfind-delete-post.py b/tools/glusterfind/S57glusterfind-delete-post.py
+index 5b5142d..5beece2 100755
+--- a/tools/glusterfind/S57glusterfind-delete-post.py
++++ b/tools/glusterfind/S57glusterfind-delete-post.py
+@@ -18,7 +18,7 @@ def handle_rm_error(func, path, exc_info):
+
+ def get_glusterd_workdir():
+ p = Popen(["gluster", "system::", "getwd"],
+- stdout=PIPE, stderr=PIPE)
++ stdout=PIPE, stderr=PIPE, universal_newlines=True)
+
+ out, _ = p.communicate()
+
+--
+1.8.3.1
+
diff --git a/0350-tools-glusterfind-Remove-an-extra-argument.patch b/0350-tools-glusterfind-Remove-an-extra-argument.patch
new file mode 100644
index 0000000..08f70a7
--- /dev/null
+++ b/0350-tools-glusterfind-Remove-an-extra-argument.patch
@@ -0,0 +1,37 @@
+From 6c06ac0571fb6bf0734b173cc3a75badc7554601 Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <sacharya@redhat.com>
+Date: Tue, 14 Jan 2020 10:51:06 +0530
+Subject: [PATCH 350/350] tools/glusterfind: Remove an extra argument
+
+Backport of:
+> Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24011/
+> fixes: bz#1790748
+> Change-Id: I1cb12c975142794139456d0f8e99fbdbb03c53a1
+> Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+
+Change-Id: I1cb12c975142794139456d0f8e99fbdbb03c53a1
+BUG: 1789447
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/189363
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tools/glusterfind/src/main.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
+index dfc9d07..5ca1fec 100644
+--- a/tools/glusterfind/src/main.py
++++ b/tools/glusterfind/src/main.py
+@@ -515,7 +515,7 @@ def write_output(outfile, outfilemerger, field_separator):
+ continue
+
+ if row_2_rep and row_2_rep != "":
+- gfind_write_row(f, row[0], field_separator, p_rep, field_separator, row_2_rep)
++ gfind_write_row(f, row[0], field_separator, p_rep, row_2_rep)
+
+ else:
+ gfind_write(f, row[0], field_separator, p_rep)
+--
+1.8.3.1
+
diff --git a/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch b/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch
new file mode 100644
index 0000000..51dc3bb
--- /dev/null
+++ b/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch
@@ -0,0 +1,131 @@
+From f38f0988eb6c0d72677abceba5ebeb51ea8d44ad Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Tue, 21 Jan 2020 21:09:56 +0530
+Subject: [PATCH 351/351] server: Mount fails after reboot 1/3 gluster nodes
+
+Problem: At the time of coming up one server node(1x3) after reboot
+client is unmounted.The client is unmounted because a client
+is getting AUTH_FAILED event and client call fini for the graph.The
+client is getting AUTH_FAILED because brick is not attached with a
+graph at that moment
+
+Solution: To avoid the unmounting the client graph throw ENOENT error
+ from server in case if brick is not attached with server at
+ the time of authenticate clients.
+
+> Credits: Xavi Hernandez <xhernandez@redhat.com>
+> Change-Id: Ie6fbd73cbcf23a35d8db8841b3b6036e87682f5e
+> Fixes: bz#1793852
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry picked from commit e4f776308d5ee7ffeb07de0fd9e1edae6944030d)
+> (Reviewd on upstream link https://review.gluster.org/#/c/glusterfs/+/24053/)
+
+Change-Id: Ie6fbd73cbcf23a35d8db8841b3b6036e87682f5e
+BUG: 1793035
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/190042
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/protocol/bug-1433815-auth-allow.t | 1 +
+ xlators/protocol/client/src/client-handshake.c | 3 +-
+ xlators/protocol/server/src/server-handshake.c | 41 +++++++++++++++++---------
+ 3 files changed, 29 insertions(+), 16 deletions(-)
+
+diff --git a/tests/bugs/protocol/bug-1433815-auth-allow.t b/tests/bugs/protocol/bug-1433815-auth-allow.t
+index fa22ad8..a78c0eb 100644
+--- a/tests/bugs/protocol/bug-1433815-auth-allow.t
++++ b/tests/bugs/protocol/bug-1433815-auth-allow.t
+@@ -17,6 +17,7 @@ TEST $CLI volume create $V0 $H0:$B0/$V0
+ # Set auth.allow so it *doesn't* include ourselves.
+ TEST $CLI volume set $V0 auth.allow 1.2.3.4
+ TEST $CLI volume start $V0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+ # "System getspec" will include the username and password if the request comes
+ # from a server (which we are). Unfortunately, this will cause authentication
+diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
+index c43756a..0002361 100644
+--- a/xlators/protocol/client/src/client-handshake.c
++++ b/xlators/protocol/client/src/client-handshake.c
+@@ -1031,8 +1031,7 @@ client_setvolume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ "SETVOLUME on remote-host failed: %s", remote_error);
+
+ errno = op_errno;
+- if (remote_error &&
+- (strcmp("Authentication failed", remote_error) == 0)) {
++ if (remote_error && (op_errno == EACCES)) {
+ auth_fail = _gf_true;
+ op_ret = 0;
+ }
+diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
+index 382f241..1d1177d 100644
+--- a/xlators/protocol/server/src/server-handshake.c
++++ b/xlators/protocol/server/src/server-handshake.c
+@@ -250,6 +250,7 @@ server_setvolume(rpcsvc_request_t *req)
+ char *subdir_mount = NULL;
+ char *client_name = NULL;
+ gf_boolean_t cleanup_starting = _gf_false;
++ gf_boolean_t xlator_in_graph = _gf_true;
+
+ params = dict_new();
+ reply = dict_new();
+@@ -311,8 +312,10 @@ server_setvolume(rpcsvc_request_t *req)
+ LOCK(&ctx->volfile_lock);
+ {
+ xl = get_xlator_by_name(this, name);
+- if (!xl)
++ if (!xl) {
++ xlator_in_graph = _gf_false;
+ xl = this;
++ }
+ }
+ UNLOCK(&ctx->volfile_lock);
+ if (xl == NULL) {
+@@ -568,20 +571,30 @@ server_setvolume(rpcsvc_request_t *req)
+ "failed to set error "
+ "msg");
+ } else {
+- gf_event(EVENT_CLIENT_AUTH_REJECT,
+- "client_uid=%s;"
+- "client_identifier=%s;server_identifier=%s;"
+- "brick_path=%s",
+- client->client_uid, req->trans->peerinfo.identifier,
+- req->trans->myinfo.identifier, name);
+- gf_msg(this->name, GF_LOG_ERROR, EACCES, PS_MSG_AUTHENTICATE_ERROR,
+- "Cannot authenticate client"
+- " from %s %s",
+- client->client_uid, (clnt_version) ? clnt_version : "old");
+-
+ op_ret = -1;
+- op_errno = EACCES;
+- ret = dict_set_str(reply, "ERROR", "Authentication failed");
++ if (!xlator_in_graph) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOENT, PS_MSG_AUTHENTICATE_ERROR,
++ "Cannot authenticate client"
++ " from %s %s because brick is not attached in graph",
++ client->client_uid, (clnt_version) ? clnt_version : "old");
++
++ op_errno = ENOENT;
++ ret = dict_set_str(reply, "ERROR", "Brick not found");
++ } else {
++ gf_event(EVENT_CLIENT_AUTH_REJECT,
++ "client_uid=%s;"
++ "client_identifier=%s;server_identifier=%s;"
++ "brick_path=%s",
++ client->client_uid, req->trans->peerinfo.identifier,
++ req->trans->myinfo.identifier, name);
++ gf_msg(this->name, GF_LOG_ERROR, EACCES, PS_MSG_AUTHENTICATE_ERROR,
++ "Cannot authenticate client"
++ " from %s %s",
++ client->client_uid, (clnt_version) ? clnt_version : "old");
++
++ op_errno = EACCES;
++ ret = dict_set_str(reply, "ERROR", "Authentication failed");
++ }
+ if (ret < 0)
+ gf_msg_debug(this->name, 0,
+ "failed to set error "
+--
+1.8.3.1
+
diff --git a/0352-spec-fixed-missing-dependencies-for-glusterfs-clouds.patch b/0352-spec-fixed-missing-dependencies-for-glusterfs-clouds.patch
new file mode 100644
index 0000000..1d9a389
--- /dev/null
+++ b/0352-spec-fixed-missing-dependencies-for-glusterfs-clouds.patch
@@ -0,0 +1,38 @@
+From 8074906ace5fbd71b5d24cc3da5571ebdebed859 Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Thu, 2 Jan 2020 11:27:47 +0000
+Subject: [PATCH 352/353] spec: fixed missing dependencies for
+ glusterfs-cloudsync-plugins
+
+RPMDiff raises a warning, subpackage glusterfs-cloudsync-plugins
+on x86_64 consumes library libglusterfs.so.0()(64bit) from
+subpackage glusterfs-libs but does not have explicit package
+version requirement, which is fixed using this patch.
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1775564
+
+Change-Id: I05ea46ac2c92090f01c07dfbd6e0d66498f1c586
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/188619
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 671ee27..e95e539 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -374,6 +374,7 @@ This package provides the GlusterFS CLI application and its man page
+ %package cloudsync-plugins
+ Summary: Cloudsync Plugins
+ BuildRequires: libcurl-devel
++Requires: glusterfs-libs = %{version}-%{release}
+
+ %description cloudsync-plugins
+ GlusterFS is a distributed file-system capable of scaling to several
+--
+1.8.3.1
+
diff --git a/0353-build-glusterfs-ganesha-pkg-requires-python3-policyc.patch b/0353-build-glusterfs-ganesha-pkg-requires-python3-policyc.patch
new file mode 100644
index 0000000..e436373
--- /dev/null
+++ b/0353-build-glusterfs-ganesha-pkg-requires-python3-policyc.patch
@@ -0,0 +1,47 @@
+From 37e2d76579abf38031d1cd9769da798fa04b183a Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Wed, 22 Jan 2020 14:14:33 -0500
+Subject: [PATCH 353/353] build: glusterfs-ganesha pkg requires
+ python3-policycoreutils on rhel8
+
+glusterfs-ganesha pkg requires policycoreutils-python-utils on rhel8,
+not policycoreutils-python
+
+also requires nfs-ganesha-selinux on rhel-8 (optional on rhel-7)
+
+Label: DOWNSTREAM ONLY
+
+Change-Id: Ia97b4dabdc098fb76e3f60e8b48ea4191e677136
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+BUG: 1794153
+Reviewed-on: https://code.engineering.redhat.com/gerrit/190130
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index e95e539..7c8a751 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -462,6 +462,7 @@ Summary: NFS-Ganesha configuration
+ Group: Applications/File
+
+ Requires: %{name}-server%{?_isa} = %{version}-%{release}
++Requires: nfs-ganesha-selinux >= 2.7.3
+ Requires: nfs-ganesha-gluster >= 2.7.3
+ Requires: pcs, dbus
+ %if ( 0%{?rhel} && 0%{?rhel} == 6 )
+@@ -475,7 +476,7 @@ Requires: net-tools
+ %endif
+
+ %if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+-%if ( 0%{?rhel} )
++%if ( 0%{?rhel} && 0%{?rhel} < 8 )
+ Requires: selinux-policy >= 3.13.1-160
+ Requires(post): policycoreutils-python
+ Requires(postun): policycoreutils-python
+--
+1.8.3.1
+
diff --git a/0354-core-fix-memory-pool-management-races.patch b/0354-core-fix-memory-pool-management-races.patch
new file mode 100644
index 0000000..a7cdfc0
--- /dev/null
+++ b/0354-core-fix-memory-pool-management-races.patch
@@ -0,0 +1,466 @@
+From 75a9d946d252ce70460144615ca17dbdf2e80fab Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Fri, 7 Feb 2020 10:19:57 +0100
+Subject: [PATCH 354/355] core: fix memory pool management races
+
+Objects allocated from a per-thread memory pool keep a reference to it
+to be able to return the object to the pool when not used anymore. The
+object holding this reference can have a long life cycle that could
+survive a glfs_fini() call.
+
+This means that it's unsafe to destroy memory pools from glfs_fini().
+
+Another side effect of destroying memory pools from glfs_fini() is that
+the TLS variable that points to one of those pools cannot be reset for
+all alive threads. This means that any attempt to allocate memory from
+those threads will access already free'd memory, which is very
+dangerous.
+
+To fix these issues, mem_pools_fini() doesn't destroy pool lists
+anymore. Only at process termination the pools are destroyed.
+
+Upatream patch:
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24099
+> Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965
+> Fixes: bz#1801684
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965
+BUG: 1800703
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/192262
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/globals.c | 13 ++-
+ libglusterfs/src/glusterfs/globals.h | 3 +
+ libglusterfs/src/glusterfs/mem-pool.h | 28 ++---
+ libglusterfs/src/mem-pool.c | 201 ++++++++++++++++++----------------
+ libglusterfs/src/syncop.c | 7 ++
+ 5 files changed, 146 insertions(+), 106 deletions(-)
+
+diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
+index 02098e6..e433ee8 100644
+--- a/libglusterfs/src/globals.c
++++ b/libglusterfs/src/globals.c
+@@ -319,7 +319,18 @@ glusterfs_cleanup(void *ptr)
+ GF_FREE(thread_syncopctx.groups);
+ }
+
+- mem_pool_thread_destructor();
++ mem_pool_thread_destructor(NULL);
++}
++
++void
++gf_thread_needs_cleanup(void)
++{
++ /* The value stored in free_key TLS is not really used for anything, but
++ * pthread implementation doesn't call the TLS destruction function unless
++ * it's != NULL. This function must be called whenever something is
++ * allocated for this thread so that glusterfs_cleanup() will be called
++ * and resources can be released. */
++ (void)pthread_setspecific(free_key, (void *)1);
+ }
+
+ static void
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index e218285..31717ed 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -181,6 +181,9 @@ glusterfs_leaseid_exist(void);
+ int
+ glusterfs_globals_init(glusterfs_ctx_t *ctx);
+
++void
++gf_thread_needs_cleanup(void);
++
+ struct tvec_base *
+ glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx);
+ void
+diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h
+index be0a26d..97bf76c 100644
+--- a/libglusterfs/src/glusterfs/mem-pool.h
++++ b/libglusterfs/src/glusterfs/mem-pool.h
+@@ -245,24 +245,26 @@ typedef struct per_thread_pool {
+ } per_thread_pool_t;
+
+ typedef struct per_thread_pool_list {
+- /*
+- * These first two members are protected by the global pool lock. When
+- * a thread first tries to use any pool, we create one of these. We
+- * link it into the global list using thr_list so the pool-sweeper
+- * thread can find it, and use pthread_setspecific so this thread can
+- * find it. When the per-thread destructor runs, we "poison" the pool
+- * list to prevent further allocations. This also signals to the
+- * pool-sweeper thread that the list should be detached and freed after
+- * the next time it's swept.
+- */
++ /* thr_list is used to place the TLS pool_list into the active global list
++ * (pool_threads) or the inactive global list (pool_free_threads). It's
++ * protected by the global pool_lock. */
+ struct list_head thr_list;
+- unsigned int poison;
++
++ /* This lock is used to update poison and the hot/cold lists of members
++ * of 'pools' array. */
++ pthread_spinlock_t lock;
++
++ /* This field is used to mark a pool_list as not being owned by any thread.
++ * This means that the sweeper thread won't be cleaning objects stored in
++ * its pools. mem_put() uses it to decide if the object being released is
++ * placed into its original pool_list or directly destroyed. */
++ bool poison;
++
+ /*
+ * There's really more than one pool, but the actual number is hidden
+ * in the implementation code so we just make it a single-element array
+ * here.
+ */
+- pthread_spinlock_t lock;
+ per_thread_pool_t pools[1];
+ } per_thread_pool_list_t;
+
+@@ -307,7 +309,7 @@ void
+ mem_pool_destroy(struct mem_pool *pool);
+
+ void
+-mem_pool_thread_destructor(void);
++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list);
+
+ void
+ gf_mem_acct_enable_set(void *ctx);
+diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
+index d88041d..2b41c01 100644
+--- a/libglusterfs/src/mem-pool.c
++++ b/libglusterfs/src/mem-pool.c
+@@ -367,7 +367,6 @@ static __thread per_thread_pool_list_t *thread_pool_list = NULL;
+ #define POOL_SWEEP_SECS 30
+
+ typedef struct {
+- struct list_head death_row;
+ pooled_obj_hdr_t *cold_lists[N_COLD_LISTS];
+ unsigned int n_cold_lists;
+ } sweep_state_t;
+@@ -384,36 +383,33 @@ static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
+ static unsigned int init_count = 0;
+ static pthread_t sweeper_tid;
+
+-gf_boolean_t
++static bool
+ collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list)
+ {
+ unsigned int i;
+ per_thread_pool_t *pt_pool;
+- gf_boolean_t poisoned;
+
+ (void)pthread_spin_lock(&pool_list->lock);
+
+- poisoned = pool_list->poison != 0;
+- if (!poisoned) {
+- for (i = 0; i < NPOOLS; ++i) {
+- pt_pool = &pool_list->pools[i];
+- if (pt_pool->cold_list) {
+- if (state->n_cold_lists >= N_COLD_LISTS) {
+- break;
+- }
+- state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
++ for (i = 0; i < NPOOLS; ++i) {
++ pt_pool = &pool_list->pools[i];
++ if (pt_pool->cold_list) {
++ if (state->n_cold_lists >= N_COLD_LISTS) {
++ (void)pthread_spin_unlock(&pool_list->lock);
++ return true;
+ }
+- pt_pool->cold_list = pt_pool->hot_list;
+- pt_pool->hot_list = NULL;
++ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
+ }
++ pt_pool->cold_list = pt_pool->hot_list;
++ pt_pool->hot_list = NULL;
+ }
+
+ (void)pthread_spin_unlock(&pool_list->lock);
+
+- return poisoned;
++ return false;
+ }
+
+-void
++static void
+ free_obj_list(pooled_obj_hdr_t *victim)
+ {
+ pooled_obj_hdr_t *next;
+@@ -425,82 +421,96 @@ free_obj_list(pooled_obj_hdr_t *victim)
+ }
+ }
+
+-void *
++static void *
+ pool_sweeper(void *arg)
+ {
+ sweep_state_t state;
+ per_thread_pool_list_t *pool_list;
+- per_thread_pool_list_t *next_pl;
+- per_thread_pool_t *pt_pool;
+- unsigned int i;
+- gf_boolean_t poisoned;
++ uint32_t i;
++ bool pending;
+
+ /*
+ * This is all a bit inelegant, but the point is to avoid doing
+ * expensive things (like freeing thousands of objects) while holding a
+- * global lock. Thus, we split each iteration into three passes, with
++ * global lock. Thus, we split each iteration into two passes, with
+ * only the first and fastest holding the lock.
+ */
+
++ pending = true;
++
+ for (;;) {
+- sleep(POOL_SWEEP_SECS);
++ /* If we know there's pending work to do (or it's the first run), we
++ * do collect garbage more often. */
++ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS);
++
+ (void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+- INIT_LIST_HEAD(&state.death_row);
+ state.n_cold_lists = 0;
++ pending = false;
+
+ /* First pass: collect stuff that needs our attention. */
+ (void)pthread_mutex_lock(&pool_lock);
+- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list)
++ list_for_each_entry(pool_list, &pool_threads, thr_list)
+ {
+- (void)pthread_mutex_unlock(&pool_lock);
+- poisoned = collect_garbage(&state, pool_list);
+- (void)pthread_mutex_lock(&pool_lock);
+-
+- if (poisoned) {
+- list_move(&pool_list->thr_list, &state.death_row);
++ if (collect_garbage(&state, pool_list)) {
++ pending = true;
+ }
+ }
+ (void)pthread_mutex_unlock(&pool_lock);
+
+- /* Second pass: free dead pools. */
+- (void)pthread_mutex_lock(&pool_free_lock);
+- list_for_each_entry_safe(pool_list, next_pl, &state.death_row, thr_list)
+- {
+- for (i = 0; i < NPOOLS; ++i) {
+- pt_pool = &pool_list->pools[i];
+- free_obj_list(pt_pool->cold_list);
+- free_obj_list(pt_pool->hot_list);
+- pt_pool->hot_list = pt_pool->cold_list = NULL;
+- }
+- list_del(&pool_list->thr_list);
+- list_add(&pool_list->thr_list, &pool_free_threads);
+- }
+- (void)pthread_mutex_unlock(&pool_free_lock);
+-
+- /* Third pass: free cold objects from live pools. */
++ /* Second pass: free cold objects from live pools. */
+ for (i = 0; i < state.n_cold_lists; ++i) {
+ free_obj_list(state.cold_lists[i]);
+ }
+ (void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+ }
++
++ return NULL;
+ }
+
+ void
+-mem_pool_thread_destructor(void)
++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
+ {
+- per_thread_pool_list_t *pool_list = thread_pool_list;
+-
+- /* The pool-sweeper thread will take it from here.
+- *
+- * We can change 'poison' here without taking locks because the change
+- * itself doesn't interact with other parts of the code and a simple write
+- * is already atomic from the point of view of the processor.
+- *
+- * This change can modify what mem_put() does, but both possibilities are
+- * fine until the sweeper thread kicks in. The real synchronization must be
+- * between mem_put() and the sweeper thread. */
++ per_thread_pool_t *pt_pool;
++ uint32_t i;
++
++ if (pool_list == NULL) {
++ pool_list = thread_pool_list;
++ }
++
++ /* The current thread is terminating. None of the allocated objects will
++ * be used again. We can directly destroy them here instead of delaying
++ * it until the next sweeper loop. */
+ if (pool_list != NULL) {
+- pool_list->poison = 1;
++ /* Remove pool_list from the global list to avoid that sweeper
++ * could touch it. */
++ pthread_mutex_lock(&pool_lock);
++ list_del(&pool_list->thr_list);
++ pthread_mutex_unlock(&pool_lock);
++
++ /* We need to protect hot/cold changes from potential mem_put() calls
++ * that reference this pool_list. Once poison is set to true, we are
++ * sure that no one else will touch hot/cold lists. The only possible
++ * race is when at the same moment a mem_put() is adding a new item
++ * to the hot list. We protect from that by taking pool_list->lock.
++ * After that we don't need the lock to destroy the hot/cold lists. */
++ pthread_spin_lock(&pool_list->lock);
++ pool_list->poison = true;
++ pthread_spin_unlock(&pool_list->lock);
++
++ for (i = 0; i < NPOOLS; i++) {
++ pt_pool = &pool_list->pools[i];
++
++ free_obj_list(pt_pool->hot_list);
++ pt_pool->hot_list = NULL;
++
++ free_obj_list(pt_pool->cold_list);
++ pt_pool->cold_list = NULL;
++ }
++
++ pthread_mutex_lock(&pool_free_lock);
++ list_add(&pool_list->thr_list, &pool_free_threads);
++ pthread_mutex_unlock(&pool_free_lock);
++
+ thread_pool_list = NULL;
+ }
+ }
+@@ -528,6 +538,30 @@ mem_pools_preinit(void)
+ init_done = GF_MEMPOOL_INIT_EARLY;
+ }
+
++static __attribute__((destructor)) void
++mem_pools_postfini(void)
++{
++ per_thread_pool_list_t *pool_list, *next;
++
++ /* This is part of a process shutdown (or dlclose()) which means that
++ * most probably all threads should be stopped. However this is not the
++ * case for gluster and there are even legitimate situations in which we
++ * could have some threads alive. What is sure is that none of those
++ * threads should be using anything from this library, so destroying
++ * everything here should be fine and safe. */
++
++ list_for_each_entry_safe(pool_list, next, &pool_threads, thr_list)
++ {
++ mem_pool_thread_destructor(pool_list);
++ }
++
++ list_for_each_entry_safe(pool_list, next, &pool_free_threads, thr_list)
++ {
++ list_del(&pool_list->thr_list);
++ FREE(pool_list);
++ }
++}
++
+ /* Call mem_pools_init() once threading has been configured completely. This
+ * prevent the pool_sweeper thread from getting killed once the main() thread
+ * exits during deamonizing. */
+@@ -560,10 +594,6 @@ mem_pools_fini(void)
+ */
+ break;
+ case 1: {
+- per_thread_pool_list_t *pool_list;
+- per_thread_pool_list_t *next_pl;
+- unsigned int i;
+-
+ /* if mem_pools_init() was not called, sweeper_tid will be invalid
+ * and the functions will error out. That is not critical. In all
+ * other cases, the sweeper_tid will be valid and the thread gets
+@@ -571,32 +601,11 @@ mem_pools_fini(void)
+ (void)pthread_cancel(sweeper_tid);
+ (void)pthread_join(sweeper_tid, NULL);
+
+- /* At this point all threads should have already terminated, so
+- * it should be safe to destroy all pending per_thread_pool_list_t
+- * structures that are stored for each thread. */
+- mem_pool_thread_destructor();
+-
+- /* free all objects from all pools */
+- list_for_each_entry_safe(pool_list, next_pl, &pool_threads,
+- thr_list)
+- {
+- for (i = 0; i < NPOOLS; ++i) {
+- free_obj_list(pool_list->pools[i].hot_list);
+- free_obj_list(pool_list->pools[i].cold_list);
+- pool_list->pools[i].hot_list = NULL;
+- pool_list->pools[i].cold_list = NULL;
+- }
+-
+- list_del(&pool_list->thr_list);
+- FREE(pool_list);
+- }
+-
+- list_for_each_entry_safe(pool_list, next_pl, &pool_free_threads,
+- thr_list)
+- {
+- list_del(&pool_list->thr_list);
+- FREE(pool_list);
+- }
++ /* There could be threads still running in some cases, so we can't
++ * destroy pool_lists in use. We can also not destroy unused
++ * pool_lists because some allocated objects may still be pointing
++ * to them. */
++ mem_pool_thread_destructor(NULL);
+
+ init_done = GF_MEMPOOL_INIT_DESTROY;
+ /* Fall through. */
+@@ -617,7 +626,7 @@ mem_pools_fini(void)
+ {
+ }
+ void
+-mem_pool_thread_destructor(void)
++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
+ {
+ }
+
+@@ -738,13 +747,21 @@ mem_get_pool_list(void)
+ }
+ }
+
++ /* There's no need to take pool_list->lock, because this is already an
++ * atomic operation and we don't need to synchronize it with any change
++ * in hot/cold lists. */
++ pool_list->poison = false;
++
+ (void)pthread_mutex_lock(&pool_lock);
+- pool_list->poison = 0;
+ list_add(&pool_list->thr_list, &pool_threads);
+ (void)pthread_mutex_unlock(&pool_lock);
+
+ thread_pool_list = pool_list;
+
++ /* Ensure that all memory objects associated to the new pool_list are
++ * destroyed when the thread terminates. */
++ gf_thread_needs_cleanup();
++
+ return pool_list;
+ }
+
+diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
+index 2eb7b49..0de53c6 100644
+--- a/libglusterfs/src/syncop.c
++++ b/libglusterfs/src/syncop.c
+@@ -97,6 +97,13 @@ syncopctx_setfsgroups(int count, const void *groups)
+
+ /* set/reset the ngrps, this is where reset of groups is handled */
+ opctx->ngrps = count;
++
++ if ((opctx->valid & SYNCOPCTX_GROUPS) == 0) {
++ /* This is the first time we are storing groups into the TLS structure
++ * so we mark the current thread so that it will be properly cleaned
++ * up when the thread terminates. */
++ gf_thread_needs_cleanup();
++ }
+ opctx->valid |= SYNCOPCTX_GROUPS;
+
+ out:
+--
+1.8.3.1
+
diff --git a/0355-core-Prevent-crash-on-process-termination.patch b/0355-core-Prevent-crash-on-process-termination.patch
new file mode 100644
index 0000000..fca3f2c
--- /dev/null
+++ b/0355-core-Prevent-crash-on-process-termination.patch
@@ -0,0 +1,74 @@
+From 10f1730073b9fb02d2ed7f7de855afd6df0e5202 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Wed, 19 Feb 2020 12:24:15 +0100
+Subject: [PATCH 355/355] core: Prevent crash on process termination
+
+A previous patch (ce61da816a) has fixed a use-after-free issue,
+but it doesn't work well when the final cleanup is done at process
+termination because gluster doesn't stop other threads before
+calling exit().
+
+For this reason, the final cleanup is removed to avoid the crash,
+at least until the termination sequence properly stops all gluster
+threads before exiting the program.
+
+Upstream patch:
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24138
+> Change-Id: Id7cfb4407fcf208e28f03a7c3cdc3ef9c1f3bf9b
+> Fixes: bz#1801684
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: Id7cfb4407fcf208e28f03a7c3cdc3ef9c1f3bf9b
+BUG: 1800703
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/192344
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/mem-pool.c | 30 +++++++++++-------------------
+ 1 file changed, 11 insertions(+), 19 deletions(-)
+
+diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
+index 2b41c01..73503e0 100644
+--- a/libglusterfs/src/mem-pool.c
++++ b/libglusterfs/src/mem-pool.c
+@@ -541,25 +541,17 @@ mem_pools_preinit(void)
+ static __attribute__((destructor)) void
+ mem_pools_postfini(void)
+ {
+- per_thread_pool_list_t *pool_list, *next;
+-
+- /* This is part of a process shutdown (or dlclose()) which means that
+- * most probably all threads should be stopped. However this is not the
+- * case for gluster and there are even legitimate situations in which we
+- * could have some threads alive. What is sure is that none of those
+- * threads should be using anything from this library, so destroying
+- * everything here should be fine and safe. */
+-
+- list_for_each_entry_safe(pool_list, next, &pool_threads, thr_list)
+- {
+- mem_pool_thread_destructor(pool_list);
+- }
+-
+- list_for_each_entry_safe(pool_list, next, &pool_free_threads, thr_list)
+- {
+- list_del(&pool_list->thr_list);
+- FREE(pool_list);
+- }
++ /* TODO: This function should destroy all per thread memory pools that
++ * are still alive, but this is not possible right now because glibc
++ * starts calling destructors as soon as exit() is called, and
++ * gluster doesn't ensure that all threads have been stopped before
++ * calling exit(). Existing threads would crash when they try to use
++ * memory or they terminate if we destroy things here.
++ *
++ * When we propertly terminate all threads, we can add the needed
++ * code here. Till then we need to leave the memory allocated. Most
++ * probably this function will be executed on process termination,
++ * so the memory will be released anyway by the system. */
+ }
+
+ /* Call mem_pools_init() once threading has been configured completely. This
+--
+1.8.3.1
+
diff --git a/0356-Update-rfc.sh-to-rhgs-3.5.1-rhel-8.patch b/0356-Update-rfc.sh-to-rhgs-3.5.1-rhel-8.patch
new file mode 100644
index 0000000..f2b6835
--- /dev/null
+++ b/0356-Update-rfc.sh-to-rhgs-3.5.1-rhel-8.patch
@@ -0,0 +1,26 @@
+From 4099fb424482ede2fb6346c76c58523113f415df Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Thu, 12 Mar 2020 01:02:41 -0400
+Subject: [PATCH 356/357] Update rfc.sh to rhgs-3.5.1-rhel-8
+
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+---
+ rfc.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/rfc.sh b/rfc.sh
+index 918fb11..a408e45 100755
+--- a/rfc.sh
++++ b/rfc.sh
+@@ -18,7 +18,7 @@ done
+ shift $((OPTIND-1))
+
+
+-branch="rhgs-3.5.1";
++branch="rhgs-3.5.1-rhel-8";
+
+ set_hooks_commit_msg()
+ {
+--
+1.8.3.1
+
diff --git a/0357-ganesha-ha-updates-for-pcs-0.10.x-i.e.-in-Fedora-29-.patch b/0357-ganesha-ha-updates-for-pcs-0.10.x-i.e.-in-Fedora-29-.patch
new file mode 100644
index 0000000..a67b89c
--- /dev/null
+++ b/0357-ganesha-ha-updates-for-pcs-0.10.x-i.e.-in-Fedora-29-.patch
@@ -0,0 +1,268 @@
+From 2d5e678f8331d4d99ee4dff6e166cbf01c83ab36 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Wed, 12 Feb 2020 12:47:57 -0500
+Subject: [PATCH 357/357] ganesha-ha: updates for pcs-0.10.x (i.e. in Fedora-29
+ and RHEL-8)
+
+pcs-0.10 has introduced changes options to pcs commands
+
+pcs-0.10.x is in Fedora-29 and later and RHEL-8.
+
+Also some minor cleanup. Namely use bash built-in [[...]] in a few
+more places instead of test(1), i.e. [...], and use correct "==" for
+comparison.
+
+master: https://review.gluster.org/24115
+
+Change-Id: I3fb2fcd71406964c77fdc4f18580ca133f365fd6
+BUG: 1802727
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/194467
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 84 ++++++++++++++++++++++++------------
+ 1 file changed, 56 insertions(+), 28 deletions(-)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index 32af1ca..0b0050a 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -28,7 +28,12 @@ HA_VOL_MNT="/var/run/gluster/shared_storage"
+ HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha"
+ SERVICE_MAN="DISTRO_NOT_FOUND"
+
+-RHEL6_PCS_CNAME_OPTION="--name"
++# rhel, fedora id, version
++ID=""
++VERSION_ID=""
++
++PCS9OR10_PCS_CNAME_OPTION=""
++PCS9OR10_PCS_CLONE_OPTION="clone"
+ SECRET_PEM="/var/lib/glusterd/nfs/secret.pem"
+
+ # UNBLOCK RA uses shared_storage which may become unavailable
+@@ -101,9 +106,9 @@ determine_service_manager () {
+ then
+ SERVICE_MAN="/sbin/service"
+ fi
+- if [ "${SERVICE_MAN}" == "DISTRO_NOT_FOUND" ]
++ if [[ "${SERVICE_MAN}X" == "DISTRO_NOT_FOUNDX" ]]
+ then
+- echo "Service manager not recognized, exiting"
++ logger "Service manager not recognized, exiting"
+ exit 1
+ fi
+ }
+@@ -114,7 +119,7 @@ manage_service ()
+ local new_node=${2}
+ local option=
+
+- if [ "${action}" == "start" ]; then
++ if [[ "${action}" == "start" ]]; then
+ option="yes"
+ else
+ option="no"
+@@ -122,7 +127,7 @@ manage_service ()
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+ ${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option"
+
+- if [ "${SERVICE_MAN}" == "/bin/systemctl" ]
++ if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]
+ then
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+ ${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha"
+@@ -140,7 +145,7 @@ check_cluster_exists()
+
+ if [ -e /var/run/corosync.pid ]; then
+ cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3)
+- if [ ${cluster_name} -a ${cluster_name} = ${name} ]; then
++ if [[ "${cluster_name}X" == "${name}X" ]]; then
+ logger "$name already exists, exiting"
+ exit 0
+ fi
+@@ -155,7 +160,7 @@ determine_servers()
+ local tmp_ifs=${IFS}
+ local ha_servers=""
+
+- if [ "X${cmd}X" != "XsetupX" -a "X${cmd}X" != "XstatusX" ]; then
++ if [ "${cmd}X" != "setupX" -a "${cmd}X" != "statusX" ]; then
+ ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//')
+ IFS=$' '
+ for server in ${ha_servers} ; do
+@@ -193,15 +198,21 @@ setup_cluster()
+
+ logger "setting up cluster ${name} with the following ${servers}"
+
+- pcs cluster auth ${servers}
+- # pcs cluster setup --name ${name} ${servers}
+- pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --enable --transport udpu ${servers}
++ # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers}
++ pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers}
+ if [ $? -ne 0 ]; then
+- logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --enable --transport udpu ${servers} failed"
++ logger "pcs cluster setup ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} failed, shutting down ganesha and bailing out"
+ #set up failed stop all ganesha process and clean up symlinks in cluster
+ stop_ganesha_all "${servers}"
+ exit 1;
+ fi
++
++ # pcs cluster auth ${servers}
++ pcs cluster auth
++ if [ $? -ne 0 ]; then
++ logger "pcs cluster auth failed"
++ fi
++
+ pcs cluster start --all
+ if [ $? -ne 0 ]; then
+ logger "pcs cluster start failed"
+@@ -217,7 +228,7 @@ setup_cluster()
+ done
+
+ unclean=$(pcs status | grep -u "UNCLEAN")
+- while [[ "${unclean}X" = "UNCLEANX" ]]; do
++ while [[ "${unclean}X" == "UNCLEANX" ]]; do
+ sleep 1
+ unclean=$(pcs status | grep -u "UNCLEAN")
+ done
+@@ -244,7 +255,7 @@ setup_finalize_ha()
+ local stopped=""
+
+ stopped=$(pcs status | grep -u "Stopped")
+- while [[ "${stopped}X" = "StoppedX" ]]; do
++ while [[ "${stopped}X" == "StoppedX" ]]; do
+ sleep 1
+ stopped=$(pcs status | grep -u "Stopped")
+ done
+@@ -265,7 +276,7 @@ refresh_config ()
+ if [ -e ${SECRET_PEM} ]; then
+ while [[ ${3} ]]; do
+ current_host=`echo ${3} | cut -d "." -f 1`
+- if [ ${short_host} != ${current_host} ]; then
++ if [[ ${short_host} != ${current_host} ]]; then
+ output=$(ssh -oPasswordAuthentication=no \
+ -oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \
+ "dbus-send --print-reply --system --dest=org.ganesha.nfsd \
+@@ -398,7 +409,7 @@ wrap_create_virt_ip_constraints()
+ # the result is "node2 node3 node4"; for node2, "node3 node4 node1"
+ # and so on.
+ while [[ ${1} ]]; do
+- if [ "${1}" = "${primary}" ]; then
++ if [[ ${1} == ${primary} ]]; then
+ shift
+ while [[ ${1} ]]; do
+ tail=${tail}" "${1}
+@@ -429,15 +440,15 @@ setup_create_resources()
+ local cibfile=$(mktemp -u)
+
+ # fixup /var/lib/nfs
+- logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone"
+- pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone
++ logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}"
++ pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}
+ if [ $? -ne 0 ]; then
+- logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed"
++ logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} failed"
+ fi
+
+- pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone
++ pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION}
+ if [ $? -ne 0 ]; then
+- logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone failed"
++ logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} failed"
+ fi
+
+ # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace
+@@ -445,9 +456,9 @@ setup_create_resources()
+ # ganesha-active crm_attribute
+ sleep 5
+
+- pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone notify=true
++ pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} notify=true
+ if [ $? -ne 0 ]; then
+- logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone failed"
++ logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} failed"
+ fi
+
+ pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1
+@@ -616,7 +627,7 @@ addnode_recreate_resources()
+ --after ${add_node}-nfs_block
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \
+- ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed"
++ ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed"
+ fi
+
+ pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1
+@@ -780,7 +791,7 @@ setup_state_volume()
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state
+ fi
+ for server in ${HA_SERVERS} ; do
+- if [ ${server} != ${dirname} ]; then
++ if [[ ${server} != ${dirname} ]]; then
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server}
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server}
+ fi
+@@ -794,7 +805,7 @@ setup_state_volume()
+ enable_pacemaker()
+ {
+ while [[ ${1} ]]; do
+- if [ "${SERVICE_MAN}" == "/usr/bin/systemctl" ]; then
++ if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]; then
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+ ${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker"
+ else
+@@ -892,7 +903,7 @@ delnode_state_volume()
+ rm -rf ${mnt}/nfs-ganesha/${dirname}
+
+ for server in ${HA_SERVERS} ; do
+- if [[ "${server}" != "${dirname}" ]]; then
++ if [[ ${server} != ${dirname} ]]; then
+ rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname}
+ rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname}
+ fi
+@@ -963,7 +974,7 @@ status()
+
+ create_ganesha_conf_file()
+ {
+- if [ $1 == "yes" ];
++ if [[ "$1" == "yes" ]];
+ then
+ if [ -e $GANESHA_CONF ];
+ then
+@@ -1012,6 +1023,13 @@ main()
+ semanage boolean -m gluster_use_execmem --on
+ fi
+
++ local osid=""
++
++ osid=$(grep ^ID= /etc/os-release)
++ eval $(echo ${osid} | grep -F ID=)
++ osid=$(grep ^VERSION_ID= /etc/os-release)
++ eval $(echo ${osid} | grep -F VERSION_ID=)
++
+ HA_CONFDIR=${1%/}; shift
+ local ha_conf=${HA_CONFDIR}/ganesha-ha.conf
+ local node=""
+@@ -1032,7 +1050,17 @@ main()
+
+ determine_servers "setup"
+
+- if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then
++ # Fedora 29+ and rhel/centos 8 has PCS-0.10.x
++ # default is pcs-0.10.x options but check for
++ # rhel/centos 7 (pcs-0.9.x) and adjust accordingly
++ if [[ ${ID} =~ {rhel,centos} ]]; then
++ if [[ ${VERSION_ID} == 7.* ]]; then
++ PCS9OR10_PCS_CNAME_OPTION="--name"
++ PCS9OR10_PCS_CLONE_OPTION="--clone"
++ fi
++ fi
++
++ if [[ "${HA_NUM_SERVERS}X" != "1X" ]]; then
+
+ determine_service_manager
+
+--
+1.8.3.1
+
diff --git a/0358-inode-fix-wrong-loop-count-in-__inode_ctx_free.patch b/0358-inode-fix-wrong-loop-count-in-__inode_ctx_free.patch
new file mode 100644
index 0000000..d7138a6
--- /dev/null
+++ b/0358-inode-fix-wrong-loop-count-in-__inode_ctx_free.patch
@@ -0,0 +1,51 @@
+From 0d8c6d78130d22c475010bcce8055073b19de82a Mon Sep 17 00:00:00 2001
+From: Xie Changlong <xiechanglong@cmss.chinamobile.com>
+Date: Fri, 17 May 2019 18:33:11 +0800
+Subject: [PATCH 358/362] inode: fix wrong loop count in __inode_ctx_free
+
+Avoid serious memory leak
+
+Backport of :
+>fixes: bz#1711240
+>Upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22738/
+>Change-Id: Ic61a8fdd0e941e136c98376a87b5a77fa8c22316
+>Signed-off-by: Xie Changlong <xiechanglong@cmss.chinamobile.com>
+
+BUG: 1781543
+Change-Id: I601ebb6cd6744a61c64edd3d21d3b9a0edf1e95b
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/195611
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/inode.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
+index 5331e93..9dbb25b 100644
+--- a/libglusterfs/src/inode.c
++++ b/libglusterfs/src/inode.c
+@@ -402,14 +402,15 @@ __inode_ctx_free(inode_t *inode)
+ goto noctx;
+ }
+
+- for (index = 0; index < inode->table->xl->graph->xl_count; index++) {
++ for (index = 0; index < inode->table->ctxcount; index++) {
+ if (inode->_ctx[index].value1 || inode->_ctx[index].value2) {
+ xl = (xlator_t *)(long)inode->_ctx[index].xl_key;
+- old_THIS = THIS;
+- THIS = xl;
+- if (!xl->call_cleanup && xl->cbks->forget)
++ if (xl && !xl->call_cleanup && xl->cbks->forget) {
++ old_THIS = THIS;
++ THIS = xl;
+ xl->cbks->forget(xl, inode);
+- THIS = old_THIS;
++ THIS = old_THIS;
++ }
+ }
+ }
+
+--
+1.8.3.1
+
diff --git a/0359-dht-gf_defrag_process_dir-is-called-even-if-gf_defra.patch b/0359-dht-gf_defrag_process_dir-is-called-even-if-gf_defra.patch
new file mode 100644
index 0000000..bd730bc
--- /dev/null
+++ b/0359-dht-gf_defrag_process_dir-is-called-even-if-gf_defra.patch
@@ -0,0 +1,41 @@
+From c0efaa98d777e4520028bf55482846b3ef5fca3a Mon Sep 17 00:00:00 2001
+From: Susant Palai <spalai@redhat.com>
+Date: Wed, 1 Apr 2020 12:14:31 +0530
+Subject: [PATCH 359/362] dht: gf_defrag_process_dir is called even if
+ gf_defrag_fix_layout has failed
+
+Currently even though gf_defrag_fix_layout fails with ENOENT or ESTALE, a
+subsequent call is made to gf_defrag_process_dir leading to rebalance failure.
+
+upstream patch: https://review.gluster.org/#/c/glusterfs/+/24225
+
+> fixes: #1102
+> Change-Id: Ib0c309fd78e89a000fed3feb4bbe2c5b48e61478
+> Signed-off-by: Susant Palai <spalai@redhat.com>
+
+BUG: 1812789
+Change-Id: Ib0c309fd78e89a000fed3feb4bbe2c5b48e61478
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/196249
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-rebalance.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index 559f046..f4c62b8 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -3939,6 +3939,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ defrag->total_failures++;
+ }
+ ret = 0;
++ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
+ "Setxattr failed for %s", loc->path);
+--
+1.8.3.1
+
diff --git a/0360-rpc-Make-ssl-log-more-useful.patch b/0360-rpc-Make-ssl-log-more-useful.patch
new file mode 100644
index 0000000..05e903d
--- /dev/null
+++ b/0360-rpc-Make-ssl-log-more-useful.patch
@@ -0,0 +1,117 @@
+From 2b859d1a5499a215c8c37472d4fc7d7e4d70dac6 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Tue, 31 Mar 2020 16:45:35 +0530
+Subject: [PATCH 360/362] rpc: Make ssl log more useful
+
+Currently, ssl_setup_connection_params throws 4 messages for every
+rpc connection that irritates a user while reading the logs. The same
+info we can print in a single log with peerinfo to make it more
+useful.ssl_setup_connection_params try to load dh_param even user
+has not configured it and if a dh_param file is not available it throws
+a failure message.To avoid the message load dh_param only while the user
+has configured it.
+
+> Change-Id: I9ddb57f86a3fa3e519180cb5d88828e59fe0e487
+> Fixes: #1141
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> Cherry pick from commit 80dd8cceab3b860bf1bc2945c8e2d8d0b3913e48
+> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24270/
+
+BUG: 1812824
+Change-Id: I9ddb57f86a3fa3e519180cb5d88828e59fe0e487
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/196371
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ rpc/rpc-transport/socket/src/socket.c | 46 ++++++++++++++++++++---------------
+ 1 file changed, 26 insertions(+), 20 deletions(-)
+
+diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
+index f54ca83..65845ea 100644
+--- a/rpc/rpc-transport/socket/src/socket.c
++++ b/rpc/rpc-transport/socket/src/socket.c
+@@ -4240,6 +4240,7 @@ ssl_setup_connection_params(rpc_transport_t *this)
+ char *cipher_list = DEFAULT_CIPHER_LIST;
+ char *dh_param = DEFAULT_DH_PARAM;
+ char *ec_curve = DEFAULT_EC_CURVE;
++ gf_boolean_t dh_flag = _gf_false;
+
+ priv = this->private;
+
+@@ -4248,6 +4249,10 @@ ssl_setup_connection_params(rpc_transport_t *this)
+ return 0;
+ }
+
++ if (!priv->ssl_enabled && !priv->mgmt_ssl) {
++ return 0;
++ }
++
+ priv->ssl_own_cert = DEFAULT_CERT_PATH;
+ if (dict_get_str(this->options, SSL_OWN_CERT_OPT, &optstr) == 0) {
+ if (!priv->ssl_enabled) {
+@@ -4294,27 +4299,25 @@ ssl_setup_connection_params(rpc_transport_t *this)
+ priv->crl_path = gf_strdup(optstr);
+ }
+
+- gf_log(this->name, priv->ssl_enabled ? GF_LOG_INFO : GF_LOG_DEBUG,
+- "SSL support on the I/O path is %s",
+- priv->ssl_enabled ? "ENABLED" : "NOT enabled");
+- gf_log(this->name, priv->mgmt_ssl ? GF_LOG_INFO : GF_LOG_DEBUG,
+- "SSL support for glusterd is %s",
+- priv->mgmt_ssl ? "ENABLED" : "NOT enabled");
+-
+ if (!priv->mgmt_ssl) {
+- if (!dict_get_int32(this->options, SSL_CERT_DEPTH_OPT, &cert_depth)) {
+- gf_log(this->name, GF_LOG_INFO, "using certificate depth %d",
+- cert_depth);
++ if (!dict_get_int32_sizen(this->options, SSL_CERT_DEPTH_OPT,
++ &cert_depth)) {
+ }
+ } else {
+ cert_depth = this->ctx->ssl_cert_depth;
+- gf_log(this->name, GF_LOG_INFO, "using certificate depth %d",
+- cert_depth);
+ }
+- if (!dict_get_str(this->options, SSL_CIPHER_LIST_OPT, &cipher_list)) {
++ gf_log(this->name, priv->ssl_enabled ? GF_LOG_INFO : GF_LOG_DEBUG,
++ "SSL support for MGMT is %s IO path is %s certificate depth is %d "
++ "for peer %s",
++ (priv->mgmt_ssl ? "ENABLED" : "NOT enabled"),
++ (priv->ssl_enabled ? "ENABLED" : "NOT enabled"), cert_depth,
++ this->peerinfo.identifier);
++
++ if (!dict_get_str_sizen(this->options, SSL_CIPHER_LIST_OPT, &cipher_list)) {
+ gf_log(this->name, GF_LOG_INFO, "using cipher list %s", cipher_list);
+ }
+- if (!dict_get_str(this->options, SSL_DH_PARAM_OPT, &dh_param)) {
++ if (!dict_get_str_sizen(this->options, SSL_DH_PARAM_OPT, &dh_param)) {
++ dh_flag = _gf_true;
+ gf_log(this->name, GF_LOG_INFO, "using DH parameters %s", dh_param);
+ }
+ if (!dict_get_str(this->options, SSL_EC_CURVE_OPT, &ec_curve)) {
+@@ -4349,12 +4352,15 @@ ssl_setup_connection_params(rpc_transport_t *this)
+ #ifdef SSL_OP_NO_COMPRESSION
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_NO_COMPRESSION);
+ #endif
+-
+- if ((bio = BIO_new_file(dh_param, "r")) == NULL) {
+- gf_log(this->name, GF_LOG_INFO,
+- "failed to open %s, "
+- "DH ciphers are disabled",
+- dh_param);
++ /* Upload file to bio wrapper only if dh param is configured
++ */
++ if (dh_flag) {
++ if ((bio = BIO_new_file(dh_param, "r")) == NULL) {
++ gf_log(this->name, GF_LOG_ERROR,
++ "failed to open %s, "
++ "DH ciphers are disabled",
++ dh_param);
++ }
+ }
+
+ if (bio != NULL) {
+--
+1.8.3.1
+
diff --git a/0361-snap_scheduler-python3-compatibility-and-new-test-ca.patch b/0361-snap_scheduler-python3-compatibility-and-new-test-ca.patch
new file mode 100644
index 0000000..62b2fe0
--- /dev/null
+++ b/0361-snap_scheduler-python3-compatibility-and-new-test-ca.patch
@@ -0,0 +1,122 @@
+From 04b824ebfcf80c648d5855f10bc30fde45fd62eb Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Thu, 26 Mar 2020 10:46:16 +0000
+Subject: [PATCH 361/362] snap_scheduler: python3 compatibility and new test
+ case
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Problem:
+"snap_scheduler.py init" command failing with the below traceback:
+
+[root@dhcp43-104 ~]# snap_scheduler.py init
+Traceback (most recent call last):
+ File "/usr/sbin/snap_scheduler.py", line 941, in <module>
+ sys.exit(main(sys.argv[1:]))
+ File "/usr/sbin/snap_scheduler.py", line 851, in main
+ initLogger()
+ File "/usr/sbin/snap_scheduler.py", line 153, in initLogger
+ logfile = os.path.join(process.stdout.read()[:-1], SCRIPT_NAME + ".log")
+ File "/usr/lib64/python3.6/posixpath.py", line 94, in join
+ genericpath._check_arg_types('join', a, *p)
+ File "/usr/lib64/python3.6/genericpath.py", line 151, in _check_arg_types
+ raise TypeError("Can't mix strings and bytes in path components") from None
+TypeError: Can't mix strings and bytes in path components
+
+Solution:
+
+Added the 'universal_newlines' flag to Popen to support backward compatibility.
+
+Added a basic test for snapshot scheduler.
+
+Backport Of:
+   
+        >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24257/
+        >Change-Id: I78e8fabd866fd96638747ecd21d292f5ca074a4e
+        >Fixes: #1134
+        >Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+   
+BUG: 1817369
+Change-Id: I78e8fabd866fd96638747ecd21d292f5ca074a4e
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/196482
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/snap_scheduler/snap_scheduler.py | 2 +-
+ tests/basic/volume-snap-scheduler.t | 49 +++++++++++++++++++++++++++++++++
+ 2 files changed, 50 insertions(+), 1 deletion(-)
+ create mode 100644 tests/basic/volume-snap-scheduler.t
+
+diff --git a/extras/snap_scheduler/snap_scheduler.py b/extras/snap_scheduler/snap_scheduler.py
+index a66c5e3..5a29d41 100755
+--- a/extras/snap_scheduler/snap_scheduler.py
++++ b/extras/snap_scheduler/snap_scheduler.py
+@@ -149,7 +149,7 @@ def initLogger():
+ sh.setFormatter(formatter)
+
+ process = subprocess.Popen(["gluster", "--print-logdir"],
+- stdout=subprocess.PIPE)
++ stdout=subprocess.PIPE, universal_newlines=True)
+ logfile = os.path.join(process.stdout.read()[:-1], SCRIPT_NAME + ".log")
+
+ fh = logging.FileHandler(logfile)
+diff --git a/tests/basic/volume-snap-scheduler.t b/tests/basic/volume-snap-scheduler.t
+new file mode 100644
+index 0000000..a638c5c
+--- /dev/null
++++ b/tests/basic/volume-snap-scheduler.t
+@@ -0,0 +1,49 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++
++cleanup;
++
++TEST glusterd;
++TEST pidof glusterd;
++
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
++TEST $CLI volume start $V0
++
++## Create, start and mount meta_volume as
++## snap_scheduler expects shared storage to be enabled.
++## This test is very basic in nature not creating any snapshot
++## and purpose is to validate snap scheduling commands.
++
++TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
++TEST $CLI volume start $META_VOL
++TEST mkdir -p $META_MNT
++TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
++
++##function to check status
++function check_status_scheduler()
++{
++ local key=$1
++ snap_scheduler.py status | grep -F "$key" | wc -l
++}
++
++##Basic snap_scheduler command test init/enable/disable/list
++
++TEST snap_scheduler.py init
++
++TEST snap_scheduler.py enable
++
++EXPECT 1 check_status_scheduler "Enabled"
++
++TEST snap_scheduler.py disable
++
++EXPECT 1 check_status_scheduler "Disabled"
++
++TEST snap_scheduler.py list
++
++TEST $CLI volume stop $V0;
++
++TEST $CLI volume delete $V0;
++
++cleanup;
+--
+1.8.3.1
+
diff --git a/0362-write-behind-fix-data-corruption.patch b/0362-write-behind-fix-data-corruption.patch
new file mode 100644
index 0000000..aeb7242
--- /dev/null
+++ b/0362-write-behind-fix-data-corruption.patch
@@ -0,0 +1,454 @@
+From 48f6929590157d9a1697e11c02441207afdc1bed Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Fri, 27 Mar 2020 23:56:15 +0100
+Subject: [PATCH 362/362] write-behind: fix data corruption
+
+There was a bug in write-behind that allowed a previous completed write
+to overwrite the overlapping region of data from a future write.
+
+Suppose we want to send three writes (W1, W2 and W3). W1 and W2 are
+sequential, and W3 writes at the same offset of W2:
+
+ W2.offset = W3.offset = W1.offset + W1.size
+
+Both W1 and W2 are sent in parallel. W3 is only sent after W2 completes.
+So W3 should *always* overwrite the overlapping part of W2.
+
+Suppose write-behind processes the requests from 2 concurrent threads:
+
+ Thread 1 Thread 2
+
+ <received W1>
+ <received W2>
+ wb_enqueue_tempted(W1)
+ /* W1 is assigned gen X */
+ wb_enqueue_tempted(W2)
+ /* W2 is assigned gen X */
+
+ wb_process_queue()
+ __wb_preprocess_winds()
+ /* W1 and W2 are sequential and all
+ * other requisites are met to merge
+ * both requests. */
+ __wb_collapse_small_writes(W1, W2)
+ __wb_fulfill_request(W2)
+
+ __wb_pick_unwinds() -> W2
+ /* In this case, since the request is
+ * already fulfilled, wb_inode->gen
+ * is not updated. */
+
+ wb_do_unwinds()
+ STACK_UNWIND(W2)
+
+ /* The application has received the
+ * result of W2, so it can send W3. */
+ <received W3>
+
+ wb_enqueue_tempted(W3)
+ /* W3 is assigned gen X */
+
+ wb_process_queue()
+ /* Here we have W1 (which contains
+ * the conflicting W2) and W3 with
+ * same gen, so they are interpreted
+ * as concurrent writes that do not
+ * conflict. */
+ __wb_pick_winds() -> W3
+
+ wb_do_winds()
+ STACK_WIND(W3)
+
+ wb_process_queue()
+ /* Eventually W1 will be
+ * ready to be sent */
+ __wb_pick_winds() -> W1
+ __wb_pick_unwinds() -> W1
+ /* Here wb_inode->gen is
+ * incremented. */
+
+ wb_do_unwinds()
+ STACK_UNWIND(W1)
+
+ wb_do_winds()
+ STACK_WIND(W1)
+
+So, as we can see, W3 is sent before W1, which shouldn't happen.
+
+The problem is that wb_inode->gen is only incremented for requests that
+have not been fulfilled but, after a merge, the request is marked as
+fulfilled even though it has not been sent to the brick. This allows
+that future requests are assigned to the same generation, which could
+be internally reordered.
+
+Solution:
+
+Increment wb_inode->gen before any unwind, even if it's for a fulfilled
+request.
+
+Special thanks to Stefan Ring for writing a reproducer that has been
+crucial to identify the issue.
+
+Upstream patch:
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24263
+> Change-Id: Id4ab0f294a09aca9a863ecaeef8856474662ab45
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+> Fixes: #884
+
+Change-Id: Id4ab0f294a09aca9a863ecaeef8856474662ab45
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+BUG: 1819059
+Reviewed-on: https://code.engineering.redhat.com/gerrit/196250
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/write-behind/issue-884.c | 267 +++++++++++++++++++++
+ tests/bugs/write-behind/issue-884.t | 40 +++
+ .../performance/write-behind/src/write-behind.c | 4 +-
+ 3 files changed, 309 insertions(+), 2 deletions(-)
+ create mode 100644 tests/bugs/write-behind/issue-884.c
+ create mode 100755 tests/bugs/write-behind/issue-884.t
+
+diff --git a/tests/bugs/write-behind/issue-884.c b/tests/bugs/write-behind/issue-884.c
+new file mode 100644
+index 0000000..e9c33b3
+--- /dev/null
++++ b/tests/bugs/write-behind/issue-884.c
+@@ -0,0 +1,267 @@
++
++#define _GNU_SOURCE
++
++#include <stdlib.h>
++#include <stdio.h>
++#include <string.h>
++#include <time.h>
++#include <assert.h>
++#include <errno.h>
++#include <sys/types.h>
++#include <sys/stat.h>
++#include <pthread.h>
++
++#include <glusterfs/api/glfs.h>
++
++/* Based on a reproducer by Stefan Ring. It seems to be quite sensible to any
++ * timing modification, so the code has been maintained as is, only with minor
++ * changes. */
++
++struct glfs *glfs;
++
++pthread_mutex_t the_mutex = PTHREAD_MUTEX_INITIALIZER;
++pthread_cond_t the_cond = PTHREAD_COND_INITIALIZER;
++
++typedef struct _my_aiocb {
++ int64_t size;
++ volatile int64_t seq;
++ int which;
++} my_aiocb;
++
++typedef struct _worker_data {
++ my_aiocb cb;
++ struct iovec iov;
++ int64_t offset;
++} worker_data;
++
++typedef struct {
++ worker_data wdata[2];
++
++ volatile unsigned busy;
++} all_data_t;
++
++all_data_t all_data;
++
++static void
++completion_fnc(struct glfs_fd *fd, ssize_t ret, struct glfs_stat *pre,
++ struct glfs_stat *post, void *arg)
++{
++ void *the_thread;
++ my_aiocb *cb = (my_aiocb *)arg;
++ long seq = cb->seq;
++
++ assert(ret == cb->size);
++
++ pthread_mutex_lock(&the_mutex);
++ pthread_cond_broadcast(&the_cond);
++
++ all_data.busy &= ~(1 << cb->which);
++ cb->seq = -1;
++
++ the_thread = (void *)pthread_self();
++ printf("worker %d is done from thread %p, seq %ld!\n", cb->which,
++ the_thread, seq);
++
++ pthread_mutex_unlock(&the_mutex);
++}
++
++static void
++init_wdata(worker_data *data, int which)
++{
++ data->cb.which = which;
++ data->cb.seq = -1;
++
++ data->iov.iov_base = malloc(1024 * 1024);
++ memset(data->iov.iov_base, 6,
++ 1024 * 1024); /* tail part never overwritten */
++}
++
++static void
++init()
++{
++ all_data.busy = 0;
++
++ init_wdata(&all_data.wdata[0], 0);
++ init_wdata(&all_data.wdata[1], 1);
++}
++
++static void
++do_write(struct glfs_fd *fd, int content, int size, int64_t seq,
++ worker_data *wdata, const char *name)
++{
++ int ret;
++
++ wdata->cb.size = size;
++ wdata->cb.seq = seq;
++
++ if (content >= 0)
++ memset(wdata->iov.iov_base, content, size);
++ wdata->iov.iov_len = size;
++
++ pthread_mutex_lock(&the_mutex);
++ printf("(%d) dispatching write \"%s\", offset %lx, len %x, seq %ld\n",
++ wdata->cb.which, name, (long)wdata->offset, size, (long)seq);
++ pthread_mutex_unlock(&the_mutex);
++ ret = glfs_pwritev_async(fd, &wdata->iov, 1, wdata->offset, 0,
++ completion_fnc, &wdata->cb);
++ assert(ret >= 0);
++}
++
++#define IDLE 0 // both workers must be idle
++#define ANY 1 // use any worker, other one may be busy
++
++int
++get_worker(int waitfor, int64_t excl_seq)
++{
++ int which;
++
++ pthread_mutex_lock(&the_mutex);
++
++ while (waitfor == IDLE && (all_data.busy & 3) != 0 ||
++ waitfor == ANY &&
++ ((all_data.busy & 3) == 3 ||
++ excl_seq >= 0 && (all_data.wdata[0].cb.seq == excl_seq ||
++ all_data.wdata[1].cb.seq == excl_seq)))
++ pthread_cond_wait(&the_cond, &the_mutex);
++
++ if (!(all_data.busy & 1))
++ which = 0;
++ else
++ which = 1;
++
++ all_data.busy |= (1 << which);
++
++ pthread_mutex_unlock(&the_mutex);
++
++ return which;
++}
++
++static int
++doit(struct glfs_fd *fd)
++{
++ int ret;
++ int64_t seq = 0;
++ int64_t offset = 0; // position in file, in blocks
++ int64_t base = 0x1000; // where to place the data, in blocks
++
++ int async_mode = ANY;
++
++ init();
++
++ for (;;) {
++ int which;
++ worker_data *wdata;
++
++ // for growing to the first offset
++ for (;;) {
++ int gap = base + 0x42 - offset;
++ if (!gap)
++ break;
++ if (gap > 80)
++ gap = 80;
++
++ which = get_worker(IDLE, -1);
++ wdata = &all_data.wdata[which];
++
++ wdata->offset = offset << 9;
++ do_write(fd, 0, gap << 9, seq++, wdata, "gap-filling");
++
++ offset += gap;
++ }
++
++ // 8700
++ which = get_worker(IDLE, -1);
++ wdata = &all_data.wdata[which];
++
++ wdata->offset = (base + 0x42) << 9;
++ do_write(fd, 1, 62 << 9, seq++, wdata, "!8700");
++
++ // 8701
++ which = get_worker(IDLE, -1);
++ wdata = &all_data.wdata[which];
++
++ wdata->offset = (base + 0x42) << 9;
++ do_write(fd, 2, 55 << 9, seq++, wdata, "!8701");
++
++ // 8702
++ which = get_worker(async_mode, -1);
++ wdata = &all_data.wdata[which];
++
++ wdata->offset = (base + 0x79) << 9;
++ do_write(fd, 3, 54 << 9, seq++, wdata, "!8702");
++
++ // 8703
++ which = get_worker(async_mode, -1);
++ wdata = &all_data.wdata[which];
++
++ wdata->offset = (base + 0xaf) << 9;
++ do_write(fd, 4, 81 << 9, seq++, wdata, "!8703");
++
++ // 8704
++ // this writes both 5s and 6s
++ // the range of 5s is the one that overwrites 8703
++
++ which = get_worker(async_mode, seq - 1);
++ wdata = &all_data.wdata[which];
++
++ memset(wdata->iov.iov_base, 5, 81 << 9);
++ wdata->offset = (base + 0xaf) << 9;
++ do_write(fd, -1, 1623 << 9, seq++, wdata, "!8704");
++
++ offset = base + 0x706;
++ base += 0x1000;
++ if (base >= 0x100000)
++ break;
++ }
++
++ printf("done!\n");
++ fflush(stdout);
++
++ pthread_mutex_lock(&the_mutex);
++
++ while ((all_data.busy & 3) != 0)
++ pthread_cond_wait(&the_cond, &the_mutex);
++
++ pthread_mutex_unlock(&the_mutex);
++
++ ret = glfs_close(fd);
++ assert(ret >= 0);
++ /*
++ ret = glfs_fini(glfs);
++ assert(ret >= 0);
++ */
++ return 0;
++}
++
++int
++main(int argc, char *argv[])
++{
++ int ret;
++ int open_flags = O_RDWR | O_DIRECT | O_TRUNC;
++ struct glfs_fd *fd;
++
++ glfs = glfs_new(argv[1]);
++ if (!glfs) {
++ printf("glfs_new!\n");
++ goto out;
++ }
++ ret = glfs_set_volfile_server(glfs, "tcp", "localhost", 24007);
++ if (ret < 0) {
++ printf("set_volfile!\n");
++ goto out;
++ }
++ ret = glfs_init(glfs);
++ if (ret) {
++ printf("init!\n");
++ goto out;
++ }
++ fd = glfs_open(glfs, argv[2], open_flags);
++ if (!fd) {
++ printf("open!\n");
++ goto out;
++ }
++ srand(time(NULL));
++ return doit(fd);
++out:
++ return 1;
++}
+diff --git a/tests/bugs/write-behind/issue-884.t b/tests/bugs/write-behind/issue-884.t
+new file mode 100755
+index 0000000..2bcf7d1
+--- /dev/null
++++ b/tests/bugs/write-behind/issue-884.t
+@@ -0,0 +1,40 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++# This test tries to detect a race condition in write-behind. It's based on a
++# reproducer written by Stefan Ring that is able to hit it sometimes. On my
++# system, it happened around 10% of the runs. This means that if this bug
++# appears again, this test will fail once every 10 runs. Most probably this
++# failure will be hidden by the automatic test retry of the testing framework.
++#
++# Please, if this test fails, it needs to be analyzed in detail.
++
++function run() {
++ "${@}" >/dev/null
++}
++
++cleanup
++
++TEST glusterd
++TEST pidof glusterd
++
++TEST $CLI volume create $V0 $H0:$B0/$V0
++# This makes it easier to hit the issue
++TEST $CLI volume set $V0 client-log-level TRACE
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
++
++build_tester $(dirname $0)/issue-884.c -lgfapi
++
++TEST touch $M0/testfile
++
++# This program generates a file of 535694336 bytes with a fixed pattern
++TEST run $(dirname $0)/issue-884 $V0 testfile
++
++# This is the md5sum of the expected pattern without corruption
++EXPECT "ad105f9349345a70fc697632cbb5eec8" echo "$(md5sum $B0/$V0/testfile | awk '{ print $1; }')"
++
++cleanup
+diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
+index 70e281a..90a0bcf 100644
+--- a/xlators/performance/write-behind/src/write-behind.c
++++ b/xlators/performance/write-behind/src/write-behind.c
+@@ -1284,14 +1284,14 @@ __wb_pick_unwinds(wb_inode_t *wb_inode, list_head_t *lies)
+
+ wb_inode->window_current += req->orig_size;
+
++ wb_inode->gen++;
++
+ if (!req->ordering.fulfilled) {
+ /* burden increased */
+ list_add_tail(&req->lie, &wb_inode->liability);
+
+ req->ordering.lied = 1;
+
+- wb_inode->gen++;
+-
+ uuid_utoa_r(req->gfid, gfid);
+ gf_msg_debug(wb_inode->this->name, 0,
+ "(unique=%" PRIu64
+--
+1.8.3.1
+
diff --git a/0363-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch b/0363-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch
new file mode 100644
index 0000000..e1ea6d0
--- /dev/null
+++ b/0363-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch
@@ -0,0 +1,47 @@
+From d7c0dc7107a024d28196a4582bacf28ddcfbeb69 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Tue, 14 Apr 2020 07:59:22 -0400
+Subject: [PATCH 363/367] common-ha: cluster status shows "FAILOVER" when
+ actually HEALTHY
+
+pacemaker devs change the format of the ouput of `pcs status`
+
+Expected to find a line in the format:
+
+Online: ....
+
+but now it's
+
+ * Online: ...
+
+And the `grep -E "^Online:" no longer finds the list of nodes that
+are online.
+
+ https://review.gluster.org/#/c/glusterfs/+/24333/
+
+Change-Id: If2aa1e7b53c766c625d7b4cc222a83ea2c0bd72d
+BUG: 1823706
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/197367
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index 0b0050a..df333a1 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -935,7 +935,7 @@ status()
+ done
+
+ # print the nodes that are expected to be online
+- grep -E "^Online:" ${scratch}
++ grep -E "Online:" ${scratch}
+
+ echo
+
+--
+1.8.3.1
+
diff --git a/0364-dht-fixing-rebalance-failures-for-files-with-holes.patch b/0364-dht-fixing-rebalance-failures-for-files-with-holes.patch
new file mode 100644
index 0000000..2c6ba98
--- /dev/null
+++ b/0364-dht-fixing-rebalance-failures-for-files-with-holes.patch
@@ -0,0 +1,97 @@
+From 5b1bfebacac649e6f5051316e4075309caf93901 Mon Sep 17 00:00:00 2001
+From: Barak Sason Rofman <bsasonro@redhat.com>
+Date: Tue, 21 Apr 2020 19:13:41 +0300
+Subject: [PATCH 364/367] dht - fixing rebalance failures for files with holes
+
+Rebalance process handling of files which contains holes casued
+rebalance to fail with "No space left on device" errors.
+This patch modifies the code-flow in such a way that files with holes
+will be rebalanced correctly.
+
+backport of https://review.gluster.org/#/c/glusterfs/+/24357/
+>fixes: #1187
+>Change-Id: I89bc3d4ea7f074db7213d759c49307f379543932
+>Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+
+BUG: 1823703
+Change-Id: I89bc3d4ea7f074db7213d759c49307f379543932
+Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/198579
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-rebalance.c | 21 ++++++++++-----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index f4c62b8..7d9df02 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -650,7 +650,7 @@ out:
+ static int
+ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
+ loc_t *loc, struct iatt *stbuf, fd_t **dst_fd,
+- int *fop_errno)
++ int *fop_errno, int file_has_holes)
+ {
+ int ret = -1;
+ int ret2 = -1;
+@@ -819,7 +819,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
+
+ /* No need to bother about 0 byte size files */
+ if (stbuf->ia_size > 0) {
+- if (conf->use_fallocate) {
++ if (conf->use_fallocate && !file_has_holes) {
+ ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL);
+ if (ret < 0) {
+ if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) {
+@@ -846,9 +846,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
+ goto out;
+ }
+ }
+- }
+-
+- if (!conf->use_fallocate) {
++ } else {
+ ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL,
+ NULL);
+ if (ret < 0) {
+@@ -1728,9 +1726,13 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ goto out;
+ }
+
++ /* Try to preserve 'holes' while migrating data */
++ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
++ file_has_holes = 1;
++
+ /* create the destination, with required modes/xattr */
+ ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd,
+- fop_errno);
++ fop_errno, file_has_holes);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "Create dst failed"
+@@ -1774,8 +1776,8 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ * destination. We need to do update this only post migration
+ * as in case of failure the linkto needs to point to the source
+ * subvol */
+- ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf,
+- &dst_fd, fop_errno);
++ ret = __dht_rebalance_create_dst_file(
++ this, to, from, loc, &stbuf, &dst_fd, fop_errno, file_has_holes);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Create dst failed"
+@@ -1862,9 +1864,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ ret = 0;
+ goto out;
+ }
+- /* Try to preserve 'holes' while migrating data */
+- if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
+- file_has_holes = 1;
+
+ ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd,
+ stbuf.ia_size, file_has_holes,
+--
+1.8.3.1
+
diff --git a/0365-build-geo-rep-requires-relevant-selinux-permission-f.patch b/0365-build-geo-rep-requires-relevant-selinux-permission-f.patch
new file mode 100644
index 0000000..daf8dc6
--- /dev/null
+++ b/0365-build-geo-rep-requires-relevant-selinux-permission-f.patch
@@ -0,0 +1,70 @@
+From 36180d21dc4b16619b75d65d51eaf37df4e0e2d3 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Mon, 20 Apr 2020 12:15:42 +0100
+Subject: [PATCH 365/367] build: geo-rep requires relevant selinux permission
+ for rsync
+
+If selinux is set in enforcing mode geo-rep goes into faulty state.
+
+To avoid this from happening some relevant selinux booleans need to be set
+in 'on' state to allow rsync operation.
+
+Backport of:
+ >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24348.
+ >Change-Id: Ia8ce530d6548c2a545f4c99c600f5aac2bbb3363
+ >Fixes: #1182
+ >Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+BUG: 1813917
+Change-Id: Ia8ce530d6548c2a545f4c99c600f5aac2bbb3363
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/198599
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 7c8a751..5ed07e7 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -130,6 +130,12 @@
+ ## All %%global definitions should be placed here and keep them sorted
+ ##
+
++# selinux booleans whose defalut value needs modification
++# these booleans will be consumed by "%%selinux_set_booleans" macro.
++%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
++%global selinuxbooleans rsync_full_access=1 rsync_client=1
++%endif
++
+ %if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+ %global _with_systemd true
+ %endif
+@@ -515,6 +521,12 @@ Requires: python%{_pythonver}-gluster = %{version}-%{release}
+ Requires: rsync
+ Requires: util-linux
+ Requires: %{name}-libs%{?_isa} = %{version}-%{release}
++# required for setting selinux bools
++%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
++Requires: selinux-policy-targeted
++Requires(post): selinux-policy-targeted
++BuildRequires: selinux-policy-devel
++%endif
+
+ %description geo-replication
+ GlusterFS is a distributed file-system capable of scaling to several
+@@ -941,6 +953,9 @@ exit 0
+
+ %if ( 0%{!?_without_georeplication:1} )
+ %post geo-replication
++%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
++%selinux_set_booleans %{selinuxbooleans}
++%endif
+ if [ $1 -ge 1 ]; then
+ %systemd_postun_with_restart glusterd
+ fi
+--
+1.8.3.1
+
diff --git a/0366-snapshot-fix-python3-issue-in-gcron.patch b/0366-snapshot-fix-python3-issue-in-gcron.patch
new file mode 100644
index 0000000..c704a17
--- /dev/null
+++ b/0366-snapshot-fix-python3-issue-in-gcron.patch
@@ -0,0 +1,55 @@
+From d7b84014cbb19e65dfae6248af47cc23fabc64e5 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Wed, 22 Apr 2020 15:09:16 +0100
+Subject: [PATCH 366/367] snapshot: fix python3 issue in gcron
+
+`$gcron.py test_vol Job`
+Traceback:
+ File "/usr/sbin/gcron.py", line 189, in <module>
+ main()
+ File "/usr/sbin/gcron.py", line 121, in main
+ initLogger(script_name)
+ File "/usr/sbin/gcron.py", line 44, in initLogger
+ logfile = os.path.join(out.strip(), script_name[:-3]+".log")
+ File "/usr/lib64/python3.6/posixpath.py", line 94, in join
+ genericpath._check_arg_types('join', a, *p)
+ File "/usr/lib64/python3.6/genericpath.py", line 151, in _check_arg_types
+ raise TypeError("Can't mix strings and bytes in path components") from None
+TypeError: Can't mix strings and bytes in path components
+
+Solution: Added the 'universal_newlines' flag to Popen.
+
+Backport of:
+
+ >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24364/
+ >Change-Id: I4c7a0e5bce605e4c134f6786c9dd8162b89fc77f
+ >Fixes: #1193
+ >Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+BUG: 1825195
+Change-Id: I4c7a0e5bce605e4c134f6786c9dd8162b89fc77f
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/198641
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/snap_scheduler/gcron.py | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py
+index 1127be0..cc16310 100755
+--- a/extras/snap_scheduler/gcron.py
++++ b/extras/snap_scheduler/gcron.py
+@@ -38,7 +38,8 @@ def initLogger(script_name):
+ sh.setFormatter(formatter)
+
+ process = subprocess.Popen(["gluster", "--print-logdir"],
+- stdout=subprocess.PIPE)
++ stdout=subprocess.PIPE,
++ universal_newlines=True)
+ out, err = process.communicate()
+ if process.returncode == 0:
+ logfile = os.path.join(out.strip(), script_name[:-3]+".log")
+--
+1.8.3.1
+
diff --git a/0367-dht-Handle-setxattr-and-rm-race-for-directory-in-reb.patch b/0367-dht-Handle-setxattr-and-rm-race-for-directory-in-reb.patch
new file mode 100644
index 0000000..b94f8fc
--- /dev/null
+++ b/0367-dht-Handle-setxattr-and-rm-race-for-directory-in-reb.patch
@@ -0,0 +1,95 @@
+From aef8e51b9974603d397cc8f5301b24451d012e46 Mon Sep 17 00:00:00 2001
+From: Susant Palai <spalai@redhat.com>
+Date: Fri, 24 Apr 2020 13:32:51 +0530
+Subject: [PATCH 367/367] dht: Handle setxattr and rm race for directory in
+ rebalance
+
+Problem: Selfheal as part of directory does not return an error if
+the layout setxattr fails. This is because the actual lookup fop
+must have been successful to proceed for layout heal. Hence, we could
+not tell if fix-layout failed in rebalance.
+
+Solution: We can check this information in the layout structure that
+whether all the xlators have returned error.
+
+> fixes: #1200
+> hange-Id: I3e5f2a36c0d934c21476a73a9a5473d8e490cde7
+> Signed-off-by: Susant Palai <spalai@redhat.com>
+(backport of https://review.gluster.org/#/c/glusterfs/+/24375/)
+
+BUG: 1812789
+Change-Id: I897826c4c2e883b3085c9314deff32d649b4588e
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/198726
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 19 +++++++++++++++++++
+ xlators/cluster/dht/src/dht-common.h | 3 +++
+ xlators/cluster/dht/src/dht-rebalance.c | 11 +++++++++++
+ 3 files changed, 33 insertions(+)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index d0b5287..7890e7a 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -11286,3 +11286,22 @@ dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata);
+ return 0;
+ }
++
++/* The job of this function is to check if all the xlators have updated
++ * error in the layout. */
++int
++dht_dir_layout_error_check(xlator_t *this, inode_t *inode)
++{
++ dht_layout_t *layout = NULL;
++ int i = 0;
++
++ layout = dht_layout_get(this, inode);
++ for (i = 0; i < layout->cnt; i++) {
++ if (layout->list[i].err == 0) {
++ return 0;
++ }
++ }
++
++ /* Returning the first xlator error as all xlators have errors */
++ return layout->list[0].err;
++}
+diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
+index ce11f02..4d2aae6 100644
+--- a/xlators/cluster/dht/src/dht-common.h
++++ b/xlators/cluster/dht/src/dht-common.h
+@@ -1544,4 +1544,7 @@ dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ int32_t
+ dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno);
+
++int
++dht_dir_layout_error_check(xlator_t *this, inode_t *inode);
++
+ #endif /* _DHT_H */
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index 7d9df02..33cacfe 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -3928,6 +3928,17 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ }
+
+ ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL);
++
++ /* In case of a race where the directory is deleted just before
++ * layout setxattr, the errors are updated in the layout structure.
++ * We can use this information to make a decision whether the directory
++ * is deleted entirely.
++ */
++ if (ret == 0) {
++ ret = dht_dir_layout_error_check(this, loc->inode);
++ ret = -ret;
++ }
++
+ if (ret) {
+ if (-ret == ENOENT || -ret == ESTALE) {
+ gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
+--
+1.8.3.1
+
diff --git a/0368-Update-rfc.sh-to-rhgs-3.5.2.patch b/0368-Update-rfc.sh-to-rhgs-3.5.2.patch
new file mode 100644
index 0000000..c103891
--- /dev/null
+++ b/0368-Update-rfc.sh-to-rhgs-3.5.2.patch
@@ -0,0 +1,26 @@
+From 00b79c4e2837980f36f7d8387d90cfb7dc8d0d58 Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Tue, 5 May 2020 12:41:41 -0400
+Subject: [PATCH 368/375] Update rfc.sh to rhgs-3.5.2
+
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+---
+ rfc.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/rfc.sh b/rfc.sh
+index a408e45..37d551f 100755
+--- a/rfc.sh
++++ b/rfc.sh
+@@ -18,7 +18,7 @@ done
+ shift $((OPTIND-1))
+
+
+-branch="rhgs-3.5.1-rhel-8";
++branch="rhgs-3.5.2";
+
+ set_hooks_commit_msg()
+ {
+--
+1.8.3.1
+
diff --git a/0369-cluster-ec-Return-correct-error-code-and-log-message.patch b/0369-cluster-ec-Return-correct-error-code-and-log-message.patch
new file mode 100644
index 0000000..c3c8925
--- /dev/null
+++ b/0369-cluster-ec-Return-correct-error-code-and-log-message.patch
@@ -0,0 +1,53 @@
+From f30fa3938f980f03d08479776037090e7fc11f42 Mon Sep 17 00:00:00 2001
+From: Ashish Pandey <aspandey@redhat.com>
+Date: Tue, 5 May 2020 18:17:49 +0530
+Subject: [PATCH 369/375] cluster/ec: Return correct error code and log message
+
+In case of readdir was send with an FD on which opendir
+was failed, this FD will be useless and we return it with error.
+For now, we are returning it with EINVAL without logging any
+message in log file.
+
+Return a correct error code and also log the message to improve thing to debug.
+
+>fixes: #1220
+>Change-Id: Iaf035254b9c5aa52fa43ace72d328be622b06169
+>Signed-off-by: Ashish Pandey <aspandey@redhat.com>
+(Backport of https://review.gluster.org/#/c/glusterfs/+/24407/)
+
+BUG: 1831403
+Change-Id: Ib5bf30c47b7491abd0ad5ca0ce52ec77945b2e53
+Signed-off-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/200209
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-dir-read.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
+index 8310d4a..9924425 100644
+--- a/xlators/cluster/ec/src/ec-dir-read.c
++++ b/xlators/cluster/ec/src/ec-dir-read.c
+@@ -388,9 +388,16 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state)
+ /* Return error if opendir has not been successfully called on
+ * any subvolume. */
+ ctx = ec_fd_get(fop->fd, fop->xl);
+- if ((ctx == NULL) || (ctx->open == 0)) {
+- fop->error = EINVAL;
++ if (ctx == NULL) {
++ fop->error = ENOMEM;
++ } else if (ctx->open == 0) {
++ fop->error = EBADFD;
++ }
+
++ if (fop->error) {
++ gf_msg(fop->xl->name, GF_LOG_ERROR, fop->error,
++ EC_MSG_INVALID_REQUEST, "EC is not winding readdir: %s",
++ ec_msg_str(fop));
+ return EC_STATE_REPORT;
+ }
+
+--
+1.8.3.1
+
diff --git a/0370-dht-Do-opendir-selectively-in-gf_defrag_process_dir.patch b/0370-dht-Do-opendir-selectively-in-gf_defrag_process_dir.patch
new file mode 100644
index 0000000..6648a4e
--- /dev/null
+++ b/0370-dht-Do-opendir-selectively-in-gf_defrag_process_dir.patch
@@ -0,0 +1,203 @@
+From 3d230880aed85737365deafe3c9a32c67da2a79e Mon Sep 17 00:00:00 2001
+From: Susant Palai <spalai@redhat.com>
+Date: Mon, 4 May 2020 19:09:00 +0530
+Subject: [PATCH 370/375] dht: Do opendir selectively in gf_defrag_process_dir
+
+Currently opendir is done from the cluster view. Hence, even if
+one opendir is successful, the opendir operation as a whole is considered
+successful.
+
+But since in gf_defrag_get_entry we fetch entries selectively from
+local_subvols, we need to opendir individually on those local subvols
+and keep track of fds separately. Otherwise it is possible that opendir
+failed on one of the subvol and we wind readdirp call on the fd to the
+corresponding subvol, which will ultimately result in EINVAL error.
+
+> fixes: #1218
+> Change-Id: I50dd88b9597852a15579f4ee325918979417f570
+> Signed-off-by: Susant Palai <spalai@redhat.com>
+(Backport of https://review.gluster.org/#/c/glusterfs/+/24404/)
+
+BUG: 1831403
+Change-Id: I96e19fdd630279c3ef44f361c1d1fc5c1c429821
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/200306
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.h | 2 +
+ xlators/cluster/dht/src/dht-rebalance.c | 74 +++++++++++++++++++++++----------
+ 2 files changed, 54 insertions(+), 22 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
+index 4d2aae6..8e65111 100644
+--- a/xlators/cluster/dht/src/dht-common.h
++++ b/xlators/cluster/dht/src/dht-common.h
+@@ -742,6 +742,8 @@ struct dir_dfmeta {
+ struct list_head **head;
+ struct list_head **iterator;
+ int *fetch_entries;
++ /* fds corresponding to local subvols only */
++ fd_t **lfd;
+ };
+
+ typedef struct dht_migrate_info {
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index 33cacfe..c692119 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -48,6 +48,8 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
+ if (meta) {
+ for (i = 0; i < local_subvols_cnt; i++) {
+ gf_dirent_free(&meta->equeue[i]);
++ if (meta->lfd && meta->lfd[i])
++ fd_unref(meta->lfd[i]);
+ }
+
+ GF_FREE(meta->equeue);
+@@ -55,6 +57,7 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
+ GF_FREE(meta->iterator);
+ GF_FREE(meta->offset_var);
+ GF_FREE(meta->fetch_entries);
++ GF_FREE(meta->lfd);
+ GF_FREE(meta);
+ }
+ }
+@@ -3095,7 +3098,7 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
+ struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req,
+ int *should_commit_hash, int *perrno)
+ {
+- int ret = -1;
++ int ret = 0;
+ char is_linkfile = 0;
+ gf_dirent_t *df_entry = NULL;
+ struct dht_container *tmp_container = NULL;
+@@ -3111,6 +3114,13 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
+ }
+
+ if (dir_dfmeta->fetch_entries[i] == 1) {
++ if (!fd) {
++ dir_dfmeta->fetch_entries[i] = 0;
++ dir_dfmeta->offset_var[i].readdir_done = 1;
++ ret = 0;
++ goto out;
++ }
++
+ ret = syncop_readdirp(conf->local_subvols[i], fd, 131072,
+ dir_dfmeta->offset_var[i].offset,
+ &(dir_dfmeta->equeue[i]), xattr_req, NULL);
+@@ -3270,7 +3280,6 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *migrate_data, int *perrno)
+ {
+ int ret = -1;
+- fd_t *fd = NULL;
+ dht_conf_t *conf = NULL;
+ gf_dirent_t entries;
+ dict_t *xattr_req = NULL;
+@@ -3304,28 +3313,49 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ goto out;
+ }
+
+- fd = fd_create(loc->inode, defrag->pid);
+- if (!fd) {
+- gf_log(this->name, GF_LOG_ERROR, "Failed to create fd");
++ dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
++ if (!dir_dfmeta) {
++ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
+ ret = -1;
+ goto out;
+ }
+
+- ret = syncop_opendir(this, loc, fd, NULL, NULL);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_DATA_FAILED,
+- "Migrate data failed: Failed to open dir %s", loc->path);
+- *perrno = -ret;
++ dir_dfmeta->lfd = GF_CALLOC(local_subvols_cnt, sizeof(fd_t *),
++ gf_common_mt_pointer);
++ if (!dir_dfmeta->lfd) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
++ "could not allocate memory for dir_dfmeta");
+ ret = -1;
++ *perrno = ENOMEM;
+ goto out;
+ }
+
+- fd_bind(fd);
+- dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
+- if (!dir_dfmeta) {
+- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
+- ret = -1;
+- goto out;
++ for (i = 0; i < local_subvols_cnt; i++) {
++ dir_dfmeta->lfd[i] = fd_create(loc->inode, defrag->pid);
++ if (!dir_dfmeta->lfd[i]) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, "failed to create fd");
++ *perrno = ENOMEM;
++ ret = -1;
++ goto out;
++ }
++
++ ret = syncop_opendir(conf->local_subvols[i], loc, dir_dfmeta->lfd[i],
++ NULL, NULL);
++ if (ret) {
++ fd_unref(dir_dfmeta->lfd[i]);
++ dir_dfmeta->lfd[i] = NULL;
++ gf_smsg(this->name, GF_LOG_WARNING, 0, 0,
++ "failed to open dir: %s subvol: %s", loc->path,
++ conf->local_subvols[i]->name);
++
++ if (conf->decommission_in_progress) {
++ *perrno = -ret;
++ ret = -1;
++ goto out;
++ }
++ } else {
++ fd_bind(dir_dfmeta->lfd[i]);
++ }
+ }
+
+ dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)),
+@@ -3360,6 +3390,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ ret = -1;
+ goto out;
+ }
++
+ ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+@@ -3372,7 +3403,8 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int),
+ gf_common_mt_int);
+ if (!dir_dfmeta->fetch_entries) {
+- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->fetch_entries is NULL");
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
++ "could not allocate memory for dir_dfmeta->fetch_entries");
+ ret = -1;
+ goto out;
+ }
+@@ -3442,8 +3474,9 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ ldfq_count <= MAX_MIGRATE_QUEUE_COUNT &&
+ !dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) {
+ ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf,
+- defrag, fd, migrate_data, dir_dfmeta,
+- xattr_req, &should_commit_hash, perrno);
++ defrag, dir_dfmeta->lfd[dfc_index],
++ migrate_data, dir_dfmeta, xattr_req,
++ &should_commit_hash, perrno);
+
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+@@ -3497,9 +3530,6 @@ out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+- if (fd)
+- fd_unref(fd);
+-
+ if (ret == 0 && should_commit_hash == 0) {
+ ret = 2;
+ }
+--
+1.8.3.1
+
diff --git a/0371-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch b/0371-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch
new file mode 100644
index 0000000..a395da3
--- /dev/null
+++ b/0371-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch
@@ -0,0 +1,53 @@
+From 05bd0226716516d37ead173c7d6924225bd474db Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Wed, 6 May 2020 07:24:38 -0400
+Subject: [PATCH 371/375] common-ha: cluster status shows "FAILOVER" when
+ actually HEALTHY
+
+pacemaker devs change the format of the ouput of `pcs status`
+
+Expected to find a line in the format:
+
+ Online: ....
+
+but now it's
+
+ * Online: ...
+
+And the `grep -E "^Online:" no longer finds the list of nodes that
+are online.
+
+Also other lines now have '*' in first few characters of the line
+throwing off `grep -x ...`
+
+https://review.gluster.org/#/c/glusterfs/+/24403/
+
+Change-Id: Ia04a89e76914f2a455a755f0a93fa415f60aefd0
+BUG: 1823706
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/199442
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index df333a1..4ecf91b 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -919,8 +919,9 @@ status()
+ local index=1
+ local nodes
+
+- # change tabs to spaces, strip leading spaces
+- pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*//" > ${scratch}
++ # change tabs to spaces, strip leading spaces, including any
++ # new '*' at the beginning of a line introduced in pcs-0.10.x
++ pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*\*//" -e "s/^[ ]*//" > ${scratch}
+
+ nodes[0]=${1}; shift
+
+--
+1.8.3.1
+
diff --git a/0372-posix-fix-seek-functionality.patch b/0372-posix-fix-seek-functionality.patch
new file mode 100644
index 0000000..7c286c2
--- /dev/null
+++ b/0372-posix-fix-seek-functionality.patch
@@ -0,0 +1,49 @@
+From 955fea10809861aa9b3da85d386c2cc92b319cdb Mon Sep 17 00:00:00 2001
+From: Barak Sason Rofman <bsasonro@redhat.com>
+Date: Thu, 7 May 2020 18:57:37 +0300
+Subject: [PATCH 372/375] posix - fix seek functionality
+
+A wrong pointer check causes the offset returned by seek to be always
+wrong
+
+backport of https://review.gluster.org/#/c/glusterfs/+/24412/
+>fixes: #1228
+>Change-Id: Iac4c6a163175617ac4f14544fc6b7c6fb4041cd6
+>Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+
+BUG: 1833017
+Change-Id: Iac4c6a163175617ac4f14544fc6b7c6fb4041cd6
+Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/199761
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/syncop.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
+index 0de53c6..693970f 100644
+--- a/libglusterfs/src/syncop.c
++++ b/libglusterfs/src/syncop.c
+@@ -2881,12 +2881,13 @@ syncop_seek(xlator_t *subvol, fd_t *fd, off_t offset, gf_seek_what_t what,
+ SYNCOP(subvol, (&args), syncop_seek_cbk, subvol->fops->seek, fd, offset,
+ what, xdata_in);
+
+- if (*off)
+- *off = args.offset;
+-
+- if (args.op_ret == -1)
++ if (args.op_ret < 0) {
+ return -args.op_errno;
+- return args.op_ret;
++ } else {
++ if (off)
++ *off = args.offset;
++ return args.op_ret;
++ }
+ }
+
+ int
+--
+1.8.3.1
+
diff --git a/0373-build-geo-rep-sub-pkg-requires-policycoreutils-pytho.patch b/0373-build-geo-rep-sub-pkg-requires-policycoreutils-pytho.patch
new file mode 100644
index 0000000..7abaf0e
--- /dev/null
+++ b/0373-build-geo-rep-sub-pkg-requires-policycoreutils-pytho.patch
@@ -0,0 +1,51 @@
+From bbf43008e6d21d649536547f500662b940562c3e Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Mon, 11 May 2020 10:02:08 +0100
+Subject: [PATCH 373/375] build: geo-rep sub-pkg requires
+ policycoreutils-python-utils on rhel8
+
+glusterfs-geo-replication sub-package requires policycoreutils-python-utils
+on rhel8 to set relevant selinux boolean to allow rsync.
+
+Backport of:
+ >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24433/
+ >Change-Id: Ia0fdcfdd8c7d18cd194e011f6b365bf5cb70a20a
+ >Fixes: #1236
+ >Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+BUG: 1825177
+Change-Id: Ia0fdcfdd8c7d18cd194e011f6b365bf5cb70a20a
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/200242
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 5ed07e7..9def416 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -523,6 +523,8 @@ Requires: util-linux
+ Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+ # required for setting selinux bools
+ %if ( 0%{?rhel} && 0%{?rhel} >= 8 )
++Requires(post): policycoreutils-python-utils
++Requires(postun): policycoreutils-python-utils
+ Requires: selinux-policy-targeted
+ Requires(post): selinux-policy-targeted
+ BuildRequires: selinux-policy-devel
+@@ -1978,6 +1980,10 @@ fi
+ %endif
+
+ %changelog
++
++* Mon May 11 2020 Sunny Kumar <sunkumar@redhat.com>
++- added requires policycoreutils-python-utils on rhel8 for geo-replication
++
+ * Tue Aug 27 2019 Hari Gowtham <hgowtham@redhat.com>
+ - Added scripts to collect machine stats and component stats (#1719171)
+
+--
+1.8.3.1
+
diff --git a/0374-open-behind-fix-missing-fd-reference.patch b/0374-open-behind-fix-missing-fd-reference.patch
new file mode 100644
index 0000000..94a1fb9
--- /dev/null
+++ b/0374-open-behind-fix-missing-fd-reference.patch
@@ -0,0 +1,121 @@
+From 30cbdf8c06145a0c290da42ecc0a7eae928200b7 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Sun, 8 Mar 2020 18:36:45 +0100
+Subject: [PATCH 374/375] open-behind: fix missing fd reference
+
+Open behind was not keeping any reference on fd's pending to be
+opened. This makes it possible that a concurrent close and en entry
+fop (unlink, rename, ...) caused destruction of the fd while it
+was still being used.
+
+Upstream patch:
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24204
+> Change-Id: Ie9e992902cf2cd7be4af1f8b4e57af9bd6afd8e9
+> Fixes: bz#1810934
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: Ie9e992902cf2cd7be4af1f8b4e57af9bd6afd8e9
+BUG: 1830713
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/199714
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/open-behind/src/open-behind.c | 27 ++++++++++++++---------
+ 1 file changed, 16 insertions(+), 11 deletions(-)
+
+diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
+index 268c717..14ebc12 100644
+--- a/xlators/performance/open-behind/src/open-behind.c
++++ b/xlators/performance/open-behind/src/open-behind.c
+@@ -206,8 +206,13 @@ ob_fd_free(ob_fd_t *ob_fd)
+ if (ob_fd->xdata)
+ dict_unref(ob_fd->xdata);
+
+- if (ob_fd->open_frame)
++ if (ob_fd->open_frame) {
++ /* If we sill have a frame it means that background open has never
++ * been triggered. We need to release the pending reference. */
++ fd_unref(ob_fd->fd);
++
+ STACK_DESTROY(ob_fd->open_frame->root);
++ }
+
+ GF_FREE(ob_fd);
+ }
+@@ -297,6 +302,7 @@ ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ call_resume(stub);
+ }
+
++ /* The background open is completed. We can release the 'fd' reference. */
+ fd_unref(fd);
+
+ STACK_DESTROY(frame->root);
+@@ -331,7 +337,9 @@ ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
+ }
+
+ if (frame) {
+- frame->local = fd_ref(fd);
++ /* We don't need to take a reference here. We already have a reference
++ * while the open is pending. */
++ frame->local = fd;
+
+ STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd,
+@@ -345,15 +353,12 @@ void
+ ob_inode_wake(xlator_t *this, struct list_head *ob_fds)
+ {
+ ob_fd_t *ob_fd = NULL, *tmp = NULL;
+- fd_t *fd = NULL;
+
+ if (!list_empty(ob_fds)) {
+ list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode)
+ {
+ ob_fd_wake(this, ob_fd->fd, ob_fd);
+- fd = ob_fd->fd;
+ ob_fd_free(ob_fd);
+- fd_unref(fd);
+ }
+ }
+ }
+@@ -365,7 +370,7 @@ ob_fd_copy(ob_fd_t *src, ob_fd_t *dst)
+ if (!src || !dst)
+ goto out;
+
+- dst->fd = __fd_ref(src->fd);
++ dst->fd = src->fd;
+ dst->loc.inode = inode_ref(src->loc.inode);
+ gf_uuid_copy(dst->loc.gfid, src->loc.gfid);
+ dst->flags = src->flags;
+@@ -509,7 +514,6 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+
+ ob_fd->ob_inode = ob_inode;
+
+- /* don't do fd_ref, it'll cause leaks */
+ ob_fd->fd = fd;
+
+ ob_fd->open_frame = copy_frame(frame);
+@@ -539,15 +543,16 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ }
+ UNLOCK(&fd->inode->lock);
+
+- if (!open_in_progress && !unlinked) {
+- fd_ref(fd);
++ /* We take a reference while the background open is pending or being
++ * processed. If we finally wind the request in the foreground, then
++ * ob_fd_free() will take care of this additional reference. */
++ fd_ref(fd);
+
++ if (!open_in_progress && !unlinked) {
+ STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata);
+
+ if (!conf->lazy_open)
+ ob_fd_wake(this, fd, NULL);
+-
+- fd_unref(fd);
+ } else {
+ ob_fd_free(ob_fd);
+ STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
+--
+1.8.3.1
+
diff --git a/0375-features-shard-Send-correct-size-when-reads-are-sent.patch b/0375-features-shard-Send-correct-size-when-reads-are-sent.patch
new file mode 100644
index 0000000..32f9c19
--- /dev/null
+++ b/0375-features-shard-Send-correct-size-when-reads-are-sent.patch
@@ -0,0 +1,75 @@
+From ac5b1b38e705bd0e4c00cc50580a71dfaa4d3b5f Mon Sep 17 00:00:00 2001
+From: Krutika Dhananjay <kdhananj@redhat.com>
+Date: Wed, 7 Aug 2019 12:12:43 +0530
+Subject: [PATCH 375/375] features/shard: Send correct size when reads are sent
+ beyond file size
+
+Backport of:
+> https://review.gluster.org/c/glusterfs/+/23175
+> Change-Id: I0cebaaf55c09eb1fb77a274268ff564e871b743b
+> fixes bz#1738419
+> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+
+Change-Id: I0cebaaf55c09eb1fb77a274268ff564e871b743b
+BUG: 1802013
+Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/199570
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/shard/bug-1738419.t | 29 +++++++++++++++++++++++++++++
+ xlators/features/shard/src/shard.c | 2 ++
+ 2 files changed, 31 insertions(+)
+ create mode 100644 tests/bugs/shard/bug-1738419.t
+
+diff --git a/tests/bugs/shard/bug-1738419.t b/tests/bugs/shard/bug-1738419.t
+new file mode 100644
+index 0000000..8d0a31d
+--- /dev/null
++++ b/tests/bugs/shard/bug-1738419.t
+@@ -0,0 +1,29 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume set $V0 features.shard on
++TEST $CLI volume set $V0 network.remote-dio off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.strict-o-direct on
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++
++TEST dd if=/dev/zero of=$M0/metadata bs=501 count=1
++
++EXPECT "501" echo $("dd" if=$M0/metadata bs=4096 count=1 of=/dev/null iflag=direct 2>&1 | awk '/bytes/ {print $1}')
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST $CLI volume stop $V0
++TEST $CLI volume delete $V0
++
++cleanup
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index b224abd..9ed597b 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -4433,6 +4433,8 @@ out:
+ if (xdata)
+ local->xattr_rsp = dict_ref(xdata);
+ vec.iov_base = local->iobuf->ptr;
++ if (local->offset + local->req_size > local->prebuf.ia_size)
++ local->total_size = local->prebuf.ia_size - local->offset;
+ vec.iov_len = local->total_size;
+ local->op_ret = local->total_size;
+ SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, &vec, 1,
+--
+1.8.3.1
+
diff --git a/0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch b/0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch
new file mode 100644
index 0000000..b295fc2
--- /dev/null
+++ b/0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch
@@ -0,0 +1,70 @@
+From 341d75642ecc4e27bc6fecb56eb98a0ba03d8544 Mon Sep 17 00:00:00 2001
+From: Krutika Dhananjay <kdhananj@redhat.com>
+Date: Mon, 23 Mar 2020 11:47:10 +0530
+Subject: [PATCH 376/379] features/shard: Fix crash during shards cleanup in
+ error cases
+
+Backport of:
+> https://review.gluster.org/c/glusterfs/+/24244
+> Change-Id: I0b49f2b58becd0d8874b3d4b14ff8d92a89d02d5
+> Fixes: #1127
+> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+
+A crash is seen during a reattempt to clean up shards in background
+upon remount. And this happens even on remount (which means a remount
+is no workaround for the crash).
+
+In such a situation, the in-memory base inode object will not be
+existent (new process, non-existent base shard).
+So local->resolver_base_inode will be NULL.
+
+In the event of an error (in this case, of space running out), the
+process would crash at the time of logging the error in the following line -
+
+ gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED,
+ "failed to delete shards of %s",
+ uuid_utoa(local->resolver_base_inode->gfid));
+
+Fixed that by using local->base_gfid as the source of gfid when
+local->resolver_base_inode is NULL.
+
+Change-Id: I0b49f2b58becd0d8874b3d4b14ff8d92a89d02d5
+BUG: 1836233
+Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/200689
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/shard/src/shard.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index 9ed597b..ee38ed2 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -2729,13 +2729,20 @@ int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode);
+ int shard_post_lookup_shards_unlink_handler(call_frame_t *frame,
+ xlator_t *this) {
+ shard_local_t *local = NULL;
++ uuid_t gfid = {
++ 0,
++ };
+
+ local = frame->local;
+
++ if (local->resolver_base_inode)
++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
++ else
++ gf_uuid_copy(gfid, local->base_gfid);
++
+ if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
+ gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED,
+- "failed to delete shards of %s",
+- uuid_utoa(local->resolver_base_inode->gfid));
++ "failed to delete shards of %s", uuid_utoa(gfid));
+ return 0;
+ }
+ local->op_ret = 0;
+--
+1.8.3.1
+
diff --git a/0377-syncop-improve-scaling-and-implement-more-tools.patch b/0377-syncop-improve-scaling-and-implement-more-tools.patch
new file mode 100644
index 0000000..66cccc3
--- /dev/null
+++ b/0377-syncop-improve-scaling-and-implement-more-tools.patch
@@ -0,0 +1,862 @@
+From 66600fb55522d405a68d7340a5680a2633c4237e Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Thu, 30 Apr 2020 11:19:01 +0200
+Subject: [PATCH 377/379] syncop: improve scaling and implement more tools
+
+The current scaling of the syncop thread pool is not working properly
+and can leave some tasks in the run queue more time than necessary
+when the maximum number of threads is not reached.
+
+This patch provides a better scaling condition to react faster to
+pending work.
+
+Condition variables and sleep in the context of a synctask have also
+been implemented. Their purpose is to replace regular condition
+variables and sleeps that block synctask threads and prevent other
+tasks to be executed.
+
+The new features have been applied to several places in glusterd.
+
+upstream patch: https://review.gluster.org/#/c/glusterfs/+/24396/
+
+> Change-Id: Ic50b7c73c104f9e41f08101a357d30b95efccfbf
+> Fixes: #1116
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: Ic50b7c73c104f9e41f08101a357d30b95efccfbf
+BUG: 1810516
+Signed-off-by: Sanju Rakonde <srakonde@redhta.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/200409
+Tested-by: Sanju Rakonde <srakonde@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
+---
+ libglusterfs/src/glusterfs/syncop.h | 52 +++-
+ libglusterfs/src/libglusterfs.sym | 7 +
+ libglusterfs/src/syncop.c | 306 ++++++++++++++++-----
+ xlators/cluster/dht/src/dht-rebalance.c | 2 +-
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 9 +-
+ xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c | 2 +-
+ .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 5 +-
+ xlators/mgmt/glusterd/src/glusterd-syncop.h | 2 +-
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 29 +-
+ xlators/mgmt/glusterd/src/glusterd.c | 2 +
+ xlators/mgmt/glusterd/src/glusterd.h | 2 +
+ 11 files changed, 317 insertions(+), 101 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h
+index e0f1017..3011b4c 100644
+--- a/libglusterfs/src/glusterfs/syncop.h
++++ b/libglusterfs/src/glusterfs/syncop.h
+@@ -15,6 +15,7 @@
+ #include <sys/time.h>
+ #include <pthread.h>
+ #include <ucontext.h>
++#include "glusterfs/timer.h"
+
+ #define SYNCENV_PROC_MAX 16
+ #define SYNCENV_PROC_MIN 2
+@@ -32,6 +33,7 @@
+ struct synctask;
+ struct syncproc;
+ struct syncenv;
++struct synccond;
+
+ typedef int (*synctask_cbk_t)(int ret, call_frame_t *frame, void *opaque);
+
+@@ -55,9 +57,12 @@ struct synctask {
+ call_frame_t *opframe;
+ synctask_cbk_t synccbk;
+ synctask_fn_t syncfn;
+- synctask_state_t state;
++ struct timespec *delta;
++ gf_timer_t *timer;
++ struct synccond *synccond;
+ void *opaque;
+ void *stack;
++ synctask_state_t state;
+ int woken;
+ int slept;
+ int ret;
+@@ -85,19 +90,21 @@ struct syncproc {
+ /* hosts the scheduler thread and framework for executing synctasks */
+ struct syncenv {
+ struct syncproc proc[SYNCENV_PROC_MAX];
+- int procs;
++
++ pthread_mutex_t mutex;
++ pthread_cond_t cond;
+
+ struct list_head runq;
+- int runcount;
+ struct list_head waitq;
+- int waitcount;
++
++ int procs;
++ int procs_idle;
++
++ int runcount;
+
+ int procmin;
+ int procmax;
+
+- pthread_mutex_t mutex;
+- pthread_cond_t cond;
+-
+ size_t stacksize;
+
+ int destroy; /* FLAG to mark syncenv is in destroy mode
+@@ -123,6 +130,13 @@ struct synclock {
+ };
+ typedef struct synclock synclock_t;
+
++struct synccond {
++ pthread_mutex_t pmutex;
++ pthread_cond_t pcond;
++ struct list_head waitq;
++};
++typedef struct synccond synccond_t;
++
+ struct syncbarrier {
+ gf_boolean_t initialized; /*Set on successful initialization*/
+ pthread_mutex_t guard; /* guard the remaining members, pair @cond */
+@@ -219,7 +233,7 @@ struct syncopctx {
+ #define __yield(args) \
+ do { \
+ if (args->task) { \
+- synctask_yield(args->task); \
++ synctask_yield(args->task, NULL); \
+ } else { \
+ pthread_mutex_lock(&args->mutex); \
+ { \
+@@ -307,7 +321,9 @@ synctask_join(struct synctask *task);
+ void
+ synctask_wake(struct synctask *task);
+ void
+-synctask_yield(struct synctask *task);
++synctask_yield(struct synctask *task, struct timespec *delta);
++void
++synctask_sleep(int32_t secs);
+ void
+ synctask_waitfor(struct synctask *task, int count);
+
+@@ -405,6 +421,24 @@ synclock_trylock(synclock_t *lock);
+ int
+ synclock_unlock(synclock_t *lock);
+
++int32_t
++synccond_init(synccond_t *cond);
++
++void
++synccond_destroy(synccond_t *cond);
++
++int
++synccond_wait(synccond_t *cond, synclock_t *lock);
++
++int
++synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta);
++
++void
++synccond_signal(synccond_t *cond);
++
++void
++synccond_broadcast(synccond_t *cond);
++
+ int
+ syncbarrier_init(syncbarrier_t *barrier);
+ int
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index 467a1b7..5a721e0 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -938,6 +938,12 @@ syncbarrier_destroy
+ syncbarrier_init
+ syncbarrier_wait
+ syncbarrier_wake
++synccond_init
++synccond_destroy
++synccond_wait
++synccond_timedwait
++synccond_signal
++synccond_broadcast
+ syncenv_destroy
+ syncenv_new
+ synclock_destroy
+@@ -1015,6 +1021,7 @@ synctask_new
+ synctask_new1
+ synctask_set
+ synctask_setid
++synctask_sleep
+ synctask_wake
+ synctask_yield
+ sys_access
+diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
+index 693970f..71d37b7 100644
+--- a/libglusterfs/src/syncop.c
++++ b/libglusterfs/src/syncop.c
+@@ -154,10 +154,14 @@ out:
+ return ret;
+ }
+
++void *
++syncenv_processor(void *thdata);
++
+ static void
+ __run(struct synctask *task)
+ {
+ struct syncenv *env = NULL;
++ int32_t total, ret, i;
+
+ env = task->env;
+
+@@ -173,7 +177,6 @@ __run(struct synctask *task)
+ env->runcount--;
+ break;
+ case SYNCTASK_WAIT:
+- env->waitcount--;
+ break;
+ case SYNCTASK_DONE:
+ gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK,
+@@ -187,8 +190,27 @@ __run(struct synctask *task)
+ }
+
+ list_add_tail(&task->all_tasks, &env->runq);
+- env->runcount++;
+ task->state = SYNCTASK_RUN;
++
++ env->runcount++;
++
++ total = env->procs + env->runcount - env->procs_idle;
++ if (total > env->procmax) {
++ total = env->procmax;
++ }
++ if (total > env->procs) {
++ for (i = 0; i < env->procmax; i++) {
++ if (env->proc[i].env == NULL) {
++ env->proc[i].env = env;
++ ret = gf_thread_create(&env->proc[i].processor, NULL,
++ syncenv_processor, &env->proc[i],
++ "sproc%d", i);
++ if ((ret < 0) || (++env->procs >= total)) {
++ break;
++ }
++ }
++ }
++ }
+ }
+
+ static void
+@@ -210,7 +232,6 @@ __wait(struct synctask *task)
+ gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_REWAITING_TASK,
+ "re-waiting already waiting "
+ "task");
+- env->waitcount--;
+ break;
+ case SYNCTASK_DONE:
+ gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK,
+@@ -223,12 +244,11 @@ __wait(struct synctask *task)
+ }
+
+ list_add_tail(&task->all_tasks, &env->waitq);
+- env->waitcount++;
+ task->state = SYNCTASK_WAIT;
+ }
+
+ void
+-synctask_yield(struct synctask *task)
++synctask_yield(struct synctask *task, struct timespec *delta)
+ {
+ xlator_t *oldTHIS = THIS;
+
+@@ -237,6 +257,8 @@ synctask_yield(struct synctask *task)
+ task->proc->sched.uc_flags &= ~_UC_TLSBASE;
+ #endif
+
++ task->delta = delta;
++
+ if (task->state != SYNCTASK_DONE) {
+ task->state = SYNCTASK_SUSPEND;
+ }
+@@ -249,6 +271,35 @@ synctask_yield(struct synctask *task)
+ }
+
+ void
++synctask_sleep(int32_t secs)
++{
++ struct timespec delta;
++ struct synctask *task;
++
++ task = synctask_get();
++
++ if (task == NULL) {
++ sleep(secs);
++ } else {
++ delta.tv_sec = secs;
++ delta.tv_nsec = 0;
++
++ synctask_yield(task, &delta);
++ }
++}
++
++static void
++__synctask_wake(struct synctask *task)
++{
++ task->woken = 1;
++
++ if (task->slept)
++ __run(task);
++
++ pthread_cond_broadcast(&task->env->cond);
++}
++
++void
+ synctask_wake(struct synctask *task)
+ {
+ struct syncenv *env = NULL;
+@@ -257,13 +308,18 @@ synctask_wake(struct synctask *task)
+
+ pthread_mutex_lock(&env->mutex);
+ {
+- task->woken = 1;
++ if (task->timer != NULL) {
++ if (gf_timer_call_cancel(task->xl->ctx, task->timer) != 0) {
++ goto unlock;
++ }
+
+- if (task->slept)
+- __run(task);
++ task->timer = NULL;
++ task->synccond = NULL;
++ }
+
+- pthread_cond_broadcast(&env->cond);
++ __synctask_wake(task);
+ }
++unlock:
+ pthread_mutex_unlock(&env->mutex);
+ }
+
+@@ -282,7 +338,7 @@ synctask_wrap(void)
+
+ task->state = SYNCTASK_DONE;
+
+- synctask_yield(task);
++ synctask_yield(task, NULL);
+ }
+
+ void
+@@ -422,11 +478,6 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn,
+ }
+
+ synctask_wake(newtask);
+- /*
+- * Make sure someone's there to execute anything we just put on the
+- * run queue.
+- */
+- syncenv_scale(env);
+
+ return newtask;
+ err:
+@@ -520,8 +571,12 @@ syncenv_task(struct syncproc *proc)
+ goto unlock;
+ }
+
++ env->procs_idle++;
++
+ sleep_till.tv_sec = time(NULL) + SYNCPROC_IDLE_TIME;
+ ret = pthread_cond_timedwait(&env->cond, &env->mutex, &sleep_till);
++
++ env->procs_idle--;
+ }
+
+ task = list_entry(env->runq.next, struct synctask, all_tasks);
+@@ -540,6 +595,34 @@ unlock:
+ return task;
+ }
+
++static void
++synctask_timer(void *data)
++{
++ struct synctask *task = data;
++ struct synccond *cond;
++
++ cond = task->synccond;
++ if (cond != NULL) {
++ pthread_mutex_lock(&cond->pmutex);
++
++ list_del_init(&task->waitq);
++ task->synccond = NULL;
++
++ pthread_mutex_unlock(&cond->pmutex);
++
++ task->ret = -ETIMEDOUT;
++ }
++
++ pthread_mutex_lock(&task->env->mutex);
++
++ gf_timer_call_cancel(task->xl->ctx, task->timer);
++ task->timer = NULL;
++
++ __synctask_wake(task);
++
++ pthread_mutex_unlock(&task->env->mutex);
++}
++
+ void
+ synctask_switchto(struct synctask *task)
+ {
+@@ -572,7 +655,14 @@ synctask_switchto(struct synctask *task)
+ } else {
+ task->slept = 1;
+ __wait(task);
++
++ if (task->delta != NULL) {
++ task->timer = gf_timer_call_after(task->xl->ctx, *task->delta,
++ synctask_timer, task);
++ }
+ }
++
++ task->delta = NULL;
+ }
+ pthread_mutex_unlock(&env->mutex);
+ }
+@@ -580,65 +670,18 @@ synctask_switchto(struct synctask *task)
+ void *
+ syncenv_processor(void *thdata)
+ {
+- struct syncenv *env = NULL;
+ struct syncproc *proc = NULL;
+ struct synctask *task = NULL;
+
+ proc = thdata;
+- env = proc->env;
+-
+- for (;;) {
+- task = syncenv_task(proc);
+- if (!task)
+- break;
+
++ while ((task = syncenv_task(proc)) != NULL) {
+ synctask_switchto(task);
+-
+- syncenv_scale(env);
+ }
+
+ return NULL;
+ }
+
+-void
+-syncenv_scale(struct syncenv *env)
+-{
+- int diff = 0;
+- int scale = 0;
+- int i = 0;
+- int ret = 0;
+-
+- pthread_mutex_lock(&env->mutex);
+- {
+- if (env->procs > env->runcount)
+- goto unlock;
+-
+- scale = env->runcount;
+- if (scale > env->procmax)
+- scale = env->procmax;
+- if (scale > env->procs)
+- diff = scale - env->procs;
+- while (diff) {
+- diff--;
+- for (; (i < env->procmax); i++) {
+- if (env->proc[i].processor == 0)
+- break;
+- }
+-
+- env->proc[i].env = env;
+- ret = gf_thread_create(&env->proc[i].processor, NULL,
+- syncenv_processor, &env->proc[i],
+- "sproc%03hx", env->procs & 0x3ff);
+- if (ret)
+- break;
+- env->procs++;
+- i++;
+- }
+- }
+-unlock:
+- pthread_mutex_unlock(&env->mutex);
+-}
+-
+ /* The syncenv threads are cleaned up in this routine.
+ */
+ void
+@@ -715,12 +758,13 @@ syncenv_new(size_t stacksize, int procmin, int procmax)
+ newenv->stacksize = stacksize;
+ newenv->procmin = procmin;
+ newenv->procmax = procmax;
++ newenv->procs_idle = 0;
+
+ for (i = 0; i < newenv->procmin; i++) {
+ newenv->proc[i].env = newenv;
+ ret = gf_thread_create(&newenv->proc[i].processor, NULL,
+ syncenv_processor, &newenv->proc[i], "sproc%d",
+- newenv->procs);
++ i);
+ if (ret)
+ break;
+ newenv->procs++;
+@@ -810,7 +854,7 @@ __synclock_lock(struct synclock *lock)
+ task->woken = 0;
+ list_add_tail(&task->waitq, &lock->waitq);
+ pthread_mutex_unlock(&lock->guard);
+- synctask_yield(task);
++ synctask_yield(task, NULL);
+ /* task is removed from waitq in unlock,
+ * under lock->guard.*/
+ pthread_mutex_lock(&lock->guard);
+@@ -963,6 +1007,136 @@ synclock_unlock(synclock_t *lock)
+ return ret;
+ }
+
++/* Condition variables */
++
++int32_t
++synccond_init(synccond_t *cond)
++{
++ int32_t ret;
++
++ INIT_LIST_HEAD(&cond->waitq);
++
++ ret = pthread_mutex_init(&cond->pmutex, NULL);
++ if (ret != 0) {
++ return -ret;
++ }
++
++ ret = pthread_cond_init(&cond->pcond, NULL);
++ if (ret != 0) {
++ pthread_mutex_destroy(&cond->pmutex);
++ }
++
++ return -ret;
++}
++
++void
++synccond_destroy(synccond_t *cond)
++{
++ pthread_cond_destroy(&cond->pcond);
++ pthread_mutex_destroy(&cond->pmutex);
++}
++
++int
++synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta)
++{
++ struct timespec now;
++ struct synctask *task = NULL;
++ int ret;
++
++ task = synctask_get();
++
++ if (task == NULL) {
++ if (delta != NULL) {
++ timespec_now_realtime(&now);
++ timespec_adjust_delta(&now, *delta);
++ }
++
++ pthread_mutex_lock(&cond->pmutex);
++
++ if (delta == NULL) {
++ ret = -pthread_cond_wait(&cond->pcond, &cond->pmutex);
++ } else {
++ ret = -pthread_cond_timedwait(&cond->pcond, &cond->pmutex, &now);
++ }
++ } else {
++ pthread_mutex_lock(&cond->pmutex);
++
++ list_add_tail(&task->waitq, &cond->waitq);
++ task->synccond = cond;
++
++ ret = synclock_unlock(lock);
++ if (ret == 0) {
++ pthread_mutex_unlock(&cond->pmutex);
++
++ synctask_yield(task, delta);
++
++ ret = synclock_lock(lock);
++ if (ret == 0) {
++ ret = task->ret;
++ }
++ task->ret = 0;
++
++ return ret;
++ }
++
++ list_del_init(&task->waitq);
++ }
++
++ pthread_mutex_unlock(&cond->pmutex);
++
++ return ret;
++}
++
++int
++synccond_wait(synccond_t *cond, synclock_t *lock)
++{
++ return synccond_timedwait(cond, lock, NULL);
++}
++
++void
++synccond_signal(synccond_t *cond)
++{
++ struct synctask *task;
++
++ pthread_mutex_lock(&cond->pmutex);
++
++ if (!list_empty(&cond->waitq)) {
++ task = list_first_entry(&cond->waitq, struct synctask, waitq);
++ list_del_init(&task->waitq);
++
++ pthread_mutex_unlock(&cond->pmutex);
++
++ synctask_wake(task);
++ } else {
++ pthread_cond_signal(&cond->pcond);
++
++ pthread_mutex_unlock(&cond->pmutex);
++ }
++}
++
++void
++synccond_broadcast(synccond_t *cond)
++{
++ struct list_head list;
++ struct synctask *task;
++
++ INIT_LIST_HEAD(&list);
++
++ pthread_mutex_lock(&cond->pmutex);
++
++ list_splice_init(&cond->waitq, &list);
++ pthread_cond_broadcast(&cond->pcond);
++
++ pthread_mutex_unlock(&cond->pmutex);
++
++ while (!list_empty(&list)) {
++ task = list_first_entry(&list, struct synctask, waitq);
++ list_del_init(&task->waitq);
++
++ synctask_wake(task);
++ }
++}
++
+ /* Barriers */
+
+ int
+@@ -1032,7 +1206,7 @@ __syncbarrier_wait(struct syncbarrier *barrier, int waitfor)
+ /* called within a synctask */
+ list_add_tail(&task->waitq, &barrier->waitq);
+ pthread_mutex_unlock(&barrier->guard);
+- synctask_yield(task);
++ synctask_yield(task, NULL);
+ pthread_mutex_lock(&barrier->guard);
+ } else {
+ /* called by a non-synctask */
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index c692119..957deaa 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -5224,7 +5224,7 @@ gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag)
+ defrag->tier_conf.pause_timer = gf_timer_call_after(
+ this->ctx, delta, gf_defrag_pause_tier_timeout, this);
+
+- synctask_yield(defrag->tier_conf.pause_synctask);
++ synctask_yield(defrag->tier_conf.pause_synctask, NULL);
+
+ if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED)
+ goto out;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 0d29de2..6475611 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -6076,13 +6076,8 @@ glusterd_op_stage_validate(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ static void
+ glusterd_wait_for_blockers(glusterd_conf_t *priv)
+ {
+- uint64_t blockers = GF_ATOMIC_GET(priv->blockers);
+-
+- while (blockers) {
+- synclock_unlock(&priv->big_lock);
+- sleep(1);
+- blockers = GF_ATOMIC_GET(priv->blockers);
+- synclock_lock(&priv->big_lock);
++ while (GF_ATOMIC_GET(priv->blockers)) {
++ synccond_wait(&priv->cond_blockers, &priv->big_lock);
+ }
+ }
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
+index 36018a0..f55a5fd 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
++++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
+@@ -112,7 +112,7 @@ glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags)
+ goto out;
+
+ synclock_unlock(&conf->big_lock);
+- sleep(1);
++ synctask_sleep(1);
+ synclock_lock(&conf->big_lock);
+ if (gf_is_service_running(proc->pidfile, &pid)) {
+ ret = kill(pid, SIGKILL);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+index d225854..386eed2 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+@@ -1961,9 +1961,7 @@ glusterd_update_snaps_synctask(void *opaque)
+ synclock_lock(&conf->big_lock);
+
+ while (conf->restart_bricks) {
+- synclock_unlock(&conf->big_lock);
+- sleep(2);
+- synclock_lock(&conf->big_lock);
++ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock);
+ }
+ conf->restart_bricks = _gf_true;
+
+@@ -2070,6 +2068,7 @@ out:
+ if (dict)
+ dict_unref(dict);
+ conf->restart_bricks = _gf_false;
++ synccond_broadcast(&conf->cond_restart_bricks);
+
+ return ret;
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h
+index ce4a940..a265f21 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-syncop.h
++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h
+@@ -32,7 +32,7 @@
+ ret = gd_syncop_submit_request(rpc, req, stb, cookie, prog, procnum, \
+ cbk, (xdrproc_t)xdrproc); \
+ if (!ret) \
+- synctask_yield(stb->task); \
++ synctask_yield(stb->task, NULL); \
+ else \
+ gf_asprintf(&stb->errstr, \
+ "%s failed. Check log file" \
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 812c698..ce9931c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -5068,22 +5068,22 @@ glusterd_import_friend_volumes_synctask(void *opaque)
+ * restarted (refer glusterd_restart_bricks ())
+ */
+ while (conf->restart_bricks) {
+- synclock_unlock(&conf->big_lock);
+- sleep(2);
+- synclock_lock(&conf->big_lock);
++ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock);
+ }
+ conf->restart_bricks = _gf_true;
+
+ while (i <= count) {
+ ret = glusterd_import_friend_volume(peer_data, i);
+ if (ret) {
+- conf->restart_bricks = _gf_false;
+- goto out;
++ break;
+ }
+ i++;
+ }
+- glusterd_svcs_manager(NULL);
++ if (i > count) {
++ glusterd_svcs_manager(NULL);
++ }
+ conf->restart_bricks = _gf_false;
++ synccond_broadcast(&conf->cond_restart_bricks);
+ out:
+ if (peer_data)
+ dict_unref(peer_data);
+@@ -5769,7 +5769,9 @@ my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame)
+ call_frame_t *frame = v_frame;
+ glusterd_conf_t *conf = frame->this->private;
+
+- GF_ATOMIC_DEC(conf->blockers);
++ if (GF_ATOMIC_DEC(conf->blockers) == 0) {
++ synccond_broadcast(&conf->cond_blockers);
++ }
+
+ STACK_DESTROY(frame->root);
+ return 0;
+@@ -5865,7 +5867,9 @@ attach_brick_callback(struct rpc_req *req, struct iovec *iov, int count,
+ }
+ }
+ out:
+- GF_ATOMIC_DEC(conf->blockers);
++ if (GF_ATOMIC_DEC(conf->blockers) == 0) {
++ synccond_broadcast(&conf->cond_blockers);
++ }
+ STACK_DESTROY(frame->root);
+ return 0;
+ }
+@@ -6053,7 +6057,7 @@ attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo,
+ * TBD: see if there's a better way
+ */
+ synclock_unlock(&conf->big_lock);
+- sleep(1);
++ synctask_sleep(1);
+ synclock_lock(&conf->big_lock);
+ }
+
+@@ -6193,7 +6197,7 @@ find_compat_brick_in_vol(glusterd_conf_t *conf,
+ "brick %s is still"
+ " starting, waiting for 2 seconds ",
+ other_brick->path);
+- sleep(2);
++ synctask_sleep(2);
+ synclock_lock(&conf->big_lock);
+ retries--;
+ }
+@@ -6680,9 +6684,7 @@ glusterd_restart_bricks(void *opaque)
+ * glusterd_compare_friend_data ())
+ */
+ while (conf->restart_bricks) {
+- synclock_unlock(&conf->big_lock);
+- sleep(2);
+- synclock_lock(&conf->big_lock);
++ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock);
+ }
+ conf->restart_bricks = _gf_true;
+
+@@ -6798,6 +6800,7 @@ out:
+ GF_ATOMIC_DEC(conf->blockers);
+ conf->restart_done = _gf_true;
+ conf->restart_bricks = _gf_false;
++ synccond_broadcast(&conf->cond_restart_bricks);
+
+ return_block:
+ return ret;
+diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
+index d360312..a01034a 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.c
++++ b/xlators/mgmt/glusterd/src/glusterd.c
+@@ -1845,6 +1845,8 @@ init(xlator_t *this)
+ (void)strncpy(conf->rundir, rundir, sizeof(conf->rundir));
+
+ synclock_init(&conf->big_lock, SYNC_LOCK_RECURSIVE);
++ synccond_init(&conf->cond_restart_bricks);
++ synccond_init(&conf->cond_blockers);
+ pthread_mutex_init(&conf->xprt_lock, NULL);
+ INIT_LIST_HEAD(&conf->xprt_list);
+ pthread_mutex_init(&conf->import_volumes, NULL);
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index 2be005c..1c6c3b1 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -209,6 +209,8 @@ typedef struct {
+ dict_t *opts;
+ synclock_t big_lock;
+ gf_boolean_t restart_done;
++ synccond_t cond_restart_bricks;
++ synccond_t cond_blockers;
+ rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */
+ uint32_t base_port;
+ uint32_t max_port;
+--
+1.8.3.1
+
diff --git a/0378-Revert-open-behind-fix-missing-fd-reference.patch b/0378-Revert-open-behind-fix-missing-fd-reference.patch
new file mode 100644
index 0000000..e228be2
--- /dev/null
+++ b/0378-Revert-open-behind-fix-missing-fd-reference.patch
@@ -0,0 +1,120 @@
+From d79660ccc65f163e0d9cf91cc13a199bec04d5f1 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez Juan <xhernandez@redhat.com>
+Date: Wed, 20 May 2020 12:55:43 +0000
+Subject: [PATCH 378/379] Revert "open-behind: fix missing fd reference"
+
+This reverts commit 30cbdf8c06145a0c290da42ecc0a7eae928200b7.
+
+The patch is not complete because there have been some crash reports
+upstream recently after the patch was released. A new patch that should
+cover all corner cases is under review (), but it's a big change and it
+could be risky to backport it without enough testing.
+
+Since there exists a workaround to avoid the problem (disable
+open-behind), for now we revert the patch.
+
+Change-Id: I9cfc55623c33758cf5530b18f03c0d795b0f650b
+BUG: 1830713
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/200952
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/open-behind/src/open-behind.c | 27 +++++++++--------------
+ 1 file changed, 11 insertions(+), 16 deletions(-)
+
+diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
+index 14ebc12..268c717 100644
+--- a/xlators/performance/open-behind/src/open-behind.c
++++ b/xlators/performance/open-behind/src/open-behind.c
+@@ -206,13 +206,8 @@ ob_fd_free(ob_fd_t *ob_fd)
+ if (ob_fd->xdata)
+ dict_unref(ob_fd->xdata);
+
+- if (ob_fd->open_frame) {
+- /* If we sill have a frame it means that background open has never
+- * been triggered. We need to release the pending reference. */
+- fd_unref(ob_fd->fd);
+-
++ if (ob_fd->open_frame)
+ STACK_DESTROY(ob_fd->open_frame->root);
+- }
+
+ GF_FREE(ob_fd);
+ }
+@@ -302,7 +297,6 @@ ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ call_resume(stub);
+ }
+
+- /* The background open is completed. We can release the 'fd' reference. */
+ fd_unref(fd);
+
+ STACK_DESTROY(frame->root);
+@@ -337,9 +331,7 @@ ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
+ }
+
+ if (frame) {
+- /* We don't need to take a reference here. We already have a reference
+- * while the open is pending. */
+- frame->local = fd;
++ frame->local = fd_ref(fd);
+
+ STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd,
+@@ -353,12 +345,15 @@ void
+ ob_inode_wake(xlator_t *this, struct list_head *ob_fds)
+ {
+ ob_fd_t *ob_fd = NULL, *tmp = NULL;
++ fd_t *fd = NULL;
+
+ if (!list_empty(ob_fds)) {
+ list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode)
+ {
+ ob_fd_wake(this, ob_fd->fd, ob_fd);
++ fd = ob_fd->fd;
+ ob_fd_free(ob_fd);
++ fd_unref(fd);
+ }
+ }
+ }
+@@ -370,7 +365,7 @@ ob_fd_copy(ob_fd_t *src, ob_fd_t *dst)
+ if (!src || !dst)
+ goto out;
+
+- dst->fd = src->fd;
++ dst->fd = __fd_ref(src->fd);
+ dst->loc.inode = inode_ref(src->loc.inode);
+ gf_uuid_copy(dst->loc.gfid, src->loc.gfid);
+ dst->flags = src->flags;
+@@ -514,6 +509,7 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+
+ ob_fd->ob_inode = ob_inode;
+
++ /* don't do fd_ref, it'll cause leaks */
+ ob_fd->fd = fd;
+
+ ob_fd->open_frame = copy_frame(frame);
+@@ -543,16 +539,15 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ }
+ UNLOCK(&fd->inode->lock);
+
+- /* We take a reference while the background open is pending or being
+- * processed. If we finally wind the request in the foreground, then
+- * ob_fd_free() will take care of this additional reference. */
+- fd_ref(fd);
+-
+ if (!open_in_progress && !unlinked) {
++ fd_ref(fd);
++
+ STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata);
+
+ if (!conf->lazy_open)
+ ob_fd_wake(this, fd, NULL);
++
++ fd_unref(fd);
+ } else {
+ ob_fd_free(ob_fd);
+ STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
+--
+1.8.3.1
+
diff --git a/0379-glusterd-add-missing-synccond_broadcast.patch b/0379-glusterd-add-missing-synccond_broadcast.patch
new file mode 100644
index 0000000..cd51c6d
--- /dev/null
+++ b/0379-glusterd-add-missing-synccond_broadcast.patch
@@ -0,0 +1,45 @@
+From e06882a7fea9720a2899f7d52d5d3866ff098866 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Thu, 21 May 2020 08:26:11 +0200
+Subject: [PATCH 379/379] glusterd: add missing synccond_broadcast()
+
+After the changes in commit 3da22f8cb08b05562a4c6bd2694f2f19199cff7f,
+there was a place where synccond_broadcast() was missing. It could
+cause a hang if another synctask was waiting on the condition variable.
+
+Upstream patch:
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24476
+> Change-Id: I92bfe4e15c5c3591e4854a64aa9e1566d50dd204
+> Fixes: #1116
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: I92bfe4e15c5c3591e4854a64aa9e1566d50dd204
+BUG: 1810516
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/201057
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index ce9931c..c92cdf3 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -6797,9 +6797,11 @@ glusterd_restart_bricks(void *opaque)
+ ret = 0;
+
+ out:
+- GF_ATOMIC_DEC(conf->blockers);
+ conf->restart_done = _gf_true;
+ conf->restart_bricks = _gf_false;
++ if (GF_ATOMIC_DEC(conf->blockers) == 0) {
++ synccond_broadcast(&conf->cond_blockers);
++ }
+ synccond_broadcast(&conf->cond_restart_bricks);
+
+ return_block:
+--
+1.8.3.1
+
diff --git a/0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch b/0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch
new file mode 100644
index 0000000..05915d9
--- /dev/null
+++ b/0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch
@@ -0,0 +1,306 @@
+From 2cf22e54c8424949607c4a20df84887b838b2702 Mon Sep 17 00:00:00 2001
+From: Krutika Dhananjay <kdhananj@redhat.com>
+Date: Fri, 15 May 2020 11:29:36 +0530
+Subject: [PATCH 380/382] features/shard: Aggregate size, block-count in iatt
+ before unwinding setxattr
+
+Backport of:
+> Upstream patch - https://review.gluster.org/c/glusterfs/+/24471
+> Fixes: #1243
+> Change-Id: I4da0eceb4235b91546df79270bcc0af8cd64e9ea
+> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+
+Posix translator returns pre and postbufs in the dict in {F}SETXATTR fops.
+These iatts are further cached at layers like md-cache.
+Shard translator, in its current state, simply returns these values without
+updating the aggregated file size and block-count.
+
+This patch fixes this problem.
+
+Change-Id: I4da0eceb4235b91546df79270bcc0af8cd64e9ea
+BUG: 1823423
+Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/201135
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
+---
+ tests/bugs/shard/issue-1243.t | 31 ++++++
+ xlators/features/shard/src/shard.c | 218 +++++++++++++++++++++++++++++++++----
+ 2 files changed, 225 insertions(+), 24 deletions(-)
+ create mode 100644 tests/bugs/shard/issue-1243.t
+
+diff --git a/tests/bugs/shard/issue-1243.t b/tests/bugs/shard/issue-1243.t
+new file mode 100644
+index 0000000..b0c092c
+--- /dev/null
++++ b/tests/bugs/shard/issue-1243.t
+@@ -0,0 +1,31 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
++TEST $CLI volume set $V0 features.shard on
++TEST $CLI volume set $V0 features.shard-block-size 4MB
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.strict-o-direct on
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++
++TEST $CLI volume set $V0 md-cache-timeout 10
++
++# Write data into a file such that its size crosses shard-block-size
++TEST dd if=/dev/zero of=$M0/foo bs=1048576 count=8 oflag=direct
++
++# Execute a setxattr on the file.
++TEST setfattr -n trusted.libvirt -v some-value $M0/foo
++
++# Size of the file should be the aggregated size, not the shard-block-size
++EXPECT '8388608' stat -c %s $M0/foo
++
++cleanup
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index ee38ed2..6ae4c41 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -5929,36 +5929,206 @@ out:
+ return 0;
+ }
+
+-int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+- dict_t *dict, int32_t flags, dict_t *xdata) {
+- int op_errno = EINVAL;
++int32_t shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie,
++ xlator_t *this, int32_t op_ret,
++ int32_t op_errno, dict_t *xdata) {
++ int ret = -1;
++ struct iatt *prebuf = NULL;
++ struct iatt *postbuf = NULL;
++ struct iatt *stbuf = NULL;
++ data_t *data = NULL;
++ shard_local_t *local = NULL;
+
+- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
+- }
++ local = frame->local;
+
+- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr,
+- fd, dict, flags, xdata);
+- return 0;
+-out:
+- shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno);
+- return 0;
++ if (op_ret < 0) {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto err;
++ }
++
++ if (!xdata)
++ goto unwind;
++
++ data = dict_get(xdata, GF_PRESTAT);
++ if (data) {
++ stbuf = data_to_iatt(data, GF_PRESTAT);
++ prebuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char);
++ if (prebuf == NULL) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto err;
++ }
++ *prebuf = *stbuf;
++ prebuf->ia_size = local->prebuf.ia_size;
++ prebuf->ia_blocks = local->prebuf.ia_blocks;
++ ret = dict_set_iatt(xdata, GF_PRESTAT, prebuf, false);
++ if (ret < 0) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto err;
++ }
++ prebuf = NULL;
++ }
++
++ data = dict_get(xdata, GF_POSTSTAT);
++ if (data) {
++ stbuf = data_to_iatt(data, GF_POSTSTAT);
++ postbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char);
++ if (postbuf == NULL) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto err;
++ }
++ *postbuf = *stbuf;
++ postbuf->ia_size = local->prebuf.ia_size;
++ postbuf->ia_blocks = local->prebuf.ia_blocks;
++ ret = dict_set_iatt(xdata, GF_POSTSTAT, postbuf, false);
++ if (ret < 0) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto err;
++ }
++ postbuf = NULL;
++ }
++
++unwind:
++ if (local->fd)
++ SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
++ xdata);
++ else
++ SHARD_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
++ xdata);
++ return 0;
++
++err:
++ GF_FREE(prebuf);
++ GF_FREE(postbuf);
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
++ return 0;
+ }
+
+-int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+- dict_t *dict, int32_t flags, dict_t *xdata) {
+- int op_errno = EINVAL;
++int32_t shard_post_lookup_set_xattr_handler(call_frame_t *frame,
++ xlator_t *this) {
++ shard_local_t *local = NULL;
+
+- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
+- }
++ local = frame->local;
+
+- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+- loc, dict, flags, xdata);
+- return 0;
+-out:
+- shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno);
+- return 0;
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
++ return 0;
++ }
++
++ if (local->fd)
++ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fsetxattr, local->fd,
++ local->xattr_req, local->flags, local->xattr_rsp);
++ else
++ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->setxattr, &local->loc,
++ local->xattr_req, local->flags, local->xattr_rsp);
++ return 0;
++}
++
++int32_t shard_common_set_xattr(call_frame_t *frame, xlator_t *this,
++ glusterfs_fop_t fop, loc_t *loc, fd_t *fd,
++ dict_t *dict, int32_t flags, dict_t *xdata) {
++ int ret = -1;
++ int op_errno = ENOMEM;
++ uint64_t block_size = 0;
++ shard_local_t *local = NULL;
++ inode_t *inode = loc ? loc->inode : fd->inode;
++
++ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) {
++ if (loc)
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
++ xdata);
++ else
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
++ xdata);
++ return 0;
++ }
++
++ /* Sharded or not, if shard's special xattrs are attempted to be set,
++ * fail the fop with EPERM (except if the client is gsyncd.
++ */
++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, err);
++ }
++
++ ret = shard_inode_ctx_get_block_size(inode, this, &block_size);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get block size from inode ctx of %s",
++ uuid_utoa(inode->gfid));
++ goto err;
++ }
++
++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++ if (loc)
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
++ xdata);
++ else
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
++ xdata);
++ return 0;
++ }
++
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
++
++ frame->local = local;
++ local->fop = fop;
++ if (loc) {
++ if (loc_copy(&local->loc, loc) != 0)
++ goto err;
++ }
++
++ if (fd) {
++ local->fd = fd_ref(fd);
++ local->loc.inode = inode_ref(fd->inode);
++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++ }
++ local->flags = flags;
++ /* Reusing local->xattr_req and local->xattr_rsp to store the setxattr dict
++ * and the xdata dict
++ */
++ if (dict)
++ local->xattr_req = dict_ref(dict);
++ if (xdata)
++ local->xattr_rsp = dict_ref(xdata);
++
++ /* To-Do: Switch from LOOKUP which is path-based, to FSTAT if the fop is
++ * on an fd. This comes under a generic class of bugs in shard tracked by
++ * bz #1782428.
++ */
++ shard_lookup_base_file(frame, this, &local->loc,
++ shard_post_lookup_set_xattr_handler);
++ return 0;
++err:
++ shard_common_failure_unwind(fop, frame, -1, op_errno);
++ return 0;
++}
++
++int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
++ dict_t *dict, int32_t flags, dict_t *xdata) {
++ shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags,
++ xdata);
++ return 0;
++}
++
++int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++ dict_t *dict, int32_t flags, dict_t *xdata) {
++ shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags,
++ xdata);
++ return 0;
+ }
+
+ int shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) {
+--
+1.8.3.1
+
diff --git a/0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch b/0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch
new file mode 100644
index 0000000..aa875a2
--- /dev/null
+++ b/0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch
@@ -0,0 +1,48 @@
+From 63ea2aad2474a0ca169342c699cb1689e6c1d83f Mon Sep 17 00:00:00 2001
+From: Susant Palai <spalai@redhat.com>
+Date: Fri, 22 May 2020 13:49:14 +0530
+Subject: [PATCH 381/382] dht: add null check in gf_defrag_free_dir_dfmeta
+
+Backport of https://review.gluster.org/#/c/glusterfs/+/24479/
+
+BUG:1812789
+Change-Id: I502ed43051bd60d9e5d2b69d4e4d7b6eea997285
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/201150
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-rebalance.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index 957deaa..8f31dca 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -47,7 +47,8 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
+
+ if (meta) {
+ for (i = 0; i < local_subvols_cnt; i++) {
+- gf_dirent_free(&meta->equeue[i]);
++ if (meta->equeue)
++ gf_dirent_free(&meta->equeue[i]);
+ if (meta->lfd && meta->lfd[i])
+ fd_unref(meta->lfd[i]);
+ }
+@@ -3344,9 +3345,9 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ if (ret) {
+ fd_unref(dir_dfmeta->lfd[i]);
+ dir_dfmeta->lfd[i] = NULL;
+- gf_smsg(this->name, GF_LOG_WARNING, 0, 0,
+- "failed to open dir: %s subvol: %s", loc->path,
+- conf->local_subvols[i]->name);
++ gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
++ "failed to open dir: %s subvol: %s", loc->path,
++ conf->local_subvols[i]->name);
+
+ if (conf->decommission_in_progress) {
+ *perrno = -ret;
+--
+1.8.3.1
+
diff --git a/0382-features-shard-Aggregate-file-size-block-count-befor.patch b/0382-features-shard-Aggregate-file-size-block-count-befor.patch
new file mode 100644
index 0000000..a6528f5
--- /dev/null
+++ b/0382-features-shard-Aggregate-file-size-block-count-befor.patch
@@ -0,0 +1,422 @@
+From 4097a748cbb7616d78886b35e3360177d570b7a6 Mon Sep 17 00:00:00 2001
+From: Krutika Dhananjay <kdhananj@redhat.com>
+Date: Fri, 22 May 2020 13:25:26 +0530
+Subject: [PATCH 382/382] features/shard: Aggregate file size, block-count
+ before unwinding removexattr
+
+Posix translator returns pre and postbufs in the dict in {F}REMOVEXATTR fops.
+These iatts are further cached at layers like md-cache.
+Shard translator, in its current state, simply returns these values without
+updating the aggregated file size and block-count.
+
+This patch fixes this problem.
+
+Upstream patch:
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24480
+> Change-Id: I4b2dd41ede472c5829af80a67401ec5a6376d872
+> Fixes: #1243
+> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+
+Change-Id: I4b2dd41ede472c5829af80a67401ec5a6376d872
+BUG: 1823423
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/201456
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/shard/issue-1243.t | 12 ++
+ xlators/features/shard/src/shard.c | 293 ++++++++++++++++++++++++++-----------
+ xlators/features/shard/src/shard.h | 1 +
+ 3 files changed, 224 insertions(+), 82 deletions(-)
+
+diff --git a/tests/bugs/shard/issue-1243.t b/tests/bugs/shard/issue-1243.t
+index b0c092c..ba22d2b 100644
+--- a/tests/bugs/shard/issue-1243.t
++++ b/tests/bugs/shard/issue-1243.t
+@@ -1,6 +1,7 @@
+ #!/bin/bash
+
+ . $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
+
+ cleanup;
+
+@@ -22,10 +23,21 @@ TEST $CLI volume set $V0 md-cache-timeout 10
+ # Write data into a file such that its size crosses shard-block-size
+ TEST dd if=/dev/zero of=$M0/foo bs=1048576 count=8 oflag=direct
+
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++
+ # Execute a setxattr on the file.
+ TEST setfattr -n trusted.libvirt -v some-value $M0/foo
+
+ # Size of the file should be the aggregated size, not the shard-block-size
+ EXPECT '8388608' stat -c %s $M0/foo
+
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++
++# Execute a removexattr on the file.
++TEST setfattr -x trusted.libvirt $M0/foo
++
++# Size of the file should be the aggregated size, not the shard-block-size
++EXPECT '8388608' stat -c %s $M0/foo
+ cleanup
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index 6ae4c41..2e2ef5d 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -442,6 +442,9 @@ void shard_local_wipe(shard_local_t *local) {
+ loc_wipe(&local->int_entrylk.loc);
+ loc_wipe(&local->newloc);
+
++ if (local->name)
++ GF_FREE(local->name);
++
+ if (local->int_entrylk.basename)
+ GF_FREE(local->int_entrylk.basename);
+ if (local->fd)
+@@ -5819,46 +5822,216 @@ int32_t shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ return 0;
+ }
+
+-int32_t shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+- const char *name, dict_t *xdata) {
+- int op_errno = EINVAL;
++int32_t
++shard_modify_and_set_iatt_in_dict(dict_t *xdata, shard_local_t *local,
++ char *key)
++{
++ int ret = 0;
++ struct iatt *tmpbuf = NULL;
++ struct iatt *stbuf = NULL;
++ data_t *data = NULL;
+
+- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
+- }
++ if (!xdata)
++ return 0;
+
+- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
+- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
+- }
++ data = dict_get(xdata, key);
++ if (!data)
++ return 0;
+
+- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+- return 0;
+-out:
+- shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno);
+- return 0;
++ tmpbuf = data_to_iatt(data, key);
++ stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char);
++ if (stbuf == NULL) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto err;
++ }
++ *stbuf = *tmpbuf;
++ stbuf->ia_size = local->prebuf.ia_size;
++ stbuf->ia_blocks = local->prebuf.ia_blocks;
++ ret = dict_set_iatt(xdata, key, stbuf, false);
++ if (ret < 0) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
++ goto err;
++ }
++ return 0;
++
++err:
++ GF_FREE(stbuf);
++ return -1;
+ }
+
+-int32_t shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+- const char *name, dict_t *xdata) {
+- int op_errno = EINVAL;
++int32_t
++shard_common_remove_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++ int ret = -1;
++ shard_local_t *local = NULL;
+
+- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
+- }
++ local = frame->local;
+
+- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
+- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
+- }
++ if (op_ret < 0) {
++ local->op_ret = op_ret;
++ local->op_errno = op_errno;
++ goto err;
++ }
+
+- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+- return 0;
+-out:
+- shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno);
+- return 0;
++ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT);
++ if (ret < 0)
++ goto err;
++
++ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT);
++ if (ret < 0)
++ goto err;
++
++ if (local->fd)
++ SHARD_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno,
++ xdata);
++ else
++ SHARD_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
++ xdata);
++ return 0;
++
++err:
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
++ return 0;
++}
++
++int32_t
++shard_post_lookup_remove_xattr_handler(call_frame_t *frame, xlator_t *this)
++{
++ shard_local_t *local = NULL;
++
++ local = frame->local;
++
++ if (local->op_ret < 0) {
++ shard_common_failure_unwind(local->fop, frame, local->op_ret,
++ local->op_errno);
++ return 0;
++ }
++
++ if (local->fd)
++ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fremovexattr, local->fd,
++ local->name, local->xattr_req);
++ else
++ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->removexattr, &local->loc,
++ local->name, local->xattr_req);
++ return 0;
++}
++
++int32_t
++shard_common_remove_xattr(call_frame_t *frame, xlator_t *this,
++ glusterfs_fop_t fop, loc_t *loc, fd_t *fd,
++ const char *name, dict_t *xdata)
++{
++ int ret = -1;
++ int op_errno = ENOMEM;
++ uint64_t block_size = 0;
++ shard_local_t *local = NULL;
++ inode_t *inode = loc ? loc->inode : fd->inode;
++
++ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) {
++ if (loc)
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->removexattr, loc, name,
++ xdata);
++ else
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
++ xdata);
++ return 0;
++ }
++
++ /* If shard's special xattrs are attempted to be removed,
++ * fail the fop with EPERM (except if the client is gsyncd).
++ */
++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, err);
++ }
++
++ /* Repeat the same check for bulk-removexattr */
++ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
++ dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
++ dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
++ }
++
++ ret = shard_inode_ctx_get_block_size(inode, this, &block_size);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++ "Failed to get block size from inode ctx of %s",
++ uuid_utoa(inode->gfid));
++ goto err;
++ }
++
++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++ if (loc)
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->removexattr, loc, name,
++ xdata);
++ else
++ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
++ xdata);
++ return 0;
++ }
++
++ local = mem_get0(this->local_pool);
++ if (!local)
++ goto err;
++
++ frame->local = local;
++ local->fop = fop;
++ if (loc) {
++ if (loc_copy(&local->loc, loc) != 0)
++ goto err;
++ }
++
++ if (fd) {
++ local->fd = fd_ref(fd);
++ local->loc.inode = inode_ref(fd->inode);
++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++ }
++
++ if (name) {
++ local->name = gf_strdup(name);
++ if (!local->name)
++ goto err;
++ }
++
++ if (xdata)
++ local->xattr_req = dict_ref(xdata);
++
++ /* To-Do: Switch from LOOKUP which is path-based, to FSTAT if the fop is
++ * on an fd. This comes under a generic class of bugs in shard tracked by
++ * bz #1782428.
++ */
++ shard_lookup_base_file(frame, this, &local->loc,
++ shard_post_lookup_remove_xattr_handler);
++ return 0;
++err:
++ shard_common_failure_unwind(fop, frame, -1, op_errno);
++ return 0;
++}
++
++int32_t
++shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++ const char *name, dict_t *xdata)
++{
++ shard_common_remove_xattr(frame, this, GF_FOP_REMOVEXATTR, loc, NULL, name,
++ xdata);
++ return 0;
++}
++
++int32_t
++shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
++ const char *name, dict_t *xdata)
++{
++ shard_common_remove_xattr(frame, this, GF_FOP_FREMOVEXATTR, NULL, fd, name,
++ xdata);
++ return 0;
+ }
+
+ int32_t shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+@@ -5933,10 +6106,6 @@ int32_t shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata) {
+ int ret = -1;
+- struct iatt *prebuf = NULL;
+- struct iatt *postbuf = NULL;
+- struct iatt *stbuf = NULL;
+- data_t *data = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+@@ -5947,52 +6116,14 @@ int32_t shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie,
+ goto err;
+ }
+
+- if (!xdata)
+- goto unwind;
+-
+- data = dict_get(xdata, GF_PRESTAT);
+- if (data) {
+- stbuf = data_to_iatt(data, GF_PRESTAT);
+- prebuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char);
+- if (prebuf == NULL) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto err;
+- }
+- *prebuf = *stbuf;
+- prebuf->ia_size = local->prebuf.ia_size;
+- prebuf->ia_blocks = local->prebuf.ia_blocks;
+- ret = dict_set_iatt(xdata, GF_PRESTAT, prebuf, false);
+- if (ret < 0) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto err;
+- }
+- prebuf = NULL;
+- }
++ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT);
++ if (ret < 0)
++ goto err;
+
+- data = dict_get(xdata, GF_POSTSTAT);
+- if (data) {
+- stbuf = data_to_iatt(data, GF_POSTSTAT);
+- postbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char);
+- if (postbuf == NULL) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto err;
+- }
+- *postbuf = *stbuf;
+- postbuf->ia_size = local->prebuf.ia_size;
+- postbuf->ia_blocks = local->prebuf.ia_blocks;
+- ret = dict_set_iatt(xdata, GF_POSTSTAT, postbuf, false);
+- if (ret < 0) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto err;
+- }
+- postbuf = NULL;
+- }
++ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT);
++ if (ret < 0)
++ goto err;
+
+-unwind:
+ if (local->fd)
+ SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+@@ -6002,8 +6133,6 @@ unwind:
+ return 0;
+
+ err:
+- GF_FREE(prebuf);
+- GF_FREE(postbuf);
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
+index 04abd62..1721417 100644
+--- a/xlators/features/shard/src/shard.h
++++ b/xlators/features/shard/src/shard.h
+@@ -318,6 +318,7 @@ typedef struct shard_local {
+ uint32_t deletion_rate;
+ gf_boolean_t cleanup_required;
+ uuid_t base_gfid;
++ char *name;
+ } shard_local_t;
+
+ typedef struct shard_inode_ctx {
+--
+1.8.3.1
+
diff --git a/0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch b/0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch
new file mode 100644
index 0000000..3adaa65
--- /dev/null
+++ b/0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch
@@ -0,0 +1,38 @@
+From f880df2ce4706dd748a09d3d6db57d49f62a234c Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Thu, 28 May 2020 08:26:47 -0400
+Subject: [PATCH 383/383] common-ha: ganesha-ha.sh bad test for {rhel,centos}
+ for pcs options
+
+bash [[ ... =~ ... ]] built-in returns _0_ when the regex matches,
+not 1, thus the sense of the test is backwards and never correctly
+detects rhel or centos.
+
+https://review.gluster.org/#/c/glusterfs/+/24502/
+
+Change-Id: Ic9e60aae4ea38aff8f13979080995e60621a68fe
+BUG: 1840794
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/201686
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index 4ecf91b..a6814b1 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -1054,7 +1054,7 @@ main()
+ # Fedora 29+ and rhel/centos 8 has PCS-0.10.x
+ # default is pcs-0.10.x options but check for
+ # rhel/centos 7 (pcs-0.9.x) and adjust accordingly
+- if [[ ${ID} =~ {rhel,centos} ]]; then
++ if [[ ! ${ID} =~ {rhel,centos} ]]; then
+ if [[ ${VERSION_ID} == 7.* ]]; then
+ PCS9OR10_PCS_CNAME_OPTION="--name"
+ PCS9OR10_PCS_CLONE_OPTION="--clone"
+--
+1.8.3.1
+
diff --git a/0384-Update-rfc.sh-to-rhgs-3.5.3.patch b/0384-Update-rfc.sh-to-rhgs-3.5.3.patch
new file mode 100644
index 0000000..4db2222
--- /dev/null
+++ b/0384-Update-rfc.sh-to-rhgs-3.5.3.patch
@@ -0,0 +1,26 @@
+From 27dc773af276e33fcca10788fae17d131c8d9bce Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Sun, 31 May 2020 15:46:24 -0400
+Subject: [PATCH 384/449] Update rfc.sh to rhgs-3.5.3
+
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+---
+ rfc.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/rfc.sh b/rfc.sh
+index 37d551f..1dca29f 100755
+--- a/rfc.sh
++++ b/rfc.sh
+@@ -18,7 +18,7 @@ done
+ shift $((OPTIND-1))
+
+
+-branch="rhgs-3.5.2";
++branch="rhgs-3.5.3";
+
+ set_hooks_commit_msg()
+ {
+--
+1.8.3.1
+
diff --git a/0385-glusterd-start-glusterd-automatically-on-abnormal-sh.patch b/0385-glusterd-start-glusterd-automatically-on-abnormal-sh.patch
new file mode 100644
index 0000000..2b194d3
--- /dev/null
+++ b/0385-glusterd-start-glusterd-automatically-on-abnormal-sh.patch
@@ -0,0 +1,50 @@
+From 143f85f55ded7a9075408e97d05abd9568d56e7b Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Mon, 25 Nov 2019 16:35:42 +0530
+Subject: [PATCH 385/449] glusterd: start glusterd automatically on abnormal
+ shutdown
+
+If glusterd crashes or goes down abnormally, systemd should
+automatically bring the glusterd up.
+
+With this change, systemd brings glusterd up for atmost 3 times
+within time period of 1 hour. If the limit exceeds, we have to
+start the glusterd manually and reset the failure count using
+systemctl reset-failed.
+
+credits: John Strunk <jstrunk@redhat.com>
+
+> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23751/
+> fixes: bz#1776264
+> Change-Id: I312d243652fb13ba028814a2ea615b67e3b10b6a
+> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+BUG: 1663557
+Change-Id: I312d243652fb13ba028814a2ea615b67e3b10b6a
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202251
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/systemd/glusterd.service.in | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in
+index f604160..b944762 100644
+--- a/extras/systemd/glusterd.service.in
++++ b/extras/systemd/glusterd.service.in
+@@ -15,6 +15,11 @@ ExecStart=@prefix@/sbin/glusterd -p @localstatedir@/run/glusterd.pid --log-leve
+ KillMode=process
+ TimeoutSec=300
+ SuccessExitStatus=15
++Restart=on-abnormal
++RestartSec=60
++StartLimitBurst=3
++StartLimitIntervalSec=3600
++StartLimitInterval=3600
+
+ [Install]
+ WantedBy=multi-user.target
+--
+1.8.3.1
+
diff --git a/0386-glusterd-increase-the-StartLimitBurst.patch b/0386-glusterd-increase-the-StartLimitBurst.patch
new file mode 100644
index 0000000..ff6d0f9
--- /dev/null
+++ b/0386-glusterd-increase-the-StartLimitBurst.patch
@@ -0,0 +1,39 @@
+From 02e7afdfb740db7cfa1a2f0f79933172d172ff27 Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Tue, 7 Jan 2020 15:32:13 +0530
+Subject: [PATCH 386/449] glusterd: increase the StartLimitBurst
+
+Based on https://bugzilla.redhat.com/show_bug.cgi?id=1782200#c6
+increasing the limit.
+
+> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23970/
+> fixes: bz#1782200
+> Change-Id: Ia885c7bdb2a90f0946c5268da894f6a4da5a69b7
+> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+BUG: 1663557
+Change-Id: Ia885c7bdb2a90f0946c5268da894f6a4da5a69b7
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202252
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/systemd/glusterd.service.in | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in
+index b944762..699aea3 100644
+--- a/extras/systemd/glusterd.service.in
++++ b/extras/systemd/glusterd.service.in
+@@ -17,7 +17,7 @@ TimeoutSec=300
+ SuccessExitStatus=15
+ Restart=on-abnormal
+ RestartSec=60
+-StartLimitBurst=3
++StartLimitBurst=6
+ StartLimitIntervalSec=3600
+ StartLimitInterval=3600
+
+--
+1.8.3.1
+
diff --git a/0387-To-fix-readdir-ahead-memory-leak.patch b/0387-To-fix-readdir-ahead-memory-leak.patch
new file mode 100644
index 0000000..b685215
--- /dev/null
+++ b/0387-To-fix-readdir-ahead-memory-leak.patch
@@ -0,0 +1,47 @@
+From d54f087a2484695ff7ac214d39f2750fddcef2d5 Mon Sep 17 00:00:00 2001
+From: HuangShujun <549702281@qq.com>
+Date: Thu, 5 Dec 2019 10:07:10 +0200
+Subject: [PATCH 387/449] To fix readdir-ahead memory leak
+
+Glusterfs client process has memory leak if create several files under
+one folder, and delete the folder. According to statedump, the ref
+counts of readdir-ahead is bigger than zero in the inode table.
+
+Readdir-ahead get parent inode by inode_parent in rda_mark_inode_dirty
+when each rda_writev_cbk,the inode ref count of parent folder will be
+increased in inode_parent, but readdir-ahead do not unref it later.
+
+The correction is unref the parent inode at the end of
+rda_mark_inode_dirty.
+
+Upstream patch:
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23815
+> Fixes: bz#1779055
+> Signed-off-by: HuangShujun <549702281@qq.com>
+> Change-Id: Iee68ab1089cbc2fbc4185b93720fb1f66ee89524
+
+BUG: 1781550
+Change-Id: Iee68ab1089cbc2fbc4185b93720fb1f66ee89524
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202312
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/readdir-ahead/src/readdir-ahead.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c
+index 7fd4f8d..933941d 100644
+--- a/xlators/performance/readdir-ahead/src/readdir-ahead.c
++++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c
+@@ -254,6 +254,7 @@ rda_mark_inode_dirty(xlator_t *this, inode_t *inode)
+ }
+ }
+ UNLOCK(&parent->lock);
++ inode_unref(parent);
+ }
+
+ return;
+--
+1.8.3.1
+
diff --git a/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch b/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch
new file mode 100644
index 0000000..dc23ba8
--- /dev/null
+++ b/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch
@@ -0,0 +1,142 @@
+From fbda9baaf7231e3237277348cc7e873f3113fd14 Mon Sep 17 00:00:00 2001
+From: l17zhou <cynthia.zhou@nokia-sbell.com.cn>
+Date: Mon, 4 Nov 2019 08:45:52 +0200
+Subject: [PATCH 388/449] rpc: Cleanup SSL specific data at the time of freeing
+ rpc object
+
+Problem: At the time of cleanup rpc object ssl specific data
+ is not freeing so it has become a leak.
+
+Solution: To avoid the leak cleanup ssl specific data at the
+ time of cleanup rpc object
+
+> Credits: l17zhou <cynthia.zhou@nokia-sbell.com.cn>
+> Fixes: bz#1768407
+> Change-Id: I37f598673ae2d7a33c75f39eb8843ccc6dffaaf0
+> (Cherry pick from commit 54ed71dba174385ab0d8fa415e09262f6250430c)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23650/)
+
+Change-Id: I37f598673ae2d7a33c75f39eb8843ccc6dffaaf0
+BUG: 1786516
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202308
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ rpc/rpc-transport/socket/src/socket.c | 22 ++++++++++++++++++++--
+ tests/features/ssl-authz.t | 23 ++++++++++++++++++++---
+ 2 files changed, 40 insertions(+), 5 deletions(-)
+
+diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
+index 65845ea..226b2e2 100644
+--- a/rpc/rpc-transport/socket/src/socket.c
++++ b/rpc/rpc-transport/socket/src/socket.c
+@@ -446,6 +446,7 @@ ssl_setup_connection_postfix(rpc_transport_t *this)
+ gf_log(this->name, GF_LOG_DEBUG,
+ "SSL verification succeeded (client: %s) (server: %s)",
+ this->peerinfo.identifier, this->myinfo.identifier);
++ X509_free(peer);
+ return gf_strdup(peer_CN);
+
+ /* Error paths. */
+@@ -1157,7 +1158,15 @@ __socket_reset(rpc_transport_t *this)
+ memset(&priv->incoming, 0, sizeof(priv->incoming));
+
+ event_unregister_close(this->ctx->event_pool, priv->sock, priv->idx);
+-
++ if (priv->use_ssl && priv->ssl_ssl) {
++ SSL_clear(priv->ssl_ssl);
++ SSL_free(priv->ssl_ssl);
++ priv->ssl_ssl = NULL;
++ }
++ if (priv->use_ssl && priv->ssl_ctx) {
++ SSL_CTX_free(priv->ssl_ctx);
++ priv->ssl_ctx = NULL;
++ }
+ priv->sock = -1;
+ priv->idx = -1;
+ priv->connected = -1;
+@@ -3217,7 +3226,6 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+ new_priv->sock = new_sock;
+
+ new_priv->ssl_enabled = priv->ssl_enabled;
+- new_priv->ssl_ctx = priv->ssl_ctx;
+ new_priv->connected = 1;
+ new_priv->is_server = _gf_true;
+
+@@ -4672,6 +4680,16 @@ fini(rpc_transport_t *this)
+ pthread_mutex_destroy(&priv->out_lock);
+ pthread_mutex_destroy(&priv->cond_lock);
+ pthread_cond_destroy(&priv->cond);
++ if (priv->use_ssl && priv->ssl_ssl) {
++ SSL_clear(priv->ssl_ssl);
++ SSL_free(priv->ssl_ssl);
++ priv->ssl_ssl = NULL;
++ }
++ if (priv->use_ssl && priv->ssl_ctx) {
++ SSL_CTX_free(priv->ssl_ctx);
++ priv->ssl_ctx = NULL;
++ }
++
+ if (priv->ssl_private_key) {
+ GF_FREE(priv->ssl_private_key);
+ }
+diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t
+index cae010c..132b598 100755
+--- a/tests/features/ssl-authz.t
++++ b/tests/features/ssl-authz.t
+@@ -25,6 +25,7 @@ TEST glusterd
+ TEST pidof glusterd
+ TEST $CLI volume info;
+
++TEST $CLI v set all cluster.brick-multiplex on
+ # Construct a cipher list that excludes CBC because of POODLE.
+ # http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2014-3566
+ #
+@@ -45,12 +46,12 @@ TEST openssl genrsa -out $SSL_KEY 2048
+ TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ ln $SSL_CERT $SSL_CA
+
+-TEST $CLI volume create $V0 $H0:$B0/1
++TEST $CLI volume create $V0 replica 3 $H0:$B0/{1,2,3} force
+ TEST $CLI volume set $V0 server.ssl on
+ TEST $CLI volume set $V0 client.ssl on
+ TEST $CLI volume set $V0 ssl.cipher-list $(valid_ciphers)
+ TEST $CLI volume start $V0
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count
+
+ # This mount should SUCCEED because ssl-allow=* by default. This effectively
+ # disables SSL authorization, though authentication and encryption might still
+@@ -59,11 +60,27 @@ TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+ TEST ping_file $M0/before
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
++glusterfsd_pid=`pgrep glusterfsd`
++TEST [ $glusterfsd_pid != 0 ]
++start=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'`
++echo "Memory consumption for glusterfsd process"
++for i in $(seq 1 100); do
++ gluster v heal $V0 info >/dev/null
++done
++
++end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'`
++diff=$((end-start))
++
++# If memory consumption is more than 5M some leak in SSL code path
++
++TEST [ $diff -lt 5000 ]
++
++
+ # Set ssl-allow to a wildcard that includes our identity.
+ TEST $CLI volume stop $V0
+ TEST $CLI volume set $V0 auth.ssl-allow Any*
+ TEST $CLI volume start $V0
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count
+
+ # This mount should SUCCEED because we match the wildcard.
+ TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+--
+1.8.3.1
+
diff --git a/0389-posix-Avoid-diskpace-error-in-case-of-overwriting-th.patch b/0389-posix-Avoid-diskpace-error-in-case-of-overwriting-th.patch
new file mode 100644
index 0000000..7f20fb2
--- /dev/null
+++ b/0389-posix-Avoid-diskpace-error-in-case-of-overwriting-th.patch
@@ -0,0 +1,297 @@
+From 50318713486e79d9258cf22e656caff402256dde Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Sun, 20 Oct 2019 22:01:01 +0530
+Subject: [PATCH 389/449] posix: Avoid diskpace error in case of overwriting
+ the data
+
+Problem: Sometime fops like posix_writev, posix_fallocate, posix_zerofile
+ failed and throw error ENOSPC if storage.reserve threshold limit
+ has reached even fops is overwriting the data
+
+Solution: Retry the fops in case of overwrite if diskspace check
+ is failed
+
+> Credits: kinsu <vpolakis@gmail.com>
+> Change-Id: I987d73bcf47ed1bb27878df40c39751296e95fe8
+> Updates: #745
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry pick from commit ca3e5905ac02fb9c373ac3de10b44f061d04cd6f)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23572/)
+
+Change-Id: I987d73bcf47ed1bb27878df40c39751296e95fe8
+BUG: 1787331
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202307
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/posix/bug-1651445.t | 1 +
+ xlators/storage/posix/src/posix-entry-ops.c | 1 -
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 141 ++++++++++++++++++++++---
+ 3 files changed, 126 insertions(+), 17 deletions(-)
+
+diff --git a/tests/bugs/posix/bug-1651445.t b/tests/bugs/posix/bug-1651445.t
+index 5248d47..4d08b69 100644
+--- a/tests/bugs/posix/bug-1651445.t
++++ b/tests/bugs/posix/bug-1651445.t
+@@ -33,6 +33,7 @@ sleep 5
+ # setup_lvm create lvm partition of 150M and 40M are reserve so after
+ # consuming more than 110M next dd should fail
+ TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1
++TEST dd if=/dev/urandom of=$M0/a bs=1022 count=1 oflag=seek_bytes,sync seek=102 conv=notrunc
+
+ rm -rf $M0/*
+
+diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
+index 283b305..bea0bbf 100644
+--- a/xlators/storage/posix/src/posix-entry-ops.c
++++ b/xlators/storage/posix/src/posix-entry-ops.c
+@@ -1634,7 +1634,6 @@ posix_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+- DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+ MAKE_ENTRY_HANDLE(real_oldpath, par_oldpath, this, oldloc, NULL);
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index a2a518f..bcce06e 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -692,6 +692,10 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ gf_boolean_t locked = _gf_false;
+ posix_inode_ctx_t *ctx = NULL;
+ struct posix_private *priv = NULL;
++ gf_boolean_t check_space_error = _gf_false;
++ struct stat statbuf = {
++ 0,
++ };
+
+ DECLARE_OLD_FS_ID_VAR;
+
+@@ -711,7 +715,10 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ if (priv->disk_reserve)
+ posix_disk_space_check(this);
+
+- DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, out);
++ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock);
++
++overwrite:
++ check_space_error = _gf_true;
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+@@ -735,7 +742,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fallocate (fstat) failed on fd=%p", fd);
+- goto out;
++ goto unlock;
+ }
+
+ if (xdata) {
+@@ -745,7 +752,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ ret = -EIO;
+- goto out;
++ goto unlock;
+ }
+ }
+
+@@ -756,7 +763,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ "fallocate failed on %s offset: %jd, "
+ "len:%zu, flags: %d",
+ uuid_utoa(fd->inode->gfid), offset, len, flags);
+- goto out;
++ goto unlock;
+ }
+
+ ret = posix_fdstat(this, fd->inode, pfd->fd, statpost);
+@@ -764,16 +771,47 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fallocate (fstat) failed on fd=%p", fd);
+- goto out;
++ goto unlock;
+ }
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost);
+
+-out:
++unlock:
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
++
++ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) {
++#ifdef FALLOC_FL_KEEP_SIZE
++ if (flags & FALLOC_FL_KEEP_SIZE) {
++ goto overwrite;
++ }
++#endif
++ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
++ "pfd is NULL from fd=%p", fd);
++ goto out;
++ }
++
++ if (sys_fstat(pfd->fd, &statbuf) < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED,
++ "%d", pfd->fd);
++ goto out;
++ }
++
++ if (offset + len <= statbuf.st_size) {
++ gf_msg_debug(this->name, 0,
++ "io vector size will not"
++ " change disk size so allow overwrite for"
++ " fd %d",
++ pfd->fd);
++ goto overwrite;
++ }
++ }
++
++out:
+ SET_TO_OLD_FS_ID();
+ if (ret == ENOSPC)
+ ret = -ENOSPC;
+@@ -1083,25 +1121,57 @@ posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ dict_t *rsp_xdata = NULL;
++ gf_boolean_t check_space_error = _gf_false;
++ struct posix_fd *pfd = NULL;
++ struct stat statbuf = {
++ 0,
++ };
+
+- VALIDATE_OR_GOTO(frame, out);
+- VALIDATE_OR_GOTO(this, out);
++ VALIDATE_OR_GOTO(frame, unwind);
++ VALIDATE_OR_GOTO(this, unwind);
+
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
++overwrite:
++ check_space_error = _gf_true;
+ ret = posix_do_zerofill(frame, this, fd, offset, len, &statpre, &statpost,
+ xdata, &rsp_xdata);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+- goto out;
++ goto unwind;
+ }
+
+ STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, rsp_xdata);
+ return 0;
+
+ out:
++ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) {
++ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
++ "pfd is NULL from fd=%p", fd);
++ goto out;
++ }
++
++ if (sys_fstat(pfd->fd, &statbuf) < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED,
++ "%d", pfd->fd);
++ goto out;
++ }
++
++ if (offset + len <= statbuf.st_size) {
++ gf_msg_debug(this->name, 0,
++ "io vector size will not"
++ " change disk size so allow overwrite for"
++ " fd %d",
++ pfd->fd);
++ goto overwrite;
++ }
++ }
++
++unwind:
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL,
+ rsp_xdata);
+ return 0;
+@@ -1857,19 +1927,28 @@ posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_boolean_t write_append = _gf_false;
+ gf_boolean_t update_atomic = _gf_false;
+ posix_inode_ctx_t *ctx = NULL;
++ gf_boolean_t check_space_error = _gf_false;
++ struct stat statbuf = {
++ 0,
++ };
++ int totlen = 0;
++ int idx = 0;
+
+- VALIDATE_OR_GOTO(frame, out);
+- VALIDATE_OR_GOTO(this, out);
+- VALIDATE_OR_GOTO(fd, out);
+- VALIDATE_OR_GOTO(fd->inode, out);
+- VALIDATE_OR_GOTO(vector, out);
+- VALIDATE_OR_GOTO(this->private, out);
++ VALIDATE_OR_GOTO(frame, unwind);
++ VALIDATE_OR_GOTO(this, unwind);
++ VALIDATE_OR_GOTO(fd, unwind);
++ VALIDATE_OR_GOTO(fd->inode, unwind);
++ VALIDATE_OR_GOTO(vector, unwind);
++ VALIDATE_OR_GOTO(this->private, unwind);
+
+ priv = this->private;
+
+- VALIDATE_OR_GOTO(priv, out);
++ VALIDATE_OR_GOTO(priv, unwind);
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
++overwrite:
++
++ check_space_error = _gf_true;
+ if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT,
+ "writev received on a block/char file (%s)",
+@@ -2011,6 +2090,36 @@ out:
+ locked = _gf_false;
+ }
+
++ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) {
++ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
++ "pfd is NULL from fd=%p", fd);
++ goto unwind;
++ }
++
++ if (sys_fstat(pfd->fd, &statbuf) < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED,
++ "%d", pfd->fd);
++ goto unwind;
++ }
++
++ for (idx = 0; idx < count; idx++) {
++ totlen = vector[idx].iov_len;
++ }
++
++ if ((offset + totlen <= statbuf.st_size) &&
++ !(statbuf.st_blocks * statbuf.st_blksize < statbuf.st_size)) {
++ gf_msg_debug(this->name, 0,
++ "io vector size will not"
++ " change disk size so allow overwrite for"
++ " fd %d",
++ pfd->fd);
++ goto overwrite;
++ }
++ }
++
++unwind:
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, &preop, &postop,
+ rsp_xdata);
+
+--
+1.8.3.1
+
diff --git a/0390-glusterd-deafult-options-after-volume-reset.patch b/0390-glusterd-deafult-options-after-volume-reset.patch
new file mode 100644
index 0000000..d95ce71
--- /dev/null
+++ b/0390-glusterd-deafult-options-after-volume-reset.patch
@@ -0,0 +1,93 @@
+From 86df0ced1cac0e3c48f6149bb2f5442f8548f89e Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Wed, 25 Dec 2019 21:56:32 +0530
+Subject: [PATCH 390/449] glusterd: deafult options after volume reset
+
+Problem: default option itransport.address-family is disappered
+in volume info output after a volume reset.
+
+Cause: with 3.8.0 onwards volume option transport.address-family
+has default value, any volume which is created will have this
+option set. So, volume info will show this in its output. But,
+with reset volume, this option is not handled.
+
+Solution: In glusterd_enable_default_options(), we should add this
+option along with other default options. This function is called
+by glusterd_options_reset() with volume reset command.
+
+> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23921/
+> fixes: bz#1786478
+> Change-Id: I58f7aa24cf01f308c4efe6cae748cc3bc8b99b1d
+> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+BUG: 1781710
+Change-Id: I58f7aa24cf01f308c4efe6cae748cc3bc8b99b1d
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202258
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/glusterd/optimized-basic-testcases.t | 5 +++++
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 23 +++++++++++++++++++++++
+ 2 files changed, 28 insertions(+)
+
+diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t
+index d700b5e..c7e8c32 100644
+--- a/tests/bugs/glusterd/optimized-basic-testcases.t
++++ b/tests/bugs/glusterd/optimized-basic-testcases.t
+@@ -69,6 +69,11 @@ TEST pidof glusterd;
+ TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+ EXPECT 'Created' volinfo_field $V0 'Status';
+
++#bug-1786478 - default volume option after volume reset
++addr_family=`volinfo_field $V0 'transport.address-family'`
++TEST $CLI volume reset $V0
++EXPECT $addr_family volinfo_field $V0 'transport.address-family'
++
+ #bug-955588 - uuid validation
+
+ uuid=`grep UUID $GLUSTERD_WORKDIR/glusterd.info | cut -f2 -d=`
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index c92cdf3..6654741 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -13032,6 +13032,11 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option)
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
++#ifdef IPV6_DEFAULT
++ char *addr_family = "inet6";
++#else
++ char *addr_family = "inet";
++#endif
+
+ this = THIS;
+ GF_ASSERT(this);
+@@ -13109,6 +13114,24 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option)
+ }
+ }
+ }
++
++ if (conf->op_version >= GD_OP_VERSION_3_9_0) {
++ if (!option || !strcmp("transport.address-family", option)) {
++ if (volinfo->transport_type == GF_TRANSPORT_TCP) {
++ ret = dict_set_dynstr_with_alloc(
++ volinfo->dict, "transport.address-family", addr_family);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, errno,
++ GD_MSG_DICT_SET_FAILED,
++ "failed to set transport."
++ "address-family on %s",
++ volinfo->volname);
++ goto out;
++ }
++ }
++ }
++ }
++
+ if (conf->op_version >= GD_OP_VERSION_7_0) {
+ ret = dict_set_dynstr_with_alloc(volinfo->dict,
+ "storage.fips-mode-rchecksum", "on");
+--
+1.8.3.1
+
diff --git a/0391-glusterd-unlink-the-file-after-killing-the-process.patch b/0391-glusterd-unlink-the-file-after-killing-the-process.patch
new file mode 100644
index 0000000..2a88254
--- /dev/null
+++ b/0391-glusterd-unlink-the-file-after-killing-the-process.patch
@@ -0,0 +1,39 @@
+From d23859d5cbd5823b2587811aa57030436ce9e74c Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Tue, 17 Dec 2019 15:52:30 +0530
+Subject: [PATCH 391/449] glusterd: unlink the file after killing the process
+
+In glusterd_proc_stop(), after killing the pid
+we should remove the pidfile.
+
+> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23890/
+> fixes: bz#1784375
+> Change-Id: Ib6367aed590932c884b0f6f892fc40542aa19686
+> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+BUG: 1784211
+Change-Id: Ib6367aed590932c884b0f6f892fc40542aa19686
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202257
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
+index f55a5fd..a05c90d 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
++++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
+@@ -107,6 +107,8 @@ glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags)
+ "service, reason:%s",
+ proc->name, strerror(errno));
+ }
++ } else {
++ (void)glusterd_unlink_file(proc->pidfile);
+ }
+ if (flags != PROC_STOP_FORCE)
+ goto out;
+--
+1.8.3.1
+
diff --git a/0392-glusterd-Brick-process-fails-to-come-up-with-brickmu.patch b/0392-glusterd-Brick-process-fails-to-come-up-with-brickmu.patch
new file mode 100644
index 0000000..e295e4f
--- /dev/null
+++ b/0392-glusterd-Brick-process-fails-to-come-up-with-brickmu.patch
@@ -0,0 +1,187 @@
+From a30a5fdef2e252eba9f44a3c671de8f3aa4f17d7 Mon Sep 17 00:00:00 2001
+From: Vishal Pandey <vpandey@redhat.com>
+Date: Tue, 19 Nov 2019 11:39:22 +0530
+Subject: [PATCH 392/449] glusterd: Brick process fails to come up with
+ brickmux on
+
+Issue:
+1- In a cluster of 3 Nodes N1, N2, N3. Create 3 volumes vol1,
+vol2, vol3 with 3 bricks (one from each node)
+2- Set cluster.brick-multiplex on
+3- Start all 3 volumes
+4- Check if all bricks on a node are running on same port
+5- Kill N1
+6- Set performance.readdir-ahead for volumes vol1, vol2, vol3
+7- Bring N1 up and check volume status
+8- All bricks processes not running on N1.
+
+Root Cause -
+Since, There is a diff in volfile versions in N1 as compared
+to N2 and N3 therefore glusterd_import_friend_volume() is called.
+glusterd_import_friend_volume() copies the new_volinfo and deletes
+old_volinfo and then calls glusterd_start_bricks().
+glusterd_start_bricks() looks for the volfiles and sends an rpc
+request to glusterfs_handle_attach(). Now, since the volinfo
+has been deleted by glusterd_delete_stale_volume()
+from priv->volumes list before glusterd_start_bricks() and
+glusterd_create_volfiles_and_notify_services() and
+glusterd_list_add_order is called after glusterd_start_bricks(),
+therefore the attach RPC req gets an empty volfile path
+and that causes the brick to crash.
+
+Fix- Call glusterd_list_add_order() and
+glusterd_create_volfiles_and_notify_services before
+glusterd_start_bricks() cal is made in glusterd_import_friend_volume
+
+> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23724/
+> Change-Id: Idfe0e8710f7eb77ca3ddfa1cabeb45b2987f41aa
+> Fixes: bz#1773856
+> Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+
+BUG: 1683602
+Change-Id: Idfe0e8710f7eb77ca3ddfa1cabeb45b2987f41aa
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202255
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../glusterd/brick-mux-validation-in-cluster.t | 61 +++++++++++++++++++++-
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 28 +++++-----
+ 2 files changed, 75 insertions(+), 14 deletions(-)
+
+diff --git a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t
+index 4e57038..f088dbb 100644
+--- a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t
++++ b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t
+@@ -7,6 +7,20 @@ function count_brick_processes {
+ pgrep glusterfsd | wc -l
+ }
+
++function count_brick_pids {
++ $CLI_1 --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
++ | grep -v "N/A" | sort | uniq | wc -l
++}
++
++function count_N/A_brick_pids {
++ $CLI_1 --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
++ | grep -- '\-1' | sort | uniq | wc -l
++}
++
++function check_peers {
++ $CLI_2 peer status | grep 'Peer in Cluster (Connected)' | wc -l
++}
++
+ cleanup;
+
+ TEST launch_cluster 3
+@@ -48,4 +62,49 @@ TEST $CLI_1 volume stop $V1
+
+ EXPECT 3 count_brick_processes
+
+-cleanup
++TEST $CLI_1 volume stop $META_VOL
++
++TEST $CLI_1 volume delete $META_VOL
++TEST $CLI_1 volume delete $V0
++TEST $CLI_1 volume delete $V1
++
++#bug-1773856 - Brick process fails to come up with brickmux on
++
++TEST $CLI_1 volume create $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1 $H3:$B3/${V0}1 force
++TEST $CLI_1 volume start $V0
++
++
++EXPECT 3 count_brick_processes
++
++#create and start a new volume
++TEST $CLI_1 volume create $V1 $H1:$B1/${V1}2 $H2:$B2/${V1}2 $H3:$B3/${V1}2 force
++TEST $CLI_1 volume start $V1
++
++EXPECT 3 count_brick_processes
++
++V2=patchy2
++TEST $CLI_1 volume create $V2 $H1:$B1/${V2}3 $H2:$B2/${V2}3 $H3:$B3/${V2}3 force
++TEST $CLI_1 volume start $V2
++
++EXPECT 3 count_brick_processes
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids
++
++TEST kill_node 1
++
++sleep 10
++
++EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers;
++
++$CLI_2 volume set $V0 performance.readdir-ahead on
++$CLI_2 volume set $V1 performance.readdir-ahead on
++
++TEST $glusterd_1;
++
++sleep 10
++
++EXPECT 4 count_brick_processes
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_brick_pids
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 count_N/A_brick_pids
++
++cleanup;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 6654741..1b78812 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -4988,16 +4988,6 @@ glusterd_import_friend_volume(dict_t *peer_data, int count)
+ glusterd_volinfo_unref(old_volinfo);
+ }
+
+- if (glusterd_is_volume_started(new_volinfo)) {
+- (void)glusterd_start_bricks(new_volinfo);
+- if (glusterd_is_snapd_enabled(new_volinfo)) {
+- svc = &(new_volinfo->snapd.svc);
+- if (svc->manager(svc, new_volinfo, PROC_START_NO_WAIT)) {
+- gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name);
+- }
+- }
+- }
+-
+ ret = glusterd_store_volinfo(new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
+@@ -5007,19 +4997,31 @@ glusterd_import_friend_volume(dict_t *peer_data, int count)
+ goto out;
+ }
+
+- ret = glusterd_create_volfiles_and_notify_services(new_volinfo);
++ ret = glusterd_create_volfiles(new_volinfo);
+ if (ret)
+ goto out;
+
++ glusterd_list_add_order(&new_volinfo->vol_list, &priv->volumes,
++ glusterd_compare_volume_name);
++
++ if (glusterd_is_volume_started(new_volinfo)) {
++ (void)glusterd_start_bricks(new_volinfo);
++ if (glusterd_is_snapd_enabled(new_volinfo)) {
++ svc = &(new_volinfo->snapd.svc);
++ if (svc->manager(svc, new_volinfo, PROC_START_NO_WAIT)) {
++ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name);
++ }
++ }
++ }
++
+ ret = glusterd_import_quota_conf(peer_data, count, new_volinfo, "volume");
+ if (ret) {
+ gf_event(EVENT_IMPORT_QUOTA_CONF_FAILED, "volume=%s",
+ new_volinfo->volname);
+ goto out;
+ }
+- glusterd_list_add_order(&new_volinfo->vol_list, &priv->volumes,
+- glusterd_compare_volume_name);
+
++ ret = glusterd_fetchspec_notify(this);
+ out:
+ gf_msg_debug("glusterd", 0, "Returning with ret: %d", ret);
+ return ret;
+--
+1.8.3.1
+
diff --git a/0393-afr-restore-timestamp-of-files-during-metadata-heal.patch b/0393-afr-restore-timestamp-of-files-during-metadata-heal.patch
new file mode 100644
index 0000000..bb93180
--- /dev/null
+++ b/0393-afr-restore-timestamp-of-files-during-metadata-heal.patch
@@ -0,0 +1,129 @@
+From b528c21e6fedc9ac841942828b82e0c808da5efb Mon Sep 17 00:00:00 2001
+From: Sheetal Pamecha <spamecha@redhat.com>
+Date: Thu, 2 Jan 2020 12:05:12 +0530
+Subject: [PATCH 393/449] afr: restore timestamp of files during metadata heal
+
+For files: During metadata heal, we restore timestamps
+only for non-regular (char, block etc.) files.
+Extenting it for regular files as timestamp is updated
+via touch command also
+
+> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23953/
+> fixes: bz#1787274
+> Change-Id: I26fe4fb6dff679422ba4698a7f828bf62ca7ca18
+> Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+
+BUG: 1761531
+Change-Id: I26fe4fb6dff679422ba4698a7f828bf62ca7ca18
+Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202332
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../bug-1761531-metadata-heal-restore-time.t | 74 ++++++++++++++++++++++
+ xlators/cluster/afr/src/afr-self-heal-metadata.c | 8 +--
+ 2 files changed, 76 insertions(+), 6 deletions(-)
+ create mode 100644 tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t
+
+diff --git a/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t b/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t
+new file mode 100644
+index 0000000..7e24eae
+--- /dev/null
++++ b/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t
+@@ -0,0 +1,74 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++cleanup
++
++GET_MDATA_PATH=$(dirname $0)/../../utils
++build_tester $GET_MDATA_PATH/get-mdata-xattr.c
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0..2}
++TEST $CLI volume start $V0
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
++
++TEST touch $M0/a
++sleep 1
++TEST kill_brick $V0 $H0 $B0/brick0
++TEST touch $M0/a
++
++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++mtime0=$(get_mtime $B0/brick0/a)
++mtime1=$(get_mtime $B0/brick1/a)
++TEST [ $mtime0 -eq $mtime1 ]
++
++ctime0=$(get_ctime $B0/brick0/a)
++ctime1=$(get_ctime $B0/brick1/a)
++TEST [ $ctime0 -eq $ctime1 ]
++
++###############################################################################
++# Repeat the test with ctime feature disabled.
++TEST $CLI volume set $V0 features.ctime off
++
++TEST touch $M0/b
++sleep 1
++TEST kill_brick $V0 $H0 $B0/brick0
++TEST touch $M0/b
++
++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++mtime2=$(get_mtime $B0/brick0/b)
++mtime3=$(get_mtime $B0/brick1/b)
++TEST [ $mtime2 -eq $mtime3 ]
++
++TEST rm $GET_MDATA_PATH/get-mdata-xattr
++
++TEST force_umount $M0
++TEST $CLI volume stop $V0
++TEST $CLI volume delete $V0
++
++cleanup
+diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
+index ecfa791..f4e31b6 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
++++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
+@@ -421,12 +421,8 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode)
+ if (ret)
+ goto unlock;
+
+- /* Restore atime/mtime for files that don't need data heal as
+- * restoring timestamps happens only as a part of data-heal.
+- */
+- if (!IA_ISREG(locked_replies[source].poststat.ia_type))
+- afr_selfheal_restore_time(frame, this, inode, source, healed_sinks,
+- locked_replies);
++ afr_selfheal_restore_time(frame, this, inode, source, healed_sinks,
++ locked_replies);
+
+ ret = afr_selfheal_undo_pending(
+ frame, this, inode, sources, sinks, healed_sinks, undid_pending,
+--
+1.8.3.1
+
diff --git a/0394-man-gluster-Add-volume-top-command-to-gluster-man-pa.patch b/0394-man-gluster-Add-volume-top-command-to-gluster-man-pa.patch
new file mode 100644
index 0000000..96a8f74
--- /dev/null
+++ b/0394-man-gluster-Add-volume-top-command-to-gluster-man-pa.patch
@@ -0,0 +1,38 @@
+From 768a6d9bca86c0a50128b8776c11ef2b6d36388d Mon Sep 17 00:00:00 2001
+From: Vishal Pandey <vpandey@redhat.com>
+Date: Thu, 21 Nov 2019 12:56:34 +0530
+Subject: [PATCH 394/449] man/gluster: Add volume top command to gluster man
+ page
+
+> Upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23735/
+> Change-Id: Ib74607d2b2e5a1a0316221f1176a7dcccea632d4
+> Fixes: bz#1774866
+> Signed-off-by: Vishal Pandey <vpandey@redhat.com>
+
+BUG: 1754391
+Change-Id: Ib74607d2b2e5a1a0316221f1176a7dcccea632d4
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202333
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ doc/gluster.8 | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/doc/gluster.8 b/doc/gluster.8
+index da6472d..88cbf44 100644
+--- a/doc/gluster.8
++++ b/doc/gluster.8
+@@ -113,6 +113,9 @@ Rotate the log file for corresponding volume/brick.
+ \fB\ volume profile <VOLNAME> {start|info [peek|incremental [peek]|cumulative|clear]|stop} [nfs] \fR
+ Profile operations on the volume. Once started, volume profile <volname> info provides cumulative statistics of the FOPs performed.
+ .TP
++\fB\ volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick <brick>] [list-cnt <value>] | {read-perf|write-perf} [bs <size> count <count>] [brick <brick>] [list-cnt <value>] \fR
++Generates a profile of a volume representing the performance and bottlenecks/hotspots of each brick.
++.TP
+ \fB\ volume statedump <VOLNAME> [[nfs|quotad] [all|mem|iobuf|callpool|priv|fd|inode|history]... | [client <hostname:process-id>]] \fR
+ Dumps the in memory state of the specified process or the bricks of the volume.
+ .TP
+--
+1.8.3.1
+
diff --git a/0395-Cli-Removing-old-log-rotate-command.patch b/0395-Cli-Removing-old-log-rotate-command.patch
new file mode 100644
index 0000000..0918777
--- /dev/null
+++ b/0395-Cli-Removing-old-log-rotate-command.patch
@@ -0,0 +1,111 @@
+From 5b3fcc8db86b4dc7af1eb63315ca2ff41c60fdea Mon Sep 17 00:00:00 2001
+From: kshithijiyer <kshithij.ki@gmail.com>
+Date: Sat, 30 Nov 2019 15:25:11 +0530
+Subject: [PATCH 395/449] [Cli] Removing old log rotate command.
+
+The old command for log rotate is still present removing
+it completely. Also adding testcase to test the
+log rotate command with both the old as well as the new command
+and fixing testcase which use the old syntax to use the new
+one.
+
+Code to be removed:
+1. In cli-cmd-volume.c from struct cli_cmd volume_cmds[]:
+{"volume log rotate <VOLNAME> [BRICK]", cli_cmd_log_rotate_cbk,
+ "rotate the log file for corresponding volume/brick"
+ " NOTE: This is an old syntax, will be deprecated from next release."},
+
+2. In cli-cmd-volume.c from cli_cmd_log_rotate_cbk():
+ ||(strcmp("rotate", words[2]) == 0)))
+
+3. In cli-cmd-parser.c from cli_cmd_log_rotate_parse()
+if (strcmp("rotate", words[2]) == 0)
+ volname = (char *)words[3];
+else
+
+> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23392/
+> fixes: bz#1750387
+> Change-Id: I56e4d295044e8d5fd1fc0d848bc87e135e9e32b4
+> Signed-off-by: kshithijiyer <kshithij.ki@gmail.com>
+
+BUG: 1784415
+Change-Id: I56e4d295044e8d5fd1fc0d848bc87e135e9e32b4
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202334
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-parser.c | 2 --
+ cli/src/cli-cmd-volume.c | 7 +------
+ tests/bugs/glusterd/optimized-basic-testcases.t | 3 ++-
+ tests/bugs/glusterfs-server/bug-852147.t | 2 +-
+ 4 files changed, 4 insertions(+), 10 deletions(-)
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index 4456a7b..ac0a263 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -2592,8 +2592,6 @@ cli_cmd_log_rotate_parse(const char **words, int wordcount, dict_t **options)
+
+ if (strcmp("rotate", words[3]) == 0)
+ volname = (char *)words[2];
+- else if (strcmp("rotate", words[2]) == 0)
+- volname = (char *)words[3];
+ GF_ASSERT(volname);
+
+ ret = dict_set_str(dict, "volname", volname);
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index 754d333..f33fc99 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -2349,8 +2349,7 @@ cli_cmd_log_rotate_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ goto out;
+ }
+
+- if (!((strcmp("rotate", words[2]) == 0) ||
+- (strcmp("rotate", words[3]) == 0))) {
++ if (!(strcmp("rotate", words[3]) == 0)) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+@@ -3401,10 +3400,6 @@ struct cli_cmd volume_cmds[] = {
+ {"volume log <VOLNAME> rotate [BRICK]", cli_cmd_log_rotate_cbk,
+ "rotate the log file for corresponding volume/brick"},
+
+- {"volume log rotate <VOLNAME> [BRICK]", cli_cmd_log_rotate_cbk,
+- "rotate the log file for corresponding volume/brick"
+- " NOTE: This is an old syntax, will be deprecated from next release."},
+-
+ {"volume sync <HOSTNAME> [all|<VOLNAME>]", cli_cmd_sync_volume_cbk,
+ "sync the volume information from a peer"},
+
+diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t
+index c7e8c32..862f329 100644
+--- a/tests/bugs/glusterd/optimized-basic-testcases.t
++++ b/tests/bugs/glusterd/optimized-basic-testcases.t
+@@ -129,7 +129,8 @@ TEST ! $CLI volume set all $V0 cluster.op-version $OP_VERS_NEW
+
+ #bug-1022055 - validate log rotate command
+
+-TEST $CLI volume log rotate $V0;
++TEST ! $CLI volume log rotate $V0;
++TEST $CLI volume log $V0 rotate;
+
+ #bug-1092841 - validating barrier enable/disable
+
+diff --git a/tests/bugs/glusterfs-server/bug-852147.t b/tests/bugs/glusterfs-server/bug-852147.t
+index c644cfa..75db2a2 100755
+--- a/tests/bugs/glusterfs-server/bug-852147.t
++++ b/tests/bugs/glusterfs-server/bug-852147.t
+@@ -66,7 +66,7 @@ ren_file=$log_file".*"
+ rm -rf $ren_file
+
+ #Initiating log rotate
+-TEST $CLI volume log rotate $V0
++TEST $CLI volume log $V0 rotate
+
+ #Capturing new log file's size
+ new_file_size=`file-size $log_file`
+--
+1.8.3.1
+
diff --git a/0396-Updating-gluster-manual.patch b/0396-Updating-gluster-manual.patch
new file mode 100644
index 0000000..bb33d10
--- /dev/null
+++ b/0396-Updating-gluster-manual.patch
@@ -0,0 +1,56 @@
+From 728aab1c1cfcf352d4ca1fde0b80044dc24bd9fa Mon Sep 17 00:00:00 2001
+From: Rishubh Jain <risjain@redhat.com>
+Date: Sun, 18 Aug 2019 18:02:57 +0530
+Subject: [PATCH 396/449] Updating gluster manual.
+
+Adding disperse-data to gluster manual under
+volume create command
+
+> Upstream Patch Link: https://review.gluster.org/#/c/glusterfs/+/23258/
+> Change-Id: Ic9eb47c9e71a1d7a11af9394c615c8e90f8d1d69
+> Fixes: bz#1668239
+> Signed-off-by: Rishubh Jain <risjain@redhat.com>
+> Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+
+BUG: 1667954
+Change-Id: Ic9eb47c9e71a1d7a11af9394c615c8e90f8d1d69
+Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202342
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ doc/gluster.8 | 2 +-
+ tests/basic/glusterd/disperse-create.t | 4 ++++
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/doc/gluster.8 b/doc/gluster.8
+index 88cbf44..66bdb48 100644
+--- a/doc/gluster.8
++++ b/doc/gluster.8
+@@ -41,7 +41,7 @@ List all volumes in cluster
+ \fB\ volume status [all | <VOLNAME> [nfs|shd|<BRICK>|quotad|tierd]] [detail|clients|mem|inode|fd|callpool|tasks|client-list] \fR
+ Display status of all or specified volume(s)/brick
+ .TP
+-\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] [disperse [<COUNT>]] [redundancy <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... \fR
++\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [[replica <COUNT> [arbiter <COUNT>]]|[replica 2 thin-arbiter 1]] [disperse [<COUNT>]] [disperse-data <COUNT>] [redundancy <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... <TA-BRICK> \fR
+ Create a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).
+ To create a volume with both transports (tcp and rdma), give 'transport tcp,rdma' as an option.
+ .TP
+diff --git a/tests/basic/glusterd/disperse-create.t b/tests/basic/glusterd/disperse-create.t
+index 384c675..db8a621 100644
+--- a/tests/basic/glusterd/disperse-create.t
++++ b/tests/basic/glusterd/disperse-create.t
+@@ -20,6 +20,10 @@ TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/b7 $H0:$B0/b8 $H0:$B
+ EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+ TEST $CLI volume delete $V0
++TEST $CLI volume create $V0 disperse-data 2 $H0:$B0/b10 $H0:$B0/b11 $H0:$B0/b12
++EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
++
++TEST $CLI volume delete $V0
+ TEST $CLI volume create $V0 redundancy 1 $H0:$B0/b10 $H0:$B0/b11 $H0:$B0/b12
+ EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+--
+1.8.3.1
+
diff --git a/0397-mgmt-brick-mux-Avoid-sending-two-response-when-attac.patch b/0397-mgmt-brick-mux-Avoid-sending-two-response-when-attac.patch
new file mode 100644
index 0000000..6694813
--- /dev/null
+++ b/0397-mgmt-brick-mux-Avoid-sending-two-response-when-attac.patch
@@ -0,0 +1,52 @@
+From 73cef29731c0d7b8b4f3b880c032dc232b8fcc31 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Thu, 4 Jun 2020 16:06:44 +0530
+Subject: [PATCH 397/449] mgmt/brick-mux: Avoid sending two response when
+ attach is failed.
+
+We were sending two response back to glusterd when an attach is
+failed. One from the handler function glusterfs_handle_attach and
+another from rpcsvc_check_and_reply_error. It was causing problems
+like ref leaks, transport disconnect etc.
+
+> Change-Id: I3bb5b59959530760b568d52becb519499b3dcd2b
+> updates: bz#1785143
+> Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+> (Cherry pick from commit 42f484dcecd9942611396d9bd2ad3a39019b0e1f)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23906/)
+
+Change-Id: I3bb5b59959530760b568d52becb519499b3dcd2b
+BUG: 1776901
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202346
+Reviewed-by: Sanju Rakonde <srakonde@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfsd/src/glusterfsd-mgmt.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
+index 15acc10..61d1b21 100644
+--- a/glusterfsd/src/glusterfsd-mgmt.c
++++ b/glusterfsd/src/glusterfsd-mgmt.c
+@@ -954,7 +954,15 @@ glusterfs_handle_attach(rpcsvc_request_t *req)
+ ret = -1;
+ }
+
+- glusterfs_translator_info_response_send(req, ret, NULL, NULL);
++ ret = glusterfs_translator_info_response_send(req, ret, NULL, NULL);
++ if (ret) {
++ /* Response sent back to glusterd, req is already destroyed. So
++ * resetting the ret to 0. Otherwise another response will be
++ * send from rpcsvc_check_and_reply_error. Which will lead to
++ * double resource leak.
++ */
++ ret = 0;
++ }
+
+ out:
+ UNLOCK(&ctx->volfile_lock);
+--
+1.8.3.1
+
diff --git a/0398-ec-change-error-message-for-heal-commands-for-disper.patch b/0398-ec-change-error-message-for-heal-commands-for-disper.patch
new file mode 100644
index 0000000..5779539
--- /dev/null
+++ b/0398-ec-change-error-message-for-heal-commands-for-disper.patch
@@ -0,0 +1,75 @@
+From 03d2c7b52da5efd6ad660315a0548c8b91e51439 Mon Sep 17 00:00:00 2001
+From: Sheetal Pamecha <spamecha@redhat.com>
+Date: Sun, 22 Dec 2019 22:52:30 +0530
+Subject: [PATCH 398/449] ec: change error message for heal commands for
+ disperse volume
+
+Currently when we issue a heal statistics or similar commands
+for disperse volume, it fails with message "Volume is not of
+type replicate." Adding message "this command is supported for
+volumes of type replicate" to reflect supportability and better
+understanding of heal functionality for disperse volumes.
+
+> Upstream Patch Link: https://review.gluster.org/#/c/glusterfs/+/23916/
+> fixes: bz#1785998
+> Change-Id: I9688a9fdf427cb6f657cfd5b8db2f76a6c56f6e2
+> Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+
+BUG: 1487177
+Change-Id: I9688a9fdf427cb6f657cfd5b8db2f76a6c56f6e2
+Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202344
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ heal/src/glfs-heal.c | 15 ++++++++++-----
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 4 +++-
+ 2 files changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
+index 7e37e47..125b12c 100644
+--- a/heal/src/glfs-heal.c
++++ b/heal/src/glfs-heal.c
+@@ -1726,14 +1726,19 @@ main(int argc, char **argv)
+ goto out;
+ }
+
++ char *var_str = (heal_op == GF_SHD_OP_INDEX_SUMMARY ||
++ heal_op == GF_SHD_OP_HEAL_SUMMARY)
++ ? "replicate/disperse"
++ : "replicate";
++
+ ret = glfsh_validate_volume(top_subvol, heal_op);
+ if (ret < 0) {
+ ret = -EINVAL;
+- gf_asprintf(&op_errstr, "Volume %s is not of type %s", volname,
+- (heal_op == GF_SHD_OP_INDEX_SUMMARY ||
+- heal_op == GF_SHD_OP_HEAL_SUMMARY)
+- ? "replicate/disperse"
+- : "replicate");
++ gf_asprintf(&op_errstr,
++ "This command is supported "
++ "for only volumes of %s type. Volume %s "
++ "is not of type %s",
++ var_str, volname, var_str);
+ goto out;
+ }
+ rootloc.inode = inode_ref(top_subvol->itable->root);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index 076bc80..93042ab 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -2008,7 +2008,9 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo,
+ if (!glusterd_is_volume_replicate(volinfo)) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+- "Volume %s is not of type "
++ "This command is supported "
++ "for only volume of replicated "
++ "type. Volume %s is not of type "
+ "replicate",
+ volinfo->volname);
+ *op_errstr = gf_strdup(msg);
+--
+1.8.3.1
+
diff --git a/0399-glusterd-coverity-fixes.patch b/0399-glusterd-coverity-fixes.patch
new file mode 100644
index 0000000..8052a46
--- /dev/null
+++ b/0399-glusterd-coverity-fixes.patch
@@ -0,0 +1,79 @@
+From 1ebd2a3227469b1775f19c8f78af7d3d19f749a3 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Fri, 26 Apr 2019 08:47:12 +0530
+Subject: [PATCH 399/449] glusterd: coverity fixes
+
+1400775 - USE_AFTER_FREE
+1400742 - Missing Unlock
+1400736 - CHECKED_RETURN
+1398470 - Missing Unlock
+
+Missing unlock is the tricky one, we have had annotation added, but
+coverity still continued to complaint. Added pthread_mutex_unlock to
+clean up the lock before destroying it to see if it makes coverity
+happy.
+
+> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22634/
+> Updates: bz#789278
+> Change-Id: I1d892612a17f805144d96c1b15004a85a1639414
+> Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+
+BUG: 1787310
+Change-Id: I1d892612a17f805144d96c1b15004a85a1639414
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202343
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-peer-utils.c | 1 +
+ xlators/mgmt/glusterd/src/glusterd-sm.c | 1 -
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 8 +++++++-
+ 3 files changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
+index f24c86e..8c1feeb 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
+@@ -48,6 +48,7 @@ glusterd_peerinfo_destroy(struct rcu_head *head)
+ }
+
+ glusterd_sm_tr_log_delete(&peerinfo->sm_log);
++ pthread_mutex_unlock(&peerinfo->delete_lock);
+ pthread_mutex_destroy(&peerinfo->delete_lock);
+ GF_FREE(peerinfo);
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
+index 54a7bd1..044da3d 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
+@@ -868,7 +868,6 @@ glusterd_ac_friend_remove(glusterd_friend_sm_event_t *event, void *ctx)
+ "Cleanup returned: %d", ret);
+ }
+ out:
+- /* coverity[ LOCK] */
+ return 0;
+ }
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 1b78812..a1299bc 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -5840,7 +5840,13 @@ attach_brick_callback(struct rpc_req *req, struct iovec *iov, int count,
+ /* PID file is copied once brick has attached
+ successfully
+ */
+- glusterd_copy_file(pidfile1, pidfile2);
++ ret = glusterd_copy_file(pidfile1, pidfile2);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
++ "Could not copy file %s to %s", pidfile1, pidfile2);
++ goto out;
++ }
++
+ brickinfo->status = GF_BRICK_STARTED;
+ brickinfo->rpc = rpc_clnt_ref(other_brick->rpc);
+ gf_log(THIS->name, GF_LOG_INFO, "brick %s is attached successfully",
+--
+1.8.3.1
+
diff --git a/0400-cli-throw-a-warning-if-replica-count-greater-than-3.patch b/0400-cli-throw-a-warning-if-replica-count-greater-than-3.patch
new file mode 100644
index 0000000..dd1ea52
--- /dev/null
+++ b/0400-cli-throw-a-warning-if-replica-count-greater-than-3.patch
@@ -0,0 +1,98 @@
+From 12ed9226fa24d073ab2b89692194b454a194c379 Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Thu, 4 Jun 2020 15:14:29 +0530
+Subject: [PATCH 400/449] cli: throw a warning if replica count greater than 3
+
+As volumes with replica count greater than 3 are not
+supported, a warning message is be thrown to user
+while creating the volume with replica count greater
+than 3 or while converting a volume to replica > 3
+volume by add-brick/remove-brick operations.
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1763129
+Change-Id: I5a32a5a2d99b5175fb692dfcab27396089f24b72
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202338
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-parser.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 45 insertions(+)
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index ac0a263..5e7ce53 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -619,6 +619,23 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
+ }
+ }
+ }
++
++ if (replica_count > 3) {
++ if (strcmp(words[wordcount - 1], "force")) {
++ question =
++ "Volumes with replica count greater than 3 are"
++ "not supported. \nDo you still want to continue?\n";
++ answer = cli_cmd_get_confirmation(state, question);
++ if (GF_ANSWER_NO == answer) {
++ gf_log("cli", GF_LOG_ERROR,
++ "Volume create "
++ "cancelled, exiting");
++ ret = -1;
++ goto out;
++ }
++ }
++ }
++
+ ret = dict_set_int32(dict, "replica-count", replica_count);
+ if (ret)
+ goto out;
+@@ -1815,6 +1832,20 @@ cli_cmd_volume_add_brick_parse(struct cli_state *state, const char **words,
+ goto out;
+ }
+ }
++ } else if (count > 3) {
++ if (strcmp(words[wordcount - 1], "force")) {
++ question =
++ "Volumes with replica count greater than 3 are"
++ "not supported. \nDo you still want to continue?\n";
++ answer = cli_cmd_get_confirmation(state, question);
++ if (GF_ANSWER_NO == answer) {
++ gf_log("cli", GF_LOG_ERROR,
++ "add-brick "
++ "cancelled, exiting");
++ ret = -1;
++ goto out;
++ }
++ }
+ }
+ } else if ((strcmp(w, "stripe")) == 0) {
+ cli_err("stripe option not supported");
+@@ -2082,6 +2113,20 @@ cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words,
+ goto out;
+ }
+ }
++ } else if (count > 3) {
++ if (strcmp(words[wordcount - 1], "force")) {
++ ques =
++ "Volumes with replica count greater than 3 are"
++ "not supported. \nDo you still want to continue?\n";
++ answer = cli_cmd_get_confirmation(state, ques);
++ if (GF_ANSWER_NO == answer) {
++ gf_log("cli", GF_LOG_ERROR,
++ "Remove-brick "
++ "cancelled, exiting");
++ ret = -1;
++ goto out;
++ }
++ }
+ }
+
+ ret = dict_set_int32(dict, "replica-count", count);
+--
+1.8.3.1
+
diff --git a/0401-cli-change-the-warning-message.patch b/0401-cli-change-the-warning-message.patch
new file mode 100644
index 0000000..5c3e895
--- /dev/null
+++ b/0401-cli-change-the-warning-message.patch
@@ -0,0 +1,70 @@
+From 704bf84d432e1eea1534e35ee27d4116a7273146 Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Thu, 4 Jun 2020 16:15:35 +0530
+Subject: [PATCH 401/449] cli: change the warning message
+
+while creating the replica 2 volume or converting
+a volume to replica 2 volume, we issue a warning
+saying "replica 2 volumes are prone to split brain".
+As the support for replica 2 volumes has been deprecated,
+warning message should be changed accordingly to reflect
+the same.
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1763124
+Change-Id: If55e5412cda2e4a21a6359492d8d704dd702530d
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202348
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-parser.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index 5e7ce53..7446b95 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -603,8 +603,8 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
+ if (replica_count == 2) {
+ if (strcmp(words[wordcount - 1], "force")) {
+ question =
+- "Replica 2 volumes are prone"
+- " to split-brain. Use "
++ "Support for replica 2 volumes stands deprecated as "
++ "they are prone to split-brain. Use "
+ "Arbiter or Replica 3 to "
+ "avoid this.\n"
+ "Do you still want to "
+@@ -1817,9 +1817,9 @@ cli_cmd_volume_add_brick_parse(struct cli_state *state, const char **words,
+ if (count == 2) {
+ if (strcmp(words[wordcount - 1], "force")) {
+ question =
+- "Replica 2 volumes are prone to "
+- "split-brain. Use Arbiter or "
+- "Replica 3 to avaoid this. See: "
++ "Support for replica 2 volumes stands deprecated as they "
++ "are prone to split-brain. Use Arbiter or "
++ "Replica 3 to avoid this. See: "
+ "http://docs.gluster.org/en/latest/Administrator%20Guide/"
+ "Split%20brain%20and%20ways%20to%20deal%20with%20it/."
+ "\nDo you still want to continue?\n";
+@@ -2098,9 +2098,9 @@ cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words,
+ if (count == 2) {
+ if (strcmp(words[wordcount - 1], "force")) {
+ ques =
+- "Replica 2 volumes are prone to "
+- "split-brain. Use Arbiter or Replica 3 "
+- "to avaoid this. See: "
++ "Support for replica 2 volumes stands deprecated as they "
++ "are prone to split-brain. Use Arbiter or Replica 3 "
++ "to avoid this. See: "
+ "http://docs.gluster.org/en/latest/Administrator%20Guide/"
+ "Split%20brain%20and%20ways%20to%20deal%20with%20it/."
+ "\nDo you still want to continue?\n";
+--
+1.8.3.1
+
diff --git a/0402-afr-wake-up-index-healer-threads.patch b/0402-afr-wake-up-index-healer-threads.patch
new file mode 100644
index 0000000..34ca329
--- /dev/null
+++ b/0402-afr-wake-up-index-healer-threads.patch
@@ -0,0 +1,198 @@
+From ecaa0f10820f4b6e803021919ce59a43aedf356b Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Thu, 4 Jun 2020 16:15:35 +0530
+Subject: [PATCH 402/449] afr: wake up index healer threads
+
+...whenever shd is re-enabled after disabling or there is a change in
+`cluster.heal-timeout`, without needing to restart shd or waiting for the
+current `cluster.heal-timeout` seconds to expire.
+
+> Upstream patch link:https://review.gluster.org/#/c/glusterfs/+/23288/
+> Change-Id: Ia5ebd7c8e9f5b54cba3199c141fdd1af2f9b9bfe
+> fixes: bz#1744548
+> Reported-by: Glen Kiessling <glenk1973@hotmail.com>
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+
+BUG: 1764091
+Change-Id: I42aa0807f09b5a09510fe9efb4a1697dad3410a3
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202368
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/replicate/bug-1744548-heal-timeout.t | 42 +++++++++++++++++++++++++
+ xlators/cluster/afr/src/afr-common.c | 6 ++--
+ xlators/cluster/afr/src/afr-self-heald.c | 14 ++++++---
+ xlators/cluster/afr/src/afr-self-heald.h | 3 --
+ xlators/cluster/afr/src/afr.c | 10 ++++++
+ xlators/cluster/afr/src/afr.h | 2 ++
+ 6 files changed, 66 insertions(+), 11 deletions(-)
+ create mode 100644 tests/bugs/replicate/bug-1744548-heal-timeout.t
+
+diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t
+new file mode 100644
+index 0000000..3cb73bc
+--- /dev/null
++++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t
+@@ -0,0 +1,42 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume heal $V0 disable
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
++TEST ! $CLI volume heal $V0
++
++# Enable shd and verify that index crawl is triggered immediately.
++TEST $CLI volume profile $V0 start
++TEST $CLI volume profile $V0 info clear
++TEST $CLI volume heal $V0 enable
++TEST $CLI volume heal $V0
++# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes
++COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
++TEST [ "$COUNT" == "333" ]
++
++# Check that a change in heal-timeout is honoured immediately.
++TEST $CLI volume set $V0 cluster.heal-timeout 5
++sleep 10
++COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
++# Two crawls must have happened.
++TEST [ "$COUNT" == "666" ]
++
++# shd must not heal if it is disabled and heal-timeout is changed.
++TEST $CLI volume heal $V0 disable
++TEST $CLI volume profile $V0 info clear
++TEST $CLI volume set $V0 cluster.heal-timeout 6
++sleep 6
++COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
++TEST [ -z $COUNT ]
++cleanup;
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 3690b84..eef7fd2 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -5613,10 +5613,8 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
+ * b) Already heard from everyone, but we now got a child-up
+ * event.
+ */
+- if (have_heard_from_all && priv->shd.iamshd) {
+- for (i = 0; i < priv->child_count; i++)
+- if (priv->child_up[i])
+- afr_selfheal_childup(this, i);
++ if (have_heard_from_all) {
++ afr_selfheal_childup(this, priv);
+ }
+ }
+ out:
+diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
+index 7eb1207..95ac5f2 100644
+--- a/xlators/cluster/afr/src/afr-self-heald.c
++++ b/xlators/cluster/afr/src/afr-self-heald.c
+@@ -1258,12 +1258,18 @@ out:
+ return ret;
+ }
+
+-int
+-afr_selfheal_childup(xlator_t *this, int subvol)
++void
++afr_selfheal_childup(xlator_t *this, afr_private_t *priv)
+ {
+- afr_shd_index_healer_spawn(this, subvol);
++ int subvol = 0;
+
+- return 0;
++ if (!priv->shd.iamshd)
++ return;
++ for (subvol = 0; subvol < priv->child_count; subvol++)
++ if (priv->child_up[subvol])
++ afr_shd_index_healer_spawn(this, subvol);
++
++ return;
+ }
+
+ int
+diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
+index 7de7c43..1990539 100644
+--- a/xlators/cluster/afr/src/afr-self-heald.h
++++ b/xlators/cluster/afr/src/afr-self-heald.h
+@@ -60,9 +60,6 @@ typedef struct {
+ } afr_self_heald_t;
+
+ int
+-afr_selfheal_childup(xlator_t *this, int subvol);
+-
+-int
+ afr_selfheal_daemon_init(xlator_t *this);
+
+ int
+diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
+index 33258a0..8f9e71f 100644
+--- a/xlators/cluster/afr/src/afr.c
++++ b/xlators/cluster/afr/src/afr.c
+@@ -141,6 +141,7 @@ reconfigure(xlator_t *this, dict_t *options)
+ afr_private_t *priv = NULL;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
++ int timeout_old = 0;
+ int ret = -1;
+ int index = -1;
+ char *qtype = NULL;
+@@ -150,6 +151,7 @@ reconfigure(xlator_t *this, dict_t *options)
+ char *locking_scheme = NULL;
+ gf_boolean_t consistent_io = _gf_false;
+ gf_boolean_t choose_local_old = _gf_false;
++ gf_boolean_t enabled_old = _gf_false;
+
+ priv = this->private;
+
+@@ -255,11 +257,13 @@ reconfigure(xlator_t *this, dict_t *options)
+ GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options,
+ bool, out);
+
++ enabled_old = priv->shd.enabled;
+ GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out);
+
+ GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool,
+ out);
+
++ timeout_old = priv->shd.timeout;
+ GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out);
+
+ GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options,
+@@ -283,6 +287,12 @@ reconfigure(xlator_t *this, dict_t *options)
+ consistent_io = _gf_false;
+ priv->consistent_io = consistent_io;
+
++ if (priv->shd.enabled) {
++ if ((priv->shd.enabled != enabled_old) ||
++ (timeout_old != priv->shd.timeout))
++ afr_selfheal_childup(this, priv);
++ }
++
+ ret = 0;
+ out:
+ return ret;
+diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
+index e731cfa..18f1a6a 100644
+--- a/xlators/cluster/afr/src/afr.h
++++ b/xlators/cluster/afr/src/afr.h
+@@ -1332,4 +1332,6 @@ afr_lookup_has_quorum(call_frame_t *frame, xlator_t *this,
+ void
+ afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this);
+
++void
++afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
+ #endif /* __AFR_H__ */
+--
+1.8.3.1
+
diff --git a/0403-Fix-spurious-failure-in-bug-1744548-heal-timeout.t.patch b/0403-Fix-spurious-failure-in-bug-1744548-heal-timeout.t.patch
new file mode 100644
index 0000000..569bdc0
--- /dev/null
+++ b/0403-Fix-spurious-failure-in-bug-1744548-heal-timeout.t.patch
@@ -0,0 +1,84 @@
+From b311385a3c4bd56d69d1fa7e9bd3d9a2ae5c344e Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Mon, 7 Oct 2019 12:27:01 +0530
+Subject: [PATCH 403/449] Fix spurious failure in bug-1744548-heal-timeout.t
+
+Script was assuming that the heal would have triggered
+by the time test was executed, which may not be the case.
+It can lead to following failures when the race happens:
+
+...
+18:29:45 not ok 14 [ 85/ 1] < 26> '[ 331 == 333 ]' -> ''
+...
+18:29:45 not ok 16 [ 10097/ 1] < 33> '[ 668 == 666 ]' -> ''
+
+Heal on 3rd brick didn't start completely first time the command was executed.
+So the extra count got added to the next profile info.
+
+Fixed it by depending on cumulative stats and waiting until the count is
+satisfied using EXPECT_WITHIN
+
+> Upstream patch link:https://review.gluster.org/23523
+>fixes: bz#1759002
+>Change-Id: I3b410671c902d6b1458a757fa245613cb29d967d
+>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+
+BUG: 1764091
+Change-Id: Ic4d16b6c8a1bbc35735567d60fd0383456b9f534
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202369
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/replicate/bug-1744548-heal-timeout.t | 17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t
+index 3cb73bc..0aaa3ea 100644
+--- a/tests/bugs/replicate/bug-1744548-heal-timeout.t
++++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t
+@@ -4,6 +4,11 @@
+ . $(dirname $0)/../../volume.rc
+ . $(dirname $0)/../../afr.rc
+
++function get_cumulative_opendir_count {
++#sed 'n:d' prints odd-numbered lines
++ $CLI volume profile $V0 info |grep OPENDIR|sed 'n;d' | awk '{print $8}'|tr -d '\n'
++}
++
+ cleanup;
+
+ TEST glusterd;
+@@ -20,23 +25,23 @@ TEST ! $CLI volume heal $V0
+ TEST $CLI volume profile $V0 start
+ TEST $CLI volume profile $V0 info clear
+ TEST $CLI volume heal $V0 enable
+-TEST $CLI volume heal $V0
+ # Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes
+-COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
+-TEST [ "$COUNT" == "333" ]
++EXPECT_WITHIN $HEAL_TIMEOUT "^333$" get_cumulative_opendir_count
+
+ # Check that a change in heal-timeout is honoured immediately.
+ TEST $CLI volume set $V0 cluster.heal-timeout 5
+ sleep 10
+-COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
+ # Two crawls must have happened.
+-TEST [ "$COUNT" == "666" ]
++EXPECT_WITHIN $HEAL_TIMEOUT "^999$" get_cumulative_opendir_count
+
+ # shd must not heal if it is disabled and heal-timeout is changed.
+ TEST $CLI volume heal $V0 disable
++#Wait for configuration update and any opendir fops to complete
++sleep 10
+ TEST $CLI volume profile $V0 info clear
+ TEST $CLI volume set $V0 cluster.heal-timeout 6
+-sleep 6
++#Better to wait for more than 6 seconds to account for configuration updates
++sleep 10
+ COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
+ TEST [ -z $COUNT ]
+ cleanup;
+--
+1.8.3.1
+
diff --git a/0404-tests-Fix-spurious-failure.patch b/0404-tests-Fix-spurious-failure.patch
new file mode 100644
index 0000000..9cbb6ea
--- /dev/null
+++ b/0404-tests-Fix-spurious-failure.patch
@@ -0,0 +1,38 @@
+From b65ca1045910bc18c601681788eb322dbb8ec2fa Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Mon, 14 Oct 2019 10:29:31 +0530
+Subject: [PATCH 404/449] tests: Fix spurious failure
+
+> Upstream patch:https://review.gluster.org/23546
+> fixes: bz#1759002
+> Change-Id: I4d49e1c2ca9b3c1d74b9dd5a30f1c66983a76529
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+
+BUG: 1764091
+Change-Id: I8b66f08cce7a87788867c6373aed71d6fc65155f
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202370
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/replicate/bug-1744548-heal-timeout.t | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t
+index 0aaa3ea..c208112 100644
+--- a/tests/bugs/replicate/bug-1744548-heal-timeout.t
++++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t
+@@ -5,8 +5,8 @@
+ . $(dirname $0)/../../afr.rc
+
+ function get_cumulative_opendir_count {
+-#sed 'n:d' prints odd-numbered lines
+- $CLI volume profile $V0 info |grep OPENDIR|sed 'n;d' | awk '{print $8}'|tr -d '\n'
++#sed command prints content between Cumulative and Interval, this keeps content from Cumulative stats
++ $CLI volume profile $V0 info |sed -n '/^Cumulative/,/^Interval/p'|grep OPENDIR| awk '{print $8}'|tr -d '\n'
+ }
+
+ cleanup;
+--
+1.8.3.1
+
diff --git a/0405-core-fix-return-of-local-in-__nlc_inode_ctx_get.patch b/0405-core-fix-return-of-local-in-__nlc_inode_ctx_get.patch
new file mode 100644
index 0000000..765c154
--- /dev/null
+++ b/0405-core-fix-return-of-local-in-__nlc_inode_ctx_get.patch
@@ -0,0 +1,175 @@
+From 9c5f5b4ffd49e8c8631defb7b6873248bbfdaf9c Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Tue, 23 Jul 2019 13:16:04 +0000
+Subject: [PATCH 405/449] [core] fix return of local in __nlc_inode_ctx_get
+
+__nlc_inode_ctx_get assigns a value to nlc_pe_p which is never used by
+its parent function or any of the predecessor hence remove the
+assignment and also that function argument as it is not being used
+anywhere.
+
+> fixes: bz#1732496
+> Change-Id: I5b950e1e251bd50a646616da872a4efe9d2ff8c9
+> (Cherry pick from commit 84a55090123a7e3124100e5564da8c521c3c22ab )
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23093/)
+
+BUG: 1686897
+
+Change-Id: I5b950e1e251bd50a646616da872a4efe9d2ff8c9
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202372
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/nl-cache/src/nl-cache-helper.c | 36 +++++++++-------------
+ 1 file changed, 14 insertions(+), 22 deletions(-)
+
+diff --git a/xlators/performance/nl-cache/src/nl-cache-helper.c b/xlators/performance/nl-cache/src/nl-cache-helper.c
+index 009f33a..4314038 100644
+--- a/xlators/performance/nl-cache/src/nl-cache-helper.c
++++ b/xlators/performance/nl-cache/src/nl-cache-helper.c
+@@ -145,12 +145,10 @@ nlc_disable_cache(xlator_t *this)
+ }
+
+ static int
+-__nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p,
+- nlc_pe_t **nlc_pe_p)
++__nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p)
+ {
+ int ret = 0;
+ nlc_ctx_t *nlc_ctx = NULL;
+- nlc_pe_t *nlc_pe = NULL;
+ uint64_t nlc_ctx_int = 0;
+ uint64_t nlc_pe_int = 0;
+
+@@ -159,10 +157,6 @@ __nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p,
+ nlc_ctx = (void *)(long)(nlc_ctx_int);
+ *nlc_ctx_p = nlc_ctx;
+ }
+- if (ret == 0 && nlc_pe_p) {
+- nlc_pe = (void *)(long)(nlc_pe_int);
+- *nlc_pe_p = nlc_pe;
+- }
+ return ret;
+ }
+
+@@ -186,14 +180,13 @@ nlc_inode_ctx_set(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx,
+ }
+
+ static void
+-nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p,
+- nlc_pe_t **nlc_pe_p)
++nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p)
+ {
+ int ret = 0;
+
+ LOCK(&inode->lock);
+ {
+- ret = __nlc_inode_ctx_get(this, inode, nlc_ctx_p, nlc_pe_p);
++ ret = __nlc_inode_ctx_get(this, inode, nlc_ctx_p);
+ if (ret < 0)
+ gf_msg_debug(this->name, 0,
+ "inode ctx get failed for "
+@@ -290,8 +283,7 @@ out:
+ }
+
+ static nlc_ctx_t *
+-nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p,
+- nlc_pe_t **nlc_pe_p)
++nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p)
+ {
+ int ret = 0;
+ nlc_ctx_t *nlc_ctx = NULL;
+@@ -301,7 +293,7 @@ nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p,
+
+ LOCK(&inode->lock);
+ {
+- ret = __nlc_inode_ctx_get(this, inode, &nlc_ctx, nlc_pe_p);
++ ret = __nlc_inode_ctx_get(this, inode, &nlc_ctx);
+ if (nlc_ctx)
+ goto unlock;
+
+@@ -410,7 +402,7 @@ nlc_set_dir_state(xlator_t *this, inode_t *inode, uint64_t state)
+ goto out;
+ }
+
+- nlc_inode_ctx_get_set(this, inode, &nlc_ctx, NULL);
++ nlc_inode_ctx_get_set(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+@@ -430,7 +422,7 @@ nlc_cache_timeout_handler(struct gf_tw_timer_list *timer, void *data,
+ nlc_timer_data_t *tmp = data;
+ nlc_ctx_t *nlc_ctx = NULL;
+
+- nlc_inode_ctx_get(tmp->this, tmp->inode, &nlc_ctx, NULL);
++ nlc_inode_ctx_get(tmp->this, tmp->inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+@@ -696,7 +688,7 @@ nlc_inode_clear_cache(xlator_t *this, inode_t *inode, int reason)
+ {
+ nlc_ctx_t *nlc_ctx = NULL;
+
+- nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL);
++ nlc_inode_ctx_get(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+@@ -883,7 +875,7 @@ nlc_dir_add_ne(xlator_t *this, inode_t *inode, const char *name)
+ goto out;
+ }
+
+- nlc_inode_ctx_get_set(this, inode, &nlc_ctx, NULL);
++ nlc_inode_ctx_get_set(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+@@ -914,7 +906,7 @@ nlc_dir_remove_pe(xlator_t *this, inode_t *parent, inode_t *entry_ino,
+ goto out;
+ }
+
+- nlc_inode_ctx_get(this, parent, &nlc_ctx, NULL);
++ nlc_inode_ctx_get(this, parent, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+@@ -945,7 +937,7 @@ nlc_dir_add_pe(xlator_t *this, inode_t *inode, inode_t *entry_ino,
+ goto out;
+ }
+
+- nlc_inode_ctx_get_set(this, inode, &nlc_ctx, NULL);
++ nlc_inode_ctx_get_set(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+@@ -1051,7 +1043,7 @@ nlc_is_negative_lookup(xlator_t *this, loc_t *loc)
+ goto out;
+ }
+
+- nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL);
++ nlc_inode_ctx_get(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+@@ -1102,7 +1094,7 @@ nlc_get_real_file_name(xlator_t *this, loc_t *loc, const char *fname,
+ goto out;
+ }
+
+- nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL);
++ nlc_inode_ctx_get(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+@@ -1152,7 +1144,7 @@ nlc_dump_inodectx(xlator_t *this, inode_t *inode)
+ nlc_ne_t *ne = NULL;
+ nlc_ne_t *tmp1 = NULL;
+
+- nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL);
++ nlc_inode_ctx_get(this, inode, &nlc_ctx);
+
+ if (!nlc_ctx)
+ goto out;
+--
+1.8.3.1
+
diff --git a/0406-afr-support-split-brain-CLI-for-replica-3.patch b/0406-afr-support-split-brain-CLI-for-replica-3.patch
new file mode 100644
index 0000000..4b57e8a
--- /dev/null
+++ b/0406-afr-support-split-brain-CLI-for-replica-3.patch
@@ -0,0 +1,185 @@
+From a75bb15fbe64f14580c44b8a33314c8bbeffdede Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Thu, 4 Jun 2020 18:54:46 +0530
+Subject: [PATCH 406/449] afr: support split-brain CLI for replica 3
+
+Patch in upstream master: https://review.gluster.org/#/c/glusterfs/+/23502/
+
+Ever since we added quorum checks for lookups in afr via commit
+bd44d59741bb8c0f5d7a62c5b1094179dd0ce8a4, the split-brain resolution
+commands would not work for replica 3 because there would be no
+readables for the lookup fop.
+
+The argument was that split-brains do not occur in replica 3 but we do
+see (data/metadata) split-brain cases once in a while which indicate that there are
+a few bugs/corner cases yet to be discovered and fixed.
+
+Fortunately, commit 8016d51a3bbd410b0b927ed66be50a09574b7982 added
+GF_CLIENT_PID_GLFS_HEALD as the pid for all fops made by glfsheal. If we
+leverage this and allow lookups in afr when pid is GF_CLIENT_PID_GLFS_HEALD,
+split-brain resolution commands will work for replica 3 volumes too.
+
+Likewise, the check is added in shard_lookup as well to permit resolving
+split-brains by specifying "/.shard/shard-file.xx" as the file name
+(which previously used to fail with EPERM).
+
+BUG: 1759875
+Change-Id: I203735b909c7d30fc4faaf3ecd4f5b6b379ab266
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202375
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../replicate/bug-1756938-replica-3-sbrain-cli.t | 111 +++++++++++++++++++++
+ xlators/cluster/afr/src/afr-common.c | 3 +-
+ xlators/features/shard/src/shard.c | 3 +-
+ 3 files changed, 115 insertions(+), 2 deletions(-)
+ create mode 100644 tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t
+
+diff --git a/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t
+new file mode 100644
+index 0000000..c1bdf34
+--- /dev/null
++++ b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t
+@@ -0,0 +1,111 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume set $V0 features.shard enable
++TEST $CLI volume set $V0 features.shard-block-size 4MB
++
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
++TEST glusterfs --volfile-server=$H0 --volfile-id=/$V0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
++
++#Create split-brain by setting afr xattrs/gfids manually.
++#file1 is non-sharded and will be in data split-brain.
++#file2 will have one shard which will be in data split-brain.
++#file3 will have one shard which will be in gfid split-brain.
++#file4 will have one shard which will be in data & metadata split-brain.
++TEST dd if=/dev/zero of=$M0/file1 bs=1024 count=1024 oflag=direct
++TEST dd if=/dev/zero of=$M0/file2 bs=1M count=6 oflag=direct
++TEST dd if=/dev/zero of=$M0/file3 bs=1M count=6 oflag=direct
++TEST dd if=/dev/zero of=$M0/file4 bs=1M count=6 oflag=direct
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++
++#-------------------------------------------------------------------------------
++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}0/file1
++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}0/file1
++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}1/file1
++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}1/file1
++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}2/file1
++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}2/file1
++
++#-------------------------------------------------------------------------------
++gfid_f2=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file2))
++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}0/.shard/$gfid_f2.1
++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}0/.shard/$gfid_f2.1
++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}1/.shard/$gfid_f2.1
++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}1/.shard/$gfid_f2.1
++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}2/.shard/$gfid_f2.1
++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}2/.shard/$gfid_f2.1
++
++#-------------------------------------------------------------------------------
++TESTS_EXPECTED_IN_LOOP=5
++function assign_new_gfid {
++ brickpath=$1
++ filename=$2
++ gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brickpath/$filename))
++ gfid_shard=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brickpath/.shard/$gfid.1))
++
++ TEST rm $brickpath/.glusterfs/${gfid_shard:0:2}/${gfid_shard:2:2}/$gfid_shard
++ TEST setfattr -x trusted.gfid $brickpath/.shard/$gfid.1
++ new_gfid=$(get_random_gfid)
++ new_gfid_str=$(gf_gfid_xattr_to_str $new_gfid)
++ TEST setfattr -n trusted.gfid -v $new_gfid $brickpath/.shard/$gfid.1
++ TEST mkdir -p $brickpath/.glusterfs/${new_gfid_str:0:2}/${new_gfid_str:2:2}
++ TEST ln $brickpath/.shard/$gfid.1 $brickpath/.glusterfs/${new_gfid_str:0:2}/${new_gfid_str:2:2}/$new_gfid_str
++}
++assign_new_gfid $B0/$V0"1" file3
++assign_new_gfid $B0/$V0"2" file3
++
++#-------------------------------------------------------------------------------
++gfid_f4=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file4))
++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000100000000 $B0/${V0}0/.shard/$gfid_f4.1
++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000100000000 $B0/${V0}0/.shard/$gfid_f4.1
++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000100000000 $B0/${V0}1/.shard/$gfid_f4.1
++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000100000000 $B0/${V0}1/.shard/$gfid_f4.1
++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000100000000 $B0/${V0}2/.shard/$gfid_f4.1
++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000100000000 $B0/${V0}2/.shard/$gfid_f4.1
++
++#-------------------------------------------------------------------------------
++#Add entry to xattrop dir on first brick and check for split-brain.
++xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
++base_entry_b0=`ls $xattrop_dir0`
++
++gfid_f1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/file1))
++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f1
++
++gfid_f2_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f2.1))
++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f2_shard1
++
++gfid_f3=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file3))
++gfid_f3_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f3.1))
++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f3_shard1
++
++gfid_f4_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f4.1))
++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f4_shard1
++
++#-------------------------------------------------------------------------------
++#gfid split-brain won't show up in split-brain count.
++EXPECT "3" afr_get_split_brain_count $V0
++EXPECT_NOT "^0$" get_pending_heal_count $V0
++
++#Resolve split-brains
++TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file1
++GFIDSTR="gfid:$gfid_f2_shard1"
++TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR
++TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /.shard/$gfid_f3.1
++TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /.shard/$gfid_f4.1
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++cleanup;
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index eef7fd2..32127c6 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -2250,7 +2250,8 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
+ if ((spb_choice >= 0) &&
+ (AFR_COUNT(success_replies, child_count) == child_count)) {
+ *read_subvol = spb_choice;
+- } else if (!priv->quorum_count) {
++ } else if (!priv->quorum_count ||
++ frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) {
+ *read_subvol = afr_first_up_child(frame, this);
+ } else if (priv->quorum_count &&
+ afr_has_quorum(data_readable, this, NULL)) {
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index 2e2ef5d..16d557b 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -1472,7 +1472,8 @@ int shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ shard_local_t *local = NULL;
+
+ this->itable = loc->inode->table;
+- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) &&
++ (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) {
+ SHARD_ENTRY_FOP_CHECK(loc, op_errno, err);
+ }
+
+--
+1.8.3.1
+
diff --git a/0407-geo-rep-Improving-help-message-in-schedule_georep.py.patch b/0407-geo-rep-Improving-help-message-in-schedule_georep.py.patch
new file mode 100644
index 0000000..459462d
--- /dev/null
+++ b/0407-geo-rep-Improving-help-message-in-schedule_georep.py.patch
@@ -0,0 +1,60 @@
+From de31f2b0cb09a59941892c9981cb8a8b3aced9ec Mon Sep 17 00:00:00 2001
+From: kshithijiyer <kshithij.ki@gmail.com>
+Date: Tue, 24 Dec 2019 13:02:21 +0530
+Subject: [PATCH 407/449] [geo-rep] Improving help message in
+ schedule_georep.py.in
+
+SLAVE positional argument doesn't provide a clear
+picture of what it is when compared to mastervol and slavevol
+in schedule_georep.py.in. It would be better if we change it to
+something like "Slave hostame (<username>@SLAVEHOST or SLAVEHOST)"
+
+Present:
+----------
+positional arguments:
+ mastervol Master Volume Name
+ SLAVE SLAVEHOST or root@SLAVEHOST or user@SLAVEHOST
+ slavevol Slave Volume Name
+
+Suggested:
+-----------
+positional arguments:
+ mastervol Master Volume Name
+ SLAVE Slave hostname (<username>@SLAVEHOST or SLAVEHOST)
+ slavevol Slave Volume Name
+
+Backport of:
+ >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/23919/
+ >fixes: bz#1786276
+ >Change-Id: I73d52247997d623f77d55e51cbb6eccc08eb95ff
+ >Signed-off-by: kshithijiyer <kshithij.ki@gmail.com>
+
+BUG: 1787994
+Change-Id: I73d52247997d623f77d55e51cbb6eccc08eb95ff
+Signed-off-by: kshithijiyer <kshithij.ki@gmail.com>
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202454
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/geo-rep/schedule_georep.py.in | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/extras/geo-rep/schedule_georep.py.in b/extras/geo-rep/schedule_georep.py.in
+index f29ae02..ac93716 100644
+--- a/extras/geo-rep/schedule_georep.py.in
++++ b/extras/geo-rep/schedule_georep.py.in
+@@ -459,8 +459,8 @@ if __name__ == "__main__":
+ description=__doc__)
+ parser.add_argument("mastervol", help="Master Volume Name")
+ parser.add_argument("slave",
+- help="SLAVEHOST or root@SLAVEHOST "
+- "or user@SLAVEHOST",
++ help="Slave hostname "
++ "(<username>@SLAVEHOST or SLAVEHOST)",
+ metavar="SLAVE")
+ parser.add_argument("slavevol", help="Slave Volume Name")
+ parser.add_argument("--interval", help="Interval in Seconds. "
+--
+1.8.3.1
+
diff --git a/0408-geo-rep-Fix-ssh-port-validation.patch b/0408-geo-rep-Fix-ssh-port-validation.patch
new file mode 100644
index 0000000..9fad8d1
--- /dev/null
+++ b/0408-geo-rep-Fix-ssh-port-validation.patch
@@ -0,0 +1,107 @@
+From 07ab5a460da007fc3809b1a943614d1c7f5fcfef Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Fri, 17 Jan 2020 11:03:46 +0000
+Subject: [PATCH 408/449] geo-rep: Fix ssh-port validation
+
+If non-standard ssh-port is used, Geo-rep can be configured to use ssh port
+by using config option, the value should be in allowed port range and non negative.
+
+At present it can accept negative value and outside allowed port range which is incorrect.
+
+Many Linux kernels use the port range 32768 to 61000.
+IANA suggests it should be in the range 1 to 2^16 - 1, so keeping the same.
+
+$ gluster volume geo-replication master 127.0.0.1::slave config ssh-port -22
+geo-replication config updated successfully
+$ gluster volume geo-replication master 127.0.0.1::slave config ssh-port 22222222
+geo-replication config updated successfully
+
+This patch fixes the above issue and have added few validations around this
+in test cases.
+Backport of:
+ >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24035/
+ >Change-Id: I9875ab3f00d7257370fbac6f5ed4356d2fed3f3c
+ >Fixes: bz#1792276
+ >Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+BUG: 1796814
+Change-Id: I9875ab3f00d7257370fbac6f5ed4356d2fed3f3c
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202453
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/gsyncd.conf.in | 4 +++-
+ tests/00-geo-rep/00-georep-verify-non-root-setup.t | 16 ++++++++++++++++
+ tests/00-geo-rep/georep-basic-dr-rsync.t | 13 +++++++++++++
+ 3 files changed, 32 insertions(+), 1 deletion(-)
+
+diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in
+index 9155cd8..11e57fd 100644
+--- a/geo-replication/gsyncd.conf.in
++++ b/geo-replication/gsyncd.conf.in
+@@ -266,7 +266,9 @@ allowed_values=ERROR,INFO,WARNING,DEBUG
+
+ [ssh-port]
+ value=22
+-validation=int
++validation=minmax
++min=1
++max=65535
+ help=Set SSH port
+ type=int
+
+diff --git a/tests/00-geo-rep/00-georep-verify-non-root-setup.t b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
+index c9fd8b2..12f0c01 100644
+--- a/tests/00-geo-rep/00-georep-verify-non-root-setup.t
++++ b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
+@@ -223,6 +223,22 @@ TEST $GEOREP_CLI $master $slave_url resume
+ #Validate failure of volume stop when geo-rep is running
+ TEST ! $CLI volume stop $GMV0
+
++#Negative test for ssh-port
++#Port should be integer and between 1-65535 range
++
++TEST ! $GEOREP_CLI $master $slave_url config ssh-port -22
++
++TEST ! $GEOREP_CLI $master $slave_url config ssh-port abc
++
++TEST ! $GEOREP_CLI $master $slave_url config ssh-port 6875943
++
++TEST ! $GEOREP_CLI $master $slave_url config ssh-port 4.5
++
++TEST ! $GEOREP_CLI $master $slave_url config ssh-port 22a
++
++#Config Set ssh-port to validate int validation
++TEST $GEOREP_CLI $master $slave config ssh-port 22
++
+ #Hybrid directory rename test BZ#1763439
+ TEST $GEOREP_CLI $master $slave_url config change_detector xsync
+ mkdir ${master_mnt}/dir1
+diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t
+index b6fbf18..d785aa5 100644
+--- a/tests/00-geo-rep/georep-basic-dr-rsync.t
++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t
+@@ -71,6 +71,19 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Created"
+ #Config gluster-command-dir
+ TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
++#Negative test for ssh-port
++#Port should be integer and between 1-65535 range
++
++TEST ! $GEOREP_CLI $master $slave config ssh-port -22
++
++TEST ! $GEOREP_CLI $master $slave config ssh-port abc
++
++TEST ! $GEOREP_CLI $master $slave config ssh-port 6875943
++
++TEST ! $GEOREP_CLI $master $slave config ssh-port 4.5
++
++TEST ! $GEOREP_CLI $master $slave config ssh-port 22a
++
+ #Config Set ssh-port to validate int validation
+ TEST $GEOREP_CLI $master $slave config ssh-port 22
+
+--
+1.8.3.1
+
diff --git a/0409-system-posix-acl-update-ctx-only-if-iatt-is-non-NULL.patch b/0409-system-posix-acl-update-ctx-only-if-iatt-is-non-NULL.patch
new file mode 100644
index 0000000..ca1c25a
--- /dev/null
+++ b/0409-system-posix-acl-update-ctx-only-if-iatt-is-non-NULL.patch
@@ -0,0 +1,52 @@
+From a92b4f6373cb18544325436cf86abfebd6780d79 Mon Sep 17 00:00:00 2001
+From: Homma <homma@allworks.co.jp>
+Date: Fri, 5 Jul 2019 16:10:41 +0530
+Subject: [PATCH 409/449] system/posix-acl: update ctx only if iatt is non-NULL
+
+We need to safe-guard against possible zero'ing out of iatt
+structure in acl ctx, which can cause many issues.
+
+> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23003/
+> fixes: 1668286
+> Change-Id: Ie81a57d7453a6624078de3be8c0845bf4d432773
+> Signed-off-by: Amar Tumballi <amarts@redhat.com>
+
+BUG: 1781649
+Change-Id: I655b61551d30215b9f23cafc3ef9a5c0d98a43d0
+Signed-off-by: Raghavendra M <raghavendra@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202446
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/system/posix-acl/src/posix-acl.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/xlators/system/posix-acl/src/posix-acl.c b/xlators/system/posix-acl/src/posix-acl.c
+index 38e48b8..c6ba281 100644
+--- a/xlators/system/posix-acl/src/posix-acl.c
++++ b/xlators/system/posix-acl/src/posix-acl.c
+@@ -875,6 +875,13 @@ posix_acl_ctx_update(inode_t *inode, xlator_t *this, struct iatt *buf,
+ int ret = 0;
+ int i = 0;
+
++ if (!buf || !buf->ia_ctime) {
++ /* No need to update ctx if buf is empty */
++ gf_log_callingfn(this->name, GF_LOG_DEBUG, "iatt struct is empty (%d)",
++ fop);
++ goto out;
++ }
++
+ LOCK(&inode->lock);
+ {
+ ctx = __posix_acl_ctx_get(inode, this, _gf_true);
+@@ -928,6 +935,7 @@ posix_acl_ctx_update(inode_t *inode, xlator_t *this, struct iatt *buf,
+ }
+ unlock:
+ UNLOCK(&inode->lock);
++out:
+ return ret;
+ }
+
+--
+1.8.3.1
+
diff --git a/0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch b/0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch
new file mode 100644
index 0000000..97bdc78
--- /dev/null
+++ b/0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch
@@ -0,0 +1,249 @@
+From 2b2eb846c49caba13ab92ec66af20292e7780fc1 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Tue, 11 Feb 2020 14:34:48 +0530
+Subject: [PATCH 410/449] afr: prevent spurious entry heals leading to gfid
+ split-brain
+
+Problem:
+In a hyperconverged setup with granular-entry-heal enabled, if a file is
+recreated while one of the bricks is down, and an index heal is triggered
+(with the brick still down), entry-self heal was doing a spurious heal
+with just the 2 good bricks. It was doing a post-op leading to removal
+of the filename from .glusterfs/indices/entry-changes as well as
+erroneous setting of afr xattrs on the parent. When the brick came up,
+the xattrs were cleared, resulting in the renamed file not getting
+healed and leading to gfid split-brain and EIO on the mount.
+
+Fix:
+Proceed with entry heal only when shd can connect to all bricks of the replica,
+just like in data and metadata heal.
+
+BUG: 1804164
+
+> Upstream patch:https://review.gluster.org/#/c/glusterfs/+/24109/
+> fixes: bz#1801624
+> Change-Id: I916ae26ad1fabf259bc6362da52d433b7223b17e
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+
+Change-Id: I23f57e543cff1e3f35eb8dbc60a2babfae6838c7
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202395
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../bug-1433571-undo-pending-only-on-up-bricks.t | 18 ++-----
+ tests/bugs/replicate/bug-1801624-entry-heal.t | 58 ++++++++++++++++++++++
+ xlators/cluster/afr/src/afr-common.c | 4 +-
+ xlators/cluster/afr/src/afr-self-heal-common.c | 8 +--
+ xlators/cluster/afr/src/afr-self-heal-entry.c | 6 +--
+ xlators/cluster/afr/src/afr-self-heal-name.c | 2 +-
+ xlators/cluster/afr/src/afr-self-heal.h | 2 -
+ 7 files changed, 69 insertions(+), 29 deletions(-)
+ create mode 100644 tests/bugs/replicate/bug-1801624-entry-heal.t
+
+diff --git a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
+index 0767f47..10ce013 100644
+--- a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
++++ b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
+@@ -49,25 +49,15 @@ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+-#Kill brick 0 and turn on the client side heal and do ls to trigger the heal.
+-#The pending xattrs on bricks 1 & 2 should have pending entry on brick 0.
+-TEST kill_brick $V0 $H0 $B0/${V0}0
++# We were killing one brick and checking that entry heal does not reset the
++# pending xattrs for the down brick. Now that we need all bricks to be up for
++# entry heal, I'm removing that test from the .t
++
+ TEST $CLI volume set $V0 cluster.data-self-heal on
+ TEST $CLI volume set $V0 cluster.metadata-self-heal on
+ TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+ TEST ls $M0
+-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1
+-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
+-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
+-
+-#Bring back all the bricks and trigger the heal again by doing ls. Now the
+-#pending xattrs on all the bricks should be 0.
+-TEST $CLI volume start $V0 force
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+-TEST ls $M0
+-
+ TEST cat $M0/f1
+ TEST cat $M0/f2
+ TEST cat $M0/f3
+diff --git a/tests/bugs/replicate/bug-1801624-entry-heal.t b/tests/bugs/replicate/bug-1801624-entry-heal.t
+new file mode 100644
+index 0000000..94b4651
+--- /dev/null
++++ b/tests/bugs/replicate/bug-1801624-entry-heal.t
+@@ -0,0 +1,58 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}
++TEST $CLI volume set $V0 heal-timeout 5
++TEST $CLI volume start $V0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0 granular-entry-heal enable
++
++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
++echo "Data">$M0/FILE
++ret=$?
++TEST [ $ret -eq 0 ]
++
++# Re-create the file when a brick is down.
++TEST kill_brick $V0 $H0 $B0/brick1
++TEST rm $M0/FILE
++echo "New Data">$M0/FILE
++ret=$?
++TEST [ $ret -eq 0 ]
++EXPECT_WITHIN $HEAL_TIMEOUT "4" get_pending_heal_count $V0
++
++# Launching index heal must not reset parent dir afr xattrs or remove granular entry indices.
++$CLI volume heal $V0 # CLI will fail but heal is launched anyway.
++TEST sleep 5 # give index heal a chance to do one run.
++brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick0/)
++brick2_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick2/)
++TEST [ $brick0_pending -eq "000000000000000000000002" ]
++TEST [ $brick2_pending -eq "000000000000000000000002" ]
++EXPECT "FILE" ls $B0/brick0/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/
++EXPECT "FILE" ls $B0/brick2/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++$CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
++
++# No gfid-split-brain (i.e. EIO) must be seen. Try on fresh mount to avoid cached values.
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
++TEST cat $M0/FILE
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++cleanup;
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 32127c6..5806556 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -6629,7 +6629,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque)
+ ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0,
+ locked_on);
+ {
+- if (ret < AFR_SH_MIN_PARTICIPANTS)
++ if (ret < priv->child_count)
+ goto data_unlock;
+ ret = __afr_selfheal_data_prepare(
+ heal_frame, this, inode, locked_on, sources, sinks,
+@@ -6646,7 +6646,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque)
+ ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
+ {
+- if (ret < AFR_SH_MIN_PARTICIPANTS)
++ if (ret < priv->child_count)
+ goto mdata_unlock;
+ ret = __afr_selfheal_metadata_prepare(
+ heal_frame, this, inode, locked_on, sources, sinks,
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index 81ef38a..ce1ea50 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -1575,7 +1575,6 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
+ char *accused = NULL; /* Accused others without any self-accusal */
+ char *pending = NULL; /* Have pending operations on others */
+ char *self_accused = NULL; /* Accused itself */
+- int min_participants = -1;
+
+ priv = this->private;
+
+@@ -1599,12 +1598,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
+ }
+ }
+
+- if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) {
+- min_participants = priv->child_count;
+- } else {
+- min_participants = AFR_SH_MIN_PARTICIPANTS;
+- }
+- if (afr_success_count(replies, priv->child_count) < min_participants) {
++ if (afr_success_count(replies, priv->child_count) < priv->child_count) {
+ /* Treat this just like locks not being acquired */
+ return -ENOTCONN;
+ }
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index 3ce882e..40be898 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -597,7 +597,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
+ locked_on);
+ {
+- if (ret < AFR_SH_MIN_PARTICIPANTS) {
++ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "entry self-heal as only %d sub-volumes "
+@@ -991,7 +991,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
+ data_lock);
+ {
+- if (ret < AFR_SH_MIN_PARTICIPANTS) {
++ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "entry self-heal as only %d sub-volumes could "
+@@ -1115,7 +1115,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode)
+ ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain,
+ NULL, locked_on);
+ {
+- if (ret < AFR_SH_MIN_PARTICIPANTS) {
++ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "entry self-heal as only %d sub-volumes could "
+diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
+index 36640b5..7d4f208 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-name.c
++++ b/xlators/cluster/afr/src/afr-self-heal-name.c
+@@ -514,7 +514,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+ ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname,
+ locked_on);
+ {
+- if (ret < AFR_SH_MIN_PARTICIPANTS) {
++ if (ret < priv->child_count) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
+index 6555ec5..8234cec 100644
+--- a/xlators/cluster/afr/src/afr-self-heal.h
++++ b/xlators/cluster/afr/src/afr-self-heal.h
+@@ -11,8 +11,6 @@
+ #ifndef _AFR_SELFHEAL_H
+ #define _AFR_SELFHEAL_H
+
+-#define AFR_SH_MIN_PARTICIPANTS 2
+-
+ /* Perform fop on all UP subvolumes and wait for all callbacks to return */
+
+ #define AFR_ONALL(frame, rfn, fop, args...) \
+--
+1.8.3.1
+
diff --git a/0411-tools-glusterfind-validate-session-name.patch b/0411-tools-glusterfind-validate-session-name.patch
new file mode 100644
index 0000000..db633f2
--- /dev/null
+++ b/0411-tools-glusterfind-validate-session-name.patch
@@ -0,0 +1,116 @@
+From 854defb4ff5e0d53f51545d20796aff662f9850f Mon Sep 17 00:00:00 2001
+From: Saravanakumar Arumugam <sarumuga@redhat.com>
+Date: Thu, 9 Jul 2015 15:56:28 +0530
+Subject: [PATCH 411/449] tools/glusterfind : validate session name
+
+Validate a session name(during create) for the following:
+1. minimum 2 character length.
+2. Maximum 256 characters.
+3. No special characters apart from underscore, hyphen allowed.
+
+Also, validate volume(expect, while using glusterfind list).
+
+>Change-Id: I1b1e64e218f93d0a531d3cf69fc2ce7e2ed11d01
+>BUG: 1241494
+>Signed-off-by: Saravanakumar Arumugam <sarumuga@redhat.com>
+>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+
+backport of https://review.gluster.org/#/c/glusterfs/+/11602/
+
+BUG: 1234220
+Change-Id: I1b1e64e218f93d0a531d3cf69fc2ce7e2ed11d01
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202469
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tools/glusterfind/src/main.py | 50 ++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 42 insertions(+), 8 deletions(-)
+
+diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
+index 5ca1fec..4b5466d 100644
+--- a/tools/glusterfind/src/main.py
++++ b/tools/glusterfind/src/main.py
+@@ -23,6 +23,7 @@ import tempfile
+ import signal
+ from datetime import datetime
+ import codecs
++import re
+
+ from utils import execute, is_host_local, mkdirp, fail
+ from utils import setup_logger, human_time, handle_rm_error
+@@ -520,11 +521,8 @@ def write_output(outfile, outfilemerger, field_separator):
+ else:
+ gfind_write(f, row[0], field_separator, p_rep)
+
+-def mode_create(session_dir, args):
+- logger.debug("Init is called - Session: %s, Volume: %s"
+- % (args.session, args.volume))
+-
+- cmd = ["gluster", 'volume', 'info', args.volume, "--xml"]
++def validate_volume(volume):
++ cmd = ["gluster", 'volume', 'info', volume, "--xml"]
+ _, data, _ = execute(cmd,
+ exit_msg="Failed to Run Gluster Volume Info",
+ logger=logger)
+@@ -532,11 +530,42 @@ def mode_create(session_dir, args):
+ tree = etree.fromstring(data)
+ statusStr = tree.find('volInfo/volumes/volume/statusStr').text
+ except (ParseError, AttributeError) as e:
+- fail("Invalid Volume: %s" % e, logger=logger)
+-
++ fail("Invalid Volume: Check the Volume name! %s" % e)
+ if statusStr != "Started":
+- fail("Volume %s is not online" % args.volume, logger=logger)
++ fail("Volume %s is not online" % volume)
++
++# The rules for a valid session name.
++SESSION_NAME_RULES = {
++ 'min_length': 2,
++ 'max_length': 256, # same as maximum volume length
++ # Specifies all alphanumeric characters, underscore, hyphen.
++ 'valid_chars': r'0-9a-zA-Z_-',
++}
++
++
++# checks valid session name, fail otherwise
++def validate_session_name(session):
++ # Check for minimum length
++ if len(session) < SESSION_NAME_RULES['min_length']:
++ fail('session_name must be at least ' +
++ str(SESSION_NAME_RULES['min_length']) + ' characters long.')
++ # Check for maximum length
++ if len(session) > SESSION_NAME_RULES['max_length']:
++ fail('session_name must not exceed ' +
++ str(SESSION_NAME_RULES['max_length']) + ' characters length.')
++
++ # Matches strings composed entirely of characters specified within
++ if not re.match(r'^[' + SESSION_NAME_RULES['valid_chars'] +
++ ']+$', session):
++ fail('Session name can only contain these characters: ' +
++ SESSION_NAME_RULES['valid_chars'])
++
++
++def mode_create(session_dir, args):
++ validate_session_name(args.session)
+
++ logger.debug("Init is called - Session: %s, Volume: %s"
++ % (args.session, args.volume))
+ mkdirp(session_dir, exit_on_err=True, logger=logger)
+ mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
+ logger=logger)
+@@ -850,6 +879,11 @@ def main():
+ args.mode not in ["create", "list", "query"]:
+ fail("Invalid session %s" % args.session)
+
++ # volume involved, validate the volume first
++ if args.mode not in ["list"]:
++ validate_volume(args.volume)
++
++
+ # "default" is a system defined session name
+ if args.mode in ["create", "post", "pre", "delete"] and \
+ args.session == "default":
+--
+1.8.3.1
+
diff --git a/0412-gluster-smb-add-smb-parameter-when-access-gluster-by.patch b/0412-gluster-smb-add-smb-parameter-when-access-gluster-by.patch
new file mode 100644
index 0000000..865fddf
--- /dev/null
+++ b/0412-gluster-smb-add-smb-parameter-when-access-gluster-by.patch
@@ -0,0 +1,46 @@
+From 0769c5ddc78ea37b9a43ac35dd71ec8cea4b8da8 Mon Sep 17 00:00:00 2001
+From: yinkui <13965432176@163.com>
+Date: Fri, 16 Aug 2019 10:15:07 +0800
+Subject: [PATCH 412/449] gluster-smb:add smb parameter when access gluster by
+ cifs
+
+Backport of https://review.gluster.org/23240
+
+Change-Id: I9ff54f2ca6f86bb5b2f4740485a0159e1fd7785f
+BUG: 1783232
+Signed-off-by: yinkui <13965432176@163.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202472
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/hook-scripts/set/post/S30samba-set.sh | 1 +
+ extras/hook-scripts/start/post/S30samba-start.sh | 1 +
+ 2 files changed, 2 insertions(+)
+
+diff --git a/extras/hook-scripts/set/post/S30samba-set.sh b/extras/hook-scripts/set/post/S30samba-set.sh
+index d2a62d3..e73f00f 100755
+--- a/extras/hook-scripts/set/post/S30samba-set.sh
++++ b/extras/hook-scripts/set/post/S30samba-set.sh
+@@ -90,6 +90,7 @@ function add_samba_share () {
+ STRING+="path = /\n"
+ STRING+="read only = no\n"
+ STRING+="guest ok = yes\n"
++ STRING+="kernel share modes = no\n"
+ printf "$STRING" >> ${CONFIGFILE}
+ }
+
+diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh
+index 2854bdd..0d5a5ed 100755
+--- a/extras/hook-scripts/start/post/S30samba-start.sh
++++ b/extras/hook-scripts/start/post/S30samba-start.sh
+@@ -89,6 +89,7 @@ function add_samba_share () {
+ STRING+="path = /\n"
+ STRING+="read only = no\n"
+ STRING+="guest ok = yes\n"
++ STRING+="kernel share modes = no\n"
+ printf "$STRING" >> "${CONFIGFILE}"
+ }
+
+--
+1.8.3.1
+
diff --git a/0413-extras-hooks-Remove-smb.conf-parameter-allowing-gues.patch b/0413-extras-hooks-Remove-smb.conf-parameter-allowing-gues.patch
new file mode 100644
index 0000000..1ff6348
--- /dev/null
+++ b/0413-extras-hooks-Remove-smb.conf-parameter-allowing-gues.patch
@@ -0,0 +1,46 @@
+From aec3dd00fa76547316fddd07e6ded428d945986c Mon Sep 17 00:00:00 2001
+From: Anoop C S <anoopcs@redhat.com>
+Date: Fri, 22 Nov 2019 17:36:55 +0530
+Subject: [PATCH 413/449] extras/hooks: Remove smb.conf parameter allowing
+ guest access
+
+Backport of https://review.gluster.org/23745
+
+Change-Id: I88f494f16153d27ab6e2f2faf4d557e075671b10
+BUG: 1775637
+Signed-off-by: Anoop C S <anoopcs@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202473
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/hook-scripts/set/post/S30samba-set.sh | 1 -
+ extras/hook-scripts/start/post/S30samba-start.sh | 1 -
+ 2 files changed, 2 deletions(-)
+
+diff --git a/extras/hook-scripts/set/post/S30samba-set.sh b/extras/hook-scripts/set/post/S30samba-set.sh
+index e73f00f..854f131 100755
+--- a/extras/hook-scripts/set/post/S30samba-set.sh
++++ b/extras/hook-scripts/set/post/S30samba-set.sh
+@@ -89,7 +89,6 @@ function add_samba_share () {
+ STRING+="glusterfs:loglevel = 7\n"
+ STRING+="path = /\n"
+ STRING+="read only = no\n"
+- STRING+="guest ok = yes\n"
+ STRING+="kernel share modes = no\n"
+ printf "$STRING" >> ${CONFIGFILE}
+ }
+diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh
+index 0d5a5ed..cac0cbf 100755
+--- a/extras/hook-scripts/start/post/S30samba-start.sh
++++ b/extras/hook-scripts/start/post/S30samba-start.sh
+@@ -88,7 +88,6 @@ function add_samba_share () {
+ STRING+="glusterfs:loglevel = 7\n"
+ STRING+="path = /\n"
+ STRING+="read only = no\n"
+- STRING+="guest ok = yes\n"
+ STRING+="kernel share modes = no\n"
+ printf "$STRING" >> "${CONFIGFILE}"
+ }
+--
+1.8.3.1
+
diff --git a/0414-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch b/0414-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch
new file mode 100644
index 0000000..67b71dd
--- /dev/null
+++ b/0414-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch
@@ -0,0 +1,62 @@
+From 5b549cbf3f1873054c6d187b09aa9f9313971b1f Mon Sep 17 00:00:00 2001
+From: Kinglong Mee <kinglongmee@gmail.com>
+Date: Mon, 18 Mar 2019 20:47:54 +0800
+Subject: [PATCH 414/449] cluster-syncop: avoid duplicate unlock of
+ inodelk/entrylk
+
+When using ec, there are many spam messages in brick and client
+logs files.
+
+When shd does entry heal, it takes lock on a directory using
+cluster_tiebreaker_inodelk(). If it does not get lock on all
+the bricks because other clients has got lock on some bricks,
+it will unlock the locks on those bricks which it got and then
+will try blocking locks (If any one of the previous was successful).
+
+The problem come here. In case we do not get locks on all the
+required bricks, we are sending unlock request twice on those
+bricks where we got the locks.
+
+BUG: 1750211
+> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22377/
+> Change-Id: Ib164d29ebb071f620a4ca9679c4345ef7c88512a
+> Updates: bz#1689920
+> Signed-off-by: Kinglong Mee <mijinlong@open-fs.com>
+
+Change-Id: I1647548ba75fdd27fd4e20dec08db67774f43375
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202477
+Tested-by: Ashish Pandey <aspandey@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/cluster-syncop.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/libglusterfs/src/cluster-syncop.c b/libglusterfs/src/cluster-syncop.c
+index 5a08f26..6ee89dd 100644
+--- a/libglusterfs/src/cluster-syncop.c
++++ b/libglusterfs/src/cluster-syncop.c
+@@ -1203,6 +1203,10 @@ cluster_tiebreaker_inodelk(xlator_t **subvols, unsigned char *on,
+ if (num_success) {
+ FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame,
+ inodelk, dom, &loc, F_SETLKW, &flock, NULL);
++ } else {
++ loc_wipe(&loc);
++ memset(locked_on, 0, numsubvols);
++ return 0;
+ }
+ break;
+ }
+@@ -1244,7 +1248,9 @@ cluster_tiebreaker_entrylk(xlator_t **subvols, unsigned char *on,
+ entrylk, dom, &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK,
+ NULL);
+ } else {
++ loc_wipe(&loc);
+ memset(locked_on, 0, numsubvols);
++ return 0;
+ }
+ break;
+ }
+--
+1.8.3.1
+
diff --git a/0415-dht-Fix-stale-layout-and-create-issue.patch b/0415-dht-Fix-stale-layout-and-create-issue.patch
new file mode 100644
index 0000000..476a8cc
--- /dev/null
+++ b/0415-dht-Fix-stale-layout-and-create-issue.patch
@@ -0,0 +1,523 @@
+From ba23e6d8f4eff11a228816149a8a1ccd6df41146 Mon Sep 17 00:00:00 2001
+From: Susant Palai <spalai@redhat.com>
+Date: Fri, 27 Dec 2019 12:06:19 +0530
+Subject: [PATCH 415/449] dht: Fix stale-layout and create issue
+
+Problem: With lookup-optimize set to on by default, a client with
+stale-layout can create a new file on a wrong subvol. This will lead to
+possible duplicate files if two different clients attempt to create the
+same file with two different layouts.
+
+Solution: Send in-memory layout to be cross checked at posix before
+commiting a "create". In case of a mismatch, sync the client layout with
+that of the server and attempt the create fop one more time.
+
+test: Manual, testcase(attached)
+
+(Backport of https://review.gluster.org/#/c/glusterfs/+/23927/)
+
+BUG: 1748865
+Change-Id: I6c82c97418654ae8eb3b81ab65f1247aa4002ceb
+Signed-off-by: Susant Palai <spalai@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202465
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/distribute/bug-1786679.t | 69 +++++++++++
+ xlators/cluster/dht/src/dht-common.c | 147 ++++++++++++++++++++---
+ xlators/cluster/dht/src/dht-common.h | 6 +
+ xlators/protocol/client/src/client-rpc-fops_v2.c | 9 +-
+ xlators/storage/posix/src/posix-entry-ops.c | 29 ++++-
+ xlators/storage/posix/src/posix-helpers.c | 76 ++++++++++++
+ xlators/storage/posix/src/posix.h | 4 +
+ 7 files changed, 321 insertions(+), 19 deletions(-)
+ create mode 100755 tests/bugs/distribute/bug-1786679.t
+
+diff --git a/tests/bugs/distribute/bug-1786679.t b/tests/bugs/distribute/bug-1786679.t
+new file mode 100755
+index 0000000..219ce51
+--- /dev/null
++++ b/tests/bugs/distribute/bug-1786679.t
+@@ -0,0 +1,69 @@
++#!/bin/bash
++
++SCRIPT_TIMEOUT=250
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../dht.rc
++
++
++# create 2 subvols
++# create a dir
++# create a file
++# change layout
++# remove the file
++# execute create from a different mount
++# Without the patch, the file will be present on both of the bricks
++
++cleanup
++
++function get_layout () {
++
++layout=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1 | grep dht | gawk -F"=" '{print $2}'`
++
++echo $layout
++
++}
++
++function set_layout()
++{
++ setfattr -n "trusted.glusterfs.dht" -v $1 $2
++}
++
++TEST glusterd
++TEST pidof glusterd
++
++BRICK1=$B0/${V0}-0
++BRICK2=$B0/${V0}-1
++
++TEST $CLI volume create $V0 $H0:$BRICK1 $H0:$BRICK2
++TEST $CLI volume start $V0
++
++# Mount FUSE and create symlink
++TEST glusterfs -s $H0 --volfile-id $V0 $M0
++TEST mkdir $M0/dir
++TEST touch $M0/dir/file
++TEST ! stat "$BRICK1/dir/file"
++TEST stat "$BRICK2/dir/file"
++
++layout1="$(get_layout "$BRICK1/dir")"
++layout2="$(get_layout "$BRICK2/dir")"
++
++TEST set_layout $layout1 "$BRICK2/dir"
++TEST set_layout $layout2 "$BRICK1/dir"
++
++TEST rm $M0/dir/file -f
++TEST gluster v set $V0 client-log-level DEBUG
++
++#Without the patch in place, this client will create the file in $BRICK2
++#which will lead to two files being on both the bricks when a new client
++#create the file with the same name
++TEST touch $M0/dir/file
++
++TEST glusterfs -s $H0 --volfile-id $V0 $M1
++TEST touch $M1/dir/file
++
++TEST stat "$BRICK1/dir/file"
++TEST ! stat "$BRICK2/dir/file"
++
++cleanup
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 7890e7a..6aa18f3 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -8262,6 +8262,11 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ xlator_t *prev = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
++ gf_boolean_t parent_layout_changed = _gf_false;
++ char pgfid[GF_UUID_BUF_SIZE] = {0};
++ xlator_t *subvol = NULL;
++
++ local = frame->local;
+
+ local = frame->local;
+ if (!local) {
+@@ -8270,8 +8275,69 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ goto out;
+ }
+
+- if (op_ret == -1)
++ if (op_ret == -1) {
++ local->op_errno = op_errno;
++ parent_layout_changed = (xdata &&
++ dict_get(xdata, GF_PREOP_CHECK_FAILED))
++ ? _gf_true
++ : _gf_false;
++
++ if (parent_layout_changed) {
++ if (local && local->lock[0].layout.parent_layout.locks) {
++ /* Returning failure as the layout could not be fixed even under
++ * the lock */
++ goto out;
++ }
++
++ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
++ "create (%s/%s) (path: %s): parent layout "
++ "changed. Attempting a layout refresh and then a "
++ "retry",
++ pgfid, local->loc.name, local->loc.path);
++
++ /*
++ dht_refresh_layout needs directory info in local->loc.Hence,
++ storing the parent_loc in local->loc and storing the create
++ context in local->loc2. We will restore this information in
++ dht_creation_do.
++ */
++
++ loc_wipe(&local->loc2);
++
++ ret = loc_copy(&local->loc2, &local->loc);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
++ "loc_copy failed %s", local->loc.path);
++
++ goto out;
++ }
++
++ loc_wipe(&local->loc);
++
++ ret = dht_build_parent_loc(this, &local->loc, &local->loc2,
++ &op_errno);
++
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
++ "parent loc build failed");
++ goto out;
++ }
++
++ subvol = dht_subvol_get_hashed(this, &local->loc2);
++
++ ret = dht_create_lock(frame, subvol);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
++ "locking parent failed");
++ goto out;
++ }
++
++ return 0;
++ }
++
+ goto out;
++ }
+
+ prev = cookie;
+
+@@ -8392,6 +8458,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+ subvol->name);
+
++ dht_set_parent_layout_in_dict(loc, this, local);
++
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
+@@ -8400,10 +8468,6 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
+ avail_subvol = dht_free_disk_available_subvol(this, subvol, local);
+
+ if (avail_subvol != subvol) {
+- local->params = dict_ref(params);
+- local->flags = flags;
+- local->mode = mode;
+- local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
+
+@@ -8419,6 +8483,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+ subvol->name);
+
++ dht_set_parent_layout_in_dict(loc, this, local);
++
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
+@@ -8680,6 +8746,60 @@ err:
+ }
+
+ int
++dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local)
++{
++ dht_conf_t *conf = this->private;
++ dht_layout_t *parent_layout = NULL;
++ int *parent_disk_layout = NULL;
++ xlator_t *hashed_subvol = NULL;
++ char pgfid[GF_UUID_BUF_SIZE] = {0};
++ int ret = 0;
++
++ gf_uuid_unparse(loc->parent->gfid, pgfid);
++
++ parent_layout = dht_layout_get(this, loc->parent);
++ hashed_subvol = dht_subvol_get_hashed(this, loc);
++
++ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
++ &parent_disk_layout);
++ if (ret == -1) {
++ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
++ DHT_MSG_PARENT_LAYOUT_CHANGED,
++ "%s (%s/%s) (path: %s): "
++ "extracting in-memory layout of parent failed. ",
++ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
++ goto err;
++ }
++
++ ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY,
++ conf->xattr_name);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
++ DHT_MSG_PARENT_LAYOUT_CHANGED,
++ "%s (%s/%s) (path: %s): "
++ "setting %s key in params dictionary failed. ",
++ gf_fop_list[local->fop], pgfid, loc->name, loc->path,
++ GF_PREOP_PARENT_KEY);
++ goto err;
++ }
++
++ ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout,
++ 4 * 4);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
++ DHT_MSG_PARENT_LAYOUT_CHANGED,
++ "%s (%s/%s) (path: %s): "
++ "setting parent-layout in params dictionary failed. ",
++ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
++ goto err;
++ }
++
++err:
++ dht_layout_unref(this, parent_layout);
++ return ret;
++}
++
++int
+ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
+ {
+@@ -8705,6 +8825,11 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ goto err;
+ }
+
++ local->params = dict_ref(params);
++ local->flags = flags;
++ local->mode = mode;
++ local->umask = umask;
++
+ if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "creating %s on %s (got create on %s)", local->loc.path,
+@@ -8720,10 +8845,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+
+ if (hashed_subvol && (hashed_subvol != subvol)) {
+ /* Create the linkto file and then the data file */
+- local->params = dict_ref(params);
+- local->flags = flags;
+- local->mode = mode;
+- local->umask = umask;
+ local->cached_subvol = subvol;
+ local->hashed_subvol = hashed_subvol;
+
+@@ -8736,6 +8857,9 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ * file as we expect a lookup everywhere if there are problems
+ * with the parent layout
+ */
++
++ dht_set_parent_layout_in_dict(loc, this, local);
++
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, &local->loc, flags, mode, umask,
+ fd, params);
+@@ -8787,11 +8911,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ goto err;
+ }
+
+- local->params = dict_ref(params);
+- local->flags = flags;
+- local->mode = mode;
+- local->umask = umask;
+-
+ loc_wipe(&local->loc);
+
+ ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno);
+diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
+index 8e65111..1b3e826 100644
+--- a/xlators/cluster/dht/src/dht-common.h
++++ b/xlators/cluster/dht/src/dht-common.h
+@@ -1549,4 +1549,10 @@ dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno);
+ int
+ dht_dir_layout_error_check(xlator_t *this, inode_t *inode);
+
++int32_t
++dht_create_lock(call_frame_t *frame, xlator_t *subvol);
++
++int
++dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local);
++
+ #endif /* _DHT_H */
+diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c
+index 2673b6e..613dda8 100644
+--- a/xlators/protocol/client/src/client-rpc-fops_v2.c
++++ b/xlators/protocol/client/src/client-rpc-fops_v2.c
+@@ -2094,11 +2094,12 @@ client4_0_create_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ goto out;
+ }
+
++ ret = client_post_create_v2(this, &rsp, &stbuf, &preparent, &postparent,
++ local, &xdata);
++ if (ret < 0)
++ goto out;
++
+ if (-1 != rsp.op_ret) {
+- ret = client_post_create_v2(this, &rsp, &stbuf, &preparent, &postparent,
+- local, &xdata);
+- if (ret < 0)
+- goto out;
+ ret = client_add_fd_to_saved_fds(frame->this, fd, &local->loc,
+ local->flags, rsp.fd, 0);
+ if (ret) {
+diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
+index bea0bbf..65650b3 100644
+--- a/xlators/storage/posix/src/posix-entry-ops.c
++++ b/xlators/storage/posix/src/posix-entry-ops.c
+@@ -2070,6 +2070,8 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false;
+ mode_t mode_bit = 0;
+
++ dict_t *xdata_rsp = dict_ref(xdata);
++
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+@@ -2118,6 +2120,28 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ was_present = 0;
+ }
+
++ if (!was_present) {
++ if (posix_is_layout_stale(xdata, par_path, this)) {
++ op_ret = -1;
++ op_errno = EIO;
++ if (!xdata_rsp) {
++ xdata_rsp = dict_new();
++ if (!xdata_rsp) {
++ op_errno = ENOMEM;
++ goto out;
++ }
++ }
++
++ if (dict_set_int32_sizen(xdata_rsp, GF_PREOP_CHECK_FAILED, 1) ==
++ -1) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DICT_SET_FAILED,
++ "setting key %s in dict failed", GF_PREOP_CHECK_FAILED);
++ }
++
++ goto out;
++ }
++ }
++
+ if (priv->o_direct)
+ _flags |= O_DIRECT;
+
+@@ -2239,7 +2263,10 @@ out:
+
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd,
+ (loc) ? loc->inode : NULL, &stbuf, &preparent,
+- &postparent, xdata);
++ &postparent, xdata_rsp);
++
++ if (xdata_rsp)
++ dict_unref(xdata_rsp);
+
+ return 0;
+ }
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 35dd3b6..2c27d22 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -3559,3 +3559,79 @@ posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req)
+ }
+ }
+ }
++
++gf_boolean_t
++posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this)
++{
++ int op_ret = 0;
++ ssize_t size = 0;
++ char value_buf[4096] = {
++ 0,
++ };
++ gf_boolean_t have_val = _gf_false;
++ data_t *arg_data = NULL;
++ char *xattr_name = NULL;
++ gf_boolean_t is_stale = _gf_false;
++
++ op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name);
++ if (xattr_name == NULL) {
++ op_ret = 0;
++ goto out;
++ }
++
++ arg_data = dict_get(xdata, xattr_name);
++ if (!arg_data) {
++ op_ret = 0;
++ goto out;
++ }
++
++ size = sys_lgetxattr(par_path, xattr_name, value_buf,
++ sizeof(value_buf) - 1);
++
++ if (size >= 0) {
++ have_val = _gf_true;
++ } else {
++ if (errno == ERANGE) {
++ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED,
++ "getxattr on key (%s) path (%s) failed due to"
++ " buffer overflow",
++ xattr_name, par_path);
++ size = sys_lgetxattr(par_path, xattr_name, NULL, 0);
++ }
++ if (size < 0) {
++ op_ret = -1;
++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED,
++ "getxattr on key (%s) failed, path : %s", xattr_name,
++ par_path);
++ goto out;
++ }
++ }
++
++ if (!have_val) {
++ size = sys_lgetxattr(par_path, xattr_name, value_buf, size);
++ if (size < 0) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED,
++ "getxattr on key (%s) failed (%s)", xattr_name,
++ strerror(errno));
++ goto out;
++ }
++ }
++
++ if ((arg_data->len != size) || (memcmp(arg_data->data, value_buf, size))) {
++ gf_msg(this->name, GF_LOG_INFO, EIO, P_MSG_PREOP_CHECK_FAILED,
++ "failing preop as on-disk xattr value differs from argument "
++ "value for key %s",
++ xattr_name);
++ op_ret = -1;
++ }
++
++out:
++ dict_del_sizen(xdata, xattr_name);
++ dict_del_sizen(xdata, GF_PREOP_PARENT_KEY);
++
++ if (op_ret == -1) {
++ is_stale = _gf_true;
++ }
++
++ return is_stale;
++}
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index dd51062..ac9d83c 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -671,4 +671,8 @@ posix_spawn_ctx_janitor_thread(xlator_t *this);
+
+ void
+ posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata);
++
++gf_boolean_t
++posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this);
++
+ #endif /* _POSIX_H */
+--
+1.8.3.1
+
diff --git a/0416-tests-fix-spurious-failure-of-bug-1402841.t-mt-dir-s.patch b/0416-tests-fix-spurious-failure-of-bug-1402841.t-mt-dir-s.patch
new file mode 100644
index 0000000..1954e6a
--- /dev/null
+++ b/0416-tests-fix-spurious-failure-of-bug-1402841.t-mt-dir-s.patch
@@ -0,0 +1,72 @@
+From 63cfdd987b1dfbf97486f0f884380faee0ae25d0 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Wed, 4 Sep 2019 11:27:30 +0530
+Subject: [PATCH 416/449] tests: fix spurious failure of
+ bug-1402841.t-mt-dir-scan-race.t
+
+Upstream patch: https://review.gluster.org/23352
+
+Problem:
+Since commit 600ba94183333c4af9b4a09616690994fd528478, shd starts
+healing as soon as it is toggled from disabled to enabled. This was
+causing the following line in the .t to fail on a 'fast' machine (always
+on my laptop and sometimes on the jenkins slaves).
+
+EXPECT_NOT "^0$" get_pending_heal_count $V0
+
+because by the time shd was disabled, the heal was already completed.
+
+Fix:
+Increase the no. of files to be healed and make it a variable called
+FILE_COUNT, should we need to bump it up further because the machines
+become even faster. Also created pending metadata heals to increase the
+time taken to heal a file.
+
+>fixes: bz#1748744
+>Change-Id: I5a26b08e45b8c19bce3c01ce67bdcc28ed48198d
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+
+BUG: 1844359
+Change-Id: Ie3676c6c2c27e7574b958d2eaac23801dfaed3a9
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202481
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t
+index 6351ba2..a1b9a85 100755
+--- a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t
++++ b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t
+@@ -3,6 +3,8 @@
+ . $(dirname $0)/../../volume.rc
+ cleanup;
+
++FILE_COUNT=500
++
+ TEST glusterd
+ TEST pidof glusterd
+ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+@@ -11,15 +13,14 @@ TEST $CLI volume set $V0 cluster.shd-wait-qlength 100
+ TEST $CLI volume start $V0
+
+ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+-touch $M0/file{1..200}
+-
++for i in `seq 1 $FILE_COUNT`; do touch $M0/file$i; done
+ TEST kill_brick $V0 $H0 $B0/${V0}1
+-for i in {1..200}; do echo hello>$M0/file$i; done
++for i in `seq 1 $FILE_COUNT`; do echo hello>$M0/file$i; chmod -x $M0/file$i; done
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+-EXPECT "200" get_pending_heal_count $V0
++EXPECT "$FILE_COUNT" get_pending_heal_count $V0
+ TEST $CLI volume set $V0 self-heal-daemon on
+
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+--
+1.8.3.1
+
diff --git a/0417-events-fix-IPv6-memory-corruption.patch b/0417-events-fix-IPv6-memory-corruption.patch
new file mode 100644
index 0000000..cefb5bf
--- /dev/null
+++ b/0417-events-fix-IPv6-memory-corruption.patch
@@ -0,0 +1,153 @@
+From 5e231ceb35bb763d6fafc7c3efe1c3c582929cc2 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Tue, 14 Jan 2020 13:28:47 +0100
+Subject: [PATCH 417/449] events: fix IPv6 memory corruption
+
+When an event was generated and the target host was resolved to an IPv6
+address, there was a memory overflow when that address was copied to a
+fixed IPv4 structure (IPv6 addresses are longer than IPv4 ones).
+
+This fix correctly handles IPv4 and IPv6 addresses returned by
+getaddrinfo()
+
+Backport of:
+> Upstream-patch-link: https://review.gluster.org/24014
+> Change-Id: I5864a0c6e6f1b405bd85988529570140cf23b250
+> Fixes: bz#1790870
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1792873
+Change-Id: I5864a0c6e6f1b405bd85988529570140cf23b250
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202486
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/events.c | 56 +++++++++++++----------------------------------
+ 1 file changed, 15 insertions(+), 41 deletions(-)
+
+diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c
+index 4e2f8f9..6d1e383 100644
+--- a/libglusterfs/src/events.c
++++ b/libglusterfs/src/events.c
+@@ -34,7 +34,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+ int ret = 0;
+ int sock = -1;
+ char *eventstr = NULL;
+- struct sockaddr_in server;
+ va_list arguments;
+ char *msg = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+@@ -42,11 +41,10 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+ struct addrinfo hints;
+ struct addrinfo *result = NULL;
+ xlator_t *this = THIS;
+- int sin_family = AF_INET;
+ char *volfile_server_transport = NULL;
+
+ /* Global context */
+- ctx = THIS->ctx;
++ ctx = this->ctx;
+
+ if (event < 0 || event >= EVENT_LAST) {
+ ret = EVENT_ERROR_INVALID_INPUTS;
+@@ -60,48 +58,31 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+ goto out;
+ }
+
+- memset(&hints, 0, sizeof(hints));
+- hints.ai_family = AF_UNSPEC;
+-
+ if (ctx) {
+ volfile_server_transport = ctx->cmd_args.volfile_server_transport;
+ }
+-
+ if (!volfile_server_transport) {
+ volfile_server_transport = "tcp";
+ }
+- /* Get Host name to send message */
++
++ /* host = NULL returns localhost */
++ host = NULL;
+ if (ctx && ctx->cmd_args.volfile_server &&
+ (strcmp(volfile_server_transport, "unix"))) {
+ /* If it is client code then volfile_server is set
+ use that information to push the events. */
+- if ((getaddrinfo(ctx->cmd_args.volfile_server, NULL, &hints,
+- &result)) != 0) {
+- ret = EVENT_ERROR_RESOLVE;
+- goto out;
+- }
+-
+- if (get_ip_from_addrinfo(result, &host) == NULL) {
+- ret = EVENT_ERROR_RESOLVE;
+- goto out;
+- }
+-
+- sin_family = result->ai_family;
+- } else {
+- /* Localhost, Use the defined IP for localhost */
+- host = gf_strdup(EVENT_HOST);
++ host = ctx->cmd_args.volfile_server;
+ }
+
+- /* Socket Configurations */
+- server.sin_family = sin_family;
+- server.sin_port = htons(EVENT_PORT);
+- ret = inet_pton(server.sin_family, host, &server.sin_addr);
+- if (ret <= 0) {
+- gf_msg(this->name, GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+- "inet_pton failed with return code %d", ret);
++ memset(&hints, 0, sizeof(hints));
++ hints.ai_family = AF_UNSPEC;
++ hints.ai_socktype = SOCK_DGRAM;
++ hints.ai_flags = AI_ADDRCONFIG;
++
++ if ((getaddrinfo(host, TOSTRING(EVENT_PORT), &hints, &result)) != 0) {
++ ret = EVENT_ERROR_RESOLVE;
+ goto out;
+ }
+- memset(&server.sin_zero, '\0', sizeof(server.sin_zero));
+
+ va_start(arguments, fmt);
+ ret = gf_vasprintf(&msg, fmt, arguments);
+@@ -113,15 +94,15 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+ }
+
+ ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)time(NULL), event, msg);
+-
++ GF_FREE(msg);
+ if (ret <= 0) {
+ ret = EVENT_ERROR_MSG_FORMAT;
+ goto out;
+ }
+
+ /* Send Message */
+- if (sendto(sock, eventstr, strlen(eventstr), 0, (struct sockaddr *)&server,
+- sizeof(server)) <= 0) {
++ if (sendto(sock, eventstr, strlen(eventstr), 0, result->ai_addr,
++ result->ai_addrlen) <= 0) {
+ ret = EVENT_ERROR_SEND;
+ goto out;
+ }
+@@ -133,17 +114,10 @@ out:
+ sys_close(sock);
+ }
+
+- /* Allocated by gf_vasprintf */
+- if (msg)
+- GF_FREE(msg);
+-
+ /* Allocated by gf_asprintf */
+ if (eventstr)
+ GF_FREE(eventstr);
+
+- if (host)
+- GF_FREE(host);
+-
+ if (result)
+ freeaddrinfo(result);
+
+--
+1.8.3.1
+
diff --git a/0418-md-cache-avoid-clearing-cache-when-not-necessary.patch b/0418-md-cache-avoid-clearing-cache-when-not-necessary.patch
new file mode 100644
index 0000000..45622d9
--- /dev/null
+++ b/0418-md-cache-avoid-clearing-cache-when-not-necessary.patch
@@ -0,0 +1,439 @@
+From 7ad8c03a28fca67150972cda964ebe9233766b54 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Mon, 30 Mar 2020 11:09:39 +0200
+Subject: [PATCH 418/449] md-cache: avoid clearing cache when not necessary
+
+mdc_inode_xatt_set() blindly cleared current cache when dict was not
+NULL, even if there was no xattr requested.
+
+This patch fixes this by only calling mdc_inode_xatt_set() when we have
+explicitly requested something to cache.
+
+Backport of:
+> Upstream-patch-link: https://review.gluster.org/24267
+> Change-Id: Idc91a4693f1ff39f7059acde26682ccc361b947d
+> Fixes: #1140
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1815434
+Change-Id: Idc91a4693f1ff39f7059acde26682ccc361b947d
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202487
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/md-cache/src/md-cache.c | 165 ++++++++++++++++------------
+ 1 file changed, 93 insertions(+), 72 deletions(-)
+
+diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
+index a6b363f..bbbee3b 100644
+--- a/xlators/performance/md-cache/src/md-cache.c
++++ b/xlators/performance/md-cache/src/md-cache.c
+@@ -133,6 +133,7 @@ struct mdc_local {
+ char *key;
+ dict_t *xattr;
+ uint64_t incident_time;
++ bool update_cache;
+ };
+
+ int
+@@ -969,7 +970,7 @@ out:
+ return ret;
+ }
+
+-void
++static bool
+ mdc_load_reqs(xlator_t *this, dict_t *dict)
+ {
+ struct mdc_conf *conf = this->private;
+@@ -978,6 +979,7 @@ mdc_load_reqs(xlator_t *this, dict_t *dict)
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+ int ret = 0;
++ bool loaded = false;
+
+ tmp1 = conf->mdc_xattr_str;
+ if (!tmp1)
+@@ -995,13 +997,17 @@ mdc_load_reqs(xlator_t *this, dict_t *dict)
+ conf->mdc_xattr_str = NULL;
+ gf_msg("md-cache", GF_LOG_ERROR, 0, MD_CACHE_MSG_NO_XATTR_CACHE,
+ "Disabled cache for xattrs, dict_set failed");
++ goto out;
+ }
+ pattern = strtok_r(NULL, ",", &tmp);
+ }
+
+- GF_FREE(mdc_xattr_str);
++ loaded = true;
++
+ out:
+- return;
++ GF_FREE(mdc_xattr_str);
++
++ return loaded;
+ }
+
+ struct checkpair {
+@@ -1092,6 +1098,25 @@ err:
+ return ret;
+ }
+
++static dict_t *
++mdc_prepare_request(xlator_t *this, mdc_local_t *local, dict_t *xdata)
++{
++ if (xdata == NULL) {
++ xdata = dict_new();
++ if (xdata == NULL) {
++ local->update_cache = false;
++
++ return NULL;
++ }
++ } else {
++ dict_ref(xdata);
++ }
++
++ local->update_cache = mdc_load_reqs(this, xdata);
++
++ return xdata;
++}
++
+ int
+ mdc_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+@@ -1201,7 +1226,9 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, stbuf, local->incident_time);
+- mdc_inode_xatt_set(this, local->loc.inode, dict);
++ if (local->update_cache) {
++ mdc_inode_xatt_set(this, local->loc.inode, dict);
++ }
+ }
+ out:
+ MDC_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, dict,
+@@ -1220,7 +1247,6 @@ mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ 0,
+ };
+ dict_t *xattr_rsp = NULL;
+- dict_t *xattr_alloc = NULL;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+@@ -1271,18 +1297,18 @@ mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ return 0;
+
+ uncached:
+- if (!xdata)
+- xdata = xattr_alloc = dict_new();
+- if (xdata)
+- mdc_load_reqs(this, xdata);
++ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+- if (xattr_alloc)
+- dict_unref(xattr_alloc);
++
++ if (xdata != NULL) {
++ dict_unref(xdata);
++ }
++
+ return 0;
+ }
+
+@@ -1305,7 +1331,9 @@ mdc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ }
+
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+- mdc_inode_xatt_set(this, local->loc.inode, xdata);
++ if (local->update_cache) {
++ mdc_inode_xatt_set(this, local->loc.inode, xdata);
++ }
+
+ out:
+ MDC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
+@@ -1319,7 +1347,6 @@ mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ int ret;
+ struct iatt stbuf;
+ mdc_local_t *local = NULL;
+- dict_t *xattr_alloc = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, loc->inode);
+@@ -1343,17 +1370,16 @@ mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ return 0;
+
+ uncached:
+- if (!xdata)
+- xdata = xattr_alloc = dict_new();
+- if (xdata)
+- mdc_load_reqs(this, xdata);
++ xdata = mdc_prepare_request(this, local, xdata);
+
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ STACK_WIND(frame, mdc_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+
+- if (xattr_alloc)
+- dict_unref(xattr_alloc);
++ if (xdata != NULL) {
++ dict_unref(xdata);
++ }
++
+ return 0;
+ }
+
+@@ -1376,7 +1402,9 @@ mdc_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ }
+
+ mdc_inode_iatt_set(this, local->fd->inode, buf, local->incident_time);
+- mdc_inode_xatt_set(this, local->fd->inode, xdata);
++ if (local->update_cache) {
++ mdc_inode_xatt_set(this, local->fd->inode, xdata);
++ }
+
+ out:
+ MDC_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata);
+@@ -1390,7 +1418,6 @@ mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+ int ret;
+ struct iatt stbuf;
+ mdc_local_t *local = NULL;
+- dict_t *xattr_alloc = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, fd->inode);
+@@ -1409,17 +1436,16 @@ mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+ return 0;
+
+ uncached:
+- if (!xdata)
+- xdata = xattr_alloc = dict_new();
+- if (xdata)
+- mdc_load_reqs(this, xdata);
++ xdata = mdc_prepare_request(this, local, xdata);
+
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ STACK_WIND(frame, mdc_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+
+- if (xattr_alloc)
+- dict_unref(xattr_alloc);
++ if (xdata != NULL) {
++ dict_unref(xdata);
++ }
++
+ return 0;
+ }
+
+@@ -2393,7 +2419,9 @@ mdc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ goto out;
+ }
+
+- mdc_inode_xatt_set(this, local->loc.inode, xdata);
++ if (local->update_cache) {
++ mdc_inode_xatt_set(this, local->loc.inode, xdata);
++ }
+
+ out:
+ MDC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
+@@ -2410,7 +2438,6 @@ mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ mdc_local_t *local = NULL;
+ dict_t *xattr = NULL;
+ struct mdc_conf *conf = this->private;
+- dict_t *xattr_alloc = NULL;
+ gf_boolean_t key_satisfied = _gf_true;
+
+ local = mdc_local_get(frame, loc->inode);
+@@ -2443,18 +2470,17 @@ mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+
+ uncached:
+ if (key_satisfied) {
+- if (!xdata)
+- xdata = xattr_alloc = dict_new();
+- if (xdata)
+- mdc_load_reqs(this, xdata);
++ xdata = mdc_prepare_request(this, local, xdata);
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ STACK_WIND(frame, mdc_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
+
+- if (xattr_alloc)
+- dict_unref(xattr_alloc);
++ if (key_satisfied && (xdata != NULL)) {
++ dict_unref(xdata);
++ }
++
+ return 0;
+ }
+
+@@ -2481,7 +2507,9 @@ mdc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ goto out;
+ }
+
+- mdc_inode_xatt_set(this, local->fd->inode, xdata);
++ if (local->update_cache) {
++ mdc_inode_xatt_set(this, local->fd->inode, xdata);
++ }
+
+ out:
+ MDC_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata);
+@@ -2498,7 +2526,6 @@ mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xattr = NULL;
+ int op_errno = ENODATA;
+ struct mdc_conf *conf = this->private;
+- dict_t *xattr_alloc = NULL;
+ gf_boolean_t key_satisfied = _gf_true;
+
+ local = mdc_local_get(frame, fd->inode);
+@@ -2531,18 +2558,17 @@ mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+
+ uncached:
+ if (key_satisfied) {
+- if (!xdata)
+- xdata = xattr_alloc = dict_new();
+- if (xdata)
+- mdc_load_reqs(this, xdata);
++ xdata = mdc_prepare_request(this, local, xdata);
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ STACK_WIND(frame, mdc_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata);
+
+- if (xattr_alloc)
+- dict_unref(xattr_alloc);
++ if (key_satisfied && (xdata != NULL)) {
++ dict_unref(xdata);
++ }
++
+ return 0;
+ }
+
+@@ -2752,27 +2778,22 @@ int
+ mdc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+ {
+- dict_t *xattr_alloc = NULL;
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+
+ loc_copy(&local->loc, loc);
+
+- if (!xdata)
+- xdata = xattr_alloc = dict_new();
+-
+- if (xdata) {
+- /* Tell readdir-ahead to include these keys in xdata when it
+- * internally issues readdirp() in it's opendir_cbk */
+- mdc_load_reqs(this, xdata);
+- }
++ /* Tell readdir-ahead to include these keys in xdata when it
++ * internally issues readdirp() in it's opendir_cbk */
++ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+
+- if (xattr_alloc)
+- dict_unref(xattr_alloc);
++ if (xdata != NULL) {
++ dict_unref(xdata);
++ }
+
+ return 0;
+ }
+@@ -2800,7 +2821,9 @@ mdc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ continue;
+ mdc_inode_iatt_set(this, entry->inode, &entry->d_stat,
+ local->incident_time);
+- mdc_inode_xatt_set(this, entry->inode, entry->dict);
++ if (local->update_cache) {
++ mdc_inode_xatt_set(this, entry->inode, entry->dict);
++ }
+ }
+
+ unwind:
+@@ -2812,7 +2835,6 @@ int
+ mdc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+ {
+- dict_t *xattr_alloc = NULL;
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+@@ -2821,15 +2843,15 @@ mdc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+
+ local->fd = fd_ref(fd);
+
+- if (!xdata)
+- xdata = xattr_alloc = dict_new();
+- if (xdata)
+- mdc_load_reqs(this, xdata);
++ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
+- if (xattr_alloc)
+- dict_unref(xattr_alloc);
++
++ if (xdata != NULL) {
++ dict_unref(xdata);
++ }
++
+ return 0;
+ out:
+ MDC_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL);
+@@ -2860,7 +2882,6 @@ int
+ mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+ {
+- int need_unref = 0;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+@@ -2876,19 +2897,14 @@ mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ return 0;
+ }
+
+- if (!xdata) {
+- xdata = dict_new();
+- need_unref = 1;
+- }
+-
+- if (xdata)
+- mdc_load_reqs(this, xdata);
++ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
+
+- if (need_unref && xdata)
++ if (xdata != NULL) {
+ dict_unref(xdata);
++ }
+
+ return 0;
+ unwind:
+@@ -3468,7 +3484,12 @@ mdc_register_xattr_inval(xlator_t *this)
+ goto out;
+ }
+
+- mdc_load_reqs(this, xattr);
++ if (!mdc_load_reqs(this, xattr)) {
++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
++ "failed to populate cache entries");
++ ret = -1;
++ goto out;
++ }
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+--
+1.8.3.1
+
diff --git a/0419-cluster-afr-fix-race-when-bricks-come-up.patch b/0419-cluster-afr-fix-race-when-bricks-come-up.patch
new file mode 100644
index 0000000..ea8c2ea
--- /dev/null
+++ b/0419-cluster-afr-fix-race-when-bricks-come-up.patch
@@ -0,0 +1,104 @@
+From b9b479de2a7fd1c5eefa7aa1142e0a39e0c96ca9 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Sun, 1 Mar 2020 19:49:04 +0100
+Subject: [PATCH 419/449] cluster/afr: fix race when bricks come up
+
+The was a problem when self-heal was sending lookups at the same time
+that one of the bricks was coming up. In this case there was a chance
+that the number of 'up' bricks changes in the middle of sending the
+requests to subvolumes which caused a discrepancy in the expected
+number of replies and the actual number of sent requests.
+
+This discrepancy caused that AFR continued executing requests before
+all requests were complete. Eventually, the frame of the pending
+request was destroyed when the operation terminated, causing a use-
+after-free issue when the answer was finally received.
+
+In theory the same thing could happen in the reverse way, i.e. AFR
+tries to wait for more replies than sent requests, causing a hang.
+
+Backport of:
+> Upstream-patch-link: https://review.gluster.org/24191
+> Change-Id: I7ed6108554ca379d532efb1a29b2de8085410b70
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+> Fixes: bz#1808875
+
+BUG: 1794663
+Change-Id: I7ed6108554ca379d532efb1a29b2de8085410b70
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202489
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-self-heal-common.c | 6 +++---
+ xlators/cluster/afr/src/afr-self-heal-name.c | 4 +++-
+ xlators/cluster/afr/src/afr-self-heal.h | 7 +++++--
+ 3 files changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index ce1ea50..d942ccf 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -1869,12 +1869,12 @@ int
+ afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
+ struct afr_reply *replies)
+ {
+- afr_private_t *priv = NULL;
++ afr_local_t *local = NULL;
+
+- priv = frame->this->private;
++ local = frame->local;
+
+ return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
+- priv->child_up);
++ local->child_up);
+ }
+
+ unsigned int
+diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
+index 7d4f208..dace071 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-name.c
++++ b/xlators/cluster/afr/src/afr-self-heal-name.c
+@@ -560,13 +560,15 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies = NULL;
+ inode_t *inode = NULL;
+ int first_idx = -1;
++ afr_local_t *local = NULL;
+
+ priv = this->private;
++ local = frame->local;
+
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
+- priv->child_up, NULL);
++ local->child_up, NULL);
+ if (!inode)
+ return -ENOMEM;
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
+index 8234cec..f7ecf5d 100644
+--- a/xlators/cluster/afr/src/afr-self-heal.h
++++ b/xlators/cluster/afr/src/afr-self-heal.h
+@@ -46,13 +46,16 @@
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0; \
+- int __count = AFR_COUNT(list, __priv->child_count); \
++ int __count = 0; \
++ unsigned char *__list = alloca(__priv->child_count); \
+ \
++ memcpy(__list, list, sizeof(*__list) * __priv->child_count); \
++ __count = AFR_COUNT(__list, __priv->child_count); \
+ __local->barrier.waitfor = __count; \
+ afr_local_replies_wipe(__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+- if (!list[__i]) \
++ if (!__list[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
+ __priv->children[__i], \
+--
+1.8.3.1
+
diff --git a/0420-scripts-quota_fsck-script-TypeError-d-format-not-dic.patch b/0420-scripts-quota_fsck-script-TypeError-d-format-not-dic.patch
new file mode 100644
index 0000000..cb27b33
--- /dev/null
+++ b/0420-scripts-quota_fsck-script-TypeError-d-format-not-dic.patch
@@ -0,0 +1,46 @@
+From 42a05c7f8464f529f53bced31a64ea373e16f58b Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <hgowtham@redhat.com>
+Date: Thu, 24 Oct 2019 17:40:44 +0530
+Subject: [PATCH 420/449] scripts: quota_fsck script TypeError: %d format:not
+ dict
+
+Problem: One of the prints in the script have been using
+%i as the format for printing which doesn't work.
+
+Fix: use %s as the format in the place of %i
+
+>Fixes: bz#1764129
+>Change-Id: I4480ede7bf62906ddedbe5f880a1e89c76946641
+>Signed-off-by: Hari Gowtham <hgowtham@redhat.com>
+>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23586/
+
+BUG: 1786681
+Change-Id: I4480ede7bf62906ddedbe5f880a1e89c76946641
+Signed-off-by: hari gowtham <hgowtham@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202484
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunny Kumar <sunkumar@redhat.com>
+---
+ extras/quota/quota_fsck.py | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py
+index 485a37a..174f2a2 100755
+--- a/extras/quota/quota_fsck.py
++++ b/extras/quota/quota_fsck.py
+@@ -58,10 +58,10 @@ def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None):
+ elif log_type == QUOTA_SIZE_MISMATCH:
+ print("mismatch")
+ if dir_size is not None:
+- print('%24s %60s %12s %12s' % ("Size Mismatch", path,
++ print('%24s %60s %12s %12s' % ("Size Mismatch", path,
+ xattr_dict, dir_size))
+ else:
+- print('%-24s %-60s %-12i %-12i' % ("Size Mismatch", path, xattr_dict,
++ print('%-24s %-60s %-12s %-12s' % ("Size Mismatch", path, xattr_dict,
+ stbuf.st_size))
+
+ def size_differs_lot(s1, s2):
+--
+1.8.3.1
+
diff --git a/0421-Improve-logging-in-EC-client-and-lock-translator.patch b/0421-Improve-logging-in-EC-client-and-lock-translator.patch
new file mode 100644
index 0000000..06f0304
--- /dev/null
+++ b/0421-Improve-logging-in-EC-client-and-lock-translator.patch
@@ -0,0 +1,93 @@
+From 8267e5e97327633bf21fd02df8d52e3a97f0f9ea Mon Sep 17 00:00:00 2001
+From: Ashish Pandey <aspandey@redhat.com>
+Date: Wed, 4 Dec 2019 17:06:18 +0530
+Subject: [PATCH 421/449] Improve logging in EC, client and lock translator
+
+BUG: 1787294
+> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23814/
+> Change-Id: I98af8672a25ff9fd9dba91a2e1384719f9155255
+> Fixes: bz#1779760
+
+Change-Id: I5cb04993f12d6248f2349a0c5a9e2c0ceecaf528
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202533
+Tested-by: Ashish Pandey <aspandey@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-combine.c | 5 +++--
+ xlators/cluster/ec/src/ec-common.c | 2 +-
+ xlators/features/locks/src/inodelk.c | 12 ++++++++----
+ 3 files changed, 12 insertions(+), 7 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
+index c5af2ab..99e5534 100644
+--- a/xlators/cluster/ec/src/ec-combine.c
++++ b/xlators/cluster/ec/src/ec-combine.c
+@@ -179,13 +179,14 @@ ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
+ "links: %u-%u, uid: %u-%u, gid: %u-%u, "
+ "rdev: %" PRIu64 "-%" PRIu64 ", size: %" PRIu64 "-%" PRIu64
+ ", "
+- "mode: %o-%o)",
++ "mode: %o-%o), %s",
+ dst[i].ia_ino, src[i].ia_ino, dst[i].ia_nlink,
+ src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid, dst[i].ia_gid,
+ src[i].ia_gid, dst[i].ia_rdev, src[i].ia_rdev,
+ dst[i].ia_size, src[i].ia_size,
+ st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type),
+- st_mode_from_ia(src[i].ia_prot, dst[i].ia_type));
++ st_mode_from_ia(src[i].ia_prot, dst[i].ia_type),
++ ec_msg_str(fop));
+
+ return 0;
+ }
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index 5cae37b..e580bfb 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -2240,7 +2240,7 @@ ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED,
+- "entry/inode unlocking failed (%s)", ec_fop_name(link->fop->id));
++ "entry/inode unlocking failed :(%s)", ec_msg_str(link->fop));
+ } else {
+ ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock);
+ }
+diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
+index df00ede..a9c42f1 100644
+--- a/xlators/features/locks/src/inodelk.c
++++ b/xlators/features/locks/src/inodelk.c
+@@ -502,22 +502,26 @@ static pl_inode_lock_t *
+ __inode_unlock_lock(xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom)
+ {
+ pl_inode_lock_t *conf = NULL;
++ inode_t *inode = NULL;
++
++ inode = lock->pl_inode->inode;
+
+ conf = find_matching_inodelk(lock, dom);
+ if (!conf) {
+ gf_log(this->name, GF_LOG_ERROR,
+ " Matching lock not found for unlock %llu-%llu, by %s "
+- "on %p",
++ "on %p for gfid:%s",
+ (unsigned long long)lock->fl_start,
+ (unsigned long long)lock->fl_end, lkowner_utoa(&lock->owner),
+- lock->client);
++ lock->client, inode ? uuid_utoa(inode->gfid) : "UNKNOWN");
+ goto out;
+ }
+ __delete_inode_lock(conf);
+ gf_log(this->name, GF_LOG_DEBUG,
+- " Matching lock found for unlock %llu-%llu, by %s on %p",
++ " Matching lock found for unlock %llu-%llu, by %s on %p for gfid:%s",
+ (unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end,
+- lkowner_utoa(&lock->owner), lock->client);
++ lkowner_utoa(&lock->owner), lock->client,
++ inode ? uuid_utoa(inode->gfid) : "UNKNOWN");
+
+ out:
+ return conf;
+--
+1.8.3.1
+
diff --git a/0422-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch b/0422-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch
new file mode 100644
index 0000000..400ba67
--- /dev/null
+++ b/0422-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch
@@ -0,0 +1,236 @@
+From 8b11ac1575ef167af2a47a96f7b7ed0f32bb5897 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Fri, 5 Jun 2020 17:20:04 +0530
+Subject: [PATCH 422/449] cluster/afr: Prioritize ENOSPC over other errors
+
+Backport of: https://review.gluster.org/#/c/glusterfs/+/24477/
+
+Problem:
+In a replicate/arbiter volume if file creations or writes fails on
+quorum number of bricks and on one brick it is due to ENOSPC and
+on other brick it fails for a different reason, it may fail with
+errors other than ENOSPC in some cases.
+
+Fix:
+Prioritize ENOSPC over other lesser priority errors and do not set
+op_errno in posix_gfid_set if op_ret is 0 to avoid receiving any
+error_no which can be misinterpreted by __afr_dir_write_finalize().
+
+Also removing the function afr_has_arbiter_fop_cbk_quorum() which
+might consider a successful reply form a single brick as quorum
+success in some cases, whereas we always need fop to be successful
+on quorum number of bricks in arbiter configuration.
+
+Change-Id: I4dd2bff17e6812bc7c8372130976e365e2407d88
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+BUG: 1837467
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202526
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../bugs/replicate/issue-1254-prioritize-enospc.t | 80 ++++++++++++++++++++++
+ xlators/cluster/afr/src/afr-common.c | 4 +-
+ xlators/cluster/afr/src/afr-transaction.c | 48 +------------
+ xlators/storage/posix/src/posix-helpers.c | 2 +-
+ 4 files changed, 86 insertions(+), 48 deletions(-)
+ create mode 100644 tests/bugs/replicate/issue-1254-prioritize-enospc.t
+
+diff --git a/tests/bugs/replicate/issue-1254-prioritize-enospc.t b/tests/bugs/replicate/issue-1254-prioritize-enospc.t
+new file mode 100644
+index 0000000..fab94b7
+--- /dev/null
++++ b/tests/bugs/replicate/issue-1254-prioritize-enospc.t
+@@ -0,0 +1,80 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup
++
++function create_bricks {
++ TEST truncate -s 100M $B0/brick0
++ TEST truncate -s 100M $B0/brick1
++ TEST truncate -s 20M $B0/brick2
++ LO1=`SETUP_LOOP $B0/brick0`
++ TEST [ $? -eq 0 ]
++ TEST MKFS_LOOP $LO1
++ LO2=`SETUP_LOOP $B0/brick1`
++ TEST [ $? -eq 0 ]
++ TEST MKFS_LOOP $LO2
++ LO3=`SETUP_LOOP $B0/brick2`
++ TEST [ $? -eq 0 ]
++ TEST MKFS_LOOP $LO3
++ TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
++ TEST MOUNT_LOOP $LO1 $B0/${V0}0
++ TEST MOUNT_LOOP $LO2 $B0/${V0}1
++ TEST MOUNT_LOOP $LO3 $B0/${V0}2
++}
++
++function create_files {
++ local i=1
++ while (true)
++ do
++ touch $M0/file$i
++ if [ -e $B0/${V0}2/file$i ];
++ then
++ ((i++))
++ else
++ break
++ fi
++ done
++}
++
++TESTS_EXPECTED_IN_LOOP=13
++
++#Arbiter volume: Check for ENOSPC when arbiter brick becomes full#
++TEST glusterd
++create_bricks
++TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume start $V0
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
++
++create_files
++TEST kill_brick $V0 $H0 $B0/${V0}1
++error1=$(touch $M0/file-1 2>&1)
++EXPECT "No space left on device" echo $error1
++error2=$(mkdir $M0/dir-1 2>&1)
++EXPECT "No space left on device" echo $error2
++error3=$((echo "Test" > $M0/file-3) 2>&1)
++EXPECT "No space left on device" echo $error3
++
++cleanup
++
++#Replica-3 volume: Check for ENOSPC when one of the brick becomes full#
++#Keeping the third brick of lower size to simulate disk full scenario#
++TEST glusterd
++create_bricks
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume start $V0
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
++
++create_files
++TEST kill_brick $V0 $H0 $B0/${V0}1
++error1=$(touch $M0/file-1 2>&1)
++EXPECT "No space left on device" echo $error1
++error2=$(mkdir $M0/dir-1 2>&1)
++EXPECT "No space left on device" echo $error2
++error3=$((cat /dev/zero > $M0/file1) 2>&1)
++EXPECT "No space left on device" echo $error3
++
++cleanup
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 5806556..59710aa 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -2464,7 +2464,7 @@ error:
+ * others in that they must be given higher priority while
+ * returning to the user.
+ *
+- * The hierarchy is ENODATA > ENOENT > ESTALE > others
++ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others
+ */
+
+ int
+@@ -2476,6 +2476,8 @@ afr_higher_errno(int32_t old_errno, int32_t new_errno)
+ return ENOENT;
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
++ if (old_errno == ENOSPC || new_errno == ENOSPC)
++ return ENOSPC;
+
+ return new_errno;
+ }
+diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
+index 15f3a7e..8e65ae2 100644
+--- a/xlators/cluster/afr/src/afr-transaction.c
++++ b/xlators/cluster/afr/src/afr-transaction.c
+@@ -514,42 +514,6 @@ afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this)
+ local->transaction.pre_op_sources[j] = 0;
+ }
+
+-gf_boolean_t
+-afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame)
+-{
+- afr_local_t *local = NULL;
+- afr_private_t *priv = NULL;
+- xlator_t *this = NULL;
+- gf_boolean_t fop_failed = _gf_false;
+- unsigned char *pre_op_sources = NULL;
+- int i = 0;
+-
+- local = frame->local;
+- this = frame->this;
+- priv = this->private;
+- pre_op_sources = local->transaction.pre_op_sources;
+-
+- /* If the fop failed on the brick, it is not a source. */
+- for (i = 0; i < priv->child_count; i++)
+- if (local->transaction.failed_subvols[i])
+- pre_op_sources[i] = 0;
+-
+- switch (AFR_COUNT(pre_op_sources, priv->child_count)) {
+- case 1:
+- if (pre_op_sources[ARBITER_BRICK_INDEX])
+- fop_failed = _gf_true;
+- break;
+- case 0:
+- fop_failed = _gf_true;
+- break;
+- }
+-
+- if (fop_failed)
+- return _gf_false;
+-
+- return _gf_true;
+-}
+-
+ void
+ afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this)
+ {
+@@ -968,12 +932,8 @@ afr_need_dirty_marking(call_frame_t *frame, xlator_t *this)
+ priv->child_count)
+ return _gf_false;
+
+- if (priv->arbiter_count) {
+- if (!afr_has_arbiter_fop_cbk_quorum(frame))
+- need_dirty = _gf_true;
+- } else if (!afr_has_fop_cbk_quorum(frame)) {
++ if (!afr_has_fop_cbk_quorum(frame))
+ need_dirty = _gf_true;
+- }
+
+ return need_dirty;
+ }
+@@ -1023,12 +983,8 @@ afr_handle_quorum(call_frame_t *frame, xlator_t *this)
+ * no split-brain with the fix. The problem is eliminated completely.
+ */
+
+- if (priv->arbiter_count) {
+- if (afr_has_arbiter_fop_cbk_quorum(frame))
+- return;
+- } else if (afr_has_fop_cbk_quorum(frame)) {
++ if (afr_has_fop_cbk_quorum(frame))
+ return;
+- }
+
+ if (afr_need_dirty_marking(frame, this))
+ goto set_response;
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 2c27d22..949c799 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -1059,7 +1059,7 @@ verify_handle:
+ ret = posix_handle_soft(this, path, loc, uuid_curr, &stat);
+
+ out:
+- if (!(*op_errno))
++ if (ret && !(*op_errno))
+ *op_errno = errno;
+ return ret;
+ }
+--
+1.8.3.1
+
diff --git a/0423-ctime-Fix-ctime-inconsisteny-with-utimensat.patch b/0423-ctime-Fix-ctime-inconsisteny-with-utimensat.patch
new file mode 100644
index 0000000..6a547ea
--- /dev/null
+++ b/0423-ctime-Fix-ctime-inconsisteny-with-utimensat.patch
@@ -0,0 +1,128 @@
+From c140d30382306d08eaf2bc5c53e5be26d3e381e1 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Mon, 18 Nov 2019 05:24:33 -0500
+Subject: [PATCH 423/449] ctime: Fix ctime inconsisteny with utimensat
+
+Problem:
+When touch is used to create a file, the ctime is not matching
+atime and mtime which ideally should match. There is a difference
+in nano seconds.
+
+Cause:
+When touch is used modify atime or mtime to current time (UTIME_NOW),
+the current time is taken from kernel. The ctime gets updated to current
+time when atime or mtime is updated. But the current time to update
+ctime is taken from utime xlator. Hence the difference in nano seconds.
+
+Fix:
+When utimesat uses UTIME_NOW, use the current time from kernel.
+
+>fixes: bz#1773530
+>Change-Id: I9ccfa47dcd39df23396852b4216f1773c49250ce
+>Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+backport of: https://review.gluster.org/#/c/glusterfs/+/23719/
+BUG: 1761932
+Change-Id: I9ccfa47dcd39df23396852b4216f1773c49250ce
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202541
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/xlator.h | 2 ++
+ tests/basic/ctime/ctime-utimesat.t | 28 ++++++++++++++++++++++++++
+ xlators/features/utime/src/utime-gen-fops-c.py | 10 +++++++++
+ xlators/mount/fuse/src/fuse-bridge.c | 8 ++++++++
+ 4 files changed, 48 insertions(+)
+ create mode 100644 tests/basic/ctime/ctime-utimesat.t
+
+diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
+index da551e9..db04c4d 100644
+--- a/libglusterfs/src/glusterfs/xlator.h
++++ b/libglusterfs/src/glusterfs/xlator.h
+@@ -35,6 +35,8 @@
+ #define GF_SET_ATTR_ATIME 0x10
+ #define GF_SET_ATTR_MTIME 0x20
+ #define GF_SET_ATTR_CTIME 0x40
++#define GF_ATTR_ATIME_NOW 0x80
++#define GF_ATTR_MTIME_NOW 0x100
+
+ #define gf_attr_mode_set(mode) ((mode)&GF_SET_ATTR_MODE)
+ #define gf_attr_uid_set(mode) ((mode)&GF_SET_ATTR_UID)
+diff --git a/tests/basic/ctime/ctime-utimesat.t b/tests/basic/ctime/ctime-utimesat.t
+new file mode 100644
+index 0000000..540e57a
+--- /dev/null
++++ b/tests/basic/ctime/ctime-utimesat.t
+@@ -0,0 +1,28 @@
++#!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.read-after-open off
++TEST $CLI volume set $V0 performance.open-behind off
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.io-cache off
++
++TEST $CLI volume start $V0
++
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
++
++touch $M0/FILE
++
++atime=$(stat -c "%.X" $M0/FILE)
++EXPECT $atime stat -c "%.Y" $M0/FILE
++EXPECT $atime stat -c "%.Z" $M0/FILE
++
++cleanup
+diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py
+index 8730a51..9fb3e1b 100755
+--- a/xlators/features/utime/src/utime-gen-fops-c.py
++++ b/xlators/features/utime/src/utime-gen-fops-c.py
+@@ -95,6 +95,16 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
+ frame->root->flags |= MDATA_CTIME;
+ }
+
++ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
++ if (valid & GF_ATTR_ATIME_NOW) {
++ frame->root->ctime.tv_sec = stbuf->ia_atime;
++ frame->root->ctime.tv_nsec = stbuf->ia_atime_nsec;
++ } else if (valid & GF_ATTR_MTIME_NOW) {
++ frame->root->ctime.tv_sec = stbuf->ia_mtime;
++ frame->root->ctime.tv_nsec = stbuf->ia_mtime_nsec;
++ }
++ }
++
+ STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
+ return 0;
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index 6e99053..fdeec49 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -1706,6 +1706,14 @@ fattr_to_gf_set_attr(int32_t valid)
+ gf_valid |= GF_SET_ATTR_CTIME;
+ #endif
+
++#if FUSE_KERNEL_MINOR_VERSION >= 9
++ if (valid & FATTR_ATIME_NOW)
++ gf_valid |= GF_ATTR_ATIME_NOW;
++
++ if (valid & FATTR_MTIME_NOW)
++ gf_valid |= GF_ATTR_MTIME_NOW;
++#endif
++
+ if (valid & FATTR_SIZE)
+ gf_valid |= GF_SET_ATTR_SIZE;
+
+--
+1.8.3.1
+
diff --git a/0424-afr-make-heal-info-lockless.patch b/0424-afr-make-heal-info-lockless.patch
new file mode 100644
index 0000000..593fa34
--- /dev/null
+++ b/0424-afr-make-heal-info-lockless.patch
@@ -0,0 +1,884 @@
+From 54d4ea44fec96560aad9c41f7e4f5aad164ffb8b Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Fri, 5 Jun 2020 14:14:15 +0530
+Subject: [PATCH 424/449] afr: make heal info lockless
+
+Changes in locks xlator:
+Added support for per-domain inodelk count requests.
+Caller needs to set GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS key in the
+dict and then set each key with name
+'GLUSTERFS_INODELK_DOM_PREFIX:<domain name>'.
+In the response dict, the xlator will send the per domain count as
+values for each of these keys.
+
+Changes in AFR:
+Replaced afr_selfheal_locked_inspect() with afr_lockless_inspect(). Logic has
+been added to make the latter behave same as the former, thus not
+breaking the current heal info output behaviour.
+
+> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23771/
+> fixes: bz#1774011
+> Change-Id: Ie9e83c162aa77f44a39c2ba7115de558120ada4d
+
+BUG: 1721355
+Change-Id: I8ed4b504880b19e00068312efd90cd0706787404
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202490
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Karthik Subrahmanya <ksubrahm@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ heal/src/glfs-heal.c | 17 +-
+ libglusterfs/src/glusterfs/glusterfs.h | 2 +
+ xlators/cluster/afr/src/afr-common.c | 367 +++++++++++--------------
+ xlators/cluster/afr/src/afr-self-heal-common.c | 43 ++-
+ xlators/cluster/afr/src/afr-self-heal.h | 3 +-
+ xlators/features/locks/src/common.h | 4 +
+ xlators/features/locks/src/locks.h | 8 +
+ xlators/features/locks/src/posix.c | 117 +++++++-
+ 8 files changed, 338 insertions(+), 223 deletions(-)
+
+diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
+index 125b12c..5af9e31 100644
+--- a/heal/src/glfs-heal.c
++++ b/heal/src/glfs-heal.c
+@@ -775,7 +775,8 @@ static int
+ glfsh_process_entries(xlator_t *xl, fd_t *fd, gf_dirent_t *entries,
+ uint64_t *offset, num_entries_t *num_entries,
+ print_status glfsh_print_status,
+- gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode)
++ gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode,
++ dict_t *xattr_req)
+ {
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+@@ -807,7 +808,7 @@ glfsh_process_entries(xlator_t *xl, fd_t *fd, gf_dirent_t *entries,
+
+ gf_uuid_parse(entry->d_name, gfid);
+ gf_uuid_copy(loc.gfid, gfid);
+- ret = syncop_getxattr(this, &loc, &dict, GF_HEAL_INFO, NULL, NULL);
++ ret = syncop_getxattr(this, &loc, &dict, GF_HEAL_INFO, xattr_req, NULL);
+ if (ret) {
+ if ((mode != GLFSH_MODE_CONTINUE_ON_ERROR) && (ret == -ENOTCONN))
+ goto out;
+@@ -876,19 +877,19 @@ glfsh_crawl_directory(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ if (heal_op == GF_SHD_OP_INDEX_SUMMARY) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_print_heal_status,
+- ignore, mode);
++ ignore, mode, xattr_req);
+ if (ret < 0)
+ goto out;
+ } else if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_print_spb_status,
+- ignore, mode);
++ ignore, mode, xattr_req);
+ if (ret < 0)
+ goto out;
+ } else if (heal_op == GF_SHD_OP_HEAL_SUMMARY) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_print_summary_status,
+- ignore, mode);
++ ignore, mode, xattr_req);
+ if (ret < 0)
+ goto out;
+ } else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) {
+@@ -897,7 +898,7 @@ glfsh_crawl_directory(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ } else if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_heal_status_boolean,
+- ignore, mode);
++ ignore, mode, xattr_req);
+ if (ret < 0)
+ goto out;
+ }
+@@ -951,6 +952,10 @@ glfsh_print_pending_heals_type(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ int32_t op_errno = 0;
+ gf_boolean_t ignore = _gf_false;
+
++ ret = dict_set_str(xattr_req, "index-vgfid", vgfid);
++ if (ret)
++ return ret;
++
+ if (!strcmp(vgfid, GF_XATTROP_DIRTY_GFID))
+ ignore = _gf_true;
+
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index 3b594c0..177a020 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -217,6 +217,8 @@ enum gf_internal_fop_indicator {
+ #define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
+ #define GLUSTERFS_PARENT_ENTRYLK "glusterfs.parent-entrylk"
+ #define GLUSTERFS_INODELK_DOM_COUNT "glusterfs.inodelk-dom-count"
++#define GLUSTERFS_INODELK_DOM_PREFIX "glusterfs.inodelk-dom-prefix"
++#define GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS "glusterfs.multi-dom-lk-cnt-req"
+ #define GFID_TO_PATH_KEY "glusterfs.gfid2path"
+ #define GF_XATTR_STIME_PATTERN "trusted.glusterfs.*.stime"
+ #define GF_XATTR_XTIME_PATTERN "trusted.glusterfs.*.xtime"
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 59710aa..c355ec5 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -5908,259 +5908,218 @@ out:
+ return _gf_true;
+ }
+
+-int
+-afr_selfheal_locked_metadata_inspect(call_frame_t *frame, xlator_t *this,
+- inode_t *inode, gf_boolean_t *msh,
+- unsigned char *pending)
++static dict_t *
++afr_set_heal_info(char *status)
+ {
++ dict_t *dict = NULL;
+ int ret = -1;
+- unsigned char *locked_on = NULL;
+- unsigned char *sources = NULL;
+- unsigned char *sinks = NULL;
+- unsigned char *healed_sinks = NULL;
+- unsigned char *undid_pending = NULL;
+- struct afr_reply *locked_replies = NULL;
+-
+- afr_private_t *priv = this->private;
+
+- locked_on = alloca0(priv->child_count);
+- sources = alloca0(priv->child_count);
+- sinks = alloca0(priv->child_count);
+- healed_sinks = alloca0(priv->child_count);
+- undid_pending = alloca0(priv->child_count);
++ dict = dict_new();
++ if (!dict) {
++ ret = -ENOMEM;
++ goto out;
++ }
+
+- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
++ ret = dict_set_dynstr_sizen(dict, "heal-info", status);
++ if (ret)
++ gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
++ "Failed to set heal-info key to "
++ "%s",
++ status);
++out:
++ /* Any error other than EINVAL, dict_set_dynstr frees status */
++ if (ret == -ENOMEM || ret == -EINVAL) {
++ GF_FREE(status);
++ }
+
+- ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
+- locked_on);
+- {
+- if (ret == 0) {
+- /* Not a single lock */
+- ret = -afr_final_errno(frame->local, priv);
+- if (ret == 0)
+- ret = -ENOTCONN; /* all invalid responses */
+- goto out;
+- }
+- ret = __afr_selfheal_metadata_prepare(
+- frame, this, inode, locked_on, sources, sinks, healed_sinks,
+- undid_pending, locked_replies, pending);
+- *msh = afr_decide_heal_info(priv, sources, ret);
++ if (ret && dict) {
++ dict_unref(dict);
++ dict = NULL;
+ }
+- afr_selfheal_uninodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
+- locked_on);
+-out:
+- if (locked_replies)
+- afr_replies_wipe(locked_replies, priv->child_count);
+- return ret;
++ return dict;
+ }
+
+-int
+-afr_selfheal_locked_data_inspect(call_frame_t *frame, xlator_t *this, fd_t *fd,
+- gf_boolean_t *dsh, unsigned char *pflag)
++static gf_boolean_t
++afr_is_dirty_count_non_unary_for_txn(xlator_t *this, struct afr_reply *replies,
++ afr_transaction_type type)
+ {
+- int ret = -1;
+- unsigned char *data_lock = NULL;
+- unsigned char *sources = NULL;
+- unsigned char *sinks = NULL;
+- unsigned char *healed_sinks = NULL;
+- unsigned char *undid_pending = NULL;
+- afr_private_t *priv = NULL;
+- struct afr_reply *locked_replies = NULL;
+- inode_t *inode = fd->inode;
++ afr_private_t *priv = this->private;
++ int *dirty = alloca0(priv->child_count * sizeof(int));
++ int i = 0;
+
+- priv = this->private;
+- data_lock = alloca0(priv->child_count);
+- sources = alloca0(priv->child_count);
+- sinks = alloca0(priv->child_count);
+- healed_sinks = alloca0(priv->child_count);
+- undid_pending = alloca0(priv->child_count);
++ afr_selfheal_extract_xattr(this, replies, type, dirty, NULL);
++ for (i = 0; i < priv->child_count; i++) {
++ if (dirty[i] > 1)
++ return _gf_true;
++ }
+
+- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
++ return _gf_false;
++}
+
+- ret = afr_selfheal_inodelk(frame, this, inode, this->name, 0, 0, data_lock);
+- {
+- if (ret == 0) {
+- ret = -afr_final_errno(frame->local, priv);
+- if (ret == 0)
+- ret = -ENOTCONN; /* all invalid responses */
+- goto out;
+- }
+- ret = __afr_selfheal_data_prepare(frame, this, inode, data_lock,
+- sources, sinks, healed_sinks,
+- undid_pending, locked_replies, pflag);
+- *dsh = afr_decide_heal_info(priv, sources, ret);
++static gf_boolean_t
++afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies,
++ ia_type_t ia_type)
++{
++ gf_boolean_t data_chk = _gf_false;
++ gf_boolean_t mdata_chk = _gf_false;
++ gf_boolean_t entry_chk = _gf_false;
++
++ switch (ia_type) {
++ case IA_IFDIR:
++ mdata_chk = _gf_true;
++ entry_chk = _gf_true;
++ break;
++ case IA_IFREG:
++ mdata_chk = _gf_true;
++ data_chk = _gf_true;
++ break;
++ default:
++ /*IA_IFBLK, IA_IFCHR, IA_IFLNK, IA_IFIFO, IA_IFSOCK*/
++ mdata_chk = _gf_true;
++ break;
+ }
+- afr_selfheal_uninodelk(frame, this, inode, this->name, 0, 0, data_lock);
+-out:
+- if (locked_replies)
+- afr_replies_wipe(locked_replies, priv->child_count);
+- return ret;
++
++ if (data_chk && afr_is_dirty_count_non_unary_for_txn(
++ this, replies, AFR_DATA_TRANSACTION)) {
++ return _gf_true;
++ } else if (mdata_chk && afr_is_dirty_count_non_unary_for_txn(
++ this, replies, AFR_METADATA_TRANSACTION)) {
++ return _gf_true;
++ } else if (entry_chk && afr_is_dirty_count_non_unary_for_txn(
++ this, replies, AFR_ENTRY_TRANSACTION)) {
++ return _gf_true;
++ }
++
++ return _gf_false;
+ }
+
+-int
+-afr_selfheal_locked_entry_inspect(call_frame_t *frame, xlator_t *this,
+- inode_t *inode, gf_boolean_t *esh,
+- unsigned char *pflag)
++static int
++afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
++ char *index_vgfid, ia_type_t ia_type, gf_boolean_t *esh,
++ gf_boolean_t *dsh, gf_boolean_t *msh)
+ {
+ int ret = -1;
+- int source = -1;
++ GF_UNUSED int ret1 = 0;
++ int i = 0;
++ int io_domain_lk_count = 0;
++ int shd_domain_lk_count = 0;
+ afr_private_t *priv = NULL;
+- unsigned char *locked_on = NULL;
+- unsigned char *data_lock = NULL;
+- unsigned char *sources = NULL;
+- unsigned char *sinks = NULL;
+- unsigned char *healed_sinks = NULL;
+- struct afr_reply *locked_replies = NULL;
+- gf_boolean_t granular_locks = _gf_false;
++ char *key1 = NULL;
++ char *key2 = NULL;
+
+ priv = this->private;
+- granular_locks = priv->granular_locks; /*Assign to local variable so that
+- reconfigure doesn't change this
+- value between locking and unlocking
+- below*/
+- locked_on = alloca0(priv->child_count);
+- data_lock = alloca0(priv->child_count);
+- sources = alloca0(priv->child_count);
+- sinks = alloca0(priv->child_count);
+- healed_sinks = alloca0(priv->child_count);
+-
+- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
++ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
++ strlen(this->name));
++ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
++ strlen(priv->sh_domain));
++ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
++ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
+
+- if (!granular_locks) {
+- ret = afr_selfheal_tryentrylk(frame, this, inode, priv->sh_domain, NULL,
+- locked_on);
+- }
+- {
+- if (!granular_locks && ret == 0) {
+- ret = -afr_final_errno(frame->local, priv);
+- if (ret == 0)
+- ret = -ENOTCONN; /* all invalid responses */
+- goto out;
++ for (i = 0; i < priv->child_count; i++) {
++ if ((replies[i].valid != 1) || (replies[i].op_ret != 0))
++ continue;
++ if (!io_domain_lk_count) {
++ ret1 = dict_get_int32(replies[i].xdata, key1, &io_domain_lk_count);
+ }
++ if (!shd_domain_lk_count) {
++ ret1 = dict_get_int32(replies[i].xdata, key2, &shd_domain_lk_count);
++ }
++ }
+
+- ret = afr_selfheal_entrylk(frame, this, inode, this->name, NULL,
+- data_lock);
+- {
+- if (ret == 0) {
+- ret = -afr_final_errno(frame->local, priv);
+- if (ret == 0)
+- ret = -ENOTCONN;
+- /* all invalid responses */
+- goto unlock;
+- }
+- ret = __afr_selfheal_entry_prepare(frame, this, inode, data_lock,
+- sources, sinks, healed_sinks,
+- locked_replies, &source, pflag);
+- if ((ret == 0) && (*pflag & PFLAG_SBRAIN))
+- ret = -EIO;
+- *esh = afr_decide_heal_info(priv, sources, ret);
++ if (!strcmp(index_vgfid, GF_XATTROP_INDEX_GFID)) {
++ if (shd_domain_lk_count) {
++ ret = -EAGAIN; /*For 'possibly-healing'. */
++ } else {
++ ret = 0; /*needs heal. Just set a non -ve value so that it is
++ assumed as the source index.*/
++ }
++ } else if (!strcmp(index_vgfid, GF_XATTROP_DIRTY_GFID)) {
++ if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) ||
++ (!io_domain_lk_count)) {
++ /* Needs heal. */
++ ret = 0;
++ } else {
++ /* No heal needed. */
++ *dsh = *esh = *msh = 0;
+ }
+- afr_selfheal_unentrylk(frame, this, inode, this->name, NULL, data_lock,
+- NULL);
+ }
+-unlock:
+- if (!granular_locks)
+- afr_selfheal_unentrylk(frame, this, inode, priv->sh_domain, NULL,
+- locked_on, NULL);
+-out:
+- if (locked_replies)
+- afr_replies_wipe(locked_replies, priv->child_count);
+ return ret;
+ }
+
++/*return EIO, EAGAIN or pending*/
+ int
+-afr_selfheal_locked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+- inode_t **inode, gf_boolean_t *entry_selfheal,
+- gf_boolean_t *data_selfheal,
+- gf_boolean_t *metadata_selfheal,
+- unsigned char *pending)
+-
++afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
++ inode_t **inode, char *index_vgfid,
++ gf_boolean_t *entry_selfheal, gf_boolean_t *data_selfheal,
++ gf_boolean_t *metadata_selfheal, unsigned char *pending)
+ {
+ int ret = -1;
+- fd_t *fd = NULL;
++ int i = 0;
++ afr_private_t *priv = NULL;
++ struct afr_reply *replies = NULL;
+ gf_boolean_t dsh = _gf_false;
+ gf_boolean_t msh = _gf_false;
+ gf_boolean_t esh = _gf_false;
++ unsigned char *sources = NULL;
++ unsigned char *sinks = NULL;
++ unsigned char *valid_on = NULL;
++ uint64_t *witness = NULL;
++
++ priv = this->private;
++ replies = alloca0(sizeof(*replies) * priv->child_count);
++ sources = alloca0(sizeof(*sources) * priv->child_count);
++ sinks = alloca0(sizeof(*sinks) * priv->child_count);
++ witness = alloca0(sizeof(*witness) * priv->child_count);
++ valid_on = alloca0(sizeof(*valid_on) * priv->child_count);
+
+ ret = afr_selfheal_unlocked_inspect(frame, this, gfid, inode, &dsh, &msh,
+- &esh);
++ &esh, replies);
+ if (ret)
+ goto out;
+-
+- /* For every heal type hold locks and check if it indeed needs heal */
+-
+- /* Heal-info does an open() on the file being examined so that the
+- * current eager-lock holding client, if present, at some point sees
+- * open-fd count being > 1 and releases the eager-lock so that heal-info
+- * doesn't remain blocked forever until IO completes.
+- */
+- if ((*inode)->ia_type == IA_IFREG) {
+- ret = afr_selfheal_data_open(this, *inode, &fd);
+- if (ret < 0) {
+- gf_msg_debug(this->name, -ret, "%s: Failed to open",
+- uuid_utoa((*inode)->gfid));
+- goto out;
++ for (i = 0; i < priv->child_count; i++) {
++ if (replies[i].valid && replies[i].op_ret == 0) {
++ valid_on[i] = 1;
+ }
+ }
+-
+ if (msh) {
+- ret = afr_selfheal_locked_metadata_inspect(frame, this, *inode, &msh,
+- pending);
+- if (ret == -EIO)
++ ret = afr_selfheal_find_direction(frame, this, replies,
++ AFR_METADATA_TRANSACTION, valid_on,
++ sources, sinks, witness, pending);
++ if (*pending & PFLAG_SBRAIN)
++ ret = -EIO;
++ if (ret)
+ goto out;
+ }
+-
+ if (dsh) {
+- ret = afr_selfheal_locked_data_inspect(frame, this, fd, &dsh, pending);
+- if (ret == -EIO || (ret == -EAGAIN))
++ ret = afr_selfheal_find_direction(frame, this, replies,
++ AFR_DATA_TRANSACTION, valid_on,
++ sources, sinks, witness, pending);
++ if (*pending & PFLAG_SBRAIN)
++ ret = -EIO;
++ if (ret)
+ goto out;
+ }
+-
+ if (esh) {
+- ret = afr_selfheal_locked_entry_inspect(frame, this, *inode, &esh,
+- pending);
++ ret = afr_selfheal_find_direction(frame, this, replies,
++ AFR_ENTRY_TRANSACTION, valid_on,
++ sources, sinks, witness, pending);
++ if (*pending & PFLAG_SBRAIN)
++ ret = -EIO;
++ if (ret)
++ goto out;
+ }
+
++ ret = afr_update_heal_status(this, replies, index_vgfid, (*inode)->ia_type,
++ &esh, &dsh, &msh);
+ out:
+ *data_selfheal = dsh;
+ *entry_selfheal = esh;
+ *metadata_selfheal = msh;
+- if (fd)
+- fd_unref(fd);
++ if (replies)
++ afr_replies_wipe(replies, priv->child_count);
+ return ret;
+ }
+
+-static dict_t *
+-afr_set_heal_info(char *status)
+-{
+- dict_t *dict = NULL;
+- int ret = -1;
+-
+- dict = dict_new();
+- if (!dict) {
+- ret = -ENOMEM;
+- goto out;
+- }
+-
+- ret = dict_set_dynstr_sizen(dict, "heal-info", status);
+- if (ret)
+- gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+- "Failed to set heal-info key to "
+- "%s",
+- status);
+-out:
+- /* Any error other than EINVAL, dict_set_dynstr frees status */
+- if (ret == -ENOMEM || ret == -EINVAL) {
+- GF_FREE(status);
+- }
+-
+- if (ret && dict) {
+- dict_unref(dict);
+- dict = NULL;
+- }
+- return dict;
+-}
+-
+ int
+ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
+ {
+@@ -6174,10 +6133,18 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
+ inode_t *inode = NULL;
+ char *substr = NULL;
+ char *status = NULL;
++ afr_local_t *local = NULL;
++ char *index_vgfid = NULL;
++
++ local = frame->local;
++ if (dict_get_str(local->xdata_req, "index-vgfid", &index_vgfid)) {
++ ret = -1;
++ goto out;
++ }
+
+- ret = afr_selfheal_locked_inspect(frame, this, loc->gfid, &inode,
+- &entry_selfheal, &data_selfheal,
+- &metadata_selfheal, &pending);
++ ret = afr_lockless_inspect(frame, this, loc->gfid, &inode, index_vgfid,
++ &entry_selfheal, &data_selfheal,
++ &metadata_selfheal, &pending);
+
+ if (ret == -ENOMEM) {
+ ret = -1;
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index d942ccf..1608f75 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -1827,6 +1827,37 @@ afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent,
+ return inode;
+ }
+
++static int
++afr_set_multi_dom_lock_count_request(xlator_t *this, dict_t *dict)
++{
++ int ret = 0;
++ afr_private_t *priv = NULL;
++ char *key1 = NULL;
++ char *key2 = NULL;
++
++ priv = this->private;
++ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
++ strlen(this->name));
++ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
++ strlen(priv->sh_domain));
++
++ ret = dict_set_uint32(dict, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, 1);
++ if (ret)
++ return ret;
++
++ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
++ ret = dict_set_uint32(dict, key1, 1);
++ if (ret)
++ return ret;
++
++ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
++ ret = dict_set_uint32(dict, key2, 1);
++ if (ret)
++ return ret;
++
++ return 0;
++}
++
+ int
+ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies,
+@@ -1851,6 +1882,11 @@ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
+ return -ENOMEM;
+ }
+
++ if (afr_set_multi_dom_lock_count_request(frame->this, xattr_req)) {
++ dict_unref(xattr_req);
++ return -1;
++ }
++
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, gfid);
+
+@@ -2241,7 +2277,8 @@ int
+ afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **link_inode, gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal,
+- gf_boolean_t *entry_selfheal)
++ gf_boolean_t *entry_selfheal,
++ struct afr_reply *replies_dst)
+ {
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+@@ -2377,6 +2414,8 @@ afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+
+ ret = 0;
+ out:
++ if (replies && replies_dst)
++ afr_replies_copy(replies_dst, replies, priv->child_count);
+ if (inode)
+ inode_unref(inode);
+ if (replies)
+@@ -2493,7 +2532,7 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid)
+
+ ret = afr_selfheal_unlocked_inspect(frame, this, gfid, &inode,
+ &data_selfheal, &metadata_selfheal,
+- &entry_selfheal);
++ &entry_selfheal, NULL);
+ if (ret)
+ goto out;
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
+index f7ecf5d..b39af02 100644
+--- a/xlators/cluster/afr/src/afr-self-heal.h
++++ b/xlators/cluster/afr/src/afr-self-heal.h
+@@ -327,7 +327,8 @@ int
+ afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **link_inode, gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal,
+- gf_boolean_t *entry_selfheal);
++ gf_boolean_t *entry_selfheal,
++ struct afr_reply *replies);
+
+ int
+ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid);
+diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
+index 3a74967..ea86b96 100644
+--- a/xlators/features/locks/src/common.h
++++ b/xlators/features/locks/src/common.h
+@@ -45,6 +45,10 @@
+ fd_unref(__local->fd); \
+ if (__local->inode) \
+ inode_unref(__local->inode); \
++ if (__local->xdata) { \
++ dict_unref(__local->xdata); \
++ __local->xdata = NULL; \
++ } \
+ mem_put(__local); \
+ } \
+ } while (0)
+diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
+index b817960..aa267de 100644
+--- a/xlators/features/locks/src/locks.h
++++ b/xlators/features/locks/src/locks.h
+@@ -239,6 +239,7 @@ typedef struct {
+ gf_boolean_t inodelk_count_req;
+ gf_boolean_t posixlk_count_req;
+ gf_boolean_t parent_entrylk_req;
++ gf_boolean_t multiple_dom_lk_requests;
+ int update_mlock_enforced_flag;
+ } pl_local_t;
+
+@@ -260,6 +261,13 @@ typedef struct _locks_ctx {
+ struct list_head metalk_list;
+ } pl_ctx_t;
+
++typedef struct _multi_dom_lk_data {
++ xlator_t *this;
++ inode_t *inode;
++ dict_t *xdata_rsp;
++ gf_boolean_t keep_max;
++} multi_dom_lk_data;
++
+ typedef enum { DECREMENT, INCREMENT } pl_count_op_t;
+
+ pl_ctx_t *
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index 4592240..9a14c64 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -150,13 +150,20 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
+ gf_boolean_t
+ pl_has_xdata_requests(dict_t *xdata)
+ {
+- static char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, GLUSTERFS_INODELK_COUNT,
+- GLUSTERFS_INODELK_DOM_COUNT, GLUSTERFS_POSIXLK_COUNT,
+- GLUSTERFS_PARENT_ENTRYLK, NULL};
+- static int reqs_size[] = {
+- SLEN(GLUSTERFS_ENTRYLK_COUNT), SLEN(GLUSTERFS_INODELK_COUNT),
+- SLEN(GLUSTERFS_INODELK_DOM_COUNT), SLEN(GLUSTERFS_POSIXLK_COUNT),
+- SLEN(GLUSTERFS_PARENT_ENTRYLK), 0};
++ static char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT,
++ GLUSTERFS_INODELK_COUNT,
++ GLUSTERFS_INODELK_DOM_COUNT,
++ GLUSTERFS_POSIXLK_COUNT,
++ GLUSTERFS_PARENT_ENTRYLK,
++ GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS,
++ NULL};
++ static int reqs_size[] = {SLEN(GLUSTERFS_ENTRYLK_COUNT),
++ SLEN(GLUSTERFS_INODELK_COUNT),
++ SLEN(GLUSTERFS_INODELK_DOM_COUNT),
++ SLEN(GLUSTERFS_POSIXLK_COUNT),
++ SLEN(GLUSTERFS_PARENT_ENTRYLK),
++ SLEN(GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS),
++ 0};
+ int i = 0;
+
+ if (!xdata)
+@@ -169,12 +176,22 @@ pl_has_xdata_requests(dict_t *xdata)
+ return _gf_false;
+ }
+
++static int
++dict_delete_domain_key(dict_t *dict, char *key, data_t *value, void *data)
++{
++ dict_del(dict, key);
++ return 0;
++}
++
+ void
+ pl_get_xdata_requests(pl_local_t *local, dict_t *xdata)
+ {
+ if (!local || !xdata)
+ return;
+
++ GF_ASSERT(local->xdata == NULL);
++ local->xdata = dict_copy_with_ref(xdata, NULL);
++
+ if (dict_get_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT)) {
+ local->entrylk_count_req = 1;
+ dict_del_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT);
+@@ -183,6 +200,12 @@ pl_get_xdata_requests(pl_local_t *local, dict_t *xdata)
+ local->inodelk_count_req = 1;
+ dict_del_sizen(xdata, GLUSTERFS_INODELK_COUNT);
+ }
++ if (dict_get_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS)) {
++ local->multiple_dom_lk_requests = 1;
++ dict_del_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS);
++ dict_foreach_fnmatch(xdata, GLUSTERFS_INODELK_DOM_PREFIX "*",
++ dict_delete_domain_key, NULL);
++ }
+
+ local->inodelk_dom_count_req = dict_get_sizen(xdata,
+ GLUSTERFS_INODELK_DOM_COUNT);
+@@ -210,7 +233,7 @@ pl_needs_xdata_response(pl_local_t *local)
+
+ if (local->parent_entrylk_req || local->entrylk_count_req ||
+ local->inodelk_dom_count_req || local->inodelk_count_req ||
+- local->posixlk_count_req)
++ local->posixlk_count_req || local->multiple_dom_lk_requests)
+ return _gf_true;
+
+ return _gf_false;
+@@ -411,6 +434,75 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
+ }
+
+ void
++pl_inodelk_xattr_fill_each(xlator_t *this, inode_t *inode, dict_t *dict,
++ char *domname, gf_boolean_t keep_max, char *key)
++{
++ int32_t count = 0;
++ int32_t maxcount = -1;
++ int ret = -1;
++
++ if (keep_max) {
++ ret = dict_get_int32(dict, key, &maxcount);
++ if (ret < 0)
++ gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
++ GLUSTERFS_INODELK_COUNT);
++ }
++ count = get_inodelk_count(this, inode, domname);
++ if (maxcount >= count)
++ return;
++
++ ret = dict_set_int32(dict, key, count);
++ if (ret < 0) {
++ gf_msg_debug(this->name, 0,
++ "Failed to set count for "
++ "key %s",
++ key);
++ }
++
++ return;
++}
++
++static int
++pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value,
++ void *data)
++{
++ multi_dom_lk_data *d = data;
++ char *tmp_key = NULL;
++ char *save_ptr = NULL;
++
++ tmp_key = gf_strdup(key);
++ strtok_r(tmp_key, ":", &save_ptr);
++ if (!*save_ptr) {
++ gf_msg(THIS->name, GF_LOG_ERROR, 0, EINVAL,
++ "Could not tokenize domain string from key %s", key);
++ return -1;
++ }
++
++ pl_inodelk_xattr_fill_each(d->this, d->inode, d->xdata_rsp, save_ptr,
++ d->keep_max, key);
++ if (tmp_key)
++ GF_FREE(tmp_key);
++
++ return 0;
++}
++
++void
++pl_fill_multiple_dom_lk_requests(xlator_t *this, pl_local_t *local,
++ inode_t *inode, dict_t *dict,
++ gf_boolean_t keep_max)
++{
++ multi_dom_lk_data data;
++
++ data.this = this;
++ data.inode = inode;
++ data.xdata_rsp = dict;
++ data.keep_max = keep_max;
++
++ dict_foreach_fnmatch(local->xdata, GLUSTERFS_INODELK_DOM_PREFIX "*",
++ pl_inodelk_xattr_fill_multiple, &data);
++}
++
++void
+ pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent,
+ inode_t *inode, char *name, dict_t *xdata,
+ gf_boolean_t max_lock)
+@@ -437,6 +529,9 @@ pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent,
+
+ if (local->posixlk_count_req)
+ pl_posixlk_xattr_fill(this, inode, xdata, max_lock);
++
++ if (local->multiple_dom_lk_requests)
++ pl_fill_multiple_dom_lk_requests(this, local, inode, xdata, max_lock);
+ }
+
+ /* Checks whether the region where fop is acting upon conflicts
+@@ -773,9 +868,6 @@ pl_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ {
+ pl_local_t *local = frame->local;
+
+- if (local->xdata)
+- dict_unref(local->xdata);
+-
+ pl_track_io_fop_count(local, this, DECREMENT);
+
+ if (local->op == GF_FOP_TRUNCATE)
+@@ -932,9 +1024,6 @@ unwind:
+ "ret: %d, error: %s",
+ op_ret, strerror(op_errno));
+
+- if (local->xdata)
+- dict_unref(local->xdata);
+-
+ switch (local->op) {
+ case GF_FOP_TRUNCATE:
+ PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, buf,
+--
+1.8.3.1
+
diff --git a/0425-tests-Fix-spurious-self-heald.t-failure.patch b/0425-tests-Fix-spurious-self-heald.t-failure.patch
new file mode 100644
index 0000000..7bfc04a
--- /dev/null
+++ b/0425-tests-Fix-spurious-self-heald.t-failure.patch
@@ -0,0 +1,187 @@
+From 2c582ea6c76031463501b31d9250e739d5aeda79 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Fri, 5 Jun 2020 14:28:11 +0530
+Subject: [PATCH 425/449] tests: Fix spurious self-heald.t failure
+
+Problem:
+heal-info code assumes that all indices in xattrop directory
+definitely need heal. There is one corner case.
+The very first xattrop on the file will lead to adding the
+gfid to 'xattrop' index in fop path and in _cbk path it is
+removed because the fop is zero-xattr xattrop in success case.
+These gfids could be read by heal-info and shown as needing heal.
+
+Fix:
+Check the pending flag to see if the file definitely needs or
+not instead of which index is being crawled at the moment.
+
+> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24110/
+> fixes: bz#1801623
+> Change-Id: I79f00dc7366fedbbb25ec4bec838dba3b34c7ad5
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+
+BUG: 1721355
+Change-Id: I7efdf45a5158fadfdbdd21c91837f193d80fa6c7
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202491
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
+---
+ heal/src/glfs-heal.c | 17 ++++++----------
+ xlators/cluster/afr/src/afr-common.c | 38 ++++++++++++++----------------------
+ 2 files changed, 21 insertions(+), 34 deletions(-)
+
+diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
+index 5af9e31..125b12c 100644
+--- a/heal/src/glfs-heal.c
++++ b/heal/src/glfs-heal.c
+@@ -775,8 +775,7 @@ static int
+ glfsh_process_entries(xlator_t *xl, fd_t *fd, gf_dirent_t *entries,
+ uint64_t *offset, num_entries_t *num_entries,
+ print_status glfsh_print_status,
+- gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode,
+- dict_t *xattr_req)
++ gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode)
+ {
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+@@ -808,7 +807,7 @@ glfsh_process_entries(xlator_t *xl, fd_t *fd, gf_dirent_t *entries,
+
+ gf_uuid_parse(entry->d_name, gfid);
+ gf_uuid_copy(loc.gfid, gfid);
+- ret = syncop_getxattr(this, &loc, &dict, GF_HEAL_INFO, xattr_req, NULL);
++ ret = syncop_getxattr(this, &loc, &dict, GF_HEAL_INFO, NULL, NULL);
+ if (ret) {
+ if ((mode != GLFSH_MODE_CONTINUE_ON_ERROR) && (ret == -ENOTCONN))
+ goto out;
+@@ -877,19 +876,19 @@ glfsh_crawl_directory(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ if (heal_op == GF_SHD_OP_INDEX_SUMMARY) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_print_heal_status,
+- ignore, mode, xattr_req);
++ ignore, mode);
+ if (ret < 0)
+ goto out;
+ } else if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_print_spb_status,
+- ignore, mode, xattr_req);
++ ignore, mode);
+ if (ret < 0)
+ goto out;
+ } else if (heal_op == GF_SHD_OP_HEAL_SUMMARY) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_print_summary_status,
+- ignore, mode, xattr_req);
++ ignore, mode);
+ if (ret < 0)
+ goto out;
+ } else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) {
+@@ -898,7 +897,7 @@ glfsh_crawl_directory(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ } else if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_heal_status_boolean,
+- ignore, mode, xattr_req);
++ ignore, mode);
+ if (ret < 0)
+ goto out;
+ }
+@@ -952,10 +951,6 @@ glfsh_print_pending_heals_type(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ int32_t op_errno = 0;
+ gf_boolean_t ignore = _gf_false;
+
+- ret = dict_set_str(xattr_req, "index-vgfid", vgfid);
+- if (ret)
+- return ret;
+-
+ if (!strcmp(vgfid, GF_XATTROP_DIRTY_GFID))
+ ignore = _gf_true;
+
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index c355ec5..89e2483 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -5995,8 +5995,8 @@ afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies,
+
+ static int
+ afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
+- char *index_vgfid, ia_type_t ia_type, gf_boolean_t *esh,
+- gf_boolean_t *dsh, gf_boolean_t *msh)
++ ia_type_t ia_type, gf_boolean_t *esh, gf_boolean_t *dsh,
++ gf_boolean_t *msh, unsigned char pending)
+ {
+ int ret = -1;
+ GF_UNUSED int ret1 = 0;
+@@ -6026,14 +6026,7 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
+ }
+ }
+
+- if (!strcmp(index_vgfid, GF_XATTROP_INDEX_GFID)) {
+- if (shd_domain_lk_count) {
+- ret = -EAGAIN; /*For 'possibly-healing'. */
+- } else {
+- ret = 0; /*needs heal. Just set a non -ve value so that it is
+- assumed as the source index.*/
+- }
+- } else if (!strcmp(index_vgfid, GF_XATTROP_DIRTY_GFID)) {
++ if (!pending) {
+ if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) ||
+ (!io_domain_lk_count)) {
+ /* Needs heal. */
+@@ -6042,6 +6035,13 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
+ /* No heal needed. */
+ *dsh = *esh = *msh = 0;
+ }
++ } else {
++ if (shd_domain_lk_count) {
++ ret = -EAGAIN; /*For 'possibly-healing'. */
++ } else {
++ ret = 0; /*needs heal. Just set a non -ve value so that it is
++ assumed as the source index.*/
++ }
+ }
+ return ret;
+ }
+@@ -6049,8 +6049,8 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
+ /*return EIO, EAGAIN or pending*/
+ int
+ afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+- inode_t **inode, char *index_vgfid,
+- gf_boolean_t *entry_selfheal, gf_boolean_t *data_selfheal,
++ inode_t **inode, gf_boolean_t *entry_selfheal,
++ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal, unsigned char *pending)
+ {
+ int ret = -1;
+@@ -6109,8 +6109,8 @@ afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ goto out;
+ }
+
+- ret = afr_update_heal_status(this, replies, index_vgfid, (*inode)->ia_type,
+- &esh, &dsh, &msh);
++ ret = afr_update_heal_status(this, replies, (*inode)->ia_type, &esh, &dsh,
++ &msh, *pending);
+ out:
+ *data_selfheal = dsh;
+ *entry_selfheal = esh;
+@@ -6133,16 +6133,8 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
+ inode_t *inode = NULL;
+ char *substr = NULL;
+ char *status = NULL;
+- afr_local_t *local = NULL;
+- char *index_vgfid = NULL;
+-
+- local = frame->local;
+- if (dict_get_str(local->xdata_req, "index-vgfid", &index_vgfid)) {
+- ret = -1;
+- goto out;
+- }
+
+- ret = afr_lockless_inspect(frame, this, loc->gfid, &inode, index_vgfid,
++ ret = afr_lockless_inspect(frame, this, loc->gfid, &inode,
+ &entry_selfheal, &data_selfheal,
+ &metadata_selfheal, &pending);
+
+--
+1.8.3.1
+
diff --git a/0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch b/0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch
new file mode 100644
index 0000000..a96b66e
--- /dev/null
+++ b/0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch
@@ -0,0 +1,216 @@
+From 91936fe5ef854bd9d2f91e643795d0e7791b97ba Mon Sep 17 00:00:00 2001
+From: Harpreet Kaur <hlalwani@redhat.com>
+Date: Mon, 7 Jan 2019 16:38:25 +0530
+Subject: [PATCH 426/449] geo-rep: Fix for "Transport End Point not connected"
+ issue
+
+problem: Geo-rep gsyncd process mounts the master and slave volume
+ on master nodes and slave nodes respectively and starts
+ the sync. But it doesn't wait for the mount to be in ready
+ state to accept I/O. The gluster mount is considered to be
+ ready when all the distribute sub-volumes is up. If the all
+ the distribute subvolumes are not up, it can cause ENOTCONN
+ error, when lookup on file comes and file is on the subvol
+ that is down.
+
+solution: Added a Virtual Xattr "dht.subvol.status" which returns "1"
+ if all subvols are up and "0" if all subvols are not up.
+ Geo-rep then uses this virtual xattr after a fresh mount, to
+ check whether all subvols are up or not and then starts the
+ I/O.
+
+>fixes: bz#1664335
+>Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91
+>Signed-off-by: Harpreet Kaur <hlalwani@redhat.com>
+>Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+backport of https://review.gluster.org/#/c/glusterfs/+/22001/
+BUG: 1640573
+Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202554
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/resource.py | 11 ++++++
+ geo-replication/syncdaemon/syncdutils.py | 20 +++++++++--
+ xlators/cluster/dht/src/dht-common.c | 59 ++++++++++++++++++++++++++++++++
+ xlators/cluster/dht/src/dht-common.h | 4 +++
+ 4 files changed, 91 insertions(+), 3 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
+index 189d8a1..0c61de9 100644
+--- a/geo-replication/syncdaemon/resource.py
++++ b/geo-replication/syncdaemon/resource.py
+@@ -37,6 +37,7 @@ from syncdutils import ChangelogException, ChangelogHistoryNotAvailable
+ from syncdutils import get_changelog_log_level, get_rsync_version
+ from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION
+ from syncdutils import GX_GFID_CANONICAL_LEN
++from syncdutils import gf_mount_ready
+ from gsyncdstatus import GeorepStatus
+ from syncdutils import lf, Popen, sup
+ from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt
+@@ -950,6 +951,16 @@ class Mounter(object):
+ logging.exception('mount cleanup failure:')
+ rv = 200
+ os._exit(rv)
++
++ #Polling the dht.subvol.status value.
++ RETRIES = 10
++ while not gf_mount_ready():
++ if RETRIES < 0:
++ logging.error('Subvols are not up')
++ break
++ RETRIES -= 1
++ time.sleep(0.2)
++
+ logging.debug('auxiliary glusterfs mount prepared')
+
+
+diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
+index b08098e..7560fa1 100644
+--- a/geo-replication/syncdaemon/syncdutils.py
++++ b/geo-replication/syncdaemon/syncdutils.py
+@@ -21,8 +21,8 @@ import subprocess
+ import socket
+ from subprocess import PIPE
+ from threading import Lock, Thread as baseThread
+-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED
+-from errno import EINTR, ENOENT, ESTALE, EBUSY, errorcode
++from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED
++from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode
+ from signal import signal, SIGTERM
+ import select as oselect
+ from os import waitpid as owaitpid
+@@ -55,6 +55,8 @@ from rconf import rconf
+
+ from hashlib import sha256 as sha256
+
++ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
++
+ # auxiliary gfid based access prefix
+ _CL_AUX_GFID_PFX = ".gfid/"
+ ROOT_GFID = "00000000-0000-0000-0000-000000000001"
+@@ -100,6 +102,19 @@ def unescape_space_newline(s):
+ .replace(NEWLINE_ESCAPE_CHAR, "\n")\
+ .replace(PERCENTAGE_ESCAPE_CHAR, "%")
+
++# gf_mount_ready() returns 1 if all subvols are up, else 0
++def gf_mount_ready():
++ ret = errno_wrap(Xattr.lgetxattr,
++ ['.', 'dht.subvol.status', 16],
++ [ENOENT, ENOTSUP, ENODATA], [ENOMEM])
++
++ if isinstance(ret, int):
++ logging.error("failed to get the xattr value")
++ return 1
++ ret = ret.rstrip('\x00')
++ if ret == "1":
++ return 1
++ return 0
+
+ def norm(s):
+ if s:
+@@ -564,7 +579,6 @@ def errno_wrap(call, arg=[], errnos=[], retry_errnos=[]):
+ def lstat(e):
+ return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY])
+
+-
+ def get_gfid_from_mnt(gfidpath):
+ return errno_wrap(Xattr.lgetxattr,
+ [gfidpath, 'glusterfs.gfid.string',
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 6aa18f3..23cc80c 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -4858,6 +4858,60 @@ out:
+ return 0;
+ }
+
++/* Virtual Xattr which returns 1 if all subvols are up,
++ else returns 0. Geo-rep then uses this virtual xattr
++ after a fresh mount and starts the I/O.
++*/
++
++enum dht_vxattr_subvol {
++ DHT_VXATTR_SUBVOLS_UP = 1,
++ DHT_VXATTR_SUBVOLS_DOWN = 0,
++};
++
++int
++dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this,
++ const char *key)
++{
++ dht_local_t *local = NULL;
++ int ret = -1;
++ int op_errno = ENODATA;
++ int value = DHT_VXATTR_SUBVOLS_UP;
++ int i = 0;
++ dht_conf_t *conf = NULL;
++
++ conf = this->private;
++ local = frame->local;
++
++ if (!key) {
++ op_errno = EINVAL;
++ goto out;
++ }
++ local->xattr = dict_new();
++ if (!local->xattr) {
++ op_errno = ENOMEM;
++ goto out;
++ }
++ for (i = 0; i < conf->subvolume_cnt; i++) {
++ if (!conf->subvolume_status[i]) {
++ value = DHT_VXATTR_SUBVOLS_DOWN;
++ gf_msg_debug(this->name, 0, "subvol %s is down ",
++ conf->subvolumes[i]->name);
++ break;
++ }
++ }
++ ret = dict_set_int8(local->xattr, (char *)key, value);
++ if (ret < 0) {
++ op_errno = -ret;
++ ret = -1;
++ goto out;
++ }
++ ret = 0;
++
++out:
++ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
++ return 0;
++}
++
+ int
+ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ dict_t *xdata)
+@@ -4915,6 +4969,11 @@ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ goto err;
+ }
+
++ if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
++ dht_vgetxattr_subvol_status(frame, this, key);
++ return 0;
++ }
++
+ /* skip over code which is irrelevant if !DHT_IS_DIR(layout) */
+ if (!DHT_IS_DIR(layout))
+ goto no_dht_is_dir;
+diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
+index 1b3e826..9ec5b51 100644
+--- a/xlators/cluster/dht/src/dht-common.h
++++ b/xlators/cluster/dht/src/dht-common.h
+@@ -45,6 +45,10 @@
+ #define DHT_DIR_STAT_BLOCKS 8
+ #define DHT_DIR_STAT_SIZE 4096
+
++/* Virtual xattr for subvols status */
++
++#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status"
++
+ /* Virtual xattrs for debugging */
+
+ #define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
+--
+1.8.3.1
+
diff --git a/0427-storage-posix-Fixing-a-coverity-issue.patch b/0427-storage-posix-Fixing-a-coverity-issue.patch
new file mode 100644
index 0000000..ebeb556
--- /dev/null
+++ b/0427-storage-posix-Fixing-a-coverity-issue.patch
@@ -0,0 +1,38 @@
+From 3943fce5818a353117fc1c492e6383434d742979 Mon Sep 17 00:00:00 2001
+From: Barak Sason <bsasonro@redhat.com>
+Date: Sun, 18 Aug 2019 17:52:04 +0300
+Subject: [PATCH 427/449] storage/posix - Fixing a coverity issue
+
+Fixed a resource leak of variable 'pfd'
+
+backport of https://review.gluster.org/#/c/glusterfs/+/23261/
+>CID: 1400673
+>Updates: bz#789278
+>Change-Id: I78e1e8a89e0604b56e35a75c25d436b35db096c3
+>Signed-off-by: Barak Sason <bsasonro@redhat.com>
+
+BUG: 1787310
+Change-Id: I78e1e8a89e0604b56e35a75c25d436b35db096c3
+Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202563
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index bcce06e..5748b9f 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -1603,6 +1603,7 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
++ GF_FREE(pfd);
+ goto out;
+ }
+
+--
+1.8.3.1
+
diff --git a/0428-glusterd-ganesha-fixing-resource-leak-in-tear_down_c.patch b/0428-glusterd-ganesha-fixing-resource-leak-in-tear_down_c.patch
new file mode 100644
index 0000000..dba8f3c
--- /dev/null
+++ b/0428-glusterd-ganesha-fixing-resource-leak-in-tear_down_c.patch
@@ -0,0 +1,48 @@
+From 7e1bf1e338a6effe209f57b1b92a70d5d25a73bf Mon Sep 17 00:00:00 2001
+From: Jiffin Tony Thottan <jthottan@redhat.com>
+Date: Mon, 26 Aug 2019 11:32:18 +0530
+Subject: [PATCH 428/449] glusterd/ganesha: fixing resource leak in
+ tear_down_cluster()
+
+backport of https://review.gluster.org/#/c/glusterfs/+/23295/
+>CID: 1370947
+>Updates: bz#789278
+>Change-Id: Ib694056430ff0536ed705a0e77e5ace22486891e
+>Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
+
+BUG: 1787310
+Change-Id: Ib694056430ff0536ed705a0e77e5ace22486891e
+Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202561
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-ganesha.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+index 0a16925..06f028f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+@@ -659,10 +659,18 @@ tear_down_cluster(gf_boolean_t run_teardown)
+ "Failed to close dir %s. Reason :"
+ " %s",
+ CONFDIR, strerror(errno));
++ goto exit;
+ }
+ }
+
+ out:
++ if (dir && sys_closedir(dir)) {
++ gf_msg_debug(THIS->name, 0,
++ "Failed to close dir %s. Reason :"
++ " %s",
++ CONFDIR, strerror(errno));
++ }
++exit:
+ return ret;
+ }
+
+--
+1.8.3.1
+
diff --git a/0429-dht-rebalance-fixing-failure-occurace-due-to-rebalan.patch b/0429-dht-rebalance-fixing-failure-occurace-due-to-rebalan.patch
new file mode 100644
index 0000000..8ac6529
--- /dev/null
+++ b/0429-dht-rebalance-fixing-failure-occurace-due-to-rebalan.patch
@@ -0,0 +1,61 @@
+From 1370db202a2a60810409f74c390448bf8fbd6998 Mon Sep 17 00:00:00 2001
+From: Barak Sason Rofman <bsasonro@redhat.com>
+Date: Sun, 9 Feb 2020 15:09:30 +0200
+Subject: [PATCH 429/449] dht/rebalance - fixing failure occurace due to
+ rebalance stop
+
+Probelm description:
+When topping rebalance, the following error messages appear in the
+rebalance log file:
+[2020-01-28 14:31:42.452070] W [dht-rebalance.c:3447:gf_defrag_process_dir] 0-distrep-dht: Found error from gf_defrag_get_entry
+[2020-01-28 14:31:42.452764] E [MSGID: 109111] [dht-rebalance.c:3971:gf_defrag_fix_layout] 0-distrep-dht: gf_defrag_process_dir failed for directory: /0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31
+[2020-01-28 14:31:42.453498] E [MSGID: 109016] [dht-rebalance.c:3906:gf_defrag_fix_layout] 0-distrep-dht: Fix layout failed for /0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30
+
+In order to avoid seing these error messages, a modification to the
+error handling mechanism has been made.
+In addition, several log messages had been added in order to improve debugging efficiency
+
+backport of https://review.gluster.org/#/c/glusterfs/+/24103/
+>fixes: bz#1800956
+>Change-Id: Ifc82dae79ab3da9fe22ee25088a2a6b855afcfcf
+>Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+
+BUG: 1286171
+Change-Id: Ifc82dae79ab3da9fe22ee25088a2a6b855afcfcf
+Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202562
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-rebalance.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index 8f31dca..88b6b54 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -3479,6 +3479,10 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ migrate_data, dir_dfmeta, xattr_req,
+ &should_commit_hash, perrno);
+
++ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
++ goto out;
++ }
++
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Found "
+@@ -3935,6 +3939,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout,
+ migrate_data);
+
++ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
++ goto out;
++ }
++
+ if (ret && ret != 2) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED,
+ "Fix layout failed for %s", entry_loc.path);
+--
+1.8.3.1
+
diff --git a/0430-Fix-some-Null-pointer-dereference-coverity-issues.patch b/0430-Fix-some-Null-pointer-dereference-coverity-issues.patch
new file mode 100644
index 0000000..6ff69e8
--- /dev/null
+++ b/0430-Fix-some-Null-pointer-dereference-coverity-issues.patch
@@ -0,0 +1,291 @@
+From 7fe500a03d42dba6082c28ef7284c950c44fbfa3 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Wed, 22 May 2019 17:46:19 +0200
+Subject: [PATCH 430/449] Fix some "Null pointer dereference" coverity issues
+
+This patch fixes the following CID's:
+
+ * 1124829
+ * 1274075
+ * 1274083
+ * 1274128
+ * 1274135
+ * 1274141
+ * 1274143
+ * 1274197
+ * 1274205
+ * 1274210
+ * 1274211
+ * 1288801
+ * 1398629
+
+Backport of:
+> Upstream-patch-link: https://review.gluster.org/22767
+> Change-Id: Ia7c86cfab3245b20777ffa296e1a59748040f558
+> Updates: bz#789278
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1787310
+Change-Id: Ia7c86cfab3245b20777ffa296e1a59748040f558
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202616
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-system.c | 2 +-
+ cli/src/cli-xml-output.c | 2 +-
+ glusterfsd/src/glusterfsd.c | 24 +++++++++++++-----------
+ libglusterfs/src/inode.c | 3 +++
+ rpc/rpc-lib/src/rpcsvc.c | 4 ++++
+ xlators/cluster/dht/src/dht-shared.c | 4 ++++
+ xlators/cluster/dht/src/switch.c | 9 +++++++++
+ xlators/features/trash/src/trash.c | 2 +-
+ xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 7 +++++--
+ xlators/nfs/server/src/mount3.c | 6 ++++++
+ xlators/protocol/client/src/client.c | 7 ++++++-
+ xlators/storage/posix/src/posix-helpers.c | 3 +++
+ 12 files changed, 56 insertions(+), 17 deletions(-)
+
+diff --git a/cli/src/cli-cmd-system.c b/cli/src/cli-cmd-system.c
+index 8cd1542..cb3a9ea 100644
+--- a/cli/src/cli-cmd-system.c
++++ b/cli/src/cli-cmd-system.c
+@@ -446,7 +446,7 @@ cli_cmd_sys_exec_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+
+- if (wordcount < 3) {
++ if ((wordcount < 3) || (words[2] == NULL)) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c
+index 006e2fb..903997c 100644
+--- a/cli/src/cli-xml-output.c
++++ b/cli/src/cli-xml-output.c
+@@ -64,7 +64,7 @@ cli_begin_xml_output(xmlTextWriterPtr *writer, xmlDocPtr *doc)
+ int ret = -1;
+
+ *writer = xmlNewTextWriterDoc(doc, 0);
+- if (writer == NULL) {
++ if (*writer == NULL) {
+ ret = -1;
+ goto out;
+ }
+diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
+index 974fb88..9821180 100644
+--- a/glusterfsd/src/glusterfsd.c
++++ b/glusterfsd/src/glusterfsd.c
+@@ -1235,19 +1235,21 @@ parse_opts(int key, char *arg, struct argp_state *state)
+ case ARGP_BRICK_PORT_KEY:
+ n = 0;
+
+- port_str = strtok_r(arg, ",", &tmp_str);
+- if (gf_string2uint_base10(port_str, &n) == 0) {
+- cmd_args->brick_port = n;
+- port_str = strtok_r(NULL, ",", &tmp_str);
+- if (port_str) {
+- if (gf_string2uint_base10(port_str, &n) == 0) {
+- cmd_args->brick_port2 = n;
+- break;
++ if (arg != NULL) {
++ port_str = strtok_r(arg, ",", &tmp_str);
++ if (gf_string2uint_base10(port_str, &n) == 0) {
++ cmd_args->brick_port = n;
++ port_str = strtok_r(NULL, ",", &tmp_str);
++ if (port_str) {
++ if (gf_string2uint_base10(port_str, &n) == 0) {
++ cmd_args->brick_port2 = n;
++ break;
++ }
++ argp_failure(state, -1, 0,
++ "wrong brick (listen) port %s", arg);
+ }
+- argp_failure(state, -1, 0, "wrong brick (listen) port %s",
+- arg);
++ break;
+ }
+- break;
+ }
+
+ argp_failure(state, -1, 0, "unknown brick (listen) port %s", arg);
+diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
+index 9dbb25b..4c3c546 100644
+--- a/libglusterfs/src/inode.c
++++ b/libglusterfs/src/inode.c
+@@ -899,6 +899,9 @@ inode_resolve(inode_table_t *table, char *path)
+
+ parent = inode_ref(table->root);
+ str = tmp = gf_strdup(path);
++ if (str == NULL) {
++ goto out;
++ }
+
+ while (1) {
+ bname = strtok_r(str, "/", &saveptr);
+diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
+index 5a35139..b058932 100644
+--- a/rpc/rpc-lib/src/rpcsvc.c
++++ b/rpc/rpc-lib/src/rpcsvc.c
+@@ -2874,6 +2874,10 @@ rpcsvc_transport_peer_check_search(dict_t *options, char *pattern, char *ip,
+ }
+
+ dup_addrstr = gf_strdup(addrstr);
++ if (dup_addrstr == NULL) {
++ ret = -1;
++ goto err;
++ }
+ addrtok = strtok_r(dup_addrstr, ",", &svptr);
+ while (addrtok) {
+ /* CASEFOLD not present on Solaris */
+diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
+index ea4b7c6..58e3339 100644
+--- a/xlators/cluster/dht/src/dht-shared.c
++++ b/xlators/cluster/dht/src/dht-shared.c
+@@ -278,6 +278,10 @@ dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf,
+ goto out;
+
+ dup_brick = gf_strdup(bricks);
++ if (dup_brick == NULL) {
++ goto out;
++ }
++
+ node = strtok_r(dup_brick, ",", &tmpstr);
+ while (node) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
+index a782fcd..207d109 100644
+--- a/xlators/cluster/dht/src/switch.c
++++ b/xlators/cluster/dht/src/switch.c
+@@ -610,9 +610,15 @@ set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str)
+ /* Get the pattern for considering switch case.
+ "option block-size *avi:10MB" etc */
+ option_string = gf_strdup(pattern_str);
++ if (option_string == NULL) {
++ goto err;
++ }
+ switch_str = strtok_r(option_string, ";", &tmp_str);
+ while (switch_str) {
+ dup_str = gf_strdup(switch_str);
++ if (dup_str == NULL) {
++ goto err;
++ }
+ switch_opt = GF_CALLOC(1, sizeof(struct switch_struct),
+ gf_switch_mt_switch_struct);
+ if (!switch_opt) {
+@@ -647,6 +653,9 @@ set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str)
+
+ if (childs) {
+ dup_childs = gf_strdup(childs);
++ if (dup_childs == NULL) {
++ goto err;
++ }
+ child = strtok_r(dup_childs, ",", &tmp);
+ while (child) {
+ if (gf_switch_valid_child(this, child)) {
+diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
+index d668436..f96ed73 100644
+--- a/xlators/features/trash/src/trash.c
++++ b/xlators/features/trash/src/trash.c
+@@ -170,7 +170,7 @@ store_eliminate_path(char *str, trash_elim_path **eliminate)
+ int ret = 0;
+ char *strtokptr = NULL;
+
+- if (eliminate == NULL) {
++ if ((str == NULL) || (eliminate == NULL)) {
+ ret = EINVAL;
+ goto out;
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+index 0f40bea..85c06c1 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
++++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+@@ -5981,7 +5981,7 @@ glusterd_get_slave_info(char *slave, char **slave_url, char **hostname,
+ GF_ASSERT(this);
+
+ ret = glusterd_urltransform_single(slave, "normalize", &linearr);
+- if (ret == -1) {
++ if ((ret == -1) || (linearr[0] == NULL)) {
+ ret = snprintf(errmsg, sizeof(errmsg) - 1, "Invalid Url: %s", slave);
+ errmsg[ret] = '\0';
+ *op_errstr = gf_strdup(errmsg);
+@@ -5992,7 +5992,10 @@ glusterd_get_slave_info(char *slave, char **slave_url, char **hostname,
+
+ tmp = strtok_r(linearr[0], "/", &save_ptr);
+ tmp = strtok_r(NULL, "/", &save_ptr);
+- slave = strtok_r(tmp, ":", &save_ptr);
++ slave = NULL;
++ if (tmp != NULL) {
++ slave = strtok_r(tmp, ":", &save_ptr);
++ }
+ if (slave) {
+ ret = glusterd_geo_rep_parse_slave(slave, hostname, op_errstr);
+ if (ret) {
+diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c
+index 396809c..734453c 100644
+--- a/xlators/nfs/server/src/mount3.c
++++ b/xlators/nfs/server/src/mount3.c
+@@ -3205,6 +3205,12 @@ mnt3_export_parse_auth_param(struct mnt3_export *exp, char *exportpath)
+ struct host_auth_spec *host = NULL;
+ int ret = 0;
+
++ if (exportpath == NULL) {
++ gf_msg(GF_MNT, GF_LOG_ERROR, EINVAL, NFS_MSG_PARSE_HOSTSPEC_FAIL,
++ "Export path is NULL");
++ return -1;
++ }
++
+ /* Using exportpath directly in strtok_r because we want
+ * to strip off AUTH parameter from exportpath. */
+ token = strtok_r(exportpath, "(", &savPtr);
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index e156d4d..ed855ca 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -1222,9 +1222,12 @@ client_set_remote_options(char *value, xlator_t *this)
+ char *remote_port_str = NULL;
+ char *tmp = NULL;
+ int remote_port = 0;
+- int ret = 0;
++ int ret = -1;
+
+ dup_value = gf_strdup(value);
++ if (dup_value == NULL) {
++ goto out;
++ }
+ host = strtok_r(dup_value, ":", &tmp);
+ subvol = strtok_r(NULL, ":", &tmp);
+ remote_port_str = strtok_r(NULL, ":", &tmp);
+@@ -1238,6 +1241,7 @@ client_set_remote_options(char *value, xlator_t *this)
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_SET_FAILED,
+ "failed to set remote-host with %s", host);
++ GF_FREE(host_dup);
+ goto out;
+ }
+ }
+@@ -1252,6 +1256,7 @@ client_set_remote_options(char *value, xlator_t *this)
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_SET_FAILED,
+ "failed to set remote-host with %s", host);
++ GF_FREE(subvol_dup);
+ goto out;
+ }
+ }
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 949c799..2336add 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -390,6 +390,9 @@ _posix_get_marker_quota_contributions(posix_xattr_filler_t *filler, char *key)
+ int i = 0, ret = 0;
+
+ tmp_key = ptr = gf_strdup(key);
++ if (tmp_key == NULL) {
++ return -1;
++ }
+ for (i = 0; i < 4; i++) {
+ token = strtok_r(tmp_key, ".", &saveptr);
+ tmp_key = NULL;
+--
+1.8.3.1
+
diff --git a/0431-glusterd-check-for-same-node-while-adding-bricks-in-.patch b/0431-glusterd-check-for-same-node-while-adding-bricks-in-.patch
new file mode 100644
index 0000000..341cfc1
--- /dev/null
+++ b/0431-glusterd-check-for-same-node-while-adding-bricks-in-.patch
@@ -0,0 +1,638 @@
+From d7c52ddd2cbadb1d9a55767c2f7fe6ba38d9a2ed Mon Sep 17 00:00:00 2001
+From: Sheetal Pamecha <spamecha@redhat.com>
+Date: Wed, 20 Nov 2019 12:42:12 +0530
+Subject: [PATCH 431/449] glusterd: check for same node while adding bricks in
+ disperse volume
+
+The optimal way for configuring disperse and replicate volumes
+is to have all bricks in different nodes.
+
+During create operation it fails saying it is not optimal, user
+must use force to over-ride this behavior. Implementing same
+during add-brick operation to avoid situation where all the added
+bricks end up from same host. Operation will error out accordingly.
+and this can be over-ridden by using force same as create.
+
+> Upstream Patch Link: https://review.gluster.org/#/c/glusterfs/+/23729
+> fixes: #1047
+> Change-Id: I3ee9c97c1a14b73f4532893bc00187ef9355238b
+> Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+
+BUG: 1524457
+Change-Id: I3ee9c97c1a14b73f4532893bc00187ef9355238b
+Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202621
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 20 +-
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 224 ++++++++++++++++++
+ xlators/mgmt/glusterd/src/glusterd-utils.h | 4 +
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 293 +++---------------------
+ 4 files changed, 276 insertions(+), 265 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+index c5141de..d424f31 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+@@ -21,7 +21,6 @@
+ #include "glusterd-messages.h"
+ #include "glusterd-server-quorum.h"
+ #include <glusterfs/run.h>
+-#include "glusterd-volgen.h"
+ #include <glusterfs/syscall.h>
+ #include <sys/signal.h>
+
+@@ -1575,6 +1574,25 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+
+ is_force = dict_get_str_boolean(dict, "force", _gf_false);
+
++ /* Check brick order if the volume type is replicate or disperse. If
++ * force at the end of command not given then check brick order.
++ */
++
++ if (!is_force) {
++ if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) ||
++ (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)) {
++ ret = glusterd_check_brick_order(dict, msg, volinfo->type);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER,
++ "Not adding brick because of "
++ "bad brick order. %s",
++ msg);
++ *op_errstr = gf_strdup(msg);
++ goto out;
++ }
++ }
++ }
++
+ if (volinfo->replica_count < replica_count && !is_force) {
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index a1299bc..14e23d1 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -14759,3 +14759,227 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo)
+ return _gf_true;
+ return _gf_false;
+ }
++
++static gf_ai_compare_t
++glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next)
++{
++ int ret = -1;
++ struct addrinfo *tmp1 = NULL;
++ struct addrinfo *tmp2 = NULL;
++ char firstip[NI_MAXHOST] = {0.};
++ char nextip[NI_MAXHOST] = {
++ 0,
++ };
++
++ for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) {
++ ret = getnameinfo(tmp1->ai_addr, tmp1->ai_addrlen, firstip, NI_MAXHOST,
++ NULL, 0, NI_NUMERICHOST);
++ if (ret)
++ return GF_AI_COMPARE_ERROR;
++ for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) {
++ ret = getnameinfo(tmp2->ai_addr, tmp2->ai_addrlen, nextip,
++ NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
++ if (ret)
++ return GF_AI_COMPARE_ERROR;
++ if (!strcmp(firstip, nextip)) {
++ return GF_AI_COMPARE_MATCH;
++ }
++ }
++ }
++ return GF_AI_COMPARE_NO_MATCH;
++}
++
++/* Check for non optimal brick order for Replicate/Disperse :
++ * Checks if bricks belonging to a replicate or disperse
++ * volume are present on the same server
++ */
++int32_t
++glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type)
++{
++ int ret = -1;
++ int i = 0;
++ int j = 0;
++ int k = 0;
++ xlator_t *this = NULL;
++ addrinfo_list_t *ai_list = NULL;
++ addrinfo_list_t *ai_list_tmp1 = NULL;
++ addrinfo_list_t *ai_list_tmp2 = NULL;
++ char *brick = NULL;
++ char *brick_list = NULL;
++ char *brick_list_dup = NULL;
++ char *brick_list_ptr = NULL;
++ char *tmpptr = NULL;
++ char *volname = NULL;
++ int32_t brick_count = 0;
++ int32_t sub_count = 0;
++ struct addrinfo *ai_info = NULL;
++ char brick_addr[128] = {
++ 0,
++ };
++ int addrlen = 0;
++
++ const char failed_string[2048] =
++ "Failed to perform brick order "
++ "check. Use 'force' at the end of the command"
++ " if you want to override this behavior. ";
++ const char found_string[2048] =
++ "Multiple bricks of a %s "
++ "volume are present on the same server. This "
++ "setup is not optimal. Bricks should be on "
++ "different nodes to have best fault tolerant "
++ "configuration. Use 'force' at the end of the "
++ "command if you want to override this "
++ "behavior. ";
++
++ this = THIS;
++
++ GF_ASSERT(this);
++
++ ai_list = MALLOC(sizeof(addrinfo_list_t));
++ ai_list->info = NULL;
++ CDS_INIT_LIST_HEAD(&ai_list->list);
++
++ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
++ "Unable to get volume name");
++ goto out;
++ }
++
++ ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &brick_list);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
++ "Bricks check : Could not "
++ "retrieve bricks list");
++ goto out;
++ }
++
++ ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
++ "Bricks check : Could not "
++ "retrieve brick count");
++ goto out;
++ }
++
++ if (type != GF_CLUSTER_TYPE_DISPERSE) {
++ ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
++ &sub_count);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
++ "Bricks check : Could"
++ " not retrieve replica count");
++ goto out;
++ }
++ gf_msg_debug(this->name, 0,
++ "Replicate cluster type "
++ "found. Checking brick order.");
++ } else {
++ ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"),
++ &sub_count);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
++ "Bricks check : Could"
++ " not retrieve disperse count");
++ goto out;
++ }
++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DISPERSE_CLUSTER_FOUND,
++ "Disperse cluster type"
++ " found. Checking brick order.");
++ }
++ brick_list_dup = brick_list_ptr = gf_strdup(brick_list);
++ /* Resolve hostnames and get addrinfo */
++ while (i < brick_count) {
++ ++i;
++ brick = strtok_r(brick_list_dup, " \n", &tmpptr);
++ brick_list_dup = tmpptr;
++ if (brick == NULL)
++ goto check_failed;
++ tmpptr = strrchr(brick, ':');
++ if (tmpptr == NULL)
++ goto check_failed;
++ addrlen = strlen(brick) - strlen(tmpptr);
++ strncpy(brick_addr, brick, addrlen);
++ brick_addr[addrlen] = '\0';
++ ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info);
++ if (ret != 0) {
++ ret = 0;
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL,
++ "unable to resolve host name for addr %s", brick_addr);
++ goto out;
++ }
++ ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t));
++ if (ai_list_tmp1 == NULL) {
++ ret = 0;
++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
++ "failed to allocate "
++ "memory");
++ freeaddrinfo(ai_info);
++ goto out;
++ }
++ ai_list_tmp1->info = ai_info;
++ cds_list_add_tail(&ai_list_tmp1->list, &ai_list->list);
++ ai_list_tmp1 = NULL;
++ }
++
++ i = 0;
++ ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list);
++
++ /* Check for bad brick order */
++ while (i < brick_count) {
++ ++i;
++ ai_info = ai_list_tmp1->info;
++ ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t,
++ list);
++ if (0 == i % sub_count) {
++ j = 0;
++ continue;
++ }
++ ai_list_tmp2 = ai_list_tmp1;
++ k = j;
++ while (k < sub_count - 1) {
++ ++k;
++ ret = glusterd_compare_addrinfo(ai_info, ai_list_tmp2->info);
++ if (GF_AI_COMPARE_ERROR == ret)
++ goto check_failed;
++ if (GF_AI_COMPARE_MATCH == ret)
++ goto found_bad_brick_order;
++ ai_list_tmp2 = cds_list_entry(ai_list_tmp2->list.next,
++ addrinfo_list_t, list);
++ }
++ ++j;
++ }
++ gf_msg_debug(this->name, 0, "Brick order okay");
++ ret = 0;
++ goto out;
++
++check_failed:
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER_CHECK_FAIL,
++ "Failed bad brick order check");
++ snprintf(err_str, sizeof(failed_string), failed_string);
++ ret = -1;
++ goto out;
++
++found_bad_brick_order:
++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BAD_BRKORDER,
++ "Bad brick order found");
++ if (type == GF_CLUSTER_TYPE_DISPERSE) {
++ snprintf(err_str, sizeof(found_string), found_string, "disperse");
++ } else {
++ snprintf(err_str, sizeof(found_string), found_string, "replicate");
++ }
++
++ ret = -1;
++out:
++ ai_list_tmp2 = NULL;
++ GF_FREE(brick_list_ptr);
++ cds_list_for_each_entry(ai_list_tmp1, &ai_list->list, list)
++ {
++ if (ai_list_tmp1->info)
++ freeaddrinfo(ai_list_tmp1->info);
++ free(ai_list_tmp2);
++ ai_list_tmp2 = ai_list_tmp1;
++ }
++ free(ai_list_tmp2);
++ return ret;
++}
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
+index ead16b2..e2e2454 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
+@@ -881,4 +881,8 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo);
+
+ char *
+ search_brick_path_from_proc(pid_t brick_pid, char *brickpath);
++
++int32_t
++glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type);
++
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index 93042ab..8da2ff3 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -41,240 +41,6 @@
+ #define glusterd_op_start_volume_args_get(dict, volname, flags) \
+ glusterd_op_stop_volume_args_get(dict, volname, flags)
+
+-gf_ai_compare_t
+-glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next)
+-{
+- int ret = -1;
+- struct addrinfo *tmp1 = NULL;
+- struct addrinfo *tmp2 = NULL;
+- char firstip[NI_MAXHOST] = {0.};
+- char nextip[NI_MAXHOST] = {
+- 0,
+- };
+-
+- for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) {
+- ret = getnameinfo(tmp1->ai_addr, tmp1->ai_addrlen, firstip, NI_MAXHOST,
+- NULL, 0, NI_NUMERICHOST);
+- if (ret)
+- return GF_AI_COMPARE_ERROR;
+- for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) {
+- ret = getnameinfo(tmp2->ai_addr, tmp2->ai_addrlen, nextip,
+- NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
+- if (ret)
+- return GF_AI_COMPARE_ERROR;
+- if (!strcmp(firstip, nextip)) {
+- return GF_AI_COMPARE_MATCH;
+- }
+- }
+- }
+- return GF_AI_COMPARE_NO_MATCH;
+-}
+-
+-/* Check for non optimal brick order for replicate :
+- * Checks if bricks belonging to a replicate volume
+- * are present on the same server
+- */
+-int32_t
+-glusterd_check_brick_order(dict_t *dict, char *err_str)
+-{
+- int ret = -1;
+- int i = 0;
+- int j = 0;
+- int k = 0;
+- xlator_t *this = NULL;
+- addrinfo_list_t *ai_list = NULL;
+- addrinfo_list_t *ai_list_tmp1 = NULL;
+- addrinfo_list_t *ai_list_tmp2 = NULL;
+- char *brick = NULL;
+- char *brick_list = NULL;
+- char *brick_list_dup = NULL;
+- char *brick_list_ptr = NULL;
+- char *tmpptr = NULL;
+- char *volname = NULL;
+- int32_t brick_count = 0;
+- int32_t type = GF_CLUSTER_TYPE_NONE;
+- int32_t sub_count = 0;
+- struct addrinfo *ai_info = NULL;
+- char brick_addr[128] = {
+- 0,
+- };
+- int addrlen = 0;
+-
+- const char failed_string[2048] =
+- "Failed to perform brick order "
+- "check. Use 'force' at the end of the command"
+- " if you want to override this behavior. ";
+- const char found_string[2048] =
+- "Multiple bricks of a %s "
+- "volume are present on the same server. This "
+- "setup is not optimal. Bricks should be on "
+- "different nodes to have best fault tolerant "
+- "configuration. Use 'force' at the end of the "
+- "command if you want to override this "
+- "behavior. ";
+-
+- this = THIS;
+-
+- GF_ASSERT(this);
+-
+- ai_list = MALLOC(sizeof(addrinfo_list_t));
+- ai_list->info = NULL;
+- CDS_INIT_LIST_HEAD(&ai_list->list);
+-
+- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+- "Unable to get volume name");
+- goto out;
+- }
+-
+- ret = dict_get_int32n(dict, "type", SLEN("type"), &type);
+- if (ret) {
+- snprintf(err_str, 512, "Unable to get type of volume %s", volname);
+- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, "%s",
+- err_str);
+- goto out;
+- }
+-
+- ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &brick_list);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+- "Bricks check : Could not "
+- "retrieve bricks list");
+- goto out;
+- }
+-
+- ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+- "Bricks check : Could not "
+- "retrieve brick count");
+- goto out;
+- }
+-
+- if (type != GF_CLUSTER_TYPE_DISPERSE) {
+- ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
+- &sub_count);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+- "Bricks check : Could"
+- " not retrieve replica count");
+- goto out;
+- }
+- gf_msg_debug(this->name, 0,
+- "Replicate cluster type "
+- "found. Checking brick order.");
+- } else {
+- ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"),
+- &sub_count);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+- "Bricks check : Could"
+- " not retrieve disperse count");
+- goto out;
+- }
+- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DISPERSE_CLUSTER_FOUND,
+- "Disperse cluster type"
+- " found. Checking brick order.");
+- }
+-
+- brick_list_dup = brick_list_ptr = gf_strdup(brick_list);
+- /* Resolve hostnames and get addrinfo */
+- while (i < brick_count) {
+- ++i;
+- brick = strtok_r(brick_list_dup, " \n", &tmpptr);
+- brick_list_dup = tmpptr;
+- if (brick == NULL)
+- goto check_failed;
+- tmpptr = strrchr(brick, ':');
+- if (tmpptr == NULL)
+- goto check_failed;
+- addrlen = strlen(brick) - strlen(tmpptr);
+- strncpy(brick_addr, brick, addrlen);
+- brick_addr[addrlen] = '\0';
+- ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info);
+- if (ret != 0) {
+- ret = 0;
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL,
+- "unable to resolve host name for addr %s", brick_addr);
+- goto out;
+- }
+- ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t));
+- if (ai_list_tmp1 == NULL) {
+- ret = 0;
+- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+- "failed to allocate "
+- "memory");
+- freeaddrinfo(ai_info);
+- goto out;
+- }
+- ai_list_tmp1->info = ai_info;
+- cds_list_add_tail(&ai_list_tmp1->list, &ai_list->list);
+- ai_list_tmp1 = NULL;
+- }
+-
+- i = 0;
+- ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list);
+-
+- /* Check for bad brick order */
+- while (i < brick_count) {
+- ++i;
+- ai_info = ai_list_tmp1->info;
+- ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t,
+- list);
+- if (0 == i % sub_count) {
+- j = 0;
+- continue;
+- }
+- ai_list_tmp2 = ai_list_tmp1;
+- k = j;
+- while (k < sub_count - 1) {
+- ++k;
+- ret = glusterd_compare_addrinfo(ai_info, ai_list_tmp2->info);
+- if (GF_AI_COMPARE_ERROR == ret)
+- goto check_failed;
+- if (GF_AI_COMPARE_MATCH == ret)
+- goto found_bad_brick_order;
+- ai_list_tmp2 = cds_list_entry(ai_list_tmp2->list.next,
+- addrinfo_list_t, list);
+- }
+- ++j;
+- }
+- gf_msg_debug(this->name, 0, "Brick order okay");
+- ret = 0;
+- goto out;
+-
+-check_failed:
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER_CHECK_FAIL,
+- "Failed bad brick order check");
+- snprintf(err_str, sizeof(failed_string), failed_string);
+- ret = -1;
+- goto out;
+-
+-found_bad_brick_order:
+- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BAD_BRKORDER,
+- "Bad brick order found");
+- if (type == GF_CLUSTER_TYPE_DISPERSE) {
+- snprintf(err_str, sizeof(found_string), found_string, "disperse");
+- } else {
+- snprintf(err_str, sizeof(found_string), found_string, "replicate");
+- }
+-
+- ret = -1;
+-out:
+- ai_list_tmp2 = NULL;
+- GF_FREE(brick_list_ptr);
+- cds_list_for_each_entry(ai_list_tmp1, &ai_list->list, list)
+- {
+- if (ai_list_tmp1->info)
+- freeaddrinfo(ai_list_tmp1->info);
+- free(ai_list_tmp2);
+- ai_list_tmp2 = ai_list_tmp1;
+- }
+- free(ai_list_tmp2);
+- return ret;
+-}
+-
+ int
+ __glusterd_handle_create_volume(rpcsvc_request_t *req)
+ {
+@@ -1337,6 +1103,35 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr,
+ }
+ }
+
++ /*Check brick order if the volume type is replicate or disperse. If
++ * force at the end of command not given then check brick order.
++ */
++ if (is_origin_glusterd(dict)) {
++ ret = dict_get_int32n(dict, "type", SLEN("type"), &type);
++ if (ret) {
++ snprintf(msg, sizeof(msg),
++ "Unable to get type of "
++ "volume %s",
++ volname);
++ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, "%s",
++ msg);
++ goto out;
++ }
++
++ if (!is_force) {
++ if ((type == GF_CLUSTER_TYPE_REPLICATE) ||
++ (type == GF_CLUSTER_TYPE_DISPERSE)) {
++ ret = glusterd_check_brick_order(dict, msg, type);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER,
++ "Not creating volume because of "
++ "bad brick order");
++ goto out;
++ }
++ }
++ }
++ }
++
+ while (i < brick_count) {
+ i++;
+ brick = strtok_r(brick_list, " \n", &tmpptr);
+@@ -1423,36 +1218,6 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr,
+ brick_info = NULL;
+ }
+
+- /*Check brick order if the volume type is replicate or disperse. If
+- * force at the end of command not given then check brick order.
+- */
+- if (is_origin_glusterd(dict)) {
+- ret = dict_get_int32n(dict, "type", SLEN("type"), &type);
+- if (ret) {
+- snprintf(msg, sizeof(msg),
+- "Unable to get type of "
+- "volume %s",
+- volname);
+- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, "%s",
+- msg);
+- goto out;
+- }
+-
+- if (!is_force) {
+- if ((type == GF_CLUSTER_TYPE_REPLICATE) ||
+- (type == GF_CLUSTER_TYPE_DISPERSE)) {
+- ret = glusterd_check_brick_order(dict, msg);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER,
+- "Not "
+- "creating volume because of "
+- "bad brick order");
+- goto out;
+- }
+- }
+- }
+- }
+-
+ ret = dict_set_int32n(rsp_dict, "brick_count", SLEN("brick_count"),
+ local_brick_count);
+ if (ret) {
+--
+1.8.3.1
+
diff --git a/0432-glusterd-Fix-coverity-defects-put-coverity-annotatio.patch b/0432-glusterd-Fix-coverity-defects-put-coverity-annotatio.patch
new file mode 100644
index 0000000..ef589de
--- /dev/null
+++ b/0432-glusterd-Fix-coverity-defects-put-coverity-annotatio.patch
@@ -0,0 +1,503 @@
+From aa215163cb7d806dc98bef2386a4e282a5e54a31 Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Thu, 25 Apr 2019 12:00:52 +0530
+Subject: [PATCH 432/449] glusterd: Fix coverity defects & put coverity
+ annotations
+
+Along with fixing few defect, put the required annotations for the defects which
+are marked ignore/false positive/intentional as per the coverity defect sheet.
+This should avoid the per component graph showing many defects as open in the
+coverity glusterfs web page.
+
+> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22619/
+> Updates: bz#789278
+> Change-Id: I19461dc3603a3bd8f88866a1ab3db43d783af8e4
+> Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+
+BUG: 1787310
+Change-Id: I19461dc3603a3bd8f88866a1ab3db43d783af8e4
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202631
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 7 +++--
+ xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 3 +-
+ .../glusterd/src/glusterd-gfproxyd-svc-helper.c | 2 +-
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 8 ++++-
+ xlators/mgmt/glusterd/src/glusterd-mountbroker.c | 5 ++-
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 8 +++++
+ xlators/mgmt/glusterd/src/glusterd-peer-utils.c | 2 ++
+ xlators/mgmt/glusterd/src/glusterd-server-quorum.c | 1 +
+ xlators/mgmt/glusterd/src/glusterd-store.c | 4 ---
+ xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 4 +--
+ xlators/mgmt/glusterd/src/glusterd-syncop.c | 1 +
+ .../mgmt/glusterd/src/glusterd-tierd-svc-helper.c | 4 +--
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 9 ++++--
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 36 +++++++++++++---------
+ 14 files changed, 63 insertions(+), 31 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+index d424f31..121346c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+@@ -2032,7 +2032,6 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
+ case GF_OP_CMD_STATUS:
+ ret = 0;
+ goto out;
+-
+ case GF_OP_CMD_DETACH_START:
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ snprintf(msg, sizeof(msg),
+@@ -2044,7 +2043,7 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
+ errstr);
+ goto out;
+ }
+-
++ /* Fall through */
+ case GF_OP_CMD_START: {
+ if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) &&
+ dict_getn(dict, "replica-count", SLEN("replica-count"))) {
+@@ -2259,7 +2258,8 @@ out:
+ if (op_errstr)
+ *op_errstr = errstr;
+ }
+-
++ if (!op_errstr && errstr)
++ GF_FREE(errstr);
+ return ret;
+ }
+
+@@ -2687,6 +2687,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
+ * Update defrag_cmd as well or it will only be done
+ * for nodes on which the brick to be removed exists.
+ */
++ /* coverity[MIXED_ENUMS] */
+ volinfo->rebal.defrag_cmd = cmd;
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED;
+ ret = dict_get_strn(dict, GF_REMOVE_BRICK_TID_KEY,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+index 85c06c1..5a91df4 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
++++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+@@ -4107,6 +4107,7 @@ gd_pause_or_resume_gsync(dict_t *dict, char *master, char *slave,
+
+ out:
+ sys_close(pfd);
++ /* coverity[INTEGER_OVERFLOW] */
+ return ret;
+ }
+
+@@ -4183,7 +4184,7 @@ stop_gsync(char *master, char *slave, char **msg, char *conf_path,
+
+ out:
+ sys_close(pfd);
+-
++ /* coverity[INTEGER_OVERFLOW] */
+ return ret;
+ }
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c
+index 67e3f41..e338bf4 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c
+@@ -111,7 +111,7 @@ glusterd_svc_get_gfproxyd_volfile(glusterd_volinfo_t *volinfo, char *svc_name,
+ goto out;
+ }
+
+- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
+ tmp_fd = mkstemp(*tmpvol);
+ if (tmp_fd < 0) {
+ gf_msg("glusterd", GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index 2e73c98..1f31e72 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -930,6 +930,7 @@ __glusterd_handle_cluster_lock(rpcsvc_request_t *req)
+
+ op_ctx = dict_new();
+ if (!op_ctx) {
++ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ "Unable to set new dict");
+ goto out;
+@@ -956,6 +957,9 @@ out:
+ glusterd_friend_sm();
+ glusterd_op_sm();
+
++ if (ret)
++ GF_FREE(ctx);
++
+ return ret;
+ }
+
+@@ -3470,6 +3474,7 @@ glusterd_rpc_create(struct rpc_clnt **rpc, dict_t *options,
+ GF_ASSERT(this);
+
+ GF_ASSERT(options);
++ GF_VALIDATE_OR_GOTO(this->name, rpc, out);
+
+ if (force && rpc && *rpc) {
+ (void)rpc_clnt_unref(*rpc);
+@@ -3482,7 +3487,6 @@ glusterd_rpc_create(struct rpc_clnt **rpc, dict_t *options,
+ goto out;
+
+ ret = rpc_clnt_register_notify(new_rpc, notify_fn, notify_data);
+- *rpc = new_rpc;
+ if (ret)
+ goto out;
+ ret = rpc_clnt_start(new_rpc);
+@@ -3491,6 +3495,8 @@ out:
+ if (new_rpc) {
+ (void)rpc_clnt_unref(new_rpc);
+ }
++ } else {
++ *rpc = new_rpc;
+ }
+
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
+index 332ddef..c017ccb 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
++++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
+@@ -334,7 +334,10 @@ make_ghadoop_mountspec(gf_mount_spec_t *mspec, const char *volname, char *user,
+ if (ret == -1)
+ return ret;
+
+- return parse_mount_pattern_desc(mspec, hadoop_mnt_desc);
++ ret = parse_mount_pattern_desc(mspec, hadoop_mnt_desc);
++ GF_FREE(hadoop_mnt_desc);
++
++ return ret;
+ }
+
+ static gf_boolean_t
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 6475611..46fc607 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -2467,6 +2467,7 @@ glusterd_start_bricks(glusterd_volinfo_t *volinfo)
+ if (!brickinfo->start_triggered) {
+ pthread_mutex_lock(&brickinfo->restart_mutex);
+ {
++ /* coverity[SLEEP] */
+ ret = glusterd_brick_start(volinfo, brickinfo, _gf_false,
+ _gf_false);
+ }
+@@ -3466,6 +3467,7 @@ _add_task_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index)
+
+ switch (op) {
+ case GD_OP_REMOVE_TIER_BRICK:
++ /* Fall through */
+ case GD_OP_REMOVE_BRICK:
+ snprintf(key, sizeof(key), "task%d", index);
+ ret = _add_remove_bricks_to_dict(dict, volinfo, key);
+@@ -7550,6 +7552,7 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx)
+ glusterd_op_t op = GD_OP_NONE;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ char *op_errstr = NULL;
++ gf_boolean_t free_req_ctx = _gf_false;
+
+ this = THIS;
+ priv = this->private;
+@@ -7558,6 +7561,9 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx)
+ req_ctx = ctx;
+ } else {
+ req_ctx = GF_CALLOC(1, sizeof(*req_ctx), gf_gld_mt_op_allack_ctx_t);
++ if (!req_ctx)
++ goto out;
++ free_req_ctx = _gf_true;
+ op = glusterd_op_get_op();
+ req_ctx->op = op;
+ gf_uuid_copy(req_ctx->uuid, MY_UUID);
+@@ -7588,6 +7594,8 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx)
+ }
+
+ out:
++ if (ret && req_ctx && free_req_ctx)
++ GF_FREE(req_ctx);
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+
+ return ret;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
+index 8c1feeb..1a65359 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
+@@ -82,6 +82,7 @@ glusterd_peerinfo_cleanup(glusterd_peerinfo_t *peerinfo)
+ call_rcu(&peerinfo->rcu_head.head, glusterd_peerinfo_destroy);
+
+ if (quorum_action)
++ /* coverity[SLEEP] */
+ glusterd_do_quorum_action();
+ return 0;
+ }
+@@ -358,6 +359,7 @@ glusterd_uuid_to_hostname(uuid_t uuid)
+
+ if (!gf_uuid_compare(MY_UUID, uuid)) {
+ hostname = gf_strdup("localhost");
++ return hostname;
+ }
+ RCU_READ_LOCK;
+ if (!cds_list_empty(&priv->peers)) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
+index fd334e6..f378187 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
++++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
+@@ -372,6 +372,7 @@ glusterd_do_volume_quorum_action(xlator_t *this, glusterd_volinfo_t *volinfo,
+ if (!brickinfo->start_triggered) {
+ pthread_mutex_lock(&brickinfo->restart_mutex);
+ {
++ /* coverity[SLEEP] */
+ ret = glusterd_brick_start(volinfo, brickinfo, _gf_false,
+ _gf_false);
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index b3b5ee9..4fa8116 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -4764,10 +4764,6 @@ glusterd_store_retrieve_peers(xlator_t *this)
+ */
+ address = cds_list_entry(peerinfo->hostnames.next,
+ glusterd_peer_hostname_t, hostname_list);
+- if (!address) {
+- ret = -1;
+- goto next;
+- }
+ peerinfo->hostname = gf_strdup(address->hostname);
+
+ ret = glusterd_friend_add_from_peerinfo(peerinfo, 1, NULL);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+index ca19a75..1d1f42d 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+@@ -179,7 +179,7 @@ glusterd_svc_check_volfile_identical(char *svc_name,
+ goto out;
+ }
+
+- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
+ tmp_fd = mkstemp(tmpvol);
+ if (tmp_fd < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+@@ -241,7 +241,7 @@ glusterd_svc_check_topology_identical(char *svc_name,
+ goto out;
+ }
+
+- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
+ tmpfd = mkstemp(tmpvol);
+ if (tmpfd < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+index 618d8bc..9e47d14 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+@@ -1752,6 +1752,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ if (dict_get(op_ctx, "client-count"))
+ break;
+ }
++ /* coverity[MIXED_ENUMS] */
+ } else if (cmd == GF_OP_CMD_DETACH_START) {
+ op = GD_OP_REMOVE_BRICK;
+ dict_del(req_dict, "rebalance-command");
+diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c
+index 922eae7..59843a0 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c
++++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c
+@@ -116,7 +116,7 @@ glusterd_svc_check_tier_volfile_identical(char *svc_name,
+ goto out;
+ }
+
+- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
+ tmp_fd = mkstemp(tmpvol);
+ if (tmp_fd < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+@@ -177,7 +177,7 @@ glusterd_svc_check_tier_topology_identical(char *svc_name,
+ goto out;
+ }
+
+- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
+ tmpfd = mkstemp(tmpvol);
+ if (tmpfd < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 14e23d1..8b0fc9a 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -908,6 +908,7 @@ glusterd_create_sub_tier_volinfo(glusterd_volinfo_t *volinfo,
+ (*dup_volinfo)->brick_count = tier_info->cold_brick_count;
+ }
+ out:
++ /* coverity[REVERSE_NULL] */
+ if (ret && *dup_volinfo) {
+ glusterd_volinfo_delete(*dup_volinfo);
+ *dup_volinfo = NULL;
+@@ -2738,6 +2739,7 @@ glusterd_readin_file(const char *filepath, int *line_count)
+ /* Reduce allocation to minimal size. */
+ p = GF_REALLOC(lines, (counter + 1) * sizeof(char *));
+ if (!p) {
++ /* coverity[TAINTED_SCALAR] */
+ free_lines(lines, counter);
+ lines = NULL;
+ goto out;
+@@ -6782,6 +6784,7 @@ glusterd_restart_bricks(void *opaque)
+ if (!brickinfo->start_triggered) {
+ pthread_mutex_lock(&brickinfo->restart_mutex);
+ {
++ /* coverity[SLEEP] */
+ glusterd_brick_start(volinfo, brickinfo, _gf_false,
+ _gf_false);
+ }
+@@ -8886,7 +8889,7 @@ glusterd_nfs_statedump(char *options, int option_cnt, char **op_errstr)
+ kill(pid, SIGUSR1);
+
+ sleep(1);
+-
++ /* coverity[TAINTED_STRING] */
+ sys_unlink(dumpoptions_path);
+ ret = 0;
+ out:
+@@ -9012,6 +9015,7 @@ glusterd_quotad_statedump(char *options, int option_cnt, char **op_errstr)
+
+ sleep(1);
+
++ /* coverity[TAINTED_STRING] */
+ sys_unlink(dumpoptions_path);
+ ret = 0;
+ out:
+@@ -13423,7 +13427,7 @@ glusterd_get_global_options_for_all_vols(rpcsvc_request_t *req, dict_t *ctx,
+ if (key_fixed)
+ key = key_fixed;
+ }
+-
++ /* coverity[CONSTANT_EXPRESSION_RESULT] */
+ ALL_VOLUME_OPTION_CHECK("all", _gf_true, key, ret, op_errstr, out);
+
+ for (i = 0; valid_all_vol_opts[i].option; i++) {
+@@ -14153,6 +14157,7 @@ glusterd_disallow_op_for_tier(glusterd_volinfo_t *volinfo, glusterd_op_t op,
+ break;
+ case GD_OP_REMOVE_BRICK:
+ switch (cmd) {
++ /* coverity[MIXED_ENUMS] */
+ case GF_DEFRAG_CMD_DETACH_START:
+ case GF_OP_CMD_DETACH_COMMIT_FORCE:
+ case GF_OP_CMD_DETACH_COMMIT:
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 539e8a5..6852f8e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -322,7 +322,7 @@ volopt_trie_cbk(char *word, void *param)
+ }
+
+ static int
+-process_nodevec(struct trienodevec *nodevec, char **hint)
++process_nodevec(struct trienodevec *nodevec, char **outputhint, char *inputhint)
+ {
+ int ret = 0;
+ char *hint1 = NULL;
+@@ -331,14 +331,14 @@ process_nodevec(struct trienodevec *nodevec, char **hint)
+ trienode_t **nodes = nodevec->nodes;
+
+ if (!nodes[0]) {
+- *hint = NULL;
++ *outputhint = NULL;
+ return 0;
+ }
+
+ #if 0
+ /* Limit as in git */
+ if (trienode_get_dist (nodes[0]) >= 6) {
+- *hint = NULL;
++ *outputhint = NULL;
+ return 0;
+ }
+ #endif
+@@ -347,23 +347,30 @@ process_nodevec(struct trienodevec *nodevec, char **hint)
+ return -1;
+
+ if (nodevec->cnt < 2 || !nodes[1]) {
+- *hint = hint1;
++ *outputhint = hint1;
+ return 0;
+ }
+
+- if (trienode_get_word(nodes[1], &hint2))
++ if (trienode_get_word(nodes[1], &hint2)) {
++ GF_FREE(hint1);
+ return -1;
++ }
+
+- if (*hint)
+- hintinfx = *hint;
+- ret = gf_asprintf(hint, "%s or %s%s", hint1, hintinfx, hint2);
++ if (inputhint)
++ hintinfx = inputhint;
++ ret = gf_asprintf(outputhint, "%s or %s%s", hint1, hintinfx, hint2);
+ if (ret > 0)
+ ret = 0;
++ if (hint1)
++ GF_FREE(hint1);
++ if (hint2)
++ GF_FREE(hint2);
+ return ret;
+ }
+
+ static int
+-volopt_trie_section(int lvl, char **patt, char *word, char **hint, int hints)
++volopt_trie_section(int lvl, char **patt, char *word, char **outputhint,
++ char *inputhint, int hints)
+ {
+ trienode_t *nodes[] = {NULL, NULL};
+ struct trienodevec nodevec = {nodes, 2};
+@@ -384,7 +391,7 @@ volopt_trie_section(int lvl, char **patt, char *word, char **hint, int hints)
+ nodevec.cnt = hints;
+ ret = trie_measure_vec(trie, word, &nodevec);
+ if (!ret && nodevec.nodes[0])
+- ret = process_nodevec(&nodevec, hint);
++ ret = process_nodevec(&nodevec, outputhint, inputhint);
+
+ trie_destroy(trie);
+
+@@ -396,6 +403,7 @@ volopt_trie(char *key, char **hint)
+ {
+ char *patt[] = {NULL};
+ char *fullhint = NULL;
++ char *inputhint = NULL;
+ char *dot = NULL;
+ char *dom = NULL;
+ int len = 0;
+@@ -405,7 +413,7 @@ volopt_trie(char *key, char **hint)
+
+ dot = strchr(key, '.');
+ if (!dot)
+- return volopt_trie_section(1, patt, key, hint, 2);
++ return volopt_trie_section(1, patt, key, hint, inputhint, 2);
+
+ len = dot - key;
+ dom = gf_strdup(key);
+@@ -413,7 +421,7 @@ volopt_trie(char *key, char **hint)
+ return -1;
+ dom[len] = '\0';
+
+- ret = volopt_trie_section(0, NULL, dom, patt, 1);
++ ret = volopt_trie_section(0, NULL, dom, patt, inputhint, 1);
+ GF_FREE(dom);
+ if (ret) {
+ patt[0] = NULL;
+@@ -422,8 +430,8 @@ volopt_trie(char *key, char **hint)
+ if (!patt[0])
+ goto out;
+
+- *hint = "...";
+- ret = volopt_trie_section(1, patt, dot + 1, hint, 2);
++ inputhint = "...";
++ ret = volopt_trie_section(1, patt, dot + 1, hint, inputhint, 2);
+ if (ret)
+ goto out;
+ if (*hint) {
+--
+1.8.3.1
+
diff --git a/0433-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch b/0433-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch
new file mode 100644
index 0000000..d980eb5
--- /dev/null
+++ b/0433-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch
@@ -0,0 +1,54 @@
+From 0e3871a57b7a621444dc5cfd49935a1e412f6436 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Mon, 8 Jun 2020 13:27:50 +0530
+Subject: [PATCH 433/449] socket: Resolve ssl_ctx leak for a brick while only
+ mgmt SSL is enabled
+
+Problem: While only mgmt SSL is enabled for a brick process use_ssl flag
+ is false for a brick process and socket api's cleanup ssl_ctx only
+ while use_ssl and ssl_ctx both are valid
+
+Solution: To avoid a leak check only ssl_ctx, if it is valid cleanup
+ ssl_ctx
+
+> Fixes: #1196
+> Change-Id: I2f4295478f4149dcb7d608ea78ee5104f28812c3
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry pick from commit 9873baee34afdf0c20f5fc98a7dbf2a9f07447e2)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24366/)
+
+BUG: 1810924
+Change-Id: I2f4295478f4149dcb7d608ea78ee5104f28812c3
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202625
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ rpc/rpc-transport/socket/src/socket.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
+index 226b2e2..54cd5df 100644
+--- a/rpc/rpc-transport/socket/src/socket.c
++++ b/rpc/rpc-transport/socket/src/socket.c
+@@ -1163,7 +1163,7 @@ __socket_reset(rpc_transport_t *this)
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+ }
+- if (priv->use_ssl && priv->ssl_ctx) {
++ if (priv->ssl_ctx) {
+ SSL_CTX_free(priv->ssl_ctx);
+ priv->ssl_ctx = NULL;
+ }
+@@ -4685,7 +4685,7 @@ fini(rpc_transport_t *this)
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+ }
+- if (priv->use_ssl && priv->ssl_ctx) {
++ if (priv->ssl_ctx) {
+ SSL_CTX_free(priv->ssl_ctx);
+ priv->ssl_ctx = NULL;
+ }
+--
+1.8.3.1
+
diff --git a/0434-glusterd-ganesha-fix-Coverity-CID-1405785.patch b/0434-glusterd-ganesha-fix-Coverity-CID-1405785.patch
new file mode 100644
index 0000000..3f038a3
--- /dev/null
+++ b/0434-glusterd-ganesha-fix-Coverity-CID-1405785.patch
@@ -0,0 +1,39 @@
+From 2ea3fc203671429d0aa9994e5bbd57f6a604523d Mon Sep 17 00:00:00 2001
+From: Xie Changlong <xiechanglong@cmss.chinamobile.com>
+Date: Mon, 28 Oct 2019 17:43:28 +0800
+Subject: [PATCH 434/449] glusterd/ganesha: fix Coverity CID 1405785
+
+To avoid double free
+
+> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23630/
+> updates: bz#789278
+> Change-Id: I15ae54ed696295d4cb015668722e77983b062ccb
+> Signed-off-by: Xie Changlong <xiechanglong@cmss.chinamobile.com>
+
+BUG: 1787310
+Change-Id: I15ae54ed696295d4cb015668722e77983b062ccb
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202623
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+index 06f028f..caba34f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+@@ -659,8 +659,8 @@ tear_down_cluster(gf_boolean_t run_teardown)
+ "Failed to close dir %s. Reason :"
+ " %s",
+ CONFDIR, strerror(errno));
+- goto exit;
+ }
++ goto exit;
+ }
+
+ out:
+--
+1.8.3.1
+
diff --git a/0435-glusterd-coverity-fix.patch b/0435-glusterd-coverity-fix.patch
new file mode 100644
index 0000000..f587107
--- /dev/null
+++ b/0435-glusterd-coverity-fix.patch
@@ -0,0 +1,38 @@
+From 9425fd5a49a17a8f91c13632ae055a6510b0b44c Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Fri, 17 May 2019 14:27:58 +0530
+Subject: [PATCH 435/449] glusterd: coverity fix
+
+CID: 1401345 - Unused value
+
+> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22737/
+> updates: bz#789278
+> Change-Id: I6b8f2611151ce0174042384b7632019c312ebae3
+> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+BUG: 1787310
+Change-Id: I6b8f2611151ce0174042384b7632019c312ebae3
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202622
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 8b0fc9a..2eb2a76 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -3740,7 +3740,7 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+ * deleting a volume, so we shouldn't be trying to create a
+ * fresh volume here which would lead to a stale entry
+ */
+- if (stage_deleted == 0)
++ if (!ret && stage_deleted == 0)
+ *status = GLUSTERD_VOL_COMP_UPDATE_REQ;
+ ret = 0;
+ goto out;
+--
+1.8.3.1
+
diff --git a/0436-glusterd-coverity-fixes.patch b/0436-glusterd-coverity-fixes.patch
new file mode 100644
index 0000000..799681f
--- /dev/null
+++ b/0436-glusterd-coverity-fixes.patch
@@ -0,0 +1,187 @@
+From 179213798496448316547506da65dbd9fd741dfa Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <amukherj@redhat.com>
+Date: Wed, 24 Apr 2019 22:02:51 +0530
+Subject: [PATCH 436/449] glusterd: coverity fixes
+
+Addresses the following:
+
+* CID 1124776: Resource leaks (RESOURCE_LEAK) - Variable "aa" going out
+of scope leaks the storage it points to in glusterd-volgen.c
+
+* Bunch of CHECKED_RETURN defects in the callers of synctask_barrier_init
+
+* CID 1400742: Program hangs (LOCK) - adding annotation to fix this
+false positive
+
+> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22615
+> Updates: bz#789278
+> Change-Id: I02f16e7eeb8c5cf72f7d0b29d00df4f03b3718b3
+> Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
+
+BUG: 1787310
+Change-Id: I02f16e7eeb8c5cf72f7d0b29d00df4f03b3718b3
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202626
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 6 ++++++
+ xlators/mgmt/glusterd/src/glusterd-mgmt.c | 24 +++++++++++++++++++-----
+ xlators/mgmt/glusterd/src/glusterd-syncop.c | 22 ++++++++++++++++++----
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 5 +++--
+ 4 files changed, 46 insertions(+), 11 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index 1f31e72..b8799ab 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -3458,6 +3458,12 @@ glusterd_friend_remove(uuid_t uuid, char *hostname)
+ ret = glusterd_peerinfo_cleanup(peerinfo);
+ out:
+ gf_msg_debug(THIS->name, 0, "returning %d", ret);
++ /* We don't need to do a mutex unlock of peerinfo->delete_lock as the same
++ * will be anyway destroyed within glusterd_peerinfo_cleanup, coverity
++ * though cries about it
++ */
++ /* coverity[LOCK] */
++
+ return ret;
+ }
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+index a4915f3..1e185d7 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c
++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+@@ -757,7 +757,10 @@ glusterd_mgmt_v3_initiate_lockdown(glusterd_op_t op, dict_t *dict,
+
+ /* Sending mgmt_v3 lock req to other nodes in the cluster */
+ gd_syncargs_init(&args, NULL);
+- synctask_barrier_init((&args));
++ ret = synctask_barrier_init((&args));
++ if (ret)
++ goto out;
++
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+@@ -1108,7 +1111,10 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict,
+
+ /* Sending Pre Validation req to other nodes in the cluster */
+ gd_syncargs_init(&args, req_dict);
+- synctask_barrier_init((&args));
++ ret = synctask_barrier_init((&args));
++ if (ret)
++ goto out;
++
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+@@ -1458,7 +1464,10 @@ glusterd_mgmt_v3_brick_op(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+
+ /* Sending brick op req to other nodes in the cluster */
+ gd_syncargs_init(&args, op_ctx);
+- synctask_barrier_init((&args));
++ ret = synctask_barrier_init((&args));
++ if (ret)
++ goto out;
++
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+@@ -1722,7 +1731,9 @@ glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+
+ /* Sending commit req to other nodes in the cluster */
+ gd_syncargs_init(&args, op_ctx);
+- synctask_barrier_init((&args));
++ ret = synctask_barrier_init((&args));
++ if (ret)
++ goto out;
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+@@ -1963,7 +1974,10 @@ glusterd_mgmt_v3_post_validate(glusterd_op_t op, int32_t op_ret, dict_t *dict,
+
+ /* Sending Post Validation req to other nodes in the cluster */
+ gd_syncargs_init(&args, req_dict);
+- synctask_barrier_init((&args));
++ ret = synctask_barrier_init((&args));
++ if (ret)
++ goto out;
++
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+index 9e47d14..c78983a 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+@@ -1191,7 +1191,12 @@ gd_lock_op_phase(glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx,
+ struct syncargs args = {0};
+
+ this = THIS;
+- synctask_barrier_init((&args));
++ GF_VALIDATE_OR_GOTO("glusterd", this, out);
++
++ ret = synctask_barrier_init((&args));
++ if (ret)
++ goto out;
++
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+@@ -1321,7 +1326,10 @@ stage_done:
+ }
+
+ gd_syncargs_init(&args, aggr_dict);
+- synctask_barrier_init((&args));
++ ret = synctask_barrier_init((&args));
++ if (ret)
++ goto out;
++
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+@@ -1449,7 +1457,10 @@ commit_done:
+ }
+
+ gd_syncargs_init(&args, op_ctx);
+- synctask_barrier_init((&args));
++ ret = synctask_barrier_init((&args));
++ if (ret)
++ goto out;
++
+ peer_cnt = 0;
+ origin_glusterd = is_origin_glusterd(req_dict);
+
+@@ -1541,7 +1552,10 @@ gd_unlock_op_phase(glusterd_conf_t *conf, glusterd_op_t op, int *op_ret,
+ goto out;
+ }
+
+- synctask_barrier_init((&args));
++ ret = synctask_barrier_init((&args));
++ if (ret)
++ goto out;
++
+ peer_cnt = 0;
+
+ if (cluster_lock) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 6852f8e..16346e7 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -4808,9 +4808,10 @@ nfs_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme,
+
+ if (ret != -1) {
+ ret = gf_canonicalize_path(vme->value);
+- if (ret)
++ if (ret) {
++ GF_FREE(aa);
+ return -1;
+-
++ }
+ ret = xlator_set_option(xl, aa, ret, vme->value);
+ GF_FREE(aa);
+ }
+--
+1.8.3.1
+
diff --git a/0437-glusterd-prevent-use-after-free-in-glusterd_op_ac_se.patch b/0437-glusterd-prevent-use-after-free-in-glusterd_op_ac_se.patch
new file mode 100644
index 0000000..21fcd8a
--- /dev/null
+++ b/0437-glusterd-prevent-use-after-free-in-glusterd_op_ac_se.patch
@@ -0,0 +1,48 @@
+From ffd428d07036531b7ed98c7393b87490aaa223ec Mon Sep 17 00:00:00 2001
+From: Niels de Vos <ndevos@redhat.com>
+Date: Fri, 3 May 2019 09:18:31 +0200
+Subject: [PATCH 437/449] glusterd: prevent use-after-free in
+ glusterd_op_ac_send_brick_op()
+
+Coverity reported that GF_FREE(req_ctx) could be called 2x on req_ctx.
+
+> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22656/
+> Change-Id: I9120686e5920de8c27688e10de0db6aa26292064
+> CID: 1401115
+> Updates: bz#789278
+> Signed-off-by: Niels de Vos <ndevos@redhat.com>
+
+BUG: 1787310
+Change-Id: I9120686e5920de8c27688e10de0db6aa26292064
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202619
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 46fc607..1e84f5f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -7575,7 +7575,6 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx)
+ if (op_errstr == NULL)
+ gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ opinfo.op_errstr = op_errstr;
+- GF_FREE(req_ctx);
+ goto out;
+ }
+ }
+@@ -7594,7 +7593,7 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx)
+ }
+
+ out:
+- if (ret && req_ctx && free_req_ctx)
++ if (ret && free_req_ctx)
+ GF_FREE(req_ctx);
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+
+--
+1.8.3.1
+
diff --git a/0438-dht-sparse-files-rebalance-enhancements.patch b/0438-dht-sparse-files-rebalance-enhancements.patch
new file mode 100644
index 0000000..6e10ce6
--- /dev/null
+++ b/0438-dht-sparse-files-rebalance-enhancements.patch
@@ -0,0 +1,324 @@
+From 7b2f1bd4e5a57ea3abd5f14a7d81b120735faecd Mon Sep 17 00:00:00 2001
+From: Barak Sason Rofman <bsasonro@redhat.com>
+Date: Wed, 6 May 2020 13:28:40 +0300
+Subject: [PATCH 438/449] dht - sparse files rebalance enhancements
+
+Currently data migration in rebalance reads sparse file sequentially,
+disregarding which segments are holes and which are data. This can lead
+to extremely long migration time for large sparse file.
+Data migration mechanism needs to be enhanced so only data segments are
+read and migrated. This can be achieved using lseek to seek for holes
+and data in the file.
+This enhancement is a consequence of
+https://bugzilla.redhat.com/show_bug.cgi?id=1823703
+
+> fixes: #1222
+> Change-Id: If5f448a0c532926464e1f34f504c5c94749b08c3
+> Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+> (Cherry pick from commit 7b7559733ca0c25c63f9d56cb7f4650dbd694c40)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24409/)
+
+BUG: 1836099
+Change-Id: If5f448a0c532926464e1f34f504c5c94749b08c3
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202647
+Reviewed-by: Barak Sason Rofman <bsasonro@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/distribute/spare_file_rebalance.t | 51 ++++++++
+ xlators/cluster/dht/src/dht-rebalance.c | 172 ++++++++++++--------------
+ 2 files changed, 130 insertions(+), 93 deletions(-)
+ create mode 100644 tests/basic/distribute/spare_file_rebalance.t
+
+diff --git a/tests/basic/distribute/spare_file_rebalance.t b/tests/basic/distribute/spare_file_rebalance.t
+new file mode 100644
+index 0000000..061c02f
+--- /dev/null
++++ b/tests/basic/distribute/spare_file_rebalance.t
+@@ -0,0 +1,51 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../dht.rc
++
++# Initialize
++#------------------------------------------------------------
++cleanup;
++
++# Start glusterd
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume info;
++
++# Create a volume
++TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
++
++# Verify volume creation
++EXPECT "$V0" volinfo_field $V0 'Volume Name';
++EXPECT 'Created' volinfo_field $V0 'Status';
++
++# Start volume and verify successful start
++TEST $CLI volume start $V0;
++EXPECT 'Started' volinfo_field $V0 'Status';
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
++
++#------------------------------------------------------------
++
++# Test case - Create sparse files on MP and verify
++# file info after rebalance
++#------------------------------------------------------------
++
++# Create some sparse files and get their size
++TEST cd $M0;
++dd if=/dev/urandom of=sparse_file bs=10k count=1 seek=2M
++cp --sparse=always sparse_file sparse_file_3;
++
++# Add a 3rd brick
++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3;
++
++# Trigger rebalance
++TEST $CLI volume rebalance $V0 start force;
++EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed;
++
++# Compare original and rebalanced files
++TEST cd $B0/${V0}2
++TEST cmp sparse_file $B0/${V0}3/sparse_file_3
++EXPECT_WITHIN 30 "";
++
++cleanup;
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index 88b6b54..d0c21b4 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -18,8 +18,8 @@
+ #include <glusterfs/events.h>
+
+ #define GF_DISK_SECTOR_SIZE 512
+-#define DHT_REBALANCE_PID 4242 /* Change it if required */
+-#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */
++#define DHT_REBALANCE_PID 4242 /* Change it if required */
++#define DHT_REBALANCE_BLKSIZE 1048576 /* 1 MB */
+ #define MAX_MIGRATE_QUEUE_COUNT 500
+ #define MIN_MIGRATE_QUEUE_COUNT 200
+ #define MAX_REBAL_TYPE_SIZE 16
+@@ -178,75 +178,6 @@ dht_strip_out_acls(dict_t *dict)
+ }
+ }
+
+-static int
+-dht_write_with_holes(xlator_t *to, fd_t *fd, struct iovec *vec, int count,
+- int32_t size, off_t offset, struct iobref *iobref,
+- int *fop_errno)
+-{
+- int i = 0;
+- int ret = -1;
+- int start_idx = 0;
+- int tmp_offset = 0;
+- int write_needed = 0;
+- int buf_len = 0;
+- int size_pending = 0;
+- char *buf = NULL;
+-
+- /* loop through each vector */
+- for (i = 0; i < count; i++) {
+- buf = vec[i].iov_base;
+- buf_len = vec[i].iov_len;
+-
+- for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len;
+- start_idx += GF_DISK_SECTOR_SIZE) {
+- if (mem_0filled(buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) {
+- write_needed = 1;
+- continue;
+- }
+-
+- if (write_needed) {
+- ret = syncop_write(
+- to, fd, (buf + tmp_offset), (start_idx - tmp_offset),
+- (offset + tmp_offset), iobref, 0, NULL, NULL);
+- /* 'path' will be logged in calling function */
+- if (ret < 0) {
+- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)",
+- strerror(-ret));
+- *fop_errno = -ret;
+- ret = -1;
+- goto out;
+- }
+-
+- write_needed = 0;
+- }
+- tmp_offset = start_idx + GF_DISK_SECTOR_SIZE;
+- }
+-
+- if ((start_idx < buf_len) || write_needed) {
+- /* This means, last chunk is not yet written.. write it */
+- ret = syncop_write(to, fd, (buf + tmp_offset),
+- (buf_len - tmp_offset), (offset + tmp_offset),
+- iobref, 0, NULL, NULL);
+- if (ret < 0) {
+- /* 'path' will be logged in calling function */
+- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)",
+- strerror(-ret));
+- *fop_errno = -ret;
+- ret = -1;
+- goto out;
+- }
+- }
+-
+- size_pending = (size - buf_len);
+- if (!size_pending)
+- break;
+- }
+-
+- ret = size;
+-out:
+- return ret;
+-}
+-
+ /*
+ return values:
+ -1 : failure
+@@ -1101,32 +1032,97 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+ int ret = 0;
+ int count = 0;
+ off_t offset = 0;
++ off_t data_offset = 0;
++ off_t hole_offset = 0;
+ struct iovec *vector = NULL;
+ struct iobref *iobref = NULL;
+ uint64_t total = 0;
+ size_t read_size = 0;
++ size_t data_block_size = 0;
+ dict_t *xdata = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
++
+ /* if file size is '0', no need to enter this loop */
+ while (total < ia_size) {
+- read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
+- ? DHT_REBALANCE_BLKSIZE
+- : (ia_size - total));
++ /* This is a regular file - read it sequentially */
++ if (!hole_exists) {
++ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
++ ? DHT_REBALANCE_BLKSIZE
++ : (ia_size - total));
++ } else {
++ /* This is a sparse file - read only the data segments in the file
++ */
++
++ /* If the previous data block is fully copied, find the next data
++ * segment
++ * starting at the offset of the last read and written byte, */
++ if (data_block_size <= 0) {
++ ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL,
++ &data_offset);
++ if (ret) {
++ if (ret == -ENXIO)
++ ret = 0; /* No more data segments */
++ else
++ *fop_errno = -ret; /* Error occurred */
++
++ break;
++ }
++
++ /* If the position of the current data segment is greater than
++ * the position of the next hole, find the next hole in order to
++ * calculate the length of the new data segment */
++ if (data_offset > hole_offset) {
++ /* Starting at the offset of the last data segment, find the
++ * next hole */
++ ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE,
++ NULL, &hole_offset);
++ if (ret) {
++ /* If an error occurred here it's a real error because
++ * if the seek for a data segment was successful then
++ * necessarily another hole must exist (EOF is a hole)
++ */
++ *fop_errno = -ret;
++ break;
++ }
++
++ /* Calculate the total size of the current data block */
++ data_block_size = hole_offset - data_offset;
++ }
++ } else {
++ /* There is still data in the current segment, move the
++ * data_offset to the position of the last written byte */
++ data_offset = offset;
++ }
++
++ /* Calculate how much data needs to be read and written. If the data
++ * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
++ * write DHT_REBALANCE_BLKSIZE data length and the rest in the
++ * next iteration(s) */
++ read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
++ ? DHT_REBALANCE_BLKSIZE
++ : data_block_size);
++
++ /* Calculate the remaining size of the data block - maybe there's no
++ * need to seek for data in the next iteration */
++ data_block_size -= read_size;
++
++ /* Set offset to the offset of the data segment so read and write
++ * will have the correct position */
++ offset = data_offset;
++ }
+
+ ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count,
+ &iobref, NULL, NULL, NULL);
++
+ if (!ret || (ret < 0)) {
+ *fop_errno = -ret;
+ break;
+ }
+
+- if (hole_exists) {
+- ret = dht_write_with_holes(to, dst, vector, count, ret, offset,
+- iobref, fop_errno);
+- } else {
+- if (!conf->force_migration && !dht_is_tier_xlator(this)) {
++ if (!conf->force_migration && !dht_is_tier_xlator(this)) {
++ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata) {
+ gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+@@ -1146,7 +1142,7 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+ * https://github.com/gluster/glusterfs/issues/308
+ * for more details.
+ */
+- ret = dict_set_int32(xdata, GF_AVOID_OVERWRITE, 1);
++ ret = dict_set_int32_sizen(xdata, GF_AVOID_OVERWRITE, 1);
+ if (ret) {
+ gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM,
+ "failed to set dict");
+@@ -1155,22 +1151,12 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+ break;
+ }
+ }
+-
+- ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
+- NULL, xdata, NULL);
+- if (ret < 0) {
+- *fop_errno = -ret;
+- }
+- }
+-
+- if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) &&
+- (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) {
+- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED,
+- "Migrate file paused");
+- ret = -1;
+ }
+
++ ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
++ NULL, xdata, NULL);
+ if (ret < 0) {
++ *fop_errno = -ret;
+ break;
+ }
+
+--
+1.8.3.1
+
diff --git a/0439-cluster-afr-Delay-post-op-for-fsync.patch b/0439-cluster-afr-Delay-post-op-for-fsync.patch
new file mode 100644
index 0000000..dc1593b
--- /dev/null
+++ b/0439-cluster-afr-Delay-post-op-for-fsync.patch
@@ -0,0 +1,438 @@
+From 3ed98fc9dcb39223032e343fd5b0ad17fa3cae14 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Fri, 29 May 2020 14:24:53 +0530
+Subject: [PATCH 439/449] cluster/afr: Delay post-op for fsync
+
+Problem:
+AFR doesn't delay post-op for fsync fop. For fsync heavy workloads
+this leads to un-necessary fxattrop/finodelk for every fsync leading
+to bad performance.
+
+Fix:
+Have delayed post-op for fsync. Add special flag in xdata to indicate
+that afr shouldn't delay post-op in cases where either the
+process will terminate or graph-switch would happen. Otherwise it leads
+to un-necessary heals when the graph-switch/process-termination
+happens before delayed-post-op completes.
+
+> Upstream-patch: https://review.gluster.org/c/glusterfs/+/24473
+> Fixes: #1253
+
+BUG: 1838479
+Change-Id: I531940d13269a111c49e0510d49514dc169f4577
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202676
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ api/src/glfs-resolve.c | 14 ++-
+ tests/basic/afr/durability-off.t | 2 +
+ tests/basic/gfapi/gfapi-graph-switch-open-fd.t | 44 +++++++++
+ tests/basic/gfapi/gfapi-keep-writing.c | 129 +++++++++++++++++++++++++
+ xlators/cluster/afr/src/afr-inode-write.c | 11 ++-
+ xlators/cluster/afr/src/afr-transaction.c | 9 +-
+ xlators/cluster/afr/src/afr.h | 2 +-
+ xlators/cluster/dht/src/dht-rebalance.c | 15 ++-
+ xlators/mount/fuse/src/fuse-bridge.c | 23 ++++-
+ 9 files changed, 239 insertions(+), 10 deletions(-)
+ create mode 100644 tests/basic/gfapi/gfapi-graph-switch-open-fd.t
+ create mode 100644 tests/basic/gfapi/gfapi-keep-writing.c
+
+diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c
+index a79f490..062b7dc 100644
+--- a/api/src/glfs-resolve.c
++++ b/api/src/glfs-resolve.c
+@@ -722,6 +722,7 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
+ 0,
+ };
+ char uuid1[64];
++ dict_t *xdata = NULL;
+
+ oldinode = oldfd->inode;
+ oldsubvol = oldinode->table->xl;
+@@ -730,7 +731,15 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
+ return fd_ref(oldfd);
+
+ if (!oldsubvol->switched) {
+- ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, NULL, NULL);
++ xdata = dict_new();
++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
++ gf_msg(fs->volname, GF_LOG_WARNING, ENOMEM, API_MSG_FSYNC_FAILED,
++ "last-fsync set failed on %s graph %s (%d)",
++ uuid_utoa_r(oldfd->inode->gfid, uuid1),
++ graphid_str(oldsubvol), oldsubvol->graph->id);
++ }
++
++ ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, xdata, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret) {
+ gf_msg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FSYNC_FAILED,
+@@ -809,6 +818,9 @@ out:
+ newfd = NULL;
+ }
+
++ if (xdata)
++ dict_unref(xdata);
++
+ return newfd;
+ }
+
+diff --git a/tests/basic/afr/durability-off.t b/tests/basic/afr/durability-off.t
+index 155ffa0..6e0f18b 100644
+--- a/tests/basic/afr/durability-off.t
++++ b/tests/basic/afr/durability-off.t
+@@ -26,6 +26,8 @@ TEST $CLI volume heal $V0
+ EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+ EXPECT "^0$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l)
+
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+ #Test that fsyncs happen when durability is on
+ TEST $CLI volume set $V0 cluster.ensure-durability on
+ TEST $CLI volume set $V0 performance.strict-write-ordering on
+diff --git a/tests/basic/gfapi/gfapi-graph-switch-open-fd.t b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t
+new file mode 100644
+index 0000000..2e666be
+--- /dev/null
++++ b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t
+@@ -0,0 +1,44 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup;
++
++TEST glusterd
++
++TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{0..2};
++EXPECT 'Created' volinfo_field $V0 'Status';
++
++TEST $CLI volume start $V0;
++EXPECT 'Started' volinfo_field $V0 'Status';
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
++TEST touch $M0/sync
++logdir=`gluster --print-logdir`
++
++TEST build_tester $(dirname $0)/gfapi-keep-writing.c -lgfapi
++
++
++#Launch a program to keep doing writes on an fd
++./$(dirname $0)/gfapi-keep-writing ${H0} $V0 $logdir/gfapi-async-calls-test.log sync &
++p=$!
++sleep 1 #Let some writes go through
++#Check if graph switch will lead to any pending markers for ever
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume set $V0 performance.read-ahead off
++
++
++TEST rm -f $M0/sync #Make sure the glfd is closed
++TEST wait #Wait for background process to die
++#Goal is to check if there is permanent FOOL changelog
++sleep 5
++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick0/glfs_test.txt trusted.afr.dirty
++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick1/glfs_test.txt trusted.afr.dirty
++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick2/glfs_test.txt trusted.afr.dirty
++
++cleanup_tester $(dirname $0)/gfapi-async-calls-test
++
++cleanup;
+diff --git a/tests/basic/gfapi/gfapi-keep-writing.c b/tests/basic/gfapi/gfapi-keep-writing.c
+new file mode 100644
+index 0000000..91b59ce
+--- /dev/null
++++ b/tests/basic/gfapi/gfapi-keep-writing.c
+@@ -0,0 +1,129 @@
++#include <fcntl.h>
++#include <unistd.h>
++#include <time.h>
++#include <limits.h>
++#include <string.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <errno.h>
++#include <glusterfs/api/glfs.h>
++#include <glusterfs/api/glfs-handles.h>
++
++#define LOG_ERR(msg) \
++ do { \
++ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
++ } while (0)
++
++glfs_t *
++init_glfs(const char *hostname, const char *volname, const char *logfile)
++{
++ int ret = -1;
++ glfs_t *fs = NULL;
++
++ fs = glfs_new(volname);
++ if (!fs) {
++ LOG_ERR("glfs_new failed");
++ return NULL;
++ }
++
++ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
++ if (ret < 0) {
++ LOG_ERR("glfs_set_volfile_server failed");
++ goto out;
++ }
++
++ ret = glfs_set_logging(fs, logfile, 7);
++ if (ret < 0) {
++ LOG_ERR("glfs_set_logging failed");
++ goto out;
++ }
++
++ ret = glfs_init(fs);
++ if (ret < 0) {
++ LOG_ERR("glfs_init failed");
++ goto out;
++ }
++
++ ret = 0;
++out:
++ if (ret) {
++ glfs_fini(fs);
++ fs = NULL;
++ }
++
++ return fs;
++}
++
++int
++glfs_test_function(const char *hostname, const char *volname,
++ const char *logfile, const char *syncfile)
++{
++ int ret = -1;
++ int flags = O_CREAT | O_RDWR;
++ glfs_t *fs = NULL;
++ glfs_fd_t *glfd = NULL;
++ const char *buff = "This is from my prog\n";
++ const char *filename = "glfs_test.txt";
++ struct stat buf = {0};
++
++ fs = init_glfs(hostname, volname, logfile);
++ if (fs == NULL) {
++ LOG_ERR("init_glfs failed");
++ return -1;
++ }
++
++ glfd = glfs_creat(fs, filename, flags, 0644);
++ if (glfd == NULL) {
++ LOG_ERR("glfs_creat failed");
++ goto out;
++ }
++
++ while (glfs_stat(fs, syncfile, &buf) == 0) {
++ ret = glfs_write(glfd, buff, strlen(buff), flags);
++ if (ret < 0) {
++ LOG_ERR("glfs_write failed");
++ goto out;
++ }
++ }
++
++ ret = glfs_close(glfd);
++ if (ret < 0) {
++ LOG_ERR("glfs_write failed");
++ goto out;
++ }
++
++out:
++ ret = glfs_fini(fs);
++ if (ret) {
++ LOG_ERR("glfs_fini failed");
++ }
++
++ return ret;
++}
++
++int
++main(int argc, char *argv[])
++{
++ int ret = 0;
++ char *hostname = NULL;
++ char *volname = NULL;
++ char *logfile = NULL;
++ char *syncfile = NULL;
++
++ if (argc != 5) {
++ fprintf(stderr, "Invalid argument\n");
++ exit(1);
++ }
++
++ hostname = argv[1];
++ volname = argv[2];
++ logfile = argv[3];
++ syncfile = argv[4];
++
++ ret = glfs_test_function(hostname, volname, logfile, syncfile);
++ if (ret) {
++ LOG_ERR("glfs_test_function failed");
++ }
++
++ return ret;
++}
+diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
+index 7fcc9d4..df82b6e 100644
+--- a/xlators/cluster/afr/src/afr-inode-write.c
++++ b/xlators/cluster/afr/src/afr-inode-write.c
+@@ -2492,6 +2492,7 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int32_t op_errno = ENOMEM;
++ int8_t last_fsync = 0;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+@@ -2501,10 +2502,16 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ if (!local)
+ goto out;
+
+- if (xdata)
++ if (xdata) {
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+- else
++ if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) {
++ if (last_fsync) {
++ local->transaction.disable_delayed_post_op = _gf_true;
++ }
++ }
++ } else {
+ local->xdata_req = dict_new();
++ }
+
+ if (!local->xdata_req)
+ goto out;
+diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
+index 8e65ae2..ffd0ab8 100644
+--- a/xlators/cluster/afr/src/afr-transaction.c
++++ b/xlators/cluster/afr/src/afr-transaction.c
+@@ -2385,8 +2385,13 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this,
+ goto out;
+ }
+
+- if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) {
+- /*Only allow writes but shard does [f]xattrops on writes, so
++ if (local->transaction.disable_delayed_post_op) {
++ goto out;
++ }
++
++ if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) &&
++ (local->op != GF_FOP_FSYNC)) {
++ /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so
+ * they are fine too*/
+ goto out;
+ }
+diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
+index 18f1a6a..ff96246 100644
+--- a/xlators/cluster/afr/src/afr.h
++++ b/xlators/cluster/afr/src/afr.h
+@@ -854,7 +854,7 @@ typedef struct _afr_local {
+
+ int (*unwind)(call_frame_t *frame, xlator_t *this);
+
+- /* post-op hook */
++ gf_boolean_t disable_delayed_post_op;
+ } transaction;
+
+ syncbarrier_t barrier;
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index d0c21b4..e9974cd 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -1550,6 +1550,7 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ xlator_t *old_target = NULL;
+ xlator_t *hashed_subvol = NULL;
+ fd_t *linkto_fd = NULL;
++ dict_t *xdata = NULL;
+
+ if (from == to) {
+ gf_msg_debug(this->name, 0,
+@@ -1868,7 +1869,15 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+
+ /* TODO: Sync the locks */
+
+- ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL);
++ xdata = dict_new();
++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
++ gf_log(this->name, GF_LOG_ERROR,
++ "%s: failed to set last-fsync flag on "
++ "%s (%s)",
++ loc->path, to->name, strerror(ENOMEM));
++ }
++
++ ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)",
+ loc->path, to->name, strerror(-ret));
+@@ -2342,11 +2351,15 @@ out:
+
+ if (dst_fd)
+ syncop_close(dst_fd);
++
+ if (src_fd)
+ syncop_close(src_fd);
+ if (linkto_fd)
+ syncop_close(linkto_fd);
+
++ if (xdata)
++ dict_unref(xdata);
++
+ loc_wipe(&tmp_loc);
+ loc_wipe(&parent_loc);
+
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index fdeec49..4264fad 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -5559,6 +5559,7 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol,
+ char create_in_progress = 0;
+ fuse_fd_ctx_t *basefd_ctx = NULL;
+ fd_t *oldfd = NULL;
++ dict_t *xdata = NULL;
+
+ basefd_ctx = fuse_fd_ctx_get(this, basefd);
+ GF_VALIDATE_OR_GOTO("glusterfs-fuse", basefd_ctx, out);
+@@ -5595,10 +5596,23 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol,
+ }
+
+ if (oldfd->inode->table->xl == old_subvol) {
+- if (IA_ISDIR(oldfd->inode->ia_type))
++ if (IA_ISDIR(oldfd->inode->ia_type)) {
+ ret = syncop_fsyncdir(old_subvol, oldfd, 0, NULL, NULL);
+- else
+- ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, NULL, NULL);
++ } else {
++ xdata = dict_new();
++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
++ gf_log("glusterfs-fuse", GF_LOG_WARNING,
++ "last-fsync set failed (%s) on fd (%p)"
++ "(basefd:%p basefd-inode.gfid:%s) "
++ "(old-subvolume:%s-%d new-subvolume:%s-%d)",
++ strerror(ENOMEM), oldfd, basefd,
++ uuid_utoa(basefd->inode->gfid), old_subvol->name,
++ old_subvol->graph->id, new_subvol->name,
++ new_subvol->graph->id);
++ }
++
++ ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, xdata, NULL);
++ }
+
+ if (ret < 0) {
+ gf_log("glusterfs-fuse", GF_LOG_WARNING,
+@@ -5653,6 +5667,9 @@ out:
+
+ fd_unref(oldfd);
+
++ if (xdata)
++ dict_unref(xdata);
++
+ return ret;
+ }
+
+--
+1.8.3.1
+
diff --git a/0440-glusterd-snapshot-Improve-log-message-during-snapsho.patch b/0440-glusterd-snapshot-Improve-log-message-during-snapsho.patch
new file mode 100644
index 0000000..a7c1869
--- /dev/null
+++ b/0440-glusterd-snapshot-Improve-log-message-during-snapsho.patch
@@ -0,0 +1,62 @@
+From 9cbab9110523cfafe23d6c6b3080d0d744062b85 Mon Sep 17 00:00:00 2001
+From: Mohammed Rafi KC <rkavunga@redhat.com>
+Date: Thu, 21 May 2020 16:04:33 +0530
+Subject: [PATCH 440/449] glusterd/snapshot: Improve log message during
+ snapshot clone
+
+While taking a snapshot clone, if the snapshot is not activated,
+the cli was returning that the bricks are down.
+This patch clearly print tha the error is due to the snapshot
+state.
+
+>Change-Id: Ia840e6e071342e061ad38bf15e2e2ff2b0dacdfa
+>Fixes: #1255
+>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24478/
+
+BUG: 1837926
+Change-Id: Ia840e6e071342e061ad38bf15e2e2ff2b0dacdfa
+Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202707
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-snapshot.c | 15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+index c56be91..5b8ae97 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+@@ -2238,7 +2238,6 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr,
+ char *clonename = NULL;
+ char *snapname = NULL;
+ char device_name[64] = "";
+- char key[PATH_MAX] = "";
+ glusterd_snap_t *snap = NULL;
+ char err_str[PATH_MAX] = "";
+ int ret = -1;
+@@ -2299,8 +2298,18 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr,
+ goto out;
+ }
+
+- snprintf(key, sizeof(key) - 1, "vol1_volid");
+- ret = dict_get_bin(dict, key, (void **)&snap_volid);
++
++ if (!glusterd_is_volume_started(snap_vol)) {
++ snprintf(err_str, sizeof(err_str),
++ "Snapshot %s is "
++ "not activated",
++ snap->snapname);
++ loglevel = GF_LOG_WARNING;
++ *op_errno = EG_VOLSTP;
++ goto out;
++ }
++
++ ret = dict_get_bin(dict, "vol1_volid", (void **)&snap_volid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap_volid");
+--
+1.8.3.1
+
diff --git a/0441-fuse-occasional-logging-for-fuse-device-weird-write-.patch b/0441-fuse-occasional-logging-for-fuse-device-weird-write-.patch
new file mode 100644
index 0000000..1e49684
--- /dev/null
+++ b/0441-fuse-occasional-logging-for-fuse-device-weird-write-.patch
@@ -0,0 +1,195 @@
+From 1bde083cbd1e06be66d00e4ca52075687cee0d60 Mon Sep 17 00:00:00 2001
+From: Csaba Henk <csaba@redhat.com>
+Date: Fri, 8 May 2020 23:01:04 +0200
+Subject: [PATCH 441/449] fuse: occasional logging for fuse device 'weird'
+ write errors
+
+This change is a followup to
+I510158843e4b1d482bdc496c2e97b1860dc1ba93.
+
+In referred change we pushed log messages about 'weird'
+write errors to fuse device out of sight, by reporting
+them at Debug loglevel instead of Error (where
+'weird' means errno is not POSIX compliant but having
+meaningful semantics for FUSE protocol).
+
+This solved the issue of spurious error reporting.
+And so far so good: these messages don't indicate
+an error condition by themselves. However, when they
+come in high repetitions, that indicates a suboptimal
+condition which should be reported.[1]
+
+Therefore now we shall emit a Warning if a certain
+errno occurs a certain number of times[2] as the
+outcome of a write to the fuse device.
+
+___
+[1] typically ENOENTs and ENOTDIRs accumulate
+when glusterfs' inode invalidation lags behind
+the kernel's internal inode garbage collection
+(in this case above errnos mean that the inode
+which we requested to be invalidated is not found
+in kernel). This can be mitigated with the
+invalidate-limit command line / mount option,
+cf. bz#1732717.
+
+[2] 256, as of the current implementation.
+
+Upstream on https://review.gluster.org/24415
+> Change-Id: I8cc7fe104da43a88875f93b0db49d5677cc16045
+> Updates: #1000
+> Signed-off-by: Csaba Henk <csaba@redhat.com>
+
+BUG: 1839137
+Change-Id: I8448d6d328d47cb01d560cd99a2f43cd8dab312d
+Signed-off-by: Csaba Henk <csaba@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202646
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mount/fuse/src/fuse-bridge.c | 36 +++++++++++++++++++++++++++++++++++-
+ xlators/mount/fuse/src/fuse-bridge.h | 18 ++++++++++++++++++
+ 2 files changed, 53 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index 4264fad..2e7584c 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -218,14 +218,30 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
+ if (res == -1) {
+ const char *errdesc = NULL;
+ gf_loglevel_t loglevel = GF_LOG_ERROR;
++ gf_boolean_t errno_degraded = _gf_false;
++ gf_boolean_t errno_promoted = _gf_false;
++
++#define ACCOUNT_ERRNO(eno) \
++ do { \
++ if (errno_degraded) { \
++ pthread_mutex_lock(&priv->fusedev_errno_cnt_mutex); \
++ { \
++ if (!++priv->fusedev_errno_cnt[FUSEDEV_##eno]) \
++ errno_promoted = _gf_true; \
++ } \
++ pthread_mutex_unlock(&priv->fusedev_errno_cnt_mutex); \
++ } \
++ } while (0)
+
+ /* If caller masked the errno, then it
+ * does not indicate an error at the application
+ * level, so we degrade the log severity to DEBUG.
+ */
+ if (errnomask && errno < ERRNOMASK_MAX &&
+- GET_ERRNO_MASK(errnomask, errno))
++ GET_ERRNO_MASK(errnomask, errno)) {
+ loglevel = GF_LOG_DEBUG;
++ errno_degraded = _gf_true;
++ }
+
+ switch (errno) {
+ /* The listed errnos are FUSE status indicators,
+@@ -235,33 +251,43 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
+ */
+ case ENOENT:
+ errdesc = "ENOENT";
++ ACCOUNT_ERRNO(ENOENT);
+ break;
+ case ENOTDIR:
+ errdesc = "ENOTDIR";
++ ACCOUNT_ERRNO(ENOTDIR);
+ break;
+ case ENODEV:
+ errdesc = "ENODEV";
++ ACCOUNT_ERRNO(ENODEV);
+ break;
+ case EPERM:
+ errdesc = "EPERM";
++ ACCOUNT_ERRNO(EPERM);
+ break;
+ case ENOMEM:
+ errdesc = "ENOMEM";
++ ACCOUNT_ERRNO(ENOMEM);
+ break;
+ case ENOTCONN:
+ errdesc = "ENOTCONN";
++ ACCOUNT_ERRNO(ENOTCONN);
+ break;
+ case ECONNREFUSED:
+ errdesc = "ECONNREFUSED";
++ ACCOUNT_ERRNO(ECONNREFUSED);
+ break;
+ case EOVERFLOW:
+ errdesc = "EOVERFLOW";
++ ACCOUNT_ERRNO(EOVERFLOW);
+ break;
+ case EBUSY:
+ errdesc = "EBUSY";
++ ACCOUNT_ERRNO(EBUSY);
+ break;
+ case ENOTEMPTY:
+ errdesc = "ENOTEMPTY";
++ ACCOUNT_ERRNO(ENOTEMPTY);
+ break;
+ default:
+ errdesc = strerror(errno);
+@@ -269,7 +295,13 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
+
+ gf_log_callingfn("glusterfs-fuse", loglevel,
+ "writing to fuse device failed: %s", errdesc);
++ if (errno_promoted)
++ gf_log("glusterfs-fuse", GF_LOG_WARNING,
++ "writing to fuse device yielded %s %d times", errdesc,
++ UINT8_MAX + 1);
+ return errno;
++
++#undef ACCOUNT_ERRNO
+ }
+
+ fouh = iov_out[0].iov_base;
+@@ -6584,6 +6616,8 @@ init(xlator_t *this_xl)
+ INIT_LIST_HEAD(&priv->interrupt_list);
+ pthread_mutex_init(&priv->interrupt_mutex, NULL);
+
++ pthread_mutex_init(&priv->fusedev_errno_cnt_mutex, NULL);
++
+ /* get options from option dictionary */
+ ret = dict_get_str(options, ZR_MOUNTPOINT_OPT, &value_string);
+ if (ret == -1 || value_string == NULL) {
+diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
+index d2d462c..2fb15a6 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.h
++++ b/xlators/mount/fuse/src/fuse-bridge.h
+@@ -78,6 +78,20 @@ typedef struct fuse_in_header fuse_in_header_t;
+ typedef void(fuse_handler_t)(xlator_t *this, fuse_in_header_t *finh, void *msg,
+ struct iobuf *iobuf);
+
++enum fusedev_errno {
++ FUSEDEV_ENOENT,
++ FUSEDEV_ENOTDIR,
++ FUSEDEV_ENODEV,
++ FUSEDEV_EPERM,
++ FUSEDEV_ENOMEM,
++ FUSEDEV_ENOTCONN,
++ FUSEDEV_ECONNREFUSED,
++ FUSEDEV_EOVERFLOW,
++ FUSEDEV_EBUSY,
++ FUSEDEV_ENOTEMPTY,
++ FUSEDEV_EMAXPLUS
++};
++
+ struct fuse_private {
+ int fd;
+ uint32_t proto_minor;
+@@ -192,6 +206,10 @@ struct fuse_private {
+ /* LRU Limit, if not set, default is 64k for now */
+ uint32_t lru_limit;
+ uint32_t invalidate_limit;
++
++ /* counters for fusdev errnos */
++ uint8_t fusedev_errno_cnt[FUSEDEV_EMAXPLUS];
++ pthread_mutex_t fusedev_errno_cnt_mutex;
+ };
+ typedef struct fuse_private fuse_private_t;
+
+--
+1.8.3.1
+
diff --git a/0442-fuse-correctly-handle-setxattr-values.patch b/0442-fuse-correctly-handle-setxattr-values.patch
new file mode 100644
index 0000000..4be3b85
--- /dev/null
+++ b/0442-fuse-correctly-handle-setxattr-values.patch
@@ -0,0 +1,139 @@
+From 56c8ef4a64506c64aeb95d5a2c38d7107f90ac3a Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Tue, 5 Feb 2019 16:57:52 +0100
+Subject: [PATCH 442/449] fuse: correctly handle setxattr values
+
+The setxattr function receives a pointer to raw data, which may not be
+null-terminated. When this data needs to be interpreted as a string, an
+explicit null termination needs to be added before using the value.
+
+Upstream patch https://review.gluster.org/#/c/glusterfs/+/22157
+> Change-Id: Id110f9b215b22786da5782adec9449ce38d0d563
+> updates: bz#1193929
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Note: this change is not addressing the issue of bz 1787310,
+indeed it is prerequisite for other changes that do.
+
+BUG: 1787310
+Change-Id: I56417b130eb2a1f388108456c905a577eb658793
+Signed-off-by: Csaba Henk <csaba@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202758
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/xlator.h | 2 +-
+ libglusterfs/src/xlator.c | 28 +++++++++++++++++++++++++---
+ xlators/mount/fuse/src/fuse-bridge.c | 20 ++++++++++++++++----
+ 3 files changed, 42 insertions(+), 8 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
+index db04c4d..8650ccc 100644
+--- a/libglusterfs/src/glusterfs/xlator.h
++++ b/libglusterfs/src/glusterfs/xlator.h
+@@ -1043,7 +1043,7 @@ xlator_mem_acct_init(xlator_t *xl, int num_types);
+ void
+ xlator_mem_acct_unref(struct mem_acct *mem_acct);
+ int
+-is_gf_log_command(xlator_t *trans, const char *name, char *value);
++is_gf_log_command(xlator_t *trans, const char *name, char *value, size_t size);
+ int
+ glusterd_check_log_level(const char *value);
+ int
+diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
+index 6bd4f09..108b96a 100644
+--- a/libglusterfs/src/xlator.c
++++ b/libglusterfs/src/xlator.c
+@@ -1278,8 +1278,21 @@ xlator_destroy(xlator_t *xl)
+ return 0;
+ }
+
++static int32_t
++gf_bin_to_string(char *dst, size_t size, void *src, size_t len)
++{
++ if (len >= size) {
++ return EINVAL;
++ }
++
++ memcpy(dst, src, len);
++ dst[len] = 0;
++
++ return 0;
++}
++
+ int
+-is_gf_log_command(xlator_t *this, const char *name, char *value)
++is_gf_log_command(xlator_t *this, const char *name, char *value, size_t size)
+ {
+ xlator_t *trav = NULL;
+ char key[1024] = {
+@@ -1291,7 +1304,11 @@ is_gf_log_command(xlator_t *this, const char *name, char *value)
+ glusterfs_ctx_t *ctx = NULL;
+
+ if (!strcmp("trusted.glusterfs.syslog", name)) {
+- ret = gf_string2boolean(value, &syslog_flag);
++ ret = gf_bin_to_string(key, sizeof(key), value, size);
++ if (ret != 0) {
++ goto out;
++ }
++ ret = gf_string2boolean(key, &syslog_flag);
+ if (ret) {
+ ret = EOPNOTSUPP;
+ goto out;
+@@ -1307,7 +1324,12 @@ is_gf_log_command(xlator_t *this, const char *name, char *value)
+ if (fnmatch("trusted.glusterfs*set-log-level", name, FNM_NOESCAPE))
+ goto out;
+
+- log_level = glusterd_check_log_level(value);
++ ret = gf_bin_to_string(key, sizeof(key), value, size);
++ if (ret != 0) {
++ goto out;
++ }
++
++ log_level = glusterd_check_log_level(key);
+ if (log_level == -1) {
+ ret = EOPNOTSUPP;
+ goto out;
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index 2e7584c..cfad2b4 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -4112,7 +4112,7 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
+
+ /* Check if the command is for changing the log
+ level of process or specific xlator */
+- ret = is_gf_log_command(this, name, value);
++ ret = is_gf_log_command(this, name, value, fsi->size);
+ if (ret >= 0) {
+ op_errno = ret;
+ goto done;
+@@ -4159,11 +4159,23 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
+ * fixups to make sure that's the case. To avoid nasty
+ * surprises, allocate an extra byte and add a NUL here.
+ */
+- dict_value = memdup(value, fsi->size + 1);
++ dict_value = GF_MALLOC(fsi->size + 1, gf_common_mt_char);
++ if (dict_value == NULL) {
++ gf_log("glusterfs-fuse", GF_LOG_ERROR,
++ "%" PRIu64 ": SETXATTR value allocation failed",
++ finh->unique);
++ op_errno = ENOMEM;
++ goto done;
++ }
++ memcpy(dict_value, value, fsi->size);
+ dict_value[fsi->size] = '\0';
+ }
+- dict_set(state->xattr, newkey,
+- data_from_dynptr((void *)dict_value, fsi->size));
++ ret = dict_set_dynptr(state->xattr, newkey, dict_value, fsi->size);
++ if (ret < 0) {
++ op_errno = -ret;
++ GF_FREE(dict_value);
++ goto done;
++ }
+
+ state->flags = fsi->flags;
+ state->name = newkey;
+--
+1.8.3.1
+
diff --git a/0443-fuse-fix-high-sev-coverity-issue.patch b/0443-fuse-fix-high-sev-coverity-issue.patch
new file mode 100644
index 0000000..7c5e9c0
--- /dev/null
+++ b/0443-fuse-fix-high-sev-coverity-issue.patch
@@ -0,0 +1,55 @@
+From 3ac3312d63b9dc3c15cd8765ab8b7c601b007500 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Tue, 19 Mar 2019 22:51:14 +0530
+Subject: [PATCH 443/449] fuse : fix high sev coverity issue
+
+This patch fixed coverity issue in fuse-bridge.c.
+
+CID : 1398630 : Resource leak
+CID : 1399757 : Uninitialized pointer read
+
+Upstream patch https://review.gluster.org/c/glusterfs/+/22382
+> updates: bz#789278
+>
+> Change-Id: I69f8591400ee56a5d215eeac443a8e3d7777db27
+> Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+
+BUG: 1787310
+Change-Id: Ib2c9af25019ee57131b3d384fc4b557437e75d3e
+Signed-off-by: Csaba Henk <csaba@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202759
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mount/fuse/src/fuse-bridge.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index cfad2b4..d17320b 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -4174,6 +4174,7 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
+ if (ret < 0) {
+ op_errno = -ret;
+ GF_FREE(dict_value);
++ GF_FREE(newkey);
+ goto done;
+ }
+
+@@ -5963,7 +5964,12 @@ fuse_thread_proc(void *data)
+ ssize_t res = 0;
+ struct iobuf *iobuf = NULL;
+ fuse_in_header_t *finh = NULL;
+- struct iovec iov_in[2];
++ struct iovec iov_in[2] = {
++ {
++ 0,
++ },
++ };
++
+ void *msg = NULL;
+ /* we need 512 extra buffer size for BATCH_FORGET fop. By tests, it is
+ found to be reduces 'REALLOC()' in the loop */
+--
+1.8.3.1
+
diff --git a/0444-mount-fuse-Fixing-a-coverity-issue.patch b/0444-mount-fuse-Fixing-a-coverity-issue.patch
new file mode 100644
index 0000000..c8e3e8c
--- /dev/null
+++ b/0444-mount-fuse-Fixing-a-coverity-issue.patch
@@ -0,0 +1,40 @@
+From 53a6aed98aad73ff51f884bf815bccfa337eb524 Mon Sep 17 00:00:00 2001
+From: Barak Sason <bsasonro@redhat.com>
+Date: Sun, 18 Aug 2019 17:38:09 +0300
+Subject: [PATCH 444/449] mount/fuse - Fixing a coverity issue
+
+Fixed resource leak of dict_value and newkey variables
+
+CID: 1398630
+
+Upstream patch https://review.gluster.org/c/glusterfs/+/23260
+> Updates: bz#789278
+>
+> Change-Id: I589fdc0aecaeb4f446cd29f95bad36ccd7a35beb
+> Signed-off-by: Barak Sason <bsasonro@redhat.com>
+
+BUG: 1787310
+Change-Id: Id191face7b082e2e8d6e62f60b56248688d396f6
+Signed-off-by: Csaba Henk <csaba@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202760
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mount/fuse/src/fuse-bridge.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index d17320b..f61fa39 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -4165,6 +4165,7 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
+ "%" PRIu64 ": SETXATTR value allocation failed",
+ finh->unique);
+ op_errno = ENOMEM;
++ GF_FREE(newkey);
+ goto done;
+ }
+ memcpy(dict_value, value, fsi->size);
+--
+1.8.3.1
+
diff --git a/0445-feature-changelog-Avoid-thread-creation-if-xlator-is.patch b/0445-feature-changelog-Avoid-thread-creation-if-xlator-is.patch
new file mode 100644
index 0000000..dea23f2
--- /dev/null
+++ b/0445-feature-changelog-Avoid-thread-creation-if-xlator-is.patch
@@ -0,0 +1,481 @@
+From dc03340654d921916ac3890d713fc84ef4bb1e28 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Sat, 29 Sep 2018 13:15:35 +0530
+Subject: [PATCH 445/449] feature/changelog: Avoid thread creation if xlator is
+ not enabled
+
+Problem:
+Changelog creates threads even if the changelog is not enabled
+
+Background:
+Changelog xlator broadly does two things
+ 1. Journalling - Cosumers are geo-rep and glusterfind
+ 2. Event Notification for registered events like (open, release etc) -
+ Consumers are bitrot, geo-rep
+
+The existing option "changelog.changelog" controls journalling and
+there is no option to control event notification and is enabled by
+default. So when bitrot/geo-rep is not enabled on the volume, threads
+and resources(rpc and rbuf) related to event notifications consumes
+resources and cpu cycle which is unnecessary.
+
+Solution:
+The solution is to have two different options as below.
+ 1. changelog-notification : Event notifications
+ 2. changelog : Journalling
+
+This patch introduces the option "changelog-notification" which is
+not exposed to user. When either bitrot or changelog (journalling)
+is enabled, it internally enbales 'changelog-notification'. But
+once the 'changelog-notification' is enabled, it will not be disabled
+for the life time of the brick process even after bitrot and changelog
+is disabled. As of now, rpc resource cleanup has lot of races and is
+difficult to cleanup cleanly. If allowed, it leads to memory leaks
+and crashes on enable/disable of bitrot or changelog (journal) in a
+loop. Hence to be safer, the event notification is not disabled within
+lifetime of process once enabled.
+
+> Change-Id: Ifd00286e0966049e8eb9f21567fe407cf11bb02a
+> Updates: #475
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Cherry pick from commit 6de80bcd6366778ac34ce58ec496fa08cc02bd0b)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21896/)
+
+BUG: 1790336
+Change-Id: Ifd00286e0966049e8eb9f21567fe407cf11bb02a
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202778
+Tested-by: Mohit Agrawal <moagrawa@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ rpc/rpc-lib/src/rpcsvc.c | 26 ++--
+ tests/basic/changelog/changelog-history.t | 12 +-
+ tests/bugs/bitrot/bug-1227996.t | 1 -
+ tests/bugs/bitrot/bug-1245981.t | 4 +-
+ xlators/features/changelog/src/changelog-helpers.h | 4 +
+ .../features/changelog/src/changelog-rpc-common.c | 3 +
+ xlators/features/changelog/src/changelog.c | 149 +++++++++++++++------
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 13 ++
+ 8 files changed, 154 insertions(+), 58 deletions(-)
+
+diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
+index b058932..3f184bf 100644
+--- a/rpc/rpc-lib/src/rpcsvc.c
++++ b/rpc/rpc-lib/src/rpcsvc.c
+@@ -1865,6 +1865,18 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
+ goto out;
+ }
+
++ pthread_rwlock_rdlock(&svc->rpclock);
++ {
++ list_for_each_entry(prog, &svc->programs, program)
++ {
++ if ((prog->prognum == program->prognum) &&
++ (prog->progver == program->progver)) {
++ break;
++ }
++ }
++ }
++ pthread_rwlock_unlock(&svc->rpclock);
++
+ ret = rpcsvc_program_unregister_portmap(program);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+@@ -1881,17 +1893,6 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
+ goto out;
+ }
+ #endif
+- pthread_rwlock_rdlock(&svc->rpclock);
+- {
+- list_for_each_entry(prog, &svc->programs, program)
+- {
+- if ((prog->prognum == program->prognum) &&
+- (prog->progver == program->progver)) {
+- break;
+- }
+- }
+- }
+- pthread_rwlock_unlock(&svc->rpclock);
+
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "Program unregistered: %s, Num: %d,"
+@@ -1912,6 +1913,9 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
+
+ ret = 0;
+ out:
++ if (prog)
++ GF_FREE(prog);
++
+ if (ret == -1) {
+ if (program) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+diff --git a/tests/basic/changelog/changelog-history.t b/tests/basic/changelog/changelog-history.t
+index 3ce4098..b56e247 100644
+--- a/tests/basic/changelog/changelog-history.t
++++ b/tests/basic/changelog/changelog-history.t
+@@ -5,6 +5,7 @@
+
+ cleanup;
+
++SCRIPT_TIMEOUT=300
+ HISTORY_BIN_PATH=$(dirname $0)/../../utils/changelog
+ build_tester $HISTORY_BIN_PATH/get-history.c -lgfchangelog
+
+@@ -68,18 +69,21 @@ TEST $CLI volume set $V0 changelog.changelog off
+ sleep 3
+ time_after_disable=$(date '+%s')
+
++TEST $CLI volume set $V0 changelog.changelog on
++sleep 5
++
+ #Passes, gives the changelogs till continuous changelogs are available
+ # but returns 1
+-EXPECT "1" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_in_sec_htime2
++EXPECT_WITHIN 10 "1" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_in_sec_htime2
+
+ #Fails as start falls between htime files
+-EXPECT "-3" $HISTORY_BIN_PATH/get-history $time_between_htime $time_in_sec_htime1
++EXPECT_WITHIN 10 "-3" $HISTORY_BIN_PATH/get-history $time_between_htime $time_in_sec_htime1
+
+ #Passes as start and end falls in same htime file
+-EXPECT "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime1 $time_in_sec_htime2
++EXPECT_WITHIN 10 "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime1 $time_in_sec_htime2
+
+ #Passes, gives the changelogs till continuous changelogs are available
+-EXPECT "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime2 $time_after_disable
++EXPECT_WITHIN 10 "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime2 $time_after_disable
+
+ TEST rm $HISTORY_BIN_PATH/get-history
+
+diff --git a/tests/bugs/bitrot/bug-1227996.t b/tests/bugs/bitrot/bug-1227996.t
+index 47ebc42..121c7b5 100644
+--- a/tests/bugs/bitrot/bug-1227996.t
++++ b/tests/bugs/bitrot/bug-1227996.t
+@@ -17,7 +17,6 @@ TEST pidof glusterd;
+ ## Lets create and start the volume
+ TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+ TEST $CLI volume start $V0
+-
+ ## Enable bitrot on volume $V0
+ TEST $CLI volume bitrot $V0 enable
+
+diff --git a/tests/bugs/bitrot/bug-1245981.t b/tests/bugs/bitrot/bug-1245981.t
+index 2bed4d9..f395525 100644
+--- a/tests/bugs/bitrot/bug-1245981.t
++++ b/tests/bugs/bitrot/bug-1245981.t
+@@ -47,9 +47,9 @@ touch $M0/5
+ sleep `expr $SLEEP_TIME \* 2`
+
+ backpath=$(get_backend_paths $fname)
+-TEST getfattr -m . -n trusted.bit-rot.signature $backpath
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' $backpath
+
+ backpath=$(get_backend_paths $M0/new_file)
+-TEST getfattr -m . -n trusted.bit-rot.signature $backpath
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' $backpath
+
+ cleanup;
+diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h
+index 517c4dc..3afacc9 100644
+--- a/xlators/features/changelog/src/changelog-helpers.h
++++ b/xlators/features/changelog/src/changelog-helpers.h
+@@ -190,8 +190,12 @@ typedef struct changelog_ev_selector {
+
+ /* changelog's private structure */
+ struct changelog_priv {
++ /* changelog journalling */
+ gf_boolean_t active;
+
++ /* changelog live notifications */
++ gf_boolean_t rpc_active;
++
+ /* to generate unique socket file per brick */
+ char *changelog_brick;
+
+diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c
+index dcdcfb1..f2d1853 100644
+--- a/xlators/features/changelog/src/changelog-rpc-common.c
++++ b/xlators/features/changelog/src/changelog-rpc-common.c
+@@ -263,6 +263,9 @@ changelog_rpc_server_destroy(xlator_t *this, rpcsvc_t *rpc, char *sockfile,
+ struct rpcsvc_program *prog = NULL;
+ rpc_transport_t *trans = NULL;
+
++ if (!rpc)
++ return;
++
+ while (*progs) {
+ prog = *progs;
+ (void)rpcsvc_program_unregister(rpc, prog);
+diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
+index d9025f3..ff06c09 100644
+--- a/xlators/features/changelog/src/changelog.c
++++ b/xlators/features/changelog/src/changelog.c
+@@ -34,6 +34,12 @@ static struct changelog_bootstrap cb_bootstrap[] = {
+ },
+ };
+
++static int
++changelog_init_rpc(xlator_t *this, changelog_priv_t *priv);
++
++static int
++changelog_init(xlator_t *this, changelog_priv_t *priv);
++
+ /* Entry operations - TYPE III */
+
+ /**
+@@ -2008,6 +2014,11 @@ notify(xlator_t *this, int event, void *data, ...)
+ uint64_t clntcnt = 0;
+ changelog_clnt_t *conn = NULL;
+ gf_boolean_t cleanup_notify = _gf_false;
++ char sockfile[UNIX_PATH_MAX] = {
++ 0,
++ };
++ rpcsvc_listener_t *listener = NULL;
++ rpcsvc_listener_t *next = NULL;
+
+ INIT_LIST_HEAD(&queue);
+
+@@ -2021,23 +2032,40 @@ notify(xlator_t *this, int event, void *data, ...)
+ "cleanup changelog rpc connection of brick %s",
+ priv->victim->name);
+
+- this->cleanup_starting = 1;
+- changelog_destroy_rpc_listner(this, priv);
+- conn = &priv->connections;
+- if (conn)
+- changelog_ev_cleanup_connections(this, conn);
+- xprtcnt = GF_ATOMIC_GET(priv->xprtcnt);
+- clntcnt = GF_ATOMIC_GET(priv->clntcnt);
+-
+- if (!xprtcnt && !clntcnt) {
+- LOCK(&priv->lock);
+- {
+- cleanup_notify = priv->notify_down;
+- priv->notify_down = _gf_true;
++ if (priv->rpc_active) {
++ this->cleanup_starting = 1;
++ changelog_destroy_rpc_listner(this, priv);
++ conn = &priv->connections;
++ if (conn)
++ changelog_ev_cleanup_connections(this, conn);
++ xprtcnt = GF_ATOMIC_GET(priv->xprtcnt);
++ clntcnt = GF_ATOMIC_GET(priv->clntcnt);
++ if (!xprtcnt && !clntcnt) {
++ LOCK(&priv->lock);
++ {
++ cleanup_notify = priv->notify_down;
++ priv->notify_down = _gf_true;
++ }
++ UNLOCK(&priv->lock);
++ list_for_each_entry_safe(listener, next, &priv->rpc->listeners,
++ list)
++ {
++ if (listener->trans) {
++ rpc_transport_unref(listener->trans);
++ }
++ }
++ CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile,
++ UNIX_PATH_MAX);
++ sys_unlink(sockfile);
++ if (priv->rpc) {
++ rpcsvc_destroy(priv->rpc);
++ priv->rpc = NULL;
++ }
++ if (!cleanup_notify)
++ default_notify(this, GF_EVENT_PARENT_DOWN, data);
+ }
+- UNLOCK(&priv->lock);
+- if (!cleanup_notify)
+- default_notify(this, GF_EVENT_PARENT_DOWN, data);
++ } else {
++ default_notify(this, GF_EVENT_PARENT_DOWN, data);
+ }
+ goto out;
+ }
+@@ -2425,6 +2453,22 @@ changelog_barrier_pthread_destroy(changelog_priv_t *priv)
+ LOCK_DESTROY(&priv->bflags.lock);
+ }
+
++static void
++changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv)
++{
++ /* terminate rpc server */
++ if (!this->cleanup_starting)
++ changelog_destroy_rpc_listner(this, priv);
++
++ (void)changelog_cleanup_rpc_threads(this, priv);
++ /* cleanup rot buffs */
++ rbuf_dtor(priv->rbuf);
++
++ /* cleanup poller thread */
++ if (priv->poller)
++ (void)changelog_thread_cleanup(this, priv->poller);
++}
++
+ int
+ reconfigure(xlator_t *this, dict_t *options)
+ {
+@@ -2433,6 +2477,9 @@ reconfigure(xlator_t *this, dict_t *options)
+ changelog_priv_t *priv = NULL;
+ gf_boolean_t active_earlier = _gf_true;
+ gf_boolean_t active_now = _gf_true;
++ gf_boolean_t rpc_active_earlier = _gf_true;
++ gf_boolean_t rpc_active_now = _gf_true;
++ gf_boolean_t iniate_rpc = _gf_false;
+ changelog_time_slice_t *slice = NULL;
+ changelog_log_data_t cld = {
+ 0,
+@@ -2454,6 +2501,7 @@ reconfigure(xlator_t *this, dict_t *options)
+
+ ret = -1;
+ active_earlier = priv->active;
++ rpc_active_earlier = priv->rpc_active;
+
+ /* first stop the rollover and the fsync thread */
+ changelog_cleanup_helper_threads(this, priv);
+@@ -2487,6 +2535,29 @@ reconfigure(xlator_t *this, dict_t *options)
+ goto out;
+
+ GF_OPTION_RECONF("changelog", active_now, options, bool, out);
++ GF_OPTION_RECONF("changelog-notification", rpc_active_now, options, bool,
++ out);
++
++ /* If journalling is enabled, enable rpc notifications */
++ if (active_now && !active_earlier) {
++ if (!rpc_active_earlier)
++ iniate_rpc = _gf_true;
++ }
++
++ if (rpc_active_now && !rpc_active_earlier) {
++ iniate_rpc = _gf_true;
++ }
++
++ /* TODO: Disable of changelog-notifications is not supported for now
++ * as there is no clean way of cleaning up of rpc resources
++ */
++
++ if (iniate_rpc) {
++ ret = changelog_init_rpc(this, priv);
++ if (ret)
++ goto out;
++ priv->rpc_active = _gf_true;
++ }
+
+ /**
+ * changelog_handle_change() handles changes that could possibly
+@@ -2618,6 +2689,7 @@ changelog_init_options(xlator_t *this, changelog_priv_t *priv)
+ goto dealloc_2;
+
+ GF_OPTION_INIT("changelog", priv->active, bool, dealloc_2);
++ GF_OPTION_INIT("changelog-notification", priv->rpc_active, bool, dealloc_2);
+ GF_OPTION_INIT("capture-del-path", priv->capture_del_path, bool, dealloc_2);
+
+ GF_OPTION_INIT("op-mode", tmp, str, dealloc_2);
+@@ -2656,22 +2728,6 @@ error_return:
+ return -1;
+ }
+
+-static void
+-changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv)
+-{
+- /* terminate rpc server */
+- if (!this->cleanup_starting)
+- changelog_destroy_rpc_listner(this, priv);
+-
+- (void)changelog_cleanup_rpc_threads(this, priv);
+- /* cleanup rot buffs */
+- rbuf_dtor(priv->rbuf);
+-
+- /* cleanup poller thread */
+- if (priv->poller)
+- (void)changelog_thread_cleanup(this, priv->poller);
+-}
+-
+ static int
+ changelog_init_rpc(xlator_t *this, changelog_priv_t *priv)
+ {
+@@ -2768,10 +2824,13 @@ init(xlator_t *this)
+ INIT_LIST_HEAD(&priv->queue);
+ priv->barrier_enabled = _gf_false;
+
+- /* RPC ball rolling.. */
+- ret = changelog_init_rpc(this, priv);
+- if (ret)
+- goto cleanup_barrier;
++ if (priv->rpc_active || priv->active) {
++ /* RPC ball rolling.. */
++ ret = changelog_init_rpc(this, priv);
++ if (ret)
++ goto cleanup_barrier;
++ priv->rpc_active = _gf_true;
++ }
+
+ ret = changelog_init(this, priv);
+ if (ret)
+@@ -2783,7 +2842,9 @@ init(xlator_t *this)
+ return 0;
+
+ cleanup_rpc:
+- changelog_cleanup_rpc(this, priv);
++ if (priv->rpc_active) {
++ changelog_cleanup_rpc(this, priv);
++ }
+ cleanup_barrier:
+ changelog_barrier_pthread_destroy(priv);
+ cleanup_options:
+@@ -2808,9 +2869,10 @@ fini(xlator_t *this)
+ priv = this->private;
+
+ if (priv) {
+- /* terminate RPC server/threads */
+- changelog_cleanup_rpc(this, priv);
+-
++ if (priv->active || priv->rpc_active) {
++ /* terminate RPC server/threads */
++ changelog_cleanup_rpc(this, priv);
++ }
+ /* call barrier_disable to cancel timer */
+ if (priv->barrier_enabled)
+ __chlog_barrier_disable(this, &queue);
+@@ -2879,6 +2941,13 @@ struct volume_options options[] = {
+ .flags = OPT_FLAG_SETTABLE,
+ .level = OPT_STATUS_BASIC,
+ .tags = {"journal", "georep", "glusterfind"}},
++ {.key = {"changelog-notification"},
++ .type = GF_OPTION_TYPE_BOOL,
++ .default_value = "off",
++ .description = "enable/disable changelog live notification",
++ .op_version = {3},
++ .level = OPT_STATUS_BASIC,
++ .tags = {"bitrot", "georep"}},
+ {.key = {"changelog-brick"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "brick path to generate unique socket file name."
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 16346e7..13f84ea 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -1876,6 +1876,19 @@ brick_graph_add_changelog(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ ret = xlator_set_fixed_option(xl, "changelog-dir", changelog_basepath);
+ if (ret)
+ goto out;
++
++ ret = glusterd_is_bitrot_enabled(volinfo);
++ if (ret == -1) {
++ goto out;
++ } else if (ret) {
++ ret = xlator_set_fixed_option(xl, "changelog-notification", "on");
++ if (ret)
++ goto out;
++ } else {
++ ret = xlator_set_fixed_option(xl, "changelog-notification", "off");
++ if (ret)
++ goto out;
++ }
+ out:
+ return ret;
+ }
+--
+1.8.3.1
+
diff --git a/0446-bitrot-Make-number-of-signer-threads-configurable.patch b/0446-bitrot-Make-number-of-signer-threads-configurable.patch
new file mode 100644
index 0000000..8eb2089
--- /dev/null
+++ b/0446-bitrot-Make-number-of-signer-threads-configurable.patch
@@ -0,0 +1,594 @@
+From 866a4c49ad9c5a9125814a9f843d4c7fd967ab2b Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Mon, 3 Feb 2020 18:10:17 +0530
+Subject: [PATCH 446/449] bitrot: Make number of signer threads configurable
+
+The number of signing process threads (glfs_brpobj)
+is set to 4 by default. The recommendation is to set
+it to number of cores available. This patch makes it
+configurable as follows
+
+gluster vol bitrot <volname> signer-threads <count>
+
+> fixes: bz#1797869
+> Change-Id: Ia883b3e5e34e0bc8d095243508d320c9c9c58adc
+> Signed-off-by: Kotresh HR <khiremat@redhat.com>
+> (Cherry pick from commit 8fad76650bd85463708f59d2518f5b764ae4c702)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24091/)
+
+BUG: 1790336
+Change-Id: Ia883b3e5e34e0bc8d095243508d320c9c9c58adc
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202780
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-parser.c | 29 +++++++-
+ cli/src/cli-cmd-volume.c | 12 +++
+ doc/gluster.8 | 6 ++
+ libglusterfs/src/glusterfs/common-utils.h | 1 +
+ rpc/xdr/src/cli1-xdr.x | 1 +
+ tests/bitrot/br-signer-threads-config-1797869.t | 73 +++++++++++++++++++
+ xlators/features/bit-rot/src/bitd/bit-rot.c | 45 +++++++++---
+ xlators/features/bit-rot/src/bitd/bit-rot.h | 20 ++---
+ .../bit-rot/src/stub/bit-rot-stub-mem-types.h | 1 +
+ xlators/mgmt/glusterd/src/glusterd-bitrot.c | 85 ++++++++++++++++++++++
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 16 ++--
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 9 +++
+ 12 files changed, 270 insertions(+), 28 deletions(-)
+ create mode 100644 tests/bitrot/br-signer-threads-config-1797869.t
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index 7446b95..5fd05f4 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -5661,7 +5661,7 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options)
+ char *volname = NULL;
+ char *opwords[] = {
+ "enable", "disable", "scrub-throttle", "scrub-frequency", "scrub",
+- "signing-time", NULL};
++ "signing-time", "signer-threads", NULL};
+ char *scrub_throt_values[] = {"lazy", "normal", "aggressive", NULL};
+ char *scrub_freq_values[] = {"hourly", "daily", "weekly", "biweekly",
+ "monthly", "minute", NULL};
+@@ -5669,6 +5669,7 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options)
+ dict_t *dict = NULL;
+ gf_bitrot_type type = GF_BITROT_OPTION_TYPE_NONE;
+ int32_t expiry_time = 0;
++ int32_t signer_th_count = 0;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+@@ -5849,6 +5850,31 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options)
+ }
+ goto set_type;
+ }
++ } else if (!strcmp(words[3], "signer-threads")) {
++ if (!words[4]) {
++ cli_err(
++ "Missing signer-thread value for bitrot "
++ "option");
++ ret = -1;
++ goto out;
++ } else {
++ type = GF_BITROT_OPTION_TYPE_SIGNER_THREADS;
++
++ signer_th_count = strtol(words[4], NULL, 0);
++ if (signer_th_count < 1) {
++ cli_err("signer-thread count should not be less than 1");
++ ret = -1;
++ goto out;
++ }
++
++ ret = dict_set_uint32(dict, "signer-threads",
++ (unsigned int)signer_th_count);
++ if (ret) {
++ cli_out("Failed to set dict for bitrot");
++ goto out;
++ }
++ goto set_type;
++ }
+ } else {
+ cli_err(
+ "Invalid option %s for bitrot. Please enter valid "
+@@ -5857,7 +5883,6 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options)
+ ret = -1;
+ goto out;
+ }
+-
+ set_type:
+ ret = dict_set_int32(dict, "type", type);
+ if (ret < 0)
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index f33fc99..72504ca 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -3236,6 +3236,16 @@ struct cli_cmd bitrot_cmds[] = {
+ {"volume bitrot <VOLNAME> {enable|disable}", NULL, /*cli_cmd_bitrot_cbk,*/
+ "Enable/disable bitrot for volume <VOLNAME>"},
+
++ {"volume bitrot <VOLNAME> signing-time <time-in-secs>",
++ NULL, /*cli_cmd_bitrot_cbk,*/
++ "Waiting time for an object after last fd is closed to start signing "
++ "process"},
++
++ {"volume bitrot <VOLNAME> signer-threads <count>",
++ NULL, /*cli_cmd_bitrot_cbk,*/
++ "Number of signing process threads. Usually set to number of available "
++ "cores"},
++
+ {"volume bitrot <VOLNAME> scrub-throttle {lazy|normal|aggressive}",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Set the speed of the scrubber for volume <VOLNAME>"},
+@@ -3251,6 +3261,8 @@ struct cli_cmd bitrot_cmds[] = {
+ "the scrubber. ondemand starts the scrubber immediately."},
+
+ {"volume bitrot <VOLNAME> {enable|disable}\n"
++ "volume bitrot <VOLNAME> signing-time <time-in-secs>\n"
++ "volume bitrot <VOLNAME> signer-threads <count>\n"
+ "volume bitrot <volname> scrub-throttle {lazy|normal|aggressive}\n"
+ "volume bitrot <volname> scrub-frequency {hourly|daily|weekly|biweekly"
+ "|monthly}\n"
+diff --git a/doc/gluster.8 b/doc/gluster.8
+index 66bdb48..084346d 100644
+--- a/doc/gluster.8
++++ b/doc/gluster.8
+@@ -244,6 +244,12 @@ Use "!<OPTION>" to reset option <OPTION> to default value.
+ \fB\ volume bitrot <VOLNAME> {enable|disable} \fR
+ Enable/disable bitrot for volume <VOLNAME>
+ .TP
++\fB\ volume bitrot <VOLNAME> signing-time <time-in-secs> \fR
++Waiting time for an object after last fd is closed to start signing process.
++.TP
++\fB\ volume bitrot <VOLNAME> signer-threads <count> \fR
++Number of signing process threads. Usually set to number of available cores.
++.TP
+ \fB\ volume bitrot <VOLNAME> scrub-throttle {lazy|normal|aggressive} \fR
+ Scrub-throttle value is a measure of how fast or slow the scrubber scrubs the filesystem for volume <VOLNAME>
+ .TP
+diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h
+index 0e2ecc7..f0a0a41 100644
+--- a/libglusterfs/src/glusterfs/common-utils.h
++++ b/libglusterfs/src/glusterfs/common-utils.h
+@@ -126,6 +126,7 @@ trap(void);
+
+ /* Default value of signing waiting time to sign a file for bitrot */
+ #define SIGNING_TIMEOUT "120"
++#define BR_WORKERS "4"
+
+ /* xxhash */
+ #define GF_XXH64_DIGEST_LENGTH 8
+diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
+index a32c864..777cb00 100644
+--- a/rpc/xdr/src/cli1-xdr.x
++++ b/rpc/xdr/src/cli1-xdr.x
+@@ -68,6 +68,7 @@ enum gf_bitrot_type {
+ GF_BITROT_OPTION_TYPE_EXPIRY_TIME,
+ GF_BITROT_CMD_SCRUB_STATUS,
+ GF_BITROT_CMD_SCRUB_ONDEMAND,
++ GF_BITROT_OPTION_TYPE_SIGNER_THREADS,
+ GF_BITROT_OPTION_TYPE_MAX
+ };
+
+diff --git a/tests/bitrot/br-signer-threads-config-1797869.t b/tests/bitrot/br-signer-threads-config-1797869.t
+new file mode 100644
+index 0000000..657ef3e
+--- /dev/null
++++ b/tests/bitrot/br-signer-threads-config-1797869.t
+@@ -0,0 +1,73 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++. $(dirname $0)/../cluster.rc
++
++function get_bitd_count_1 {
++ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H1 | wc -l
++}
++
++function get_bitd_count_2 {
++ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H2 | wc -l
++}
++
++function get_bitd_pid_1 {
++ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H1 | awk '{print $2}'
++}
++
++function get_bitd_pid_2 {
++ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H2 | awk '{print $2}'
++}
++
++function get_signer_th_count_1 {
++ ps -eL | grep $(get_bitd_pid_1) | grep glfs_brpobj | wc -l
++}
++
++function get_signer_th_count_2 {
++ ps -eL | grep $(get_bitd_pid_2) | grep glfs_brpobj | wc -l
++}
++
++cleanup;
++
++TEST launch_cluster 2
++
++TEST $CLI_1 peer probe $H2;
++EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count;
++
++TEST $CLI_1 volume create $V0 $H1:$B1
++TEST $CLI_1 volume create $V1 $H2:$B2
++EXPECT 'Created' volinfo_field_1 $V0 'Status';
++EXPECT 'Created' volinfo_field_1 $V1 'Status';
++
++TEST $CLI_1 volume start $V0
++TEST $CLI_1 volume start $V1
++EXPECT 'Started' volinfo_field_1 $V0 'Status';
++EXPECT 'Started' volinfo_field_1 $V1 'Status';
++
++#Enable bitrot
++TEST $CLI_1 volume bitrot $V0 enable
++TEST $CLI_1 volume bitrot $V1 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_2
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_2
++
++old_bitd_pid_1=$(get_bitd_pid_1)
++old_bitd_pid_2=$(get_bitd_pid_2)
++TEST $CLI_1 volume bitrot $V0 signer-threads 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_signer_th_count_1
++EXPECT_NOT "$old_bitd_pid_1" get_bitd_pid_1;
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_2
++EXPECT "$old_bitd_pid_2" get_bitd_pid_2;
++
++old_bitd_pid_1=$(get_bitd_pid_1)
++old_bitd_pid_2=$(get_bitd_pid_2)
++TEST $CLI_1 volume bitrot $V1 signer-threads 2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_signer_th_count_2
++EXPECT_NOT "$old_bitd_pid_2" get_bitd_pid_2;
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_signer_th_count_1
++EXPECT "$old_bitd_pid_1" get_bitd_pid_1;
++
++cleanup;
+diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
+index 7b1c5dc..b8feef7 100644
+--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
++++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
+@@ -1734,22 +1734,26 @@ out:
+ return 0;
+ }
+
+-/**
+- * Initialize signer specific structures, spawn worker threads.
+- */
+-
+ static void
+ br_fini_signer(xlator_t *this, br_private_t *priv)
+ {
+ int i = 0;
+
+- for (; i < BR_WORKERS; i++) {
++ if (priv == NULL)
++ return;
++
++ for (; i < priv->signer_th_count; i++) {
+ (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]);
+ }
++ GF_FREE(priv->obj_queue->workers);
+
+ pthread_cond_destroy(&priv->object_cond);
+ }
+
++/**
++ * Initialize signer specific structures, spawn worker threads.
++ */
++
+ static int32_t
+ br_init_signer(xlator_t *this, br_private_t *priv)
+ {
+@@ -1769,7 +1773,12 @@ br_init_signer(xlator_t *this, br_private_t *priv)
+ goto cleanup_cond;
+ INIT_LIST_HEAD(&priv->obj_queue->objects);
+
+- for (i = 0; i < BR_WORKERS; i++) {
++ priv->obj_queue->workers = GF_CALLOC(
++ priv->signer_th_count, sizeof(pthread_t), gf_br_mt_br_worker_t);
++ if (!priv->obj_queue->workers)
++ goto cleanup_obj_queue;
++
++ for (i = 0; i < priv->signer_th_count; i++) {
+ ret = gf_thread_create(&priv->obj_queue->workers[i], NULL,
+ br_process_object, this, "brpobj");
+ if (ret != 0) {
+@@ -1787,7 +1796,9 @@ cleanup_threads:
+ for (i--; i >= 0; i--) {
+ (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]);
+ }
++ GF_FREE(priv->obj_queue->workers);
+
++cleanup_obj_queue:
+ GF_FREE(priv->obj_queue);
+
+ cleanup_cond:
+@@ -1840,7 +1851,7 @@ br_rate_limit_signer(xlator_t *this, int child_count, int numbricks)
+ if (contribution == 0)
+ contribution = 1;
+ spec.rate = BR_HASH_CALC_READ_SIZE * contribution;
+- spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE;
++ spec.maxlimit = priv->signer_th_count * BR_HASH_CALC_READ_SIZE;
+
+ #endif
+
+@@ -1860,11 +1871,16 @@ br_rate_limit_signer(xlator_t *this, int child_count, int numbricks)
+ static int32_t
+ br_signer_handle_options(xlator_t *this, br_private_t *priv, dict_t *options)
+ {
+- if (options)
++ if (options) {
+ GF_OPTION_RECONF("expiry-time", priv->expiry_time, options, uint32,
+ error_return);
+- else
++ GF_OPTION_RECONF("signer-threads", priv->signer_th_count, options,
++ uint32, error_return);
++ } else {
+ GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return);
++ GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32,
++ error_return);
++ }
+
+ return 0;
+
+@@ -1880,6 +1896,8 @@ br_signer_init(xlator_t *this, br_private_t *priv)
+
+ GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return);
+ GF_OPTION_INIT("brick-count", numbricks, int32, error_return);
++ GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32,
++ error_return);
+
+ ret = br_rate_limit_signer(this, priv->child_count, numbricks);
+ if (ret)
+@@ -2210,6 +2228,15 @@ struct volume_options options[] = {
+ .description = "Pause/Resume scrub. Upon resume, scrubber "
+ "continues from where it left off.",
+ },
++ {
++ .key = {"signer-threads"},
++ .type = GF_OPTION_TYPE_INT,
++ .default_value = BR_WORKERS,
++ .op_version = {GD_OP_VERSION_7_0},
++ .flags = OPT_FLAG_SETTABLE,
++ .description = "Number of signing process threads. As a best "
++ "practice, set this to the number of processor cores",
++ },
+ {.key = {NULL}},
+ };
+
+diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h
+index a4d4fd7..8ac7dcd 100644
+--- a/xlators/features/bit-rot/src/bitd/bit-rot.h
++++ b/xlators/features/bit-rot/src/bitd/bit-rot.h
+@@ -30,12 +30,6 @@
+
+ #include <openssl/sha.h>
+
+-/**
+- * TODO: make this configurable. As a best practice, set this to the
+- * number of processor cores.
+- */
+-#define BR_WORKERS 4
+-
+ typedef enum scrub_throttle {
+ BR_SCRUB_THROTTLE_VOID = -1,
+ BR_SCRUB_THROTTLE_LAZY = 0,
+@@ -108,12 +102,12 @@ struct br_child {
+ typedef struct br_child br_child_t;
+
+ struct br_obj_n_workers {
+- struct list_head objects; /* queue of objects expired from the
+- timer wheel and ready to be picked
+- up for signing */
+- pthread_t workers[BR_WORKERS]; /* Threads which pick up the objects
+- from the above queue and start
+- signing each object */
++ struct list_head objects; /* queue of objects expired from the
++ timer wheel and ready to be picked
++ up for signing */
++ pthread_t *workers; /* Threads which pick up the objects
++ from the above queue and start
++ signing each object */
+ };
+
+ struct br_scrubber {
+@@ -209,6 +203,8 @@ struct br_private {
+
+ uint32_t expiry_time; /* objects "wait" time */
+
++ uint32_t signer_th_count; /* Number of signing process threads */
++
+ tbf_t *tbf; /* token bucket filter */
+
+ gf_boolean_t iamscrubber; /* function as a fs scrubber */
+diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
+index 40bcda1..9d93caf 100644
+--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
+@@ -29,6 +29,7 @@ enum br_mem_types {
+ gf_br_stub_mt_sigstub_t,
+ gf_br_mt_br_child_event_t,
+ gf_br_stub_mt_misc,
++ gf_br_mt_br_worker_t,
+ gf_br_stub_mt_end,
+ };
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-bitrot.c b/xlators/mgmt/glusterd/src/glusterd-bitrot.c
+index c653249..f79af2d 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-bitrot.c
++++ b/xlators/mgmt/glusterd/src/glusterd-bitrot.c
+@@ -34,6 +34,7 @@ const char *gd_bitrot_op_list[GF_BITROT_OPTION_TYPE_MAX] = {
+ [GF_BITROT_OPTION_TYPE_SCRUB_FREQ] = "scrub-frequency",
+ [GF_BITROT_OPTION_TYPE_SCRUB] = "scrub",
+ [GF_BITROT_OPTION_TYPE_EXPIRY_TIME] = "expiry-time",
++ [GF_BITROT_OPTION_TYPE_SIGNER_THREADS] = "signer-threads",
+ };
+
+ int
+@@ -354,6 +355,81 @@ out:
+ return ret;
+ }
+
++static gf_boolean_t
++is_bitd_configure_noop(xlator_t *this, glusterd_volinfo_t *volinfo)
++{
++ gf_boolean_t noop = _gf_true;
++ glusterd_brickinfo_t *brickinfo = NULL;
++
++ if (!glusterd_is_bitrot_enabled(volinfo))
++ goto out;
++ else if (volinfo->status != GLUSTERD_STATUS_STARTED)
++ goto out;
++ else {
++ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
++ {
++ if (!glusterd_is_local_brick(this, volinfo, brickinfo))
++ continue;
++ noop = _gf_false;
++ return noop;
++ }
++ }
++out:
++ return noop;
++}
++
++static int
++glusterd_bitrot_signer_threads(glusterd_volinfo_t *volinfo, dict_t *dict,
++ char *key, char **op_errstr)
++{
++ int32_t ret = -1;
++ uint32_t signer_th_count = 0;
++ uint32_t existing_th_count = 0;
++ xlator_t *this = NULL;
++ glusterd_conf_t *priv = NULL;
++ char dkey[32] = {
++ 0,
++ };
++
++ this = THIS;
++ GF_ASSERT(this);
++
++ priv = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, priv, out);
++
++ ret = dict_get_uint32(dict, "signer-threads", &signer_th_count);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
++ "Unable to get bitrot signer thread count.");
++ goto out;
++ }
++
++ ret = dict_get_uint32(volinfo->dict, key, &existing_th_count);
++ if (ret == 0 && signer_th_count == existing_th_count) {
++ goto out;
++ }
++
++ snprintf(dkey, sizeof(dkey), "%d", signer_th_count);
++ ret = dict_set_dynstr_with_alloc(volinfo->dict, key, dkey);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
++ "Failed to set option %s", key);
++ goto out;
++ }
++
++ if (!is_bitd_configure_noop(this, volinfo)) {
++ ret = priv->bitd_svc.manager(&(priv->bitd_svc), NULL,
++ PROC_START_NO_WAIT);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BITDSVC_RECONF_FAIL,
++ "Failed to reconfigure bitrot services");
++ goto out;
++ }
++ }
++out:
++ return ret;
++}
++
+ static int
+ glusterd_bitrot_enable(glusterd_volinfo_t *volinfo, char **op_errstr)
+ {
+@@ -594,6 +670,15 @@ glusterd_op_bitrot(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+ volinfo, dict, "features.expiry-time", op_errstr);
+ if (ret)
+ goto out;
++ break;
++
++ case GF_BITROT_OPTION_TYPE_SIGNER_THREADS:
++ ret = glusterd_bitrot_signer_threads(
++ volinfo, dict, "features.signer-threads", op_errstr);
++ if (ret)
++ goto out;
++ break;
++
+ case GF_BITROT_CMD_SCRUB_STATUS:
+ case GF_BITROT_CMD_SCRUB_ONDEMAND:
+ break;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 13f84ea..094a71f 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -4658,6 +4658,12 @@ bitrot_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme,
+ return -1;
+ }
+
++ if (!strcmp(vme->option, "signer-threads")) {
++ ret = xlator_set_fixed_option(xl, "signer-threads", vme->value);
++ if (ret)
++ return -1;
++ }
++
+ return ret;
+ }
+
+@@ -4940,18 +4946,18 @@ glusterd_prepare_shd_volume_options_for_tier(glusterd_volinfo_t *volinfo,
+ dict_t *set_dict)
+ {
+ int ret = -1;
+- char *key = NULL;
++ char *key = NULL;
+
+- key = volgen_get_shd_key (volinfo->tier_info.cold_type);
++ key = volgen_get_shd_key(volinfo->tier_info.cold_type);
+ if (key) {
+- ret = dict_set_str (set_dict, key, "enable");
++ ret = dict_set_str(set_dict, key, "enable");
+ if (ret)
+ goto out;
+ }
+
+- key = volgen_get_shd_key (volinfo->tier_info.hot_type);
++ key = volgen_get_shd_key(volinfo->tier_info.hot_type);
+ if (key) {
+- ret = dict_set_str (set_dict, key, "enable");
++ ret = dict_set_str(set_dict, key, "enable");
+ if (ret)
+ goto out;
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 9001b88..62acadf 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -3379,6 +3379,15 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .op_version = GD_OP_VERSION_3_7_0,
+ .type = NO_DOC,
+ },
++ {
++ .key = "features.signer-threads",
++ .voltype = "features/bit-rot",
++ .value = BR_WORKERS,
++ .option = "signer-threads",
++ .op_version = GD_OP_VERSION_7_0,
++ .type = NO_DOC,
++ },
++ /* Upcall translator options */
+ /* Upcall translator options */
+ {
+ .key = "features.cache-invalidation",
+--
+1.8.3.1
+
diff --git a/0447-core-brick_mux-brick-crashed-when-creating-and-delet.patch b/0447-core-brick_mux-brick-crashed-when-creating-and-delet.patch
new file mode 100644
index 0000000..a39b61b
--- /dev/null
+++ b/0447-core-brick_mux-brick-crashed-when-creating-and-delet.patch
@@ -0,0 +1,359 @@
+From 51090a4b3cb000d601083f12d1875547819fc03f Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Wed, 4 Mar 2020 09:17:26 +0530
+Subject: [PATCH 447/449] core[brick_mux]: brick crashed when creating and
+ deleting volumes over time
+
+Problem: In brick_mux environment, while volumes are created/stopped in a loop
+ after running a long time the main brick is crashed.The brick is crashed
+ because the main brick process was not cleaned up memory for all objects
+ at the time of detaching a volume.
+ Below are the objects that are missed at the time of detaching a volume
+ 1) xlator object for a brick graph
+ 2) local_pool for posix_lock xlator
+ 3) rpc object cleanup at quota xlator
+ 4) inode leak at brick xlator
+
+Solution: To avoid the crash resolve all leak at the time of detaching a brick
+> Change-Id: Ibb6e46c5fba22b9441a88cbaf6b3278823235913
+> updates: #977
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry pick from commit e589d8de66d3325da8fbbbe44d1a5bd6335e08ab)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24209/)
+
+BUG: 1790336
+Change-Id: Ibb6e46c5fba22b9441a88cbaf6b3278823235913
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202782
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
+---
+ libglusterfs/src/glusterfs/glusterfs.h | 1 +
+ libglusterfs/src/graph.c | 1 +
+ libglusterfs/src/graph.y | 2 +-
+ libglusterfs/src/xlator.c | 29 ++++++++----
+ xlators/features/changelog/src/changelog.c | 1 +
+ xlators/features/locks/src/posix.c | 4 ++
+ xlators/features/quota/src/quota-enforcer-client.c | 14 +++++-
+ xlators/features/quota/src/quota.c | 54 ++++++++++++++++++++--
+ xlators/features/quota/src/quota.h | 3 ++
+ xlators/protocol/server/src/server.c | 12 +++--
+ 10 files changed, 103 insertions(+), 18 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index 177a020..584846e 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -603,6 +603,7 @@ struct _glusterfs_graph {
+ int used; /* Should be set when fuse gets
+ first CHILD_UP */
+ uint32_t volfile_checksum;
++ pthread_mutex_t mutex;
+ };
+ typedef struct _glusterfs_graph glusterfs_graph_t;
+
+diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
+index bb5e67a..1cd92db 100644
+--- a/libglusterfs/src/graph.c
++++ b/libglusterfs/src/graph.c
+@@ -1092,6 +1092,7 @@ glusterfs_graph_destroy_residual(glusterfs_graph_t *graph)
+ ret = xlator_tree_free_memacct(graph->first);
+
+ list_del_init(&graph->list);
++ pthread_mutex_destroy(&graph->mutex);
+ GF_FREE(graph);
+
+ return ret;
+diff --git a/libglusterfs/src/graph.y b/libglusterfs/src/graph.y
+index 5b92985..5733515 100644
+--- a/libglusterfs/src/graph.y
++++ b/libglusterfs/src/graph.y
+@@ -541,7 +541,7 @@ glusterfs_graph_new ()
+ return NULL;
+
+ INIT_LIST_HEAD (&graph->list);
+-
++ pthread_mutex_init(&graph->mutex, NULL);
+ gettimeofday (&graph->dob, NULL);
+
+ return graph;
+diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
+index 108b96a..36cc32c 100644
+--- a/libglusterfs/src/xlator.c
++++ b/libglusterfs/src/xlator.c
+@@ -938,6 +938,8 @@ xlator_mem_cleanup(xlator_t *this)
+ xlator_list_t **trav_p = NULL;
+ xlator_t *top = NULL;
+ xlator_t *victim = NULL;
++ glusterfs_graph_t *graph = NULL;
++ gf_boolean_t graph_cleanup = _gf_false;
+
+ if (this->call_cleanup || !this->ctx)
+ return;
+@@ -945,6 +947,12 @@ xlator_mem_cleanup(xlator_t *this)
+ this->call_cleanup = 1;
+ ctx = this->ctx;
+
++ inode_table = this->itable;
++ if (inode_table) {
++ inode_table_destroy(inode_table);
++ this->itable = NULL;
++ }
++
+ xlator_call_fini(trav);
+
+ while (prev) {
+@@ -953,12 +961,6 @@ xlator_mem_cleanup(xlator_t *this)
+ prev = trav;
+ }
+
+- inode_table = this->itable;
+- if (inode_table) {
+- inode_table_destroy(inode_table);
+- this->itable = NULL;
+- }
+-
+ if (this->fini) {
+ this->fini(this);
+ }
+@@ -968,17 +970,28 @@ xlator_mem_cleanup(xlator_t *this)
+ if (ctx->active) {
+ top = ctx->active->first;
+ LOCK(&ctx->volfile_lock);
+- /* TODO here we have leak for xlator node in a graph */
+- /* Need to move only top xlator from a graph */
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ victim = (*trav_p)->xlator;
+ if (victim->call_cleanup && !strcmp(victim->name, this->name)) {
++ graph_cleanup = _gf_true;
+ (*trav_p) = (*trav_p)->next;
+ break;
+ }
+ }
+ UNLOCK(&ctx->volfile_lock);
+ }
++
++ if (graph_cleanup) {
++ prev = this;
++ graph = ctx->active;
++ pthread_mutex_lock(&graph->mutex);
++ while (prev) {
++ trav = prev->next;
++ GF_FREE(prev);
++ prev = trav;
++ }
++ pthread_mutex_unlock(&graph->mutex);
++ }
+ }
+
+ void
+diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
+index ff06c09..b54112c 100644
+--- a/xlators/features/changelog/src/changelog.c
++++ b/xlators/features/changelog/src/changelog.c
+@@ -2872,6 +2872,7 @@ fini(xlator_t *this)
+ if (priv->active || priv->rpc_active) {
+ /* terminate RPC server/threads */
+ changelog_cleanup_rpc(this, priv);
++ GF_FREE(priv->ev_dispatcher);
+ }
+ /* call barrier_disable to cancel timer */
+ if (priv->barrier_enabled)
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index 9a14c64..50f1265 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -4102,6 +4102,10 @@ fini(xlator_t *this)
+ if (!priv)
+ return;
+ this->private = NULL;
++ if (this->local_pool) {
++ mem_pool_destroy(this->local_pool);
++ this->local_pool = NULL;
++ }
+ GF_FREE(priv->brickname);
+ GF_FREE(priv);
+
+diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c
+index 1a4c2e3..097439d 100644
+--- a/xlators/features/quota/src/quota-enforcer-client.c
++++ b/xlators/features/quota/src/quota-enforcer-client.c
+@@ -362,16 +362,28 @@ quota_enforcer_notify(struct rpc_clnt *rpc, void *mydata,
+ {
+ xlator_t *this = NULL;
+ int ret = 0;
++ quota_priv_t *priv = NULL;
+
+ this = mydata;
+-
++ priv = this->private;
+ switch (event) {
+ case RPC_CLNT_CONNECT: {
++ pthread_mutex_lock(&priv->conn_mutex);
++ {
++ priv->conn_status = _gf_true;
++ }
++ pthread_mutex_unlock(&priv->conn_mutex);
+ gf_msg_trace(this->name, 0, "got RPC_CLNT_CONNECT");
+ break;
+ }
+
+ case RPC_CLNT_DISCONNECT: {
++ pthread_mutex_lock(&priv->conn_mutex);
++ {
++ priv->conn_status = _gf_false;
++ pthread_cond_signal(&priv->conn_cond);
++ }
++ pthread_mutex_unlock(&priv->conn_mutex);
+ gf_msg_trace(this->name, 0, "got RPC_CLNT_DISCONNECT");
+ break;
+ }
+diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
+index a0c236d..d1123ce 100644
+--- a/xlators/features/quota/src/quota.c
++++ b/xlators/features/quota/src/quota.c
+@@ -5014,6 +5014,43 @@ quota_forget(xlator_t *this, inode_t *inode)
+ return 0;
+ }
+
++int
++notify(xlator_t *this, int event, void *data, ...)
++{
++ quota_priv_t *priv = NULL;
++ int ret = 0;
++ rpc_clnt_t *rpc = NULL;
++ gf_boolean_t conn_status = _gf_true;
++ xlator_t *victim = data;
++
++ priv = this->private;
++ if (!priv || !priv->is_quota_on)
++ goto out;
++
++ if (event == GF_EVENT_PARENT_DOWN) {
++ rpc = priv->rpc_clnt;
++ if (rpc) {
++ rpc_clnt_disable(rpc);
++ pthread_mutex_lock(&priv->conn_mutex);
++ {
++ conn_status = priv->conn_status;
++ while (conn_status) {
++ (void)pthread_cond_wait(&priv->conn_cond,
++ &priv->conn_mutex);
++ conn_status = priv->conn_status;
++ }
++ }
++ pthread_mutex_unlock(&priv->conn_mutex);
++ gf_log(this->name, GF_LOG_INFO,
++ "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name);
++ }
++ }
++
++out:
++ ret = default_notify(this, event, data);
++ return ret;
++}
++
+ int32_t
+ init(xlator_t *this)
+ {
+@@ -5056,6 +5093,10 @@ init(xlator_t *this)
+ goto err;
+ }
+
++ pthread_mutex_init(&priv->conn_mutex, NULL);
++ pthread_cond_init(&priv->conn_cond, NULL);
++ priv->conn_status = _gf_false;
++
+ if (priv->is_quota_on) {
+ rpc = quota_enforcer_init(this, this->options);
+ if (rpc == NULL) {
+@@ -5169,20 +5210,22 @@ fini(xlator_t *this)
+ {
+ quota_priv_t *priv = NULL;
+ rpc_clnt_t *rpc = NULL;
+- int i = 0, cnt = 0;
+
+ priv = this->private;
+ if (!priv)
+ return;
+ rpc = priv->rpc_clnt;
+ priv->rpc_clnt = NULL;
+- this->private = NULL;
+ if (rpc) {
+- cnt = GF_ATOMIC_GET(rpc->refcount);
+- for (i = 0; i < cnt; i++)
+- rpc_clnt_unref(rpc);
++ rpc_clnt_connection_cleanup(&rpc->conn);
++ rpc_clnt_unref(rpc);
+ }
++
++ this->private = NULL;
+ LOCK_DESTROY(&priv->lock);
++ pthread_mutex_destroy(&priv->conn_mutex);
++ pthread_cond_destroy(&priv->conn_cond);
++
+ GF_FREE(priv);
+ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
+@@ -5314,6 +5357,7 @@ struct volume_options options[] = {
+ xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
++ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h
+index a5a99ca..e51ffd4 100644
+--- a/xlators/features/quota/src/quota.h
++++ b/xlators/features/quota/src/quota.h
+@@ -217,6 +217,9 @@ struct quota_priv {
+ char *volume_uuid;
+ uint64_t validation_count;
+ int32_t quotad_conn_status;
++ pthread_mutex_t conn_mutex;
++ pthread_cond_t conn_cond;
++ gf_boolean_t conn_status;
+ };
+ typedef struct quota_priv quota_priv_t;
+
+diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
+index a5f09fe..54d9c0f 100644
+--- a/xlators/protocol/server/src/server.c
++++ b/xlators/protocol/server/src/server.c
+@@ -409,7 +409,13 @@ server_call_xlator_mem_cleanup(xlator_t *this, char *victim_name)
+
+ arg = calloc(1, sizeof(*arg));
+ arg->this = this;
+- arg->victim_name = gf_strdup(victim_name);
++ arg->victim_name = strdup(victim_name);
++ if (!arg->victim_name) {
++ gf_smsg(this->name, GF_LOG_CRITICAL, ENOMEM, LG_MSG_NO_MEMORY,
++ "Memory allocation is failed");
++ return;
++ }
++
+ th_ret = gf_thread_create_detached(&th_id, server_graph_janitor_threads,
+ arg, "graphjanitor");
+ if (th_ret) {
+@@ -417,7 +423,7 @@ server_call_xlator_mem_cleanup(xlator_t *this, char *victim_name)
+ "graph janitor Thread"
+ " creation is failed for brick %s",
+ victim_name);
+- GF_FREE(arg->victim_name);
++ free(arg->victim_name);
+ free(arg);
+ }
+ }
+@@ -628,7 +634,7 @@ server_graph_janitor_threads(void *data)
+ }
+
+ out:
+- GF_FREE(arg->victim_name);
++ free(arg->victim_name);
+ free(arg);
+ return NULL;
+ }
+--
+1.8.3.1
+
diff --git a/0448-Posix-Use-simple-approach-to-close-fd.patch b/0448-Posix-Use-simple-approach-to-close-fd.patch
new file mode 100644
index 0000000..f030358
--- /dev/null
+++ b/0448-Posix-Use-simple-approach-to-close-fd.patch
@@ -0,0 +1,341 @@
+From 175c99dccc47d2b4267a8819404e5cbeb8cfba11 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Thu, 12 Mar 2020 21:12:13 +0530
+Subject: [PATCH 448/449] Posix: Use simple approach to close fd
+
+Problem: posix_release(dir) functions add the fd's into a ctx->janitor_fds
+ and janitor thread closes the fd's.In brick_mux environment it is
+ difficult to handle race condition in janitor threads because brick
+ spawns a single janitor thread for all bricks.
+
+Solution: Use synctask to execute posix_release(dir) functions instead of
+ using background a thread to close fds.
+
+> Credits: Pranith Karampuri <pkarampu@redhat.com>
+> Change-Id: Iffb031f0695a7da83d5a2f6bac8863dad225317e
+> Fixes: bz#1811631
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Cherry pick from commit fb20713b380e1df8d7f9e9df96563be2f9144fd6)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24221/)
+
+BUG: 1790336
+Change-Id: Iffb031f0695a7da83d5a2f6bac8863dad225317e
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202791
+Tested-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/glusterfs.h | 6 +-
+ libglusterfs/src/glusterfs/syncop.h | 7 +-
+ rpc/rpc-lib/src/rpcsvc.c | 6 ++
+ run-tests.sh | 2 +-
+ tests/features/ssl-authz.t | 7 +-
+ xlators/storage/posix/src/posix-common.c | 4 --
+ xlators/storage/posix/src/posix-helpers.c | 98 --------------------------
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 28 ++------
+ xlators/storage/posix/src/posix.h | 3 -
+ 9 files changed, 20 insertions(+), 141 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index 584846e..495a4d7 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -734,11 +734,7 @@ struct _glusterfs_ctx {
+
+ struct list_head volfile_list;
+
+- /* Add members to manage janitor threads for cleanup fd */
+- struct list_head janitor_fds;
+- pthread_cond_t janitor_cond;
+- pthread_mutex_t janitor_lock;
+- pthread_t janitor;
++ char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */
+ };
+ typedef struct _glusterfs_ctx glusterfs_ctx_t;
+
+diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h
+index 3011b4c..1e4c73b 100644
+--- a/libglusterfs/src/glusterfs/syncop.h
++++ b/libglusterfs/src/glusterfs/syncop.h
+@@ -254,7 +254,7 @@ struct syncopctx {
+ task = synctask_get(); \
+ stb->task = task; \
+ if (task) \
+- frame = task->opframe; \
++ frame = copy_frame(task->opframe); \
+ else \
+ frame = syncop_create_frame(THIS); \
+ \
+@@ -269,10 +269,7 @@ struct syncopctx {
+ STACK_WIND_COOKIE(frame, cbk, (void *)stb, subvol, fn_op, params); \
+ \
+ __yield(stb); \
+- if (task) \
+- STACK_RESET(frame->root); \
+- else \
+- STACK_DESTROY(frame->root); \
++ STACK_DESTROY(frame->root); \
+ } while (0)
+
+ /*
+diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
+index 3f184bf..23ca1fd 100644
+--- a/rpc/rpc-lib/src/rpcsvc.c
++++ b/rpc/rpc-lib/src/rpcsvc.c
+@@ -375,6 +375,12 @@ rpcsvc_program_actor(rpcsvc_request_t *req)
+
+ req->ownthread = program->ownthread;
+ req->synctask = program->synctask;
++ if (((req->procnum == GFS3_OP_RELEASE) ||
++ (req->procnum == GFS3_OP_RELEASEDIR)) &&
++ (program->prognum == GLUSTER_FOP_PROGRAM)) {
++ req->ownthread = _gf_false;
++ req->synctask = _gf_true;
++ }
+
+ err = SUCCESS;
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s for %s",
+diff --git a/run-tests.sh b/run-tests.sh
+index 5683b21..c835d93 100755
+--- a/run-tests.sh
++++ b/run-tests.sh
+@@ -356,7 +356,7 @@ function run_tests()
+ selected_tests=$((selected_tests+1))
+ echo
+ echo $section_separator$section_separator
+- if [[ $(get_test_status $t) == "BAD_TEST" ]] && \
++ if [[ $(get_test_status $t) =~ "BAD_TEST" ]] && \
+ [[ $skip_bad_tests == "yes" ]]
+ then
+ skipped_bad_tests=$((skipped_bad_tests+1))
+diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t
+index 132b598..497083e 100755
+--- a/tests/features/ssl-authz.t
++++ b/tests/features/ssl-authz.t
+@@ -67,13 +67,14 @@ echo "Memory consumption for glusterfsd process"
+ for i in $(seq 1 100); do
+ gluster v heal $V0 info >/dev/null
+ done
+-
++#Wait to cleanup memory
++sleep 10
+ end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'`
+ diff=$((end-start))
+
+-# If memory consumption is more than 5M some leak in SSL code path
++# If memory consumption is more than 15M some leak in SSL code path
+
+-TEST [ $diff -lt 5000 ]
++TEST [ $diff -lt 15000 ]
+
+
+ # Set ssl-allow to a wildcard that includes our identity.
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index 2cb58ba..ac53796 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -1041,10 +1041,6 @@ posix_init(xlator_t *this)
+ pthread_mutex_init(&_private->janitor_mutex, NULL);
+ pthread_cond_init(&_private->janitor_cond, NULL);
+ INIT_LIST_HEAD(&_private->fsyncs);
+- ret = posix_spawn_ctx_janitor_thread(this);
+- if (ret)
+- goto out;
+-
+ ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this,
+ "posixfsy");
+ if (ret) {
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 2336add..39dbcce 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -1582,104 +1582,6 @@ unlock:
+ return;
+ }
+
+-static struct posix_fd *
+-janitor_get_next_fd(glusterfs_ctx_t *ctx, int32_t janitor_sleep)
+-{
+- struct posix_fd *pfd = NULL;
+-
+- struct timespec timeout;
+-
+- pthread_mutex_lock(&ctx->janitor_lock);
+- {
+- if (list_empty(&ctx->janitor_fds)) {
+- time(&timeout.tv_sec);
+- timeout.tv_sec += janitor_sleep;
+- timeout.tv_nsec = 0;
+-
+- pthread_cond_timedwait(&ctx->janitor_cond, &ctx->janitor_lock,
+- &timeout);
+- goto unlock;
+- }
+-
+- pfd = list_entry(ctx->janitor_fds.next, struct posix_fd, list);
+-
+- list_del(ctx->janitor_fds.next);
+- }
+-unlock:
+- pthread_mutex_unlock(&ctx->janitor_lock);
+-
+- return pfd;
+-}
+-
+-static void *
+-posix_ctx_janitor_thread_proc(void *data)
+-{
+- xlator_t *this = NULL;
+- struct posix_fd *pfd;
+- glusterfs_ctx_t *ctx = NULL;
+- struct posix_private *priv = NULL;
+- int32_t sleep_duration = 0;
+-
+- this = data;
+- ctx = THIS->ctx;
+- THIS = this;
+-
+- priv = this->private;
+- sleep_duration = priv->janitor_sleep_duration;
+- while (1) {
+- pfd = janitor_get_next_fd(ctx, sleep_duration);
+- if (pfd) {
+- if (pfd->dir == NULL) {
+- gf_msg_trace(this->name, 0, "janitor: closing file fd=%d",
+- pfd->fd);
+- sys_close(pfd->fd);
+- } else {
+- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p",
+- pfd->dir);
+- sys_closedir(pfd->dir);
+- }
+-
+- GF_FREE(pfd);
+- }
+- }
+-
+- return NULL;
+-}
+-
+-int
+-posix_spawn_ctx_janitor_thread(xlator_t *this)
+-{
+- struct posix_private *priv = NULL;
+- int ret = 0;
+- glusterfs_ctx_t *ctx = NULL;
+-
+- priv = this->private;
+- ctx = THIS->ctx;
+-
+- LOCK(&priv->lock);
+- {
+- if (!ctx->janitor) {
+- pthread_mutex_init(&ctx->janitor_lock, NULL);
+- pthread_cond_init(&ctx->janitor_cond, NULL);
+- INIT_LIST_HEAD(&ctx->janitor_fds);
+-
+- ret = gf_thread_create(&ctx->janitor, NULL,
+- posix_ctx_janitor_thread_proc, this,
+- "posixctxjan");
+-
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED,
+- "spawning janitor "
+- "thread failed");
+- goto unlock;
+- }
+- }
+- }
+-unlock:
+- UNLOCK(&priv->lock);
+- return ret;
+-}
+-
+ static int
+ is_fresh_file(int64_t ctime_sec)
+ {
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index 5748b9f..d135d8b 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -1358,7 +1358,6 @@ posix_releasedir(xlator_t *this, fd_t *fd)
+ struct posix_fd *pfd = NULL;
+ uint64_t tmp_pfd = 0;
+ int ret = 0;
+- glusterfs_ctx_t *ctx = NULL;
+
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+@@ -1376,21 +1375,11 @@ posix_releasedir(xlator_t *this, fd_t *fd)
+ goto out;
+ }
+
+- ctx = THIS->ctx;
+-
+- pthread_mutex_lock(&ctx->janitor_lock);
+- {
+- INIT_LIST_HEAD(&pfd->list);
+- list_add_tail(&pfd->list, &ctx->janitor_fds);
+- pthread_cond_signal(&ctx->janitor_cond);
+- }
+- pthread_mutex_unlock(&ctx->janitor_lock);
+-
+- /*gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
++ gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
+
+ sys_closedir(pfd->dir);
+ GF_FREE(pfd);
+- */
++
+ out:
+ return 0;
+ }
+@@ -2510,13 +2499,11 @@ posix_release(xlator_t *this, fd_t *fd)
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ uint64_t tmp_pfd = 0;
+- glusterfs_ctx_t *ctx = NULL;
+
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+- ctx = THIS->ctx;
+
+ ret = fd_ctx_del(fd, this, &tmp_pfd);
+ if (ret < 0) {
+@@ -2531,13 +2518,10 @@ posix_release(xlator_t *this, fd_t *fd)
+ "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd);
+ }
+
+- pthread_mutex_lock(&ctx->janitor_lock);
+- {
+- INIT_LIST_HEAD(&pfd->list);
+- list_add_tail(&pfd->list, &ctx->janitor_fds);
+- pthread_cond_signal(&ctx->janitor_cond);
+- }
+- pthread_mutex_unlock(&ctx->janitor_lock);
++ gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
++
++ sys_close(pfd->fd);
++ GF_FREE(pfd);
+
+ if (!priv)
+ goto out;
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index ac9d83c..61495a7 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -666,9 +666,6 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd,
+ int
+ posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno);
+
+-int
+-posix_spawn_ctx_janitor_thread(xlator_t *this);
+-
+ void
+ posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata);
+
+--
+1.8.3.1
+
diff --git a/0449-test-Test-case-brick-mux-validation-in-cluster.t-is-.patch b/0449-test-Test-case-brick-mux-validation-in-cluster.t-is-.patch
new file mode 100644
index 0000000..6a161bf
--- /dev/null
+++ b/0449-test-Test-case-brick-mux-validation-in-cluster.t-is-.patch
@@ -0,0 +1,107 @@
+From 6e15fca1621b06270983f57ac146f0f8e52f0797 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Tue, 9 Jun 2020 15:38:12 +0530
+Subject: [PATCH 449/449] test: Test case brick-mux-validation-in-cluster.t is
+ failing on RHEL-8
+
+Brick process are not properly attached on any cluster node while
+some volume options are changed on peer node and glusterd is down on
+that specific node.
+
+Solution: At the time of restart glusterd it got a friend update request
+from a peer node if peer node having some changes on volume.If the brick
+process is started before received a friend update request in that case
+brick_mux behavior is not workingproperly. All bricks are attached to
+the same process even volumes options are not the same. To avoid the
+issue introduce an atomic flag volpeerupdate and update the value while
+glusterd has received a friend update request from peer for a specific
+volume.If volpeerupdate flag is 1 volume is started by
+glusterd_import_friend_volume synctask
+
+> Change-Id: I4c026f1e7807ded249153670e6967a2be8d22cb7
+> Credit: Sanju Rakaonde <srakonde@redhat.com>
+> fixes: #1290
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24540/)
+> (Cherry pick from commit 955bfd567329cf7fe63e9c3b89d333a55e5e9a20)
+
+BUG: 1844359
+Change-Id: I4c026f1e7807ded249153670e6967a2be8d22cb7
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202812
+Tested-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-by: Sanju Rakonde <srakonde@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/glusterd/brick-mux-validation-in-cluster.t | 4 +---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 7 +++++--
+ xlators/mgmt/glusterd/src/glusterd.h | 4 ++++
+ 3 files changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t
+index f088dbb..b6af487 100644
+--- a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t
++++ b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t
+@@ -100,10 +100,8 @@ $CLI_2 volume set $V0 performance.readdir-ahead on
+ $CLI_2 volume set $V1 performance.readdir-ahead on
+
+ TEST $glusterd_1;
++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+-sleep 10
+-
+-EXPECT 4 count_brick_processes
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_brick_pids
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 count_N/A_brick_pids
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 2eb2a76..6f904ae 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -3758,6 +3758,7 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+ "Version of volume %s differ. local version = %d, "
+ "remote version = %d on peer %s",
+ volinfo->volname, volinfo->version, version, hostname);
++ GF_ATOMIC_INIT(volinfo->volpeerupdate, 1);
+ *status = GLUSTERD_VOL_COMP_UPDATE_REQ;
+ goto out;
+ } else if (version < volinfo->version) {
+@@ -4784,7 +4785,8 @@ glusterd_volinfo_stop_stale_bricks(glusterd_volinfo_t *new_volinfo,
+ * or if it's part of the new volume and is pending a snap,
+ * then stop the brick process
+ */
+- if (ret || (new_brickinfo->snap_status == -1)) {
++ if (ret || (new_brickinfo->snap_status == -1) ||
++ GF_ATOMIC_GET(old_volinfo->volpeerupdate)) {
+ /*TODO: may need to switch to 'atomic' flavour of
+ * brick_stop, once we make peer rpc program also
+ * synctask enabled*/
+@@ -6490,7 +6492,8 @@ glusterd_brick_start(glusterd_volinfo_t *volinfo,
+ * three different triggers for an attempt to start the brick process
+ * due to the quorum handling code in glusterd_friend_sm.
+ */
+- if (brickinfo->status == GF_BRICK_STARTING || brickinfo->start_triggered) {
++ if (brickinfo->status == GF_BRICK_STARTING || brickinfo->start_triggered ||
++ GF_ATOMIC_GET(volinfo->volpeerupdate)) {
+ gf_msg_debug(this->name, 0,
+ "brick %s is already in starting "
+ "phase",
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index 1c6c3b1..f739b5d 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -523,6 +523,10 @@ struct glusterd_volinfo_ {
+ pthread_mutex_t store_volinfo_lock; /* acquire lock for
+ * updating the volinfo
+ */
++ gf_atomic_t volpeerupdate;
++ /* Flag to check about volume has received updates
++ from peer
++ */
+ };
+
+ typedef enum gd_snap_status_ {
+--
+1.8.3.1
+
diff --git a/0450-tests-basic-ctime-enable-ctime-before-testing.patch b/0450-tests-basic-ctime-enable-ctime-before-testing.patch
new file mode 100644
index 0000000..96de5a1
--- /dev/null
+++ b/0450-tests-basic-ctime-enable-ctime-before-testing.patch
@@ -0,0 +1,35 @@
+From 09dce9ce8e946a86209b6f057bf14323036fa12a Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <sacharya@redhat.com>
+Date: Wed, 10 Jun 2020 11:44:56 +0530
+Subject: [PATCH 450/451] tests/basic/ctime: enable ctime before testing
+
+This is to ensure that this test successfully runs, even if
+ctime is disabled by default (which is the case in downstream.)
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1844359
+Change-Id: I91e80b3d8a56fc089aeb58b0254812111d394842
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/202874
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/ctime/ctime-utimesat.t | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/tests/basic/ctime/ctime-utimesat.t b/tests/basic/ctime/ctime-utimesat.t
+index 540e57a..da12fbe 100644
+--- a/tests/basic/ctime/ctime-utimesat.t
++++ b/tests/basic/ctime/ctime-utimesat.t
+@@ -14,6 +14,7 @@ TEST $CLI volume set $V0 performance.read-after-open off
+ TEST $CLI volume set $V0 performance.open-behind off
+ TEST $CLI volume set $V0 performance.write-behind off
+ TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 ctime on
+
+ TEST $CLI volume start $V0
+
+--
+1.8.3.1
+
diff --git a/0451-extras-Modify-group-virt-to-include-network-related-.patch b/0451-extras-Modify-group-virt-to-include-network-related-.patch
new file mode 100644
index 0000000..bba69e1
--- /dev/null
+++ b/0451-extras-Modify-group-virt-to-include-network-related-.patch
@@ -0,0 +1,44 @@
+From 96d9b659fd0367abe1666a5ac6203208e0dc056d Mon Sep 17 00:00:00 2001
+From: Krutika Dhananjay <kdhananj@redhat.com>
+Date: Mon, 4 May 2020 14:30:57 +0530
+Subject: [PATCH 451/451] extras: Modify group 'virt' to include
+ network-related options
+
+This is needed to work around an issue seen where vms running on
+online hosts are getting killed when a different host is rebooted
+in ovirt-gluster hyperconverged environments. Actual RCA is quite
+lengthy and documented in the github issue. Please refer to it
+for more details.
+
+Upstream patch:
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24400
+> Change-Id: Ic25b5f50144ad42458e5c847e1e7e191032396c1
+> Fixes: #1217
+> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+
+Change-Id: Ic25b5f50144ad42458e5c847e1e7e191032396c1
+BUG: 1845064
+Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/203291
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/group-virt.example | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/extras/group-virt.example b/extras/group-virt.example
+index c2ce89d..3a441eb 100644
+--- a/extras/group-virt.example
++++ b/extras/group-virt.example
+@@ -16,3 +16,8 @@ cluster.choose-local=off
+ client.event-threads=4
+ server.event-threads=4
+ performance.client-io-threads=on
++network.ping-timeout=20
++server.tcp-user-timeout=20
++server.keepalive-time=10
++server.keepalive-interval=2
++server.keepalive-count=5
+--
+1.8.3.1
+
diff --git a/0452-Tier-DHT-Handle-the-pause-case-missed-out.patch b/0452-Tier-DHT-Handle-the-pause-case-missed-out.patch
new file mode 100644
index 0000000..0b115bb
--- /dev/null
+++ b/0452-Tier-DHT-Handle-the-pause-case-missed-out.patch
@@ -0,0 +1,48 @@
+From c184943bdf38de5b4cbf165fd1cd98ce7bd9e976 Mon Sep 17 00:00:00 2001
+From: hari gowtham <hgowtham@redhat.com>
+Date: Tue, 16 Jun 2020 14:47:53 +0530
+Subject: [PATCH 452/456] Tier/DHT: Handle the pause case missed out
+
+Problem: While backporting a change from master
+the changes related to tier were removed. This started affecting
+the tier pause functionality. Backporting it
+to downstream left this usecase messed up as we still support tier.
+patch that caused this: https://code.engineering.redhat.com/gerrit/#/c/202647/2
+
+Fix: add the condition back for tier pause to work.
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1844359
+Change-Id: I46c6c179b09c7e1a729be9fd257fa4a490f0287e
+Signed-off-by: hari gowtham <hgowtham@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/203560
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-rebalance.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index e9974cd..abc10fc 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -1160,6 +1160,15 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+ break;
+ }
+
++ if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) &&
++ (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) {
++ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED,
++ "Migrate file paused");
++ ret = -1;
++ break;
++ }
++
++
+ offset += ret;
+ total += ret;
+
+--
+1.8.3.1
+
diff --git a/0453-glusterd-add-brick-command-failure.patch b/0453-glusterd-add-brick-command-failure.patch
new file mode 100644
index 0000000..dd21350
--- /dev/null
+++ b/0453-glusterd-add-brick-command-failure.patch
@@ -0,0 +1,300 @@
+From a04592cce9aaa6ccb8a038bc3b4e31bc125d1d10 Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Tue, 16 Jun 2020 18:03:21 +0530
+Subject: [PATCH 453/456] glusterd: add-brick command failure
+
+Problem: add-brick operation is failing when replica or disperse
+count is not mentioned in the add-brick command.
+
+Reason: with commit a113d93 we are checking brick order while
+doing add-brick operation for replica and disperse volumes. If
+replica count or disperse count is not mentioned in the command,
+the dict get is failing and resulting add-brick operation failure.
+
+> upstream patch: https://review.gluster.org/#/c/glusterfs/+/24581/
+> fixes: #1306
+> Change-Id: Ie957540e303bfb5f2d69015661a60d7e72557353
+> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+BUG: 1847081
+Change-Id: Ie957540e303bfb5f2d69015661a60d7e72557353
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/203867
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/glusterd/brick-order-check-add-brick.t | 40 ++++++++++++++++++++++
+ tests/cluster.rc | 11 ++++--
+ xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 39 ++++++++++++++-------
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 30 ++---------------
+ xlators/mgmt/glusterd/src/glusterd-utils.h | 3 +-
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 41 +++++++++++++++++++----
+ 6 files changed, 115 insertions(+), 49 deletions(-)
+ create mode 100644 tests/bugs/glusterd/brick-order-check-add-brick.t
+
+diff --git a/tests/bugs/glusterd/brick-order-check-add-brick.t b/tests/bugs/glusterd/brick-order-check-add-brick.t
+new file mode 100644
+index 0000000..29f0ed1
+--- /dev/null
++++ b/tests/bugs/glusterd/brick-order-check-add-brick.t
+@@ -0,0 +1,40 @@
++#!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../cluster.rc
++. $(dirname $0)/../../snapshot.rc
++
++cleanup;
++
++TEST verify_lvm_version;
++#Create cluster with 3 nodes
++TEST launch_cluster 3 -NO_DEBUG -NO_FORCE
++TEST setup_lvm 3
++
++TEST $CLI_1 peer probe $H2
++TEST $CLI_1 peer probe $H3
++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
++
++TEST $CLI_1 volume create $V0 replica 3 $H1:$L1/$V0 $H2:$L2/$V0 $H3:$L3/$V0
++EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks'
++EXPECT 'Created' volinfo_field $V0 'Status'
++
++TEST $CLI_1 volume start $V0
++EXPECT 'Started' volinfo_field $V0 'Status'
++
++#add-brick with or without mentioning the replica count should not fail
++TEST $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}_1 $H2:$L2/${V0}_1 $H3:$L3/${V0}_1
++EXPECT '2 x 3 = 6' volinfo_field $V0 'Number of Bricks'
++
++TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_2 $H2:$L2/${V0}_2 $H3:$L3/${V0}_2
++EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks'
++
++#adding bricks from same host should fail the brick order check
++TEST ! $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5
++EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks'
++
++#adding bricks from same host with force should succeed
++TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5 force
++EXPECT '4 x 3 = 12' volinfo_field $V0 'Number of Bricks'
++
++cleanup
+diff --git a/tests/cluster.rc b/tests/cluster.rc
+index 99be8e7..8b73153 100644
+--- a/tests/cluster.rc
++++ b/tests/cluster.rc
+@@ -11,7 +11,7 @@ function launch_cluster() {
+ define_backends $count;
+ define_hosts $count;
+ define_glusterds $count $2;
+- define_clis $count;
++ define_clis $count $3;
+
+ start_glusterds;
+ }
+@@ -133,8 +133,13 @@ function define_clis() {
+ lopt1="--log-file=$logdir/$logfile1"
+
+
+- eval "CLI_$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt'";
+- eval "CLI$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'";
++ if [ "$2" == "-NO_FORCE" ]; then
++ eval "CLI_$i='$CLI_NO_FORCE --glusterd-sock=${!b}/glusterd/gd.sock $lopt'";
++ eval "CLI$i='$CLI_NO_FORCE --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'";
++ else
++ eval "CLI_$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt'";
++ eval "CLI$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'";
++ fi
+ done
+ }
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+index 121346c..5ae577a 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+@@ -1576,20 +1576,35 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+
+ /* Check brick order if the volume type is replicate or disperse. If
+ * force at the end of command not given then check brick order.
++ * doing this check at the originator node is sufficient.
+ */
+
+- if (!is_force) {
+- if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) ||
+- (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)) {
+- ret = glusterd_check_brick_order(dict, msg, volinfo->type);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER,
+- "Not adding brick because of "
+- "bad brick order. %s",
+- msg);
+- *op_errstr = gf_strdup(msg);
+- goto out;
+- }
++ if (is_origin_glusterd(dict) && !is_force) {
++ ret = 0;
++ if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
++ gf_msg_debug(this->name, 0,
++ "Replicate cluster type "
++ "found. Checking brick order.");
++ if (replica_count)
++ ret = glusterd_check_brick_order(dict, msg, volinfo->type,
++ replica_count);
++ else
++ ret = glusterd_check_brick_order(dict, msg, volinfo->type,
++ volinfo->replica_count);
++ } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
++ gf_msg_debug(this->name, 0,
++ "Disperse cluster type"
++ " found. Checking brick order.");
++ ret = glusterd_check_brick_order(dict, msg, volinfo->type,
++ volinfo->disperse_count);
++ }
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER,
++ "Not adding brick because of "
++ "bad brick order. %s",
++ msg);
++ *op_errstr = gf_strdup(msg);
++ goto out;
+ }
+ }
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 6f904ae..545e688 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -14802,7 +14802,8 @@ glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next)
+ * volume are present on the same server
+ */
+ int32_t
+-glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type)
++glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
++ int32_t sub_count)
+ {
+ int ret = -1;
+ int i = 0;
+@@ -14819,7 +14820,6 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type)
+ char *tmpptr = NULL;
+ char *volname = NULL;
+ int32_t brick_count = 0;
+- int32_t sub_count = 0;
+ struct addrinfo *ai_info = NULL;
+ char brick_addr[128] = {
+ 0,
+@@ -14870,31 +14870,6 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type)
+ goto out;
+ }
+
+- if (type != GF_CLUSTER_TYPE_DISPERSE) {
+- ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
+- &sub_count);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+- "Bricks check : Could"
+- " not retrieve replica count");
+- goto out;
+- }
+- gf_msg_debug(this->name, 0,
+- "Replicate cluster type "
+- "found. Checking brick order.");
+- } else {
+- ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"),
+- &sub_count);
+- if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+- "Bricks check : Could"
+- " not retrieve disperse count");
+- goto out;
+- }
+- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DISPERSE_CLUSTER_FOUND,
+- "Disperse cluster type"
+- " found. Checking brick order.");
+- }
+ brick_list_dup = brick_list_ptr = gf_strdup(brick_list);
+ /* Resolve hostnames and get addrinfo */
+ while (i < brick_count) {
+@@ -14989,5 +14964,6 @@ out:
+ ai_list_tmp2 = ai_list_tmp1;
+ }
+ free(ai_list_tmp2);
++ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
+ }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
+index e2e2454..5f5de82 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
+@@ -883,6 +883,7 @@ char *
+ search_brick_path_from_proc(pid_t brick_pid, char *brickpath);
+
+ int32_t
+-glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type);
++glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
++ int32_t sub_count);
+
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index 8da2ff3..134b04c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -1024,6 +1024,8 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr,
+ int32_t local_brick_count = 0;
+ int32_t i = 0;
+ int32_t type = 0;
++ int32_t replica_count = 0;
++ int32_t disperse_count = 0;
+ char *brick = NULL;
+ char *tmpptr = NULL;
+ xlator_t *this = NULL;
+@@ -1119,15 +1121,42 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr,
+ }
+
+ if (!is_force) {
+- if ((type == GF_CLUSTER_TYPE_REPLICATE) ||
+- (type == GF_CLUSTER_TYPE_DISPERSE)) {
+- ret = glusterd_check_brick_order(dict, msg, type);
++ if (type == GF_CLUSTER_TYPE_REPLICATE) {
++ ret = dict_get_int32n(dict, "replica-count",
++ SLEN("replica-count"), &replica_count);
+ if (ret) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER,
+- "Not creating volume because of "
+- "bad brick order");
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
++ "Bricks check : Could"
++ " not retrieve replica count");
++ goto out;
++ }
++ gf_msg_debug(this->name, 0,
++ "Replicate cluster type "
++ "found. Checking brick order.");
++ ret = glusterd_check_brick_order(dict, msg, type,
++ replica_count);
++ } else if (type == GF_CLUSTER_TYPE_DISPERSE) {
++ ret = dict_get_int32n(dict, "disperse-count",
++ SLEN("disperse-count"), &disperse_count);
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
++ "Bricks check : Could"
++ " not retrieve disperse count");
+ goto out;
+ }
++ gf_msg_debug(this->name, 0,
++ "Disperse cluster type"
++ " found. Checking brick order.");
++ ret = glusterd_check_brick_order(dict, msg, type,
++ disperse_count);
++ }
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER,
++ "Not creating the volume because of "
++ "bad brick order. %s",
++ msg);
++ *op_errstr = gf_strdup(msg);
++ goto out;
+ }
+ }
+ }
+--
+1.8.3.1
+
diff --git a/0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch b/0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch
new file mode 100644
index 0000000..6ad460d
--- /dev/null
+++ b/0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch
@@ -0,0 +1,152 @@
+From cddd253c5e3f0a7c3b91c35cea8ad1921cb43b98 Mon Sep 17 00:00:00 2001
+From: Kinglong Mee <kinglongmee@gmail.com>
+Date: Thu, 18 Jul 2019 11:43:01 +0800
+Subject: [PATCH 454/456] features/locks: avoid use after freed of frame for
+ blocked lock
+
+The fop contains blocked lock may use freed frame info when other
+unlock fop has unwind the blocked lock.
+
+Because the blocked lock is added to block list in inode lock(or
+other lock), after that, when out of the inode lock, the fop
+contains the blocked lock should not use it.
+
+Upstream Patch - https://review.gluster.org/#/c/glusterfs/+/23155/
+
+>Change-Id: Icb309a1cc78380dc982b26d50c18d67e4f2c8915
+>fixes: bz#1737291
+>Signed-off-by: Kinglong Mee <mijinlong@horiscale.com>
+
+Change-Id: Icb309a1cc78380dc982b26d50c18d67e4f2c8915
+BUG: 1812789
+Reviewed-on: https://code.engineering.redhat.com/gerrit/206465
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
+---
+ xlators/features/locks/src/common.c | 4 ++++
+ xlators/features/locks/src/entrylk.c | 4 ++--
+ xlators/features/locks/src/inodelk.c | 7 +++++--
+ xlators/features/locks/src/posix.c | 5 +++--
+ xlators/features/locks/src/reservelk.c | 2 --
+ 5 files changed, 14 insertions(+), 8 deletions(-)
+
+diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
+index 6e7fb4b..1406e70 100644
+--- a/xlators/features/locks/src/common.c
++++ b/xlators/features/locks/src/common.c
+@@ -1080,6 +1080,10 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
++
++ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW,
++ &lock->user_flock, NULL);
++
+ lock->blocked = 1;
+ __insert_lock(pl_inode, lock);
+ ret = -1;
+diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
+index ced5eca..93c649c 100644
+--- a/xlators/features/locks/src/entrylk.c
++++ b/xlators/features/locks/src/entrylk.c
+@@ -552,6 +552,8 @@ __lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+ gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}",
+ pinode, lock->basename);
+
++ entrylk_trace_block(this, lock->frame, NULL, NULL, NULL, lock->basename,
++ ENTRYLK_LOCK, lock->type);
+ out:
+ return -EAGAIN;
+ }
+@@ -932,8 +934,6 @@ out:
+ op_ret, op_errno);
+ unwind:
+ STACK_UNWIND_STRICT(entrylk, frame, op_ret, op_errno, NULL);
+- } else {
+- entrylk_trace_block(this, frame, volume, fd, loc, basename, cmd, type);
+ }
+
+ if (pcontend != NULL) {
+diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
+index a9c42f1..24dee49 100644
+--- a/xlators/features/locks/src/inodelk.c
++++ b/xlators/features/locks/src/inodelk.c
+@@ -420,6 +420,8 @@ __lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
+ lkowner_utoa(&lock->owner), lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
++ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
++ lock->volume);
+ out:
+ return -EAGAIN;
+ }
+@@ -959,6 +961,7 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ int ret = -1;
+ GF_UNUSED int dict_ret = -1;
+ int can_block = 0;
++ short lock_type = 0;
+ pl_inode_t *pinode = NULL;
+ pl_inode_lock_t *reqlock = NULL;
+ pl_dom_list_t *dom = NULL;
+@@ -1024,13 +1027,13 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ /* fall through */
+
+ case F_SETLK:
++ lock_type = flock->l_type;
+ memcpy(&reqlock->user_flock, flock, sizeof(struct gf_flock));
+ ret = pl_inode_setlk(this, ctx, pinode, reqlock, can_block, dom,
+ inode);
+
+ if (ret < 0) {
+- if ((can_block) && (F_UNLCK != flock->l_type)) {
+- pl_trace_block(this, frame, fd, loc, cmd, flock, volume);
++ if ((can_block) && (F_UNLCK != lock_type)) {
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index 50f1265..7887b82 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -2557,6 +2557,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ uint32_t lk_flags = 0;
+ posix_locks_private_t *priv = this->private;
+ pl_local_t *local = NULL;
++ short lock_type = 0;
+
+ int ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_flags);
+ if (ret == 0) {
+@@ -2701,6 +2702,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ case F_SETLK:
+ reqlock->frame = frame;
+ reqlock->this = this;
++ lock_type = flock->l_type;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+@@ -2738,8 +2740,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+
+ ret = pl_setlk(this, pl_inode, reqlock, can_block);
+ if (ret == -1) {
+- if ((can_block) && (F_UNLCK != flock->l_type)) {
+- pl_trace_block(this, frame, fd, NULL, cmd, flock, NULL);
++ if ((can_block) && (F_UNLCK != lock_type)) {
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN");
+diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c
+index 51076d7..604691f 100644
+--- a/xlators/features/locks/src/reservelk.c
++++ b/xlators/features/locks/src/reservelk.c
+@@ -312,8 +312,6 @@ grant_blocked_lock_calls(xlator_t *this, pl_inode_t *pl_inode)
+ ret = pl_setlk(this, pl_inode, lock, can_block);
+ if (ret == -1) {
+ if (can_block) {
+- pl_trace_block(this, lock->frame, fd, NULL, cmd,
+- &lock->user_flock, NULL);
+ continue;
+ } else {
+ gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN");
+--
+1.8.3.1
+
diff --git a/0455-locks-prevent-deletion-of-locked-entries.patch b/0455-locks-prevent-deletion-of-locked-entries.patch
new file mode 100644
index 0000000..5960690
--- /dev/null
+++ b/0455-locks-prevent-deletion-of-locked-entries.patch
@@ -0,0 +1,1253 @@
+From 3f6ff474db3934f43d9963dfe4dda7d201211e75 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Fri, 12 Jun 2020 00:06:36 +0200
+Subject: [PATCH 455/456] locks: prevent deletion of locked entries
+
+To keep consistency inside transactions started by locking an entry or
+an inode, this change delays the removal of entries that are currently
+locked by one or more clients. Once all locks are released, the removal
+is processed.
+
+It has also been improved the detection of stale inodes in the locking
+code of EC.
+
+>Upstream patch - https://review.gluster.org/#/c/glusterfs/+/20025/
+>Fixes: #990
+
+Change-Id: Ic8ba23d9480f80c7f74e7a310bf8a15922320fd5
+BUG: 1812789
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/206442
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/cluster/ec/src/ec-locks.c | 69 ++++++--
+ xlators/features/locks/src/common.c | 316 ++++++++++++++++++++++++++++++++++-
+ xlators/features/locks/src/common.h | 43 +++++
+ xlators/features/locks/src/entrylk.c | 19 +--
+ xlators/features/locks/src/inodelk.c | 150 ++++++++++-------
+ xlators/features/locks/src/locks.h | 23 ++-
+ xlators/features/locks/src/posix.c | 183 ++++++++++++++++++--
+ 7 files changed, 689 insertions(+), 114 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c
+index ffcac07..db86296 100644
+--- a/xlators/cluster/ec/src/ec-locks.c
++++ b/xlators/cluster/ec/src/ec-locks.c
+@@ -28,9 +28,36 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
+ ec_t *ec = fop->xl->private;
+ ec_cbk_data_t *ans = NULL;
+ ec_cbk_data_t *cbk = NULL;
+- uintptr_t locked = 0, notlocked = 0;
++ uintptr_t locked = 0;
++ int32_t good = 0;
++ int32_t eagain = 0;
++ int32_t estale = 0;
+ int32_t error = -1;
+
++ /* There are some errors that we'll handle in an special way while trying
++ * to acquire a lock.
++ *
++ * EAGAIN: If it's found during a parallel non-blocking lock request, we
++ * consider that there's contention on the inode, so we consider
++ * the acquisition a failure and try again with a sequential
++ * blocking lock request. This will ensure that we get a lock on
++ * as many bricks as possible (ignoring EAGAIN here would cause
++ * unnecessary triggers of self-healing).
++ *
++ * If it's found during a sequential blocking lock request, it's
++ * considered an error. Lock will only succeed if there are
++ * enough other bricks locked.
++ *
++ * ESTALE: This can appear during parallel or sequential lock request if
++ * the inode has just been unlinked. We consider this error is
++ * not recoverable, but we also don't consider it as fatal. So,
++ * if it happens during parallel lock, we won't attempt a
++ * sequential one unless there are EAGAIN errors on other
++ * bricks (and are enough to form a quorum), but if we reach
++ * quorum counting the ESTALE bricks, we consider the whole
++ * result of the operation is ESTALE instead of EIO.
++ */
++
+ list_for_each_entry(ans, &fop->cbk_list, list)
+ {
+ if (ans->op_ret >= 0) {
+@@ -38,24 +65,23 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
+ error = EIO;
+ }
+ locked |= ans->mask;
++ good = ans->count;
+ cbk = ans;
+- } else {
+- if (ans->op_errno == EAGAIN) {
+- switch (fop->uint32) {
+- case EC_LOCK_MODE_NONE:
+- case EC_LOCK_MODE_ALL:
+- /* Goal is to treat non-blocking lock as failure
+- * even if there is a single EAGAIN*/
+- notlocked |= ans->mask;
+- break;
+- }
+- }
++ } else if (ans->op_errno == ESTALE) {
++ estale += ans->count;
++ } else if ((ans->op_errno == EAGAIN) &&
++ (fop->uint32 != EC_LOCK_MODE_INC)) {
++ eagain += ans->count;
+ }
+ }
+
+ if (error == -1) {
+- if (gf_bits_count(locked | notlocked) >= ec->fragments) {
+- if (notlocked == 0) {
++ /* If we have enough quorum with succeeded and EAGAIN answers, we
++ * ignore for now any ESTALE answer. If there are EAGAIN answers,
++ * we retry with a sequential blocking lock request if needed.
++ * Otherwise we succeed. */
++ if ((good + eagain) >= ec->fragments) {
++ if (eagain == 0) {
+ if (fop->answer == NULL) {
+ fop->answer = cbk;
+ }
+@@ -68,21 +94,28 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
+ case EC_LOCK_MODE_NONE:
+ error = EAGAIN;
+ break;
+-
+ case EC_LOCK_MODE_ALL:
+ fop->uint32 = EC_LOCK_MODE_INC;
+ break;
+-
+ default:
++ /* This shouldn't happen because eagain cannot be > 0
++ * when fop->uint32 is EC_LOCK_MODE_INC. */
+ error = EIO;
+ break;
+ }
+ }
+ } else {
+- if (fop->answer && fop->answer->op_ret < 0)
++ /* We have been unable to find enough candidates that will be able
++ * to take the lock. If we have quorum on some answer, we return
++ * it. Otherwise we check if ESTALE answers allow us to reach
++ * quorum. If so, we return ESTALE. */
++ if (fop->answer && fop->answer->op_ret < 0) {
+ error = fop->answer->op_errno;
+- else
++ } else if ((good + eagain + estale) >= ec->fragments) {
++ error = ESTALE;
++ } else {
+ error = EIO;
++ }
+ }
+ }
+
+diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
+index 1406e70..0c52853 100644
+--- a/xlators/features/locks/src/common.c
++++ b/xlators/features/locks/src/common.c
+@@ -462,11 +462,16 @@ pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local)
+ INIT_LIST_HEAD(&pl_inode->blocked_calls);
+ INIT_LIST_HEAD(&pl_inode->metalk_list);
+ INIT_LIST_HEAD(&pl_inode->queued_locks);
++ INIT_LIST_HEAD(&pl_inode->waiting);
+ gf_uuid_copy(pl_inode->gfid, inode->gfid);
+
+ pl_inode->check_mlock_info = _gf_true;
+ pl_inode->mlock_enforced = _gf_false;
+
++ /* -2 means never looked up. -1 means something went wrong and link
++ * tracking is disabled. */
++ pl_inode->links = -2;
++
+ ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode));
+ if (ret) {
+ pthread_mutex_destroy(&pl_inode->mutex);
+@@ -1276,4 +1281,313 @@ pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+ }
+
+ return 0;
+-}
+\ No newline at end of file
++}
++
++gf_boolean_t
++pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client)
++{
++ if (client && (client->opversion < GD_OP_VERSION_7_0)) {
++ return _gf_true;
++ }
++
++ if (is_lk_owner_null(owner)) {
++ return _gf_false;
++ }
++ return _gf_true;
++}
++
++static int32_t
++pl_inode_from_loc(loc_t *loc, inode_t **pinode)
++{
++ inode_t *inode = NULL;
++ int32_t error = 0;
++
++ if (loc->inode != NULL) {
++ inode = inode_ref(loc->inode);
++ goto done;
++ }
++
++ if (loc->parent == NULL) {
++ error = EINVAL;
++ goto done;
++ }
++
++ if (!gf_uuid_is_null(loc->gfid)) {
++ inode = inode_find(loc->parent->table, loc->gfid);
++ if (inode != NULL) {
++ goto done;
++ }
++ }
++
++ if (loc->name == NULL) {
++ error = EINVAL;
++ goto done;
++ }
++
++ inode = inode_grep(loc->parent->table, loc->parent, loc->name);
++ if (inode == NULL) {
++ /* We haven't found any inode. This means that the file doesn't exist
++ * or that even if it exists, we don't have any knowledge about it, so
++ * we don't have locks on it either, which is fine for our purposes. */
++ goto done;
++ }
++
++done:
++ *pinode = inode;
++
++ return error;
++}
++
++static gf_boolean_t
++pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode,
++ struct timespec *now, struct list_head *contend)
++{
++ pl_dom_list_t *dom;
++ pl_inode_lock_t *lock;
++ gf_boolean_t has_owners = _gf_false;
++
++ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
++ {
++ list_for_each_entry(lock, &dom->inodelk_list, list)
++ {
++ /* If the lock belongs to the same client, we assume it's related
++ * to the same operation, so we allow the removal to continue. */
++ if (lock->client == client) {
++ continue;
++ }
++ /* If the lock belongs to an internal process, we don't block the
++ * removal. */
++ if (lock->client_pid < 0) {
++ continue;
++ }
++ if (contend == NULL) {
++ return _gf_true;
++ }
++ has_owners = _gf_true;
++ inodelk_contention_notify_check(xl, lock, now, contend);
++ }
++ }
++
++ return has_owners;
++}
++
++int32_t
++pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
++ pl_inode_t **ppl_inode, struct list_head *contend)
++{
++ struct timespec now;
++ inode_t *inode;
++ pl_inode_t *pl_inode;
++ int32_t error;
++
++ pl_inode = NULL;
++
++ error = pl_inode_from_loc(loc, &inode);
++ if ((error != 0) || (inode == NULL)) {
++ goto done;
++ }
++
++ pl_inode = pl_inode_get(xl, inode, NULL);
++ if (pl_inode == NULL) {
++ inode_unref(inode);
++ error = ENOMEM;
++ goto done;
++ }
++
++ /* pl_inode_from_loc() already increments ref count for inode, so
++ * we only assign here our reference. */
++ pl_inode->inode = inode;
++
++ timespec_now(&now);
++
++ pthread_mutex_lock(&pl_inode->mutex);
++
++ if (pl_inode->removed) {
++ error = ESTALE;
++ goto unlock;
++ }
++
++ if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) {
++ error = -1;
++ /* We skip the unlock here because the caller must create a stub when
++ * we return -1 and do a call to pl_inode_remove_complete(), which
++ * assumes the lock is still acquired and will release it once
++ * everything else is prepared. */
++ goto done;
++ }
++
++ pl_inode->is_locked = _gf_true;
++ pl_inode->remove_running++;
++
++unlock:
++ pthread_mutex_unlock(&pl_inode->mutex);
++
++done:
++ *ppl_inode = pl_inode;
++
++ return error;
++}
++
++int32_t
++pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub,
++ struct list_head *contend)
++{
++ pl_inode_lock_t *lock;
++ int32_t error = -1;
++
++ if (stub != NULL) {
++ list_add_tail(&stub->list, &pl_inode->waiting);
++ pl_inode->is_locked = _gf_true;
++ } else {
++ error = ENOMEM;
++
++ while (!list_empty(contend)) {
++ lock = list_first_entry(contend, pl_inode_lock_t, list);
++ list_del_init(&lock->list);
++ __pl_inodelk_unref(lock);
++ }
++ }
++
++ pthread_mutex_unlock(&pl_inode->mutex);
++
++ if (error < 0) {
++ inodelk_contention_notify(xl, contend);
++ }
++
++ inode_unref(pl_inode->inode);
++
++ return error;
++}
++
++void
++pl_inode_remove_wake(struct list_head *list)
++{
++ call_stub_t *stub;
++
++ while (!list_empty(list)) {
++ stub = list_first_entry(list, call_stub_t, list);
++ list_del_init(&stub->list);
++
++ call_resume(stub);
++ }
++}
++
++void
++pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error)
++{
++ struct list_head contend, granted;
++ struct timespec now;
++ pl_dom_list_t *dom;
++
++ if (pl_inode == NULL) {
++ return;
++ }
++
++ INIT_LIST_HEAD(&contend);
++ INIT_LIST_HEAD(&granted);
++ timespec_now(&now);
++
++ pthread_mutex_lock(&pl_inode->mutex);
++
++ if (error == 0) {
++ if (pl_inode->links >= 0) {
++ pl_inode->links--;
++ }
++ if (pl_inode->links == 0) {
++ pl_inode->removed = _gf_true;
++ }
++ }
++
++ pl_inode->remove_running--;
++
++ if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) {
++ pl_inode->is_locked = _gf_false;
++
++ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
++ {
++ __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now,
++ &contend);
++ }
++ }
++
++ pthread_mutex_unlock(&pl_inode->mutex);
++
++ unwind_granted_inodes(xl, pl_inode, &granted);
++
++ inodelk_contention_notify(xl, &contend);
++
++ inode_unref(pl_inode->inode);
++}
++
++void
++pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode,
++ struct list_head *list)
++{
++ call_stub_t *stub, *tmp;
++
++ if (!pl_inode->is_locked) {
++ return;
++ }
++
++ list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list)
++ {
++ if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL,
++ NULL)) {
++ list_move_tail(&stub->list, list);
++ }
++ }
++}
++
++/* This function determines if an inodelk attempt can be done now or it needs
++ * to wait.
++ *
++ * Possible return values:
++ * < 0: An error occurred. Currently only -ESTALE can be returned if the
++ * inode has been deleted previously by unlink/rmdir/rename
++ * = 0: The lock can be attempted.
++ * > 0: The lock needs to wait because a conflicting remove operation is
++ * ongoing.
++ */
++int32_t
++pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock)
++{
++ pl_dom_list_t *dom;
++ pl_inode_lock_t *ilock;
++
++ /* If the inode has been deleted, we won't allow any lock. */
++ if (pl_inode->removed) {
++ return -ESTALE;
++ }
++
++ /* We only synchronize with locks made for regular operations coming from
++ * the user. Locks done for internal purposes are hard to control and could
++ * lead to long delays or deadlocks quite easily. */
++ if (lock->client_pid < 0) {
++ return 0;
++ }
++ if (!pl_inode->is_locked) {
++ return 0;
++ }
++ if (pl_inode->remove_running > 0) {
++ return 1;
++ }
++
++ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
++ {
++ list_for_each_entry(ilock, &dom->inodelk_list, list)
++ {
++ /* If a lock from the same client is already granted, we allow this
++ * one to continue. This is necessary to prevent deadlocks when
++ * multiple locks are taken for the same operation.
++ *
++ * On the other side it's unlikely that the same client sends
++ * completely unrelated locks for the same inode.
++ */
++ if (ilock->client == lock->client) {
++ return 0;
++ }
++ }
++ }
++
++ return 1;
++}
+diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
+index ea86b96..6c81ac3 100644
+--- a/xlators/features/locks/src/common.h
++++ b/xlators/features/locks/src/common.h
+@@ -105,6 +105,15 @@ void
+ __pl_inodelk_unref(pl_inode_lock_t *lock);
+
+ void
++__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
++ struct list_head *granted, pl_dom_list_t *dom,
++ struct timespec *now, struct list_head *contend);
++
++void
++unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
++ struct list_head *granted);
++
++void
+ grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend);
+@@ -204,6 +213,16 @@ pl_metalock_is_active(pl_inode_t *pl_inode);
+ void
+ __pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock);
+
++void
++inodelk_contention_notify_check(xlator_t *xl, pl_inode_lock_t *lock,
++ struct timespec *now,
++ struct list_head *contend);
++
++void
++entrylk_contention_notify_check(xlator_t *xl, pl_entry_lock_t *lock,
++ struct timespec *now,
++ struct list_head *contend);
++
+ gf_boolean_t
+ pl_does_monkey_want_stuck_lock();
+
+@@ -216,4 +235,28 @@ pl_clean_local(pl_local_t *local);
+ int
+ pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd);
+
++gf_boolean_t
++pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client);
++
++int32_t
++pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
++ pl_inode_t **ppl_inode, struct list_head *contend);
++
++int32_t
++pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub,
++ struct list_head *contend);
++
++void
++pl_inode_remove_wake(struct list_head *list);
++
++void
++pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error);
++
++void
++pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode,
++ struct list_head *list);
++
++int32_t
++pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock);
++
+ #endif /* __COMMON_H__ */
+diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
+index 93c649c..b97836f 100644
+--- a/xlators/features/locks/src/entrylk.c
++++ b/xlators/features/locks/src/entrylk.c
+@@ -197,9 +197,9 @@ out:
+ return revoke_lock;
+ }
+
+-static gf_boolean_t
+-__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
+- struct timespec *now)
++void
++entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock,
++ struct timespec *now, struct list_head *contend)
+ {
+ posix_locks_private_t *priv;
+ int64_t elapsed;
+@@ -209,7 +209,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
+ /* If this lock is in a list, it means that we are about to send a
+ * notification for it, so no need to do anything else. */
+ if (!list_empty(&lock->contend)) {
+- return _gf_false;
++ return;
+ }
+
+ elapsed = now->tv_sec;
+@@ -218,7 +218,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
+ elapsed--;
+ }
+ if (elapsed < priv->notify_contention_delay) {
+- return _gf_false;
++ return;
+ }
+
+ /* All contention notifications will be sent outside of the locked
+@@ -231,7 +231,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
+
+ lock->contention_time = *now;
+
+- return _gf_true;
++ list_add_tail(&lock->contend, contend);
+ }
+
+ void
+@@ -325,9 +325,7 @@ __entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock,
+ break;
+ }
+ }
+- if (__entrylk_needs_contention_notify(this, tmp, now)) {
+- list_add_tail(&tmp->contend, contend);
+- }
++ entrylk_contention_notify_check(this, tmp, now, contend);
+ }
+ }
+
+@@ -690,10 +688,9 @@ __grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
+ bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend);
+
+ if (bl_ret == 0) {
+- list_add(&bl->blocked_locks, granted);
++ list_add_tail(&bl->blocked_locks, granted);
+ }
+ }
+- return;
+ }
+
+ /* Grants locks if possible which are blocked on a lock */
+diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
+index 24dee49..1a07243 100644
+--- a/xlators/features/locks/src/inodelk.c
++++ b/xlators/features/locks/src/inodelk.c
+@@ -231,9 +231,9 @@ out:
+ return revoke_lock;
+ }
+
+-static gf_boolean_t
+-__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
+- struct timespec *now)
++void
++inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock,
++ struct timespec *now, struct list_head *contend)
+ {
+ posix_locks_private_t *priv;
+ int64_t elapsed;
+@@ -243,7 +243,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
+ /* If this lock is in a list, it means that we are about to send a
+ * notification for it, so no need to do anything else. */
+ if (!list_empty(&lock->contend)) {
+- return _gf_false;
++ return;
+ }
+
+ elapsed = now->tv_sec;
+@@ -252,7 +252,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
+ elapsed--;
+ }
+ if (elapsed < priv->notify_contention_delay) {
+- return _gf_false;
++ return;
+ }
+
+ /* All contention notifications will be sent outside of the locked
+@@ -265,7 +265,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
+
+ lock->contention_time = *now;
+
+- return _gf_true;
++ list_add_tail(&lock->contend, contend);
+ }
+
+ void
+@@ -353,9 +353,7 @@ __inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
+ break;
+ }
+ }
+- if (__inodelk_needs_contention_notify(this, l, now)) {
+- list_add_tail(&l->contend, contend);
+- }
++ inodelk_contention_notify_check(this, l, now, contend);
+ }
+ }
+
+@@ -435,12 +433,17 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
+ struct list_head *contend)
+ {
+ pl_inode_lock_t *conf = NULL;
+- int ret = -EINVAL;
++ int ret;
+
+- conf = __inodelk_grantable(this, dom, lock, now, contend);
+- if (conf) {
+- ret = __lock_blocked_add(this, dom, lock, can_block);
+- goto out;
++ ret = pl_inode_remove_inodelk(pl_inode, lock);
++ if (ret < 0) {
++ return ret;
++ }
++ if (ret == 0) {
++ conf = __inodelk_grantable(this, dom, lock, now, contend);
++ }
++ if ((ret > 0) || (conf != NULL)) {
++ return __lock_blocked_add(this, dom, lock, can_block);
+ }
+
+ /* To prevent blocked locks starvation, check if there are any blocked
+@@ -462,17 +465,13 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
+ "starvation");
+ }
+
+- ret = __lock_blocked_add(this, dom, lock, can_block);
+- goto out;
++ return __lock_blocked_add(this, dom, lock, can_block);
+ }
+ __pl_inodelk_ref(lock);
+ gettimeofday(&lock->granted_time, NULL);
+ list_add(&lock->list, &dom->inodelk_list);
+
+- ret = 0;
+-
+-out:
+- return ret;
++ return 0;
+ }
+
+ /* Return true if the two inodelks have exactly same lock boundaries */
+@@ -529,12 +528,11 @@ out:
+ return conf;
+ }
+
+-static void
++void
+ __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted, pl_dom_list_t *dom,
+ struct timespec *now, struct list_head *contend)
+ {
+- int bl_ret = 0;
+ pl_inode_lock_t *bl = NULL;
+ pl_inode_lock_t *tmp = NULL;
+
+@@ -547,52 +545,48 @@ __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ {
+ list_del_init(&bl->blocked_locks);
+
+- bl_ret = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend);
++ bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend);
+
+- if (bl_ret == 0) {
+- list_add(&bl->blocked_locks, granted);
++ if (bl->status != -EAGAIN) {
++ list_add_tail(&bl->blocked_locks, granted);
+ }
+ }
+- return;
+ }
+
+-/* Grant all inodelks blocked on a lock */
+ void
+-grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+- pl_dom_list_t *dom, struct timespec *now,
+- struct list_head *contend)
++unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
++ struct list_head *granted)
+ {
+- struct list_head granted;
+ pl_inode_lock_t *lock;
+ pl_inode_lock_t *tmp;
++ int32_t op_ret;
++ int32_t op_errno;
+
+- INIT_LIST_HEAD(&granted);
+-
+- pthread_mutex_lock(&pl_inode->mutex);
+- {
+- __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now,
+- contend);
+- }
+- pthread_mutex_unlock(&pl_inode->mutex);
+-
+- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks)
++ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
+ {
+- gf_log(this->name, GF_LOG_TRACE,
+- "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted",
+- lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
+- lkowner_utoa(&lock->owner), lock->user_flock.l_start,
+- lock->user_flock.l_len);
+-
++ if (lock->status == 0) {
++ op_ret = 0;
++ op_errno = 0;
++ gf_log(this->name, GF_LOG_TRACE,
++ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64
++ " => Granted",
++ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
++ lock->client_pid, lkowner_utoa(&lock->owner),
++ lock->user_flock.l_start, lock->user_flock.l_len);
++ } else {
++ op_ret = -1;
++ op_errno = -lock->status;
++ }
+ pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
+- 0, 0, lock->volume);
++ op_ret, op_errno, lock->volume);
+
+- STACK_UNWIND_STRICT(inodelk, lock->frame, 0, 0, NULL);
++ STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL);
+ lock->frame = NULL;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks)
++ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
+ {
+ list_del_init(&lock->blocked_locks);
+ __pl_inodelk_unref(lock);
+@@ -601,6 +595,26 @@ grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
++/* Grant all inodelks blocked on a lock */
++void
++grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
++ pl_dom_list_t *dom, struct timespec *now,
++ struct list_head *contend)
++{
++ struct list_head granted;
++
++ INIT_LIST_HEAD(&granted);
++
++ pthread_mutex_lock(&pl_inode->mutex);
++ {
++ __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now,
++ contend);
++ }
++ pthread_mutex_unlock(&pl_inode->mutex);
++
++ unwind_granted_inodes(this, pl_inode, &granted);
++}
++
+ static void
+ pl_inodelk_log_cleanup(pl_inode_lock_t *lock)
+ {
+@@ -662,7 +676,7 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
+ * and blocked lists, then this means that a parallel
+ * unlock on another inodelk (L2 say) may have 'granted'
+ * L1 and added it to 'granted' list in
+- * __grant_blocked_node_locks() (although using the
++ * __grant_blocked_inode_locks() (although using the
+ * 'blocked_locks' member). In that case, the cleanup
+ * codepath must try and grant other overlapping
+ * blocked inodelks from other clients, now that L1 is
+@@ -747,6 +761,7 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
+ gf_boolean_t need_inode_unref = _gf_false;
+ struct list_head *pcontend = NULL;
+ struct list_head contend;
++ struct list_head wake;
+ struct timespec now = {};
+ short fl_type;
+
+@@ -798,6 +813,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
+ timespec_now(&now);
+ }
+
++ INIT_LIST_HEAD(&wake);
++
+ if (ctx)
+ pthread_mutex_lock(&ctx->lock);
+ pthread_mutex_lock(&pl_inode->mutex);
+@@ -820,18 +837,17 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+- if (can_block)
++ if (can_block) {
+ unref = _gf_false;
+- /* For all but the case where a non-blocking
+- * lock attempt fails, the extra ref taken at
+- * the start of this function must be negated.
+- */
+- else
+- need_inode_unref = _gf_true;
++ }
+ }
+-
+- if (ctx && (!ret || can_block))
++ /* For all but the case where a non-blocking lock attempt fails
++ * with -EAGAIN, the extra ref taken at the start of this function
++ * must be negated. */
++ need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block);
++ if (ctx && !need_inode_unref) {
+ list_add_tail(&lock->client_list, &ctx->inodelk_lockers);
++ }
+ } else {
+ /* Irrespective of whether unlock succeeds or not,
+ * the extra inode ref that was done at the start of
+@@ -849,6 +865,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
+ list_del_init(&retlock->client_list);
+ __pl_inodelk_unref(retlock);
+
++ pl_inode_remove_unlocked(this, pl_inode, &wake);
++
+ ret = 0;
+ }
+ out:
+@@ -859,6 +877,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
+ if (ctx)
+ pthread_mutex_unlock(&ctx->lock);
+
++ pl_inode_remove_wake(&wake);
++
+ /* The following (extra) unref corresponds to the ref that
+ * was done at the time the lock was granted.
+ */
+@@ -1033,10 +1053,14 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ inode);
+
+ if (ret < 0) {
+- if ((can_block) && (F_UNLCK != lock_type)) {
+- goto out;
++ if (ret == -EAGAIN) {
++ if (can_block && (F_UNLCK != lock_type)) {
++ goto out;
++ }
++ gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
++ } else {
++ gf_log(this->name, GF_LOG_TRACE, "returning %d", ret);
+ }
+- gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
+ op_errno = -ret;
+ goto unwind;
+ }
+diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
+index aa267de..6666feb 100644
+--- a/xlators/features/locks/src/locks.h
++++ b/xlators/features/locks/src/locks.h
+@@ -102,6 +102,9 @@ struct __pl_inode_lock {
+
+ struct list_head client_list; /* list of all locks from a client */
+ short fl_type;
++
++ int32_t status; /* Error code when we try to grant a lock in blocked
++ state */
+ };
+ typedef struct __pl_inode_lock pl_inode_lock_t;
+
+@@ -164,13 +167,14 @@ struct __pl_inode {
+ struct list_head rw_list; /* list of waiting r/w requests */
+ struct list_head reservelk_list; /* list of reservelks */
+ struct list_head blocked_reservelks; /* list of blocked reservelks */
+- struct list_head
+- blocked_calls; /* List of blocked lock calls while a reserve is held*/
+- struct list_head metalk_list; /* Meta lock list */
+- /* This is to store the incoming lock
+- requests while meta lock is enabled */
+- struct list_head queued_locks;
+- int mandatory; /* if mandatory locking is enabled */
++ struct list_head blocked_calls; /* List of blocked lock calls while a
++ reserve is held*/
++ struct list_head metalk_list; /* Meta lock list */
++ struct list_head queued_locks; /* This is to store the incoming lock
++ requests while meta lock is enabled */
++ struct list_head waiting; /* List of pending fops waiting to unlink/rmdir
++ the inode. */
++ int mandatory; /* if mandatory locking is enabled */
+
+ inode_t *refkeeper; /* hold refs on an inode while locks are
+ held to prevent pruning */
+@@ -197,6 +201,11 @@ struct __pl_inode {
+ */
+ int fop_wind_count;
+ pthread_cond_t check_fop_wind_count;
++
++ int32_t links; /* Number of hard links the inode has. */
++ uint32_t remove_running; /* Number of remove operations running. */
++ gf_boolean_t is_locked; /* Regular locks will be blocked. */
++ gf_boolean_t removed; /* The inode has been deleted. */
+ };
+ typedef struct __pl_inode pl_inode_t;
+
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index 7887b82..5ae0125 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -147,6 +147,29 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
+ } \
+ } while (0)
+
++#define PL_INODE_REMOVE(_fop, _frame, _xl, _loc1, _loc2, _cont, _cbk, \
++ _args...) \
++ ({ \
++ struct list_head contend; \
++ pl_inode_t *__pl_inode; \
++ call_stub_t *__stub; \
++ int32_t __error; \
++ INIT_LIST_HEAD(&contend); \
++ __error = pl_inode_remove_prepare(_xl, _frame, _loc2 ? _loc2 : _loc1, \
++ &__pl_inode, &contend); \
++ if (__error < 0) { \
++ __stub = fop_##_fop##_stub(_frame, _cont, ##_args); \
++ __error = pl_inode_remove_complete(_xl, __pl_inode, __stub, \
++ &contend); \
++ } else if (__error == 0) { \
++ PL_LOCAL_GET_REQUESTS(_frame, _xl, xdata, ((fd_t *)NULL), _loc1, \
++ _loc2); \
++ STACK_WIND_COOKIE(_frame, _cbk, __pl_inode, FIRST_CHILD(_xl), \
++ FIRST_CHILD(_xl)->fops->_fop, ##_args); \
++ } \
++ __error; \
++ })
++
+ gf_boolean_t
+ pl_has_xdata_requests(dict_t *xdata)
+ {
+@@ -2969,11 +2992,85 @@ out:
+ return ret;
+ }
+
++static int32_t
++pl_request_link_count(dict_t **pxdata)
++{
++ dict_t *xdata;
++
++ xdata = *pxdata;
++ if (xdata == NULL) {
++ xdata = dict_new();
++ if (xdata == NULL) {
++ return ENOMEM;
++ }
++ } else {
++ dict_ref(xdata);
++ }
++
++ if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) {
++ dict_unref(xdata);
++ return ENOMEM;
++ }
++
++ *pxdata = xdata;
++
++ return 0;
++}
++
++static int32_t
++pl_check_link_count(dict_t *xdata)
++{
++ int32_t count;
++
++ /* In case we are unable to read the link count from xdata, we take a
++ * conservative approach and return -2, which will prevent the inode from
++ * being considered deleted. In fact it will cause link tracking for this
++ * inode to be disabled completely to avoid races. */
++
++ if (xdata == NULL) {
++ return -2;
++ }
++
++ if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) {
++ return -2;
++ }
++
++ return count;
++}
++
+ int32_t
+ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+ {
++ pl_inode_t *pl_inode;
++
++ if (op_ret >= 0) {
++ pl_inode = pl_inode_get(this, inode, NULL);
++ if (pl_inode == NULL) {
++ PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL,
++ NULL);
++ return 0;
++ }
++
++ pthread_mutex_lock(&pl_inode->mutex);
++
++ /* We only update the link count if we previously didn't know it.
++ * Doing it always can lead to races since lookup is not executed
++ * atomically most of the times. */
++ if (pl_inode->links == -2) {
++ pl_inode->links = pl_check_link_count(xdata);
++ if (buf->ia_type == IA_IFDIR) {
++ /* Directories have at least 2 links. To avoid special handling
++ * for directories, we simply decrement the value here to make
++ * them equivalent to regular files. */
++ pl_inode->links--;
++ }
++ }
++
++ pthread_mutex_unlock(&pl_inode->mutex);
++ }
++
+ PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+@@ -2982,9 +3079,17 @@ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t
+ pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ {
+- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+- STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->lookup, loc, xdata);
++ int32_t error;
++
++ error = pl_request_link_count(&xdata);
++ if (error == 0) {
++ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
++ STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->lookup, loc, xdata);
++ dict_unref(xdata);
++ } else {
++ STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL);
++ }
+ return 0;
+ }
+
+@@ -3792,6 +3897,10 @@ unlock:
+ gf_proc_dump_write("posixlk-count", "%d", count);
+ __dump_posixlks(pl_inode);
+ }
++
++ gf_proc_dump_write("links", "%d", pl_inode->links);
++ gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running);
++ gf_proc_dump_write("removed", "%u", pl_inode->removed);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+@@ -4137,8 +4246,11 @@ pl_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ struct iatt *postoldparent, struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata)
+ {
++ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
++
+ PL_STACK_UNWIND(rename, xdata, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
++
+ return 0;
+ }
+
+@@ -4146,10 +4258,15 @@ int32_t
+ pl_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+ {
+- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc);
++ int32_t error;
++
++ error = PL_INODE_REMOVE(rename, frame, this, oldloc, newloc, pl_rename,
++ pl_rename_cbk, oldloc, newloc, xdata);
++ if (error > 0) {
++ STACK_UNWIND_STRICT(rename, frame, -1, error, NULL, NULL, NULL, NULL,
++ NULL, NULL);
++ }
+
+- STACK_WIND(frame, pl_rename_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
+ }
+
+@@ -4273,8 +4390,11 @@ pl_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+ {
++ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
++
+ PL_STACK_UNWIND(unlink, xdata, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
++
+ return 0;
+ }
+
+@@ -4282,9 +4402,14 @@ int32_t
+ pl_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+ {
+- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+- STACK_WIND(frame, pl_unlink_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
++ int32_t error;
++
++ error = PL_INODE_REMOVE(unlink, frame, this, loc, NULL, pl_unlink,
++ pl_unlink_cbk, loc, xflag, xdata);
++ if (error > 0) {
++ STACK_UNWIND_STRICT(unlink, frame, -1, error, NULL, NULL, NULL);
++ }
++
+ return 0;
+ }
+
+@@ -4351,8 +4476,11 @@ pl_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+ {
++ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
++
+ PL_STACK_UNWIND_FOR_CLIENT(rmdir, xdata, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
++
+ return 0;
+ }
+
+@@ -4360,9 +4488,14 @@ int
+ pl_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
+ {
+- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+- STACK_WIND(frame, pl_rmdir_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata);
++ int32_t error;
++
++ error = PL_INODE_REMOVE(rmdir, frame, this, loc, NULL, pl_rmdir,
++ pl_rmdir_cbk, loc, xflags, xdata);
++ if (error > 0) {
++ STACK_UNWIND_STRICT(rmdir, frame, -1, error, NULL, NULL, NULL);
++ }
++
+ return 0;
+ }
+
+@@ -4392,6 +4525,19 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+ {
++ pl_inode_t *pl_inode = (pl_inode_t *)cookie;
++
++ if (op_ret >= 0) {
++ pthread_mutex_lock(&pl_inode->mutex);
++
++ /* TODO: can happen pl_inode->links == 0 ? */
++ if (pl_inode->links >= 0) {
++ pl_inode->links++;
++ }
++
++ pthread_mutex_unlock(&pl_inode->mutex);
++ }
++
+ PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+@@ -4401,9 +4547,18 @@ int
+ pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+ {
++ pl_inode_t *pl_inode;
++
++ pl_inode = pl_inode_get(this, oldloc->inode, NULL);
++ if (pl_inode == NULL) {
++ STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
++ NULL);
++ return 0;
++ }
++
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc);
+- STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
++ STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
+ }
+
+--
+1.8.3.1
+
diff --git a/0456-add-clean-local-after-grant-lock.patch b/0456-add-clean-local-after-grant-lock.patch
new file mode 100644
index 0000000..6b8210b
--- /dev/null
+++ b/0456-add-clean-local-after-grant-lock.patch
@@ -0,0 +1,74 @@
+From c38b38249fdf951565f6501ce8e9a4d01142d43e Mon Sep 17 00:00:00 2001
+From: l17zhou <cynthia.zhou@nokia-sbell.com>
+Date: Tue, 3 Dec 2019 07:43:35 +0200
+Subject: [PATCH 456/456] add clean local after grant lock
+
+found by flock test, without correct ref number of fd,
+lock will not be correctly released.
+
+Upstream patch:
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23794
+> Fixes: bz#1779089
+> Change-Id: I3e466b17c852eb219c8778e43af8ad670a8449cc
+> Signed-off-by: l17zhou <cynthia.zhou@nokia-sbell.com>
+
+BUG: 1854165
+Change-Id: I3e466b17c852eb219c8778e43af8ad670a8449cc
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/206673
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/locks/src/common.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
+index 0c52853..cddbfa6 100644
+--- a/xlators/features/locks/src/common.c
++++ b/xlators/features/locks/src/common.c
+@@ -961,7 +961,7 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode)
+ struct list_head granted_list;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *lock = NULL;
+-
++ pl_local_t *local = NULL;
+ INIT_LIST_HEAD(&granted_list);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+@@ -976,9 +976,9 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode)
+
+ pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
+ 0, 0, NULL);
+-
+- STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL);
+-
++ local = lock->frame->local;
++ PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0,
++ &lock->user_flock, NULL);
+ __destroy_lock(lock);
+ }
+
+@@ -997,6 +997,7 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head granted_list;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *lock = NULL;
++ pl_local_t *local = NULL;
+
+ int ret = -1;
+
+@@ -1024,9 +1025,9 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode,
+
+ pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
+ 0, 0, NULL);
+-
+- STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL);
+-
++ local = lock->frame->local;
++ PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0,
++ &lock->user_flock, NULL);
+ __destroy_lock(lock);
+ }
+
+--
+1.8.3.1
+
diff --git a/0457-cluster-ec-Improve-detection-of-new-heals.patch b/0457-cluster-ec-Improve-detection-of-new-heals.patch
new file mode 100644
index 0000000..be9202a
--- /dev/null
+++ b/0457-cluster-ec-Improve-detection-of-new-heals.patch
@@ -0,0 +1,409 @@
+From 3e8b3a2c2c6f83635486035fc8040c87d89813d2 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Thu, 2 Jul 2020 18:08:52 +0200
+Subject: [PATCH 457/465] cluster/ec: Improve detection of new heals
+
+When EC successfully healed a directory it assumed that maybe other
+entries inside that directory could have been created, which could
+require additional heal cycles. For this reason, when the heal happened
+as part of one index heal iteration, it triggered a new iteration.
+
+The problem happened when the directory was healthy, so no new entries
+were added, but its index entry was not removed for some reason. In
+this case self-heal started and endless loop healing the same directory
+continuously, cause high CPU utilization.
+
+This patch improves detection of new files added to the heal index so
+that a new index heal iteration is only triggered if there is new work
+to do.
+
+>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24665/
+>Fixes: #1354
+
+Change-Id: I2355742b85fbfa6de758bccc5d2e1a283c82b53f
+BUG: 1852736
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/208041
+Tested-by: Ashish Pandey <aspandey@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ashish Pandey <aspandey@redhat.com>
+---
+ xlators/cluster/ec/src/ec-common.c | 2 +-
+ xlators/cluster/ec/src/ec-heal.c | 58 +++++++++++++++++++++++-----------
+ xlators/cluster/ec/src/ec-heald.c | 24 ++++++++++----
+ xlators/cluster/ec/src/ec-inode-read.c | 27 ++++++++++++++--
+ xlators/cluster/ec/src/ec-types.h | 4 +--
+ xlators/cluster/ec/src/ec.h | 1 +
+ 6 files changed, 86 insertions(+), 30 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index e580bfb..e3f8769 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -230,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
+ int32_t
+ ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good,
+- uintptr_t bad, dict_t *xdata)
++ uintptr_t bad, uint32_t pending, dict_t *xdata)
+ {
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL,
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index 06a7016..e2de879 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -72,6 +72,7 @@ struct ec_name_data {
+ char *name;
+ inode_t *parent;
+ default_args_cbk_t *replies;
++ uint32_t heal_pending;
+ };
+
+ static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
+@@ -996,6 +997,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia,
+ ret = -ENOTCONN;
+ goto out;
+ }
++
+ out:
+ if (xattr)
+ dict_unref(xattr);
+@@ -1164,6 +1166,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
+ dict_t *xdata = NULL;
+ char *linkname = NULL;
+ ec_config_t config;
++
+ /* There should be just one gfid key */
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ if (gfid_db->count != 1) {
+@@ -1408,6 +1411,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
+
+ ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent,
+ participants);
++ if (ret >= 0) {
++ /* If ec_create_name() succeeded we return 1 to indicate that a new
++ * file has been created and it will need to be healed. */
++ ret = 1;
++ }
+ out:
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+@@ -1485,18 +1493,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name,
+ name_on);
+
+- if (ret < 0)
++ if (ret < 0) {
+ memset(name_on, 0, ec->nodes);
++ } else {
++ name_data->heal_pending += ret;
++ }
+
+ for (i = 0; i < ec->nodes; i++)
+ if (name_data->participants[i] && !name_on[i])
+ name_data->failed_on[i] = 1;
++
+ return 0;
+ }
+
+ int
+ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
+- unsigned char *participants)
++ unsigned char *participants, uint32_t *pending)
+ {
+ int i = 0;
+ int j = 0;
+@@ -1509,7 +1521,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ name_data.frame = frame;
+ name_data.participants = participants;
+ name_data.failed_on = alloca0(ec->nodes);
+- ;
++ name_data.heal_pending = 0;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!participants[i])
+@@ -1528,6 +1540,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ break;
+ }
+ }
++ *pending += name_data.heal_pending;
++
+ loc_wipe(&loc);
+ return ret;
+ }
+@@ -1535,7 +1549,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ int
+ __ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *heal_on, unsigned char *sources,
+- unsigned char *healed_sinks)
++ unsigned char *healed_sinks, uint32_t *pending)
+ {
+ unsigned char *locked_on = NULL;
+ unsigned char *output = NULL;
+@@ -1580,7 +1594,7 @@ unlock:
+ if (sources[i] || healed_sinks[i])
+ participants[i] = 1;
+ }
+- ret = ec_heal_names(frame, ec, inode, participants);
++ ret = ec_heal_names(frame, ec, inode, participants, pending);
+
+ if (EC_COUNT(participants, ec->nodes) <= ec->fragments)
+ goto out;
+@@ -1601,7 +1615,8 @@ out:
+
+ int
+ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
+- unsigned char *sources, unsigned char *healed_sinks)
++ unsigned char *sources, unsigned char *healed_sinks,
++ uint32_t *pending)
+ {
+ unsigned char *locked_on = NULL;
+ unsigned char *up_subvols = NULL;
+@@ -1632,7 +1647,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ goto unlock;
+ }
+ ret = __ec_heal_entry(frame, ec, inode, locked_on, sources,
+- healed_sinks);
++ healed_sinks, pending);
+ }
+ unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+@@ -1953,14 +1968,14 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
+ if (fop->cbks.heal) {
+ fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0,
+ (heal->good | heal->bad), heal->good, heal->bad,
+- NULL);
++ 0, NULL);
+ }
+
+ return EC_STATE_END;
+ case -EC_STATE_REPORT:
+ if (fop->cbks.heal) {
+- fop->cbks.heal(fop->req_frame, fop, fop->xl, -1, fop->error, 0,
+- 0, 0, NULL);
++ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1,
++ fop->error, 0, 0, 0, 0, NULL);
+ }
+
+ return EC_STATE_END;
+@@ -1997,14 +2012,15 @@ out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+- func(frame, NULL, this, -1, error, 0, 0, 0, NULL);
++ func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL);
+ }
+ }
+
+ int32_t
+ ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uintptr_t mask,
+- uintptr_t good, uintptr_t bad, dict_t *xdata)
++ uintptr_t good, uintptr_t bad, uint32_t pending,
++ dict_t *xdata)
+ {
+ ec_fop_data_t *fop = cookie;
+ ec_heal_t *heal = fop->data;
+@@ -2489,6 +2505,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
+ intptr_t mbad = 0;
+ intptr_t good = 0;
+ intptr_t bad = 0;
++ uint32_t pending = 0;
+ ec_fop_data_t *fop = data;
+ gf_boolean_t blocking = _gf_false;
+ ec_heal_need_t need_heal = EC_HEAL_NONEED;
+@@ -2524,7 +2541,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
+ if (loc->name && strlen(loc->name)) {
+ ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name,
+ participants);
+- if (ret == 0) {
++ if (ret >= 0) {
+ gf_msg_debug(this->name, 0,
+ "%s: name heal "
+ "successful on %" PRIXPTR,
+@@ -2542,7 +2559,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
+
+ /* Mount triggers heal only when it detects that it must need heal, shd
+ * triggers heals periodically which need not be thorough*/
+- if (ec->shd.iamshd) {
++ if (ec->shd.iamshd && (ret <= 0)) {
+ ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
+ &need_heal);
+
+@@ -2552,13 +2569,15 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
+ goto out;
+ }
+ }
++
+ sources = alloca0(ec->nodes);
+ healed_sinks = alloca0(ec->nodes);
+ if (IA_ISREG(loc->inode->ia_type)) {
+ ret = ec_heal_data(frame, ec, blocking, loc->inode, sources,
+ healed_sinks);
+ } else if (IA_ISDIR(loc->inode->ia_type) && !partial) {
+- ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks);
++ ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks,
++ &pending);
+ } else {
+ ret = 0;
+ memcpy(sources, participants, ec->nodes);
+@@ -2588,10 +2607,11 @@ out:
+ if (fop->cbks.heal) {
+ fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno,
+ ec_char_array_to_mask(participants, ec->nodes),
+- mgood & good, mbad & bad, NULL);
++ mgood & good, mbad & bad, pending, NULL);
+ }
+ if (frame)
+ STACK_DESTROY(frame->root);
++
+ return;
+ }
+
+@@ -2638,8 +2658,8 @@ void
+ ec_heal_fail(ec_t *ec, ec_fop_data_t *fop)
+ {
+ if (fop->cbks.heal) {
+- fop->cbks.heal(fop->req_frame, NULL, ec->xl, -1, fop->error, 0, 0, 0,
+- NULL);
++ fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0,
++ 0, 0, NULL);
+ }
+ ec_fop_data_release(fop);
+ }
+@@ -2826,7 +2846,7 @@ fail:
+ if (fop)
+ ec_fop_data_release(fop);
+ if (func)
+- func(frame, NULL, this, -1, err, 0, 0, 0, NULL);
++ func(frame, data, this, -1, err, 0, 0, 0, 0, NULL);
+ }
+
+ int
+diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
+index cba111a..4f4b6aa 100644
+--- a/xlators/cluster/ec/src/ec-heald.c
++++ b/xlators/cluster/ec/src/ec-heald.c
+@@ -156,15 +156,27 @@ int
+ ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
+ gf_boolean_t full)
+ {
++ dict_t *xdata = NULL;
++ uint32_t count;
+ int32_t ret;
+
+- ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL);
+- if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) {
++ ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, &xdata);
++ if (!full && (loc->inode->ia_type == IA_IFDIR)) {
+ /* If we have just healed a directory, it's possible that
+- * other index entries have appeared to be healed. We put a
+- * mark so that we can check it later and restart a scan
+- * without delay. */
+- healer->rerun = _gf_true;
++ * other index entries have appeared to be healed. */
++ if ((xdata != NULL) &&
++ (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) &&
++ (count > 0)) {
++ /* Force a rerun of the index healer. */
++ gf_msg_debug(healer->this->name, 0, "%d more entries to heal",
++ count);
++
++ healer->rerun = _gf_true;
++ }
++ }
++
++ if (xdata != NULL) {
++ dict_unref(xdata);
+ }
+
+ return ret;
+diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
+index f87a94a..e82e8f6 100644
+--- a/xlators/cluster/ec/src/ec-inode-read.c
++++ b/xlators/cluster/ec/src/ec-inode-read.c
+@@ -393,7 +393,8 @@ ec_manager_getxattr(ec_fop_data_t *fop, int32_t state)
+ int32_t
+ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
+ int32_t op_ret, int32_t op_errno, uintptr_t mask,
+- uintptr_t good, uintptr_t bad, dict_t *xdata)
++ uintptr_t good, uintptr_t bad, uint32_t pending,
++ dict_t *xdata)
+ {
+ ec_fop_data_t *fop = cookie;
+ fop_getxattr_cbk_t func = fop->data;
+@@ -402,6 +403,25 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
+ char *str;
+ char bin1[65], bin2[65];
+
++ /* We try to return the 'pending' information in xdata, but if this cannot
++ * be set, we will ignore it silently. We prefer to report the success or
++ * failure of the heal itself. */
++ if (xdata == NULL) {
++ xdata = dict_new();
++ } else {
++ dict_ref(xdata);
++ }
++ if (xdata != NULL) {
++ if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) {
++ /* dict_set_uint32() is marked as 'warn_unused_result' and gcc
++ * enforces to check the result in this case. However we don't
++ * really care if it succeeded or not. We'll just do the same.
++ *
++ * This empty 'if' avoids the warning, and it will be removed by
++ * the optimizer. */
++ }
++ }
++
+ if (op_ret >= 0) {
+ dict = dict_new();
+ if (dict == NULL) {
+@@ -435,11 +455,14 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
+ }
+
+ out:
+- func(frame, NULL, xl, op_ret, op_errno, dict, NULL);
++ func(frame, NULL, xl, op_ret, op_errno, dict, xdata);
+
+ if (dict != NULL) {
+ dict_unref(dict);
+ }
++ if (xdata != NULL) {
++ dict_unref(xdata);
++ }
+
+ return 0;
+ }
+diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
+index 34a9768..f15429d 100644
+--- a/xlators/cluster/ec/src/ec-types.h
++++ b/xlators/cluster/ec/src/ec-types.h
+@@ -186,10 +186,10 @@ struct _ec_inode {
+
+ typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
+ int32_t, uintptr_t, uintptr_t, uintptr_t,
+- dict_t *);
++ uint32_t, dict_t *);
+ typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
+ int32_t, uintptr_t, uintptr_t, uintptr_t,
+- dict_t *);
++ uint32_t, dict_t *);
+
+ union _ec_cbk {
+ fop_access_cbk_t access;
+diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h
+index 1b210d9..6f6de6d 100644
+--- a/xlators/cluster/ec/src/ec.h
++++ b/xlators/cluster/ec/src/ec.h
+@@ -18,6 +18,7 @@
+ #define EC_XATTR_SIZE EC_XATTR_PREFIX "size"
+ #define EC_XATTR_VERSION EC_XATTR_PREFIX "version"
+ #define EC_XATTR_HEAL EC_XATTR_PREFIX "heal"
++#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new"
+ #define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty"
+ #define EC_STRIPE_CACHE_MAX_SIZE 10
+ #define EC_VERSION_SIZE 2
+--
+1.8.3.1
+
diff --git a/0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch b/0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch
new file mode 100644
index 0000000..b7b9f04
--- /dev/null
+++ b/0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch
@@ -0,0 +1,182 @@
+From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001
+From: Raghavendra Bhat <raghavendra@redhat.com>
+Date: Mon, 15 Apr 2019 14:09:34 -0400
+Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after
+ cancelling the signer thread
+
+When bit-rot feature is disabled, the signer thread from the bit-rot-stub
+xlator (the thread which performs the setxattr of the signature on to the
+disk) is cancelled. But, if the cancelled signer thread had already held
+the mutex (&priv->lock) which it uses to monitor the queue of files to
+be signed, then the mutex is never released. This creates problems in
+future when the feature is enabled again. Both the new instance of the
+signer thread and the regular thread which enqueues the files to be
+signed will be blocked on this mutex.
+
+So, as part of cancelling the signer thread, unlock the mutex associated
+with it as well using pthread_cleanup_push and pthread_cleanup_pop.
+
+Upstream patch:
+ > patch: https://review.gluster.org/22572
+ > fixes: #bz1700078
+ > Change-Id: Ib761910caed90b268e69794ddeb108165487af40
+
+Change-Id: Ib761910caed90b268e69794ddeb108165487af40
+BUG: 1851424
+Signed-off-by: Raghavendra M <raghavendra@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/208304
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../bit-rot/src/stub/bit-rot-stub-messages.h | 4 +-
+ xlators/features/bit-rot/src/stub/bit-rot-stub.c | 62 +++++++++++++++++++---
+ 2 files changed, 59 insertions(+), 7 deletions(-)
+
+diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
+index 7f07f29..155802b 100644
+--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
+@@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
+ BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL,
+ BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED,
+- BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL);
++ BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL,
++ BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD,
++ BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL);
+
+ #endif /* !_BITROT_STUB_MESSAGES_H_ */
+diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+index 3f48a4b..c3f81bc 100644
+--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+@@ -26,6 +26,15 @@
+
+ #define BR_STUB_REQUEST_COOKIE 0x1
+
++void
++br_stub_lock_cleaner(void *arg)
++{
++ pthread_mutex_t *clean_mutex = arg;
++
++ pthread_mutex_unlock(clean_mutex);
++ return;
++}
++
+ void *
+ br_stub_signth(void *);
+
+@@ -166,8 +175,11 @@ init(xlator_t *this)
+
+ ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
+ "brssign");
+- if (ret != 0)
++ if (ret != 0) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED,
++ "failed to create the new thread for signer");
+ goto cleanup_lock;
++ }
+
+ ret = br_stub_bad_object_container_init(this, priv);
+ if (ret) {
+@@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options)
+ priv = this->private;
+
+ GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err);
+- if (priv->do_versioning) {
++ if (priv->do_versioning && !priv->signth) {
+ ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
+ "brssign");
+- if (ret != 0)
++ if (ret != 0) {
++ gf_msg(this->name, GF_LOG_WARNING, 0,
++ BRS_MSG_SPAWN_SIGN_THRD_FAILED,
++ "failed to create the new thread for signer");
+ goto err;
++ }
+
+ ret = br_stub_bad_object_container_init(this, priv);
+ if (ret) {
+@@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
+ "Could not cancel sign serializer thread");
++ } else {
++ gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD,
++ "killed the signer thread");
++ priv->signth = 0;
+ }
+- priv->signth = 0;
+ }
+
+ if (priv->container.thread) {
+@@ -902,6 +921,24 @@ br_stub_signth(void *arg)
+
+ THIS = this;
+ while (1) {
++ /*
++ * Disabling bit-rot feature leads to this particular thread
++ * getting cleaned up by reconfigure via a call to the function
++ * gf_thread_cleanup_xint (which in turn calls pthread_cancel
++ * and pthread_join). But, if this thread had held the mutex
++ * &priv->lock at the time of cancellation, then it leads to
++ * deadlock in future when bit-rot feature is enabled (which
++ * again spawns this thread which cant hold the lock as the
++ * mutex is still held by the previous instance of the thread
++ * which got killed). Also, the br_stub_handle_object_signature
++ * function which is called whenever file has to be signed
++ * also gets blocked as it too attempts to acquire &priv->lock.
++ *
++ * So, arrange for the lock to be unlocked as part of the
++ * cleanup of this thread using pthread_cleanup_push and
++ * pthread_cleanup_pop.
++ */
++ pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock);
+ pthread_mutex_lock(&priv->lock);
+ {
+ while (list_empty(&priv->squeue))
+@@ -912,6 +949,7 @@ br_stub_signth(void *arg)
+ list_del_init(&sigstub->list);
+ }
+ pthread_mutex_unlock(&priv->lock);
++ pthread_cleanup_pop(0);
+
+ call_resume(sigstub->stub);
+
+@@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd,
+
+ priv = this->private;
+
+- if (frame->root->pid != GF_CLIENT_PID_BITD)
++ if (frame->root->pid != GF_CLIENT_PID_BITD) {
++ gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID,
++ "PID %d from where signature request"
++ "came, does not belong to bit-rot daemon."
++ "Unwinding the fop",
++ frame->root->pid);
+ goto dofop;
++ }
+
+ ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess);
+- if (ret)
++ if (ret) {
++ gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL,
++ "failed to prepare the signature for %s. Unwinding the fop",
++ uuid_utoa(fd->inode->gfid));
+ goto dofop;
++ }
+ if (fakesuccess) {
+ op_ret = op_errno = 0;
+ goto dofop;
+@@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ /* object signature request */
+ ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign);
+ if (!ret) {
++ gf_msg_debug(this->name, 0, "got SIGNATURE request on %s",
++ uuid_utoa(fd->inode->gfid));
+ br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata);
+ goto done;
+ }
+--
+1.8.3.1
+
diff --git a/0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch b/0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch
new file mode 100644
index 0000000..2c9b66e
--- /dev/null
+++ b/0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch
@@ -0,0 +1,181 @@
+From 80eef2f52bb92ed740ac00eeb11ee7a3e7fffff2 Mon Sep 17 00:00:00 2001
+From: Raghavendra Bhat <raghavendra@redhat.com>
+Date: Mon, 11 Mar 2019 12:16:50 -0400
+Subject: [PATCH 459/465] features/bit-rot: Unconditionally sign the files
+ during oneshot crawl
+
+Currently bit-rot feature has an issue with disabling and reenabling it
+on the same volume. Consider enabling bit-rot detection which goes on to
+crawl and sign all the files present in the volume. Then some files are
+modified and the bit-rot daemon goes on to sign the modified files with
+the correct signature. Now, disable bit-rot feature. While, signing and
+scrubbing are not happening, previous checksums of the files continue to
+exist as extended attributes. Now, if some files with checksum xattrs get
+modified, they are not signed with new signature as the feature is off.
+
+At this point, if the feature is enabled again, the bit rot daemon will
+go and sign those files which does not have any bit-rot specific xattrs
+(i.e. those files which were created after bit-rot was disabled). Whereas
+the files with bit-rot xattrs wont get signed with proper new checksum.
+At this point if scrubber runs, it finds the on disk checksum and the actual
+checksum of the file to be different (because the file got modified) and
+marks the file as corrupted.
+
+FIX:
+
+The fix is to unconditionally sign the files when the bit-rot daemon
+comes up (instead of skipping the files with bit-rot xattrs).
+
+upstream fix:
+ > patch: https://review.gluster.org/#/c/glusterfs/+/22360/
+ > fixes: #bz1700078
+ > Change-ID: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5
+
+Change-Id: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5
+BUG: 1851424
+Signed-off-by: Raghavendra M <raghavendra@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/208305
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bitrot/bug-1700078.t | 87 +++++++++++++++++++++++++++++
+ xlators/features/bit-rot/src/bitd/bit-rot.c | 15 ++++-
+ 2 files changed, 101 insertions(+), 1 deletion(-)
+ create mode 100644 tests/bitrot/bug-1700078.t
+
+diff --git a/tests/bitrot/bug-1700078.t b/tests/bitrot/bug-1700078.t
+new file mode 100644
+index 0000000..f273742
+--- /dev/null
++++ b/tests/bitrot/bug-1700078.t
+@@ -0,0 +1,87 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++
++cleanup;
++
++## Start glusterd
++TEST glusterd;
++TEST pidof glusterd;
++
++## Lets create and start the volume
++TEST $CLI volume create $V0 $H0:$B0/${V0}1
++TEST $CLI volume start $V0
++
++## Enable bitrot for volume $V0
++TEST $CLI volume bitrot $V0 enable
++
++## Turn off quick-read so that it wont cache the contents
++# of the file in lookup. For corrupted files, it might
++# end up in reads being served from the cache instead of
++# an error.
++TEST $CLI volume set $V0 performance.quick-read off
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
++
++## Set expiry-timeout to 1 sec
++TEST $CLI volume set $V0 features.expiry-time 1
++
++##Mount $V0
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
++
++## Turn off quick-read xlator so that, the contents are not served from the
++# quick-read cache.
++TEST $CLI volume set $V0 performance.quick-read off
++
++#Create sample file
++TEST `echo "1234" > $M0/FILE1`
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' "/$B0/${V0}1/FILE1"
++
++##disable bitrot
++TEST $CLI volume bitrot $V0 disable
++
++## modify the file
++TEST `echo "write" >> $M0/FILE1`
++
++# unmount and remount when the file has to be accessed.
++# This is to ensure that, when the remount happens,
++# and the file is read, its contents are served from the
++# brick instead of cache.
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++
++##enable bitrot
++TEST $CLI volume bitrot $V0 enable
++
++# expiry time is set to 1 second. Hence sleep for 2 seconds for the
++# oneshot crawler to finish its crawling and sign the file properly.
++sleep 2
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
++
++## Ondemand scrub
++TEST $CLI volume bitrot $V0 scrub ondemand
++
++# the scrub ondemand CLI command, just ensures that
++# the scrubber has received the ondemand scrub directive
++# and started. sleep for 2 seconds for scrubber to finish
++# crawling and marking file(s) as bad (if if finds that
++# corruption has happened) which are filesystem operations.
++sleep 2
++
++TEST ! getfattr -n 'trusted.bit-rot.bad-file' $B0/${V0}1/FILE1
++
++##Mount $V0
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
++
++TEST cat $M0/FILE1
++
++cleanup;
+diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
+index b8feef7..424c0d5 100644
+--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
++++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
+@@ -973,6 +973,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ int32_t ret = -1;
+ inode_t *linked_inode = NULL;
+ gf_boolean_t need_signing = _gf_false;
++ gf_boolean_t need_reopen = _gf_true;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", subvol, out);
+ GF_VALIDATE_OR_GOTO("bit-rot", data, out);
+@@ -1046,6 +1047,18 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ uuid_utoa(linked_inode->gfid));
+ } else {
+ need_signing = br_check_object_need_sign(this, xattr, child);
++
++ /*
++ * If we are here means, bitrot daemon has started. Is it just
++ * a simple restart of the daemon or is it started because the
++ * feature is enabled is something hard to determine. Hence,
++ * if need_signing is false (because bit-rot version and signature
++ * are present), then still go ahead and sign it.
++ */
++ if (!need_signing) {
++ need_signing = _gf_true;
++ need_reopen = _gf_true;
++ }
+ }
+
+ if (!need_signing)
+@@ -1054,7 +1067,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN,
+ "Triggering signing for %s [GFID: %s | Brick: %s]", loc.path,
+ uuid_utoa(linked_inode->gfid), child->brick_path);
+- br_trigger_sign(this, child, linked_inode, &loc, _gf_true);
++ br_trigger_sign(this, child, linked_inode, &loc, need_reopen);
+
+ ret = 0;
+
+--
+1.8.3.1
+
diff --git a/0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch b/0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch
new file mode 100644
index 0000000..e31349a
--- /dev/null
+++ b/0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch
@@ -0,0 +1,152 @@
+From b166826b283d9071532174ebbec857dea600064b Mon Sep 17 00:00:00 2001
+From: Ashish Pandey <aspandey@redhat.com>
+Date: Thu, 23 Jul 2020 11:07:32 +0530
+Subject: [PATCH 460/465] cluster/ec: Remove stale entries from indices/xattrop
+ folder
+
+Problem:
+If a gfid is present in indices/xattrop folder while
+the file/dir is actaully healthy and all the xattrs are healthy,
+it causes lot of lookups by shd on an entry which does not need
+to be healed.
+This whole process eats up lot of CPU usage without doing meaningful
+work.
+
+Solution:
+Set trusted.ec.dirty xattr of the entry so that actual heal process
+happens and at the end of it, during unset of dirty, gfid enrty from
+indices/xattrop will be removed.
+
+>Upstream patch : https://review.gluster.org/#/c/glusterfs/+/24765/
+>Fixes: #1385
+
+Change-Id: Ib1b9377d8dda384bba49523e9ff6ba9f0699cc1b
+BUG: 1785714
+Signed-off-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/208591
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-heal.c | 73 ++++++++++++++++++++++++++++++++++++++-
+ xlators/cluster/ec/src/ec-types.h | 7 +++-
+ 2 files changed, 78 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index e2de879..7d25853 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -2488,6 +2488,59 @@ out:
+ return ret;
+ }
+
++int
++ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode)
++{
++ int i = 0;
++ int ret = 0;
++ dict_t **xattr = NULL;
++ loc_t loc = {0};
++ uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
++ unsigned char *on = NULL;
++ default_args_cbk_t *replies = NULL;
++ dict_t *dict = NULL;
++
++ /* Allocate the required memory */
++ loc.inode = inode_ref(inode);
++ gf_uuid_copy(loc.gfid, inode->gfid);
++ on = alloca0(ec->nodes);
++ EC_REPLIES_ALLOC(replies, ec->nodes);
++ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
++ if (!xattr) {
++ ret = -ENOMEM;
++ goto out;
++ }
++ dict = dict_new();
++ if (!dict) {
++ ret = -ENOMEM;
++ goto out;
++ }
++ for (i = 0; i < ec->nodes; i++) {
++ xattr[i] = dict;
++ on[i] = 1;
++ }
++ dirty_xattr[EC_METADATA_TXN] = hton64(1);
++ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
++ (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
++ if (ret < 0) {
++ ret = -ENOMEM;
++ goto out;
++ }
++ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
++ ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
++ xattr, NULL);
++out:
++ if (dict) {
++ dict_unref(dict);
++ }
++ if (xattr) {
++ GF_FREE(xattr);
++ }
++ cluster_replies_wipe(replies, ec->nodes);
++ loc_wipe(&loc);
++ return ret;
++}
++
+ void
+ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
+ {
+@@ -2563,7 +2616,18 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
+ ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
+ &need_heal);
+
+- if (need_heal == EC_HEAL_NONEED) {
++ if (need_heal == EC_HEAL_PURGE_INDEX) {
++ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
++ "Index entry needs to be purged for: %s ",
++ uuid_utoa(loc->gfid));
++ /* We need to send xattrop to set dirty flag so that it can be
++ * healed and index entry could be removed. We need not to take lock
++ * on this entry to do so as we are just setting dirty flag which
++ * actually increases the trusted.ec.dirty count and does not set
++ * the new value.
++ * This will make sure that it is not interfering in other fops.*/
++ ec_heal_set_dirty_without_lock(frame, ec, loc->inode);
++ } else if (need_heal == EC_HEAL_NONEED) {
+ gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
+ "Heal is not required for : %s ", uuid_utoa(loc->gfid));
+ goto out;
+@@ -2958,6 +3022,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
+ goto out;
+ }
+ }
++ /* If lock count is 0, all dirty flags are 0 and all the
++ * versions are macthing then why are we here. It looks
++ * like something went wrong while removing the index entries
++ * after completing a successful heal or fop. In this case
++ * we need to remove this index entry to avoid triggering heal
++ * in a loop and causing lookups again and again*/
++ *need_heal = EC_HEAL_PURGE_INDEX;
+ } else {
+ for (i = 0; i < ec->nodes; i++) {
+ /* Since each lock can only increment the dirty
+diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
+index f15429d..700dc39 100644
+--- a/xlators/cluster/ec/src/ec-types.h
++++ b/xlators/cluster/ec/src/ec-types.h
+@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
+
+ enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
+
+-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
++enum _ec_heal_need {
++ EC_HEAL_NONEED,
++ EC_HEAL_MAYBE,
++ EC_HEAL_MUST,
++ EC_HEAL_PURGE_INDEX
++};
+
+ enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
+
+--
+1.8.3.1
+
diff --git a/0461-geo-replication-Fix-IPv6-parsing.patch b/0461-geo-replication-Fix-IPv6-parsing.patch
new file mode 100644
index 0000000..098be5f
--- /dev/null
+++ b/0461-geo-replication-Fix-IPv6-parsing.patch
@@ -0,0 +1,127 @@
+From d425ed54261d5bc19aa853854cc3b64647e3c897 Mon Sep 17 00:00:00 2001
+From: Aravinda Vishwanathapura <aravinda@kadalu.io>
+Date: Sun, 12 Jul 2020 12:42:36 +0530
+Subject: [PATCH 461/465] geo-replication: Fix IPv6 parsing
+
+Brick paths in Volinfo used `:` as delimiter, Geo-rep uses split
+based on `:` char. This will go wrong with IPv6.
+
+This patch handles the IPv6 case and handles the split properly.
+Backport of:
+ >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24706
+ >Fixes: #1366
+ >Change-Id: I25e88d693744381c0ccf3c1dbf1541b84be2499d
+ >Signed-off-by: Aravinda Vishwanathapura <aravinda@kadalu.io>
+
+BUG: 1855966
+Change-Id: I25e88d693744381c0ccf3c1dbf1541b84be2499d
+Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/208610
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/master.py | 5 ++--
+ geo-replication/syncdaemon/syncdutils.py | 43 +++++++++++++++++++++++++++++---
+ 2 files changed, 43 insertions(+), 5 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
+index 3f98337..08e98f8 100644
+--- a/geo-replication/syncdaemon/master.py
++++ b/geo-replication/syncdaemon/master.py
+@@ -26,7 +26,8 @@ from rconf import rconf
+ from syncdutils import Thread, GsyncdError, escape_space_newline
+ from syncdutils import unescape_space_newline, gauxpfx, escape
+ from syncdutils import lstat, errno_wrap, FreeObject, lf, matching_disk_gfid
+-from syncdutils import NoStimeAvailable, PartialHistoryAvailable
++from syncdutils import NoStimeAvailable, PartialHistoryAvailable, host_brick_split
++
+
+ URXTIME = (-1, 0)
+
+@@ -1466,7 +1467,7 @@ class GMasterChangelogMixin(GMasterCommon):
+ node = rconf.args.resource_remote
+ node_data = node.split("@")
+ node = node_data[-1]
+- remote_node_ip = node.split(":")[0]
++ remote_node_ip, _ = host_brick_split(node)
+ self.status.set_slave_node(remote_node_ip)
+
+ def changelogs_batch_process(self, changes):
+diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
+index 7560fa1..f43e13b 100644
+--- a/geo-replication/syncdaemon/syncdutils.py
++++ b/geo-replication/syncdaemon/syncdutils.py
+@@ -883,6 +883,19 @@ class Popen(subprocess.Popen):
+ self.errfail()
+
+
++def host_brick_split(value):
++ """
++ IPv6 compatible way to split and get the host
++ and brick information. Example inputs:
++ node1.example.com:/exports/bricks/brick1/brick
++ fe80::af0f:df82:844f:ef66%utun0:/exports/bricks/brick1/brick
++ """
++ parts = value.split(":")
++ brick = parts[-1]
++ hostparts = parts[0:-1]
++ return (":".join(hostparts), brick)
++
++
+ class Volinfo(object):
+
+ def __init__(self, vol, host='localhost', prelude=[], master=True):
+@@ -925,7 +938,7 @@ class Volinfo(object):
+ @memoize
+ def bricks(self):
+ def bparse(b):
+- host, dirp = b.find("name").text.split(':', 2)
++ host, dirp = host_brick_split(b.find("name").text)
+ return {'host': host, 'dir': dirp, 'uuid': b.find("hostUuid").text}
+ return [bparse(b) for b in self.get('brick')]
+
+@@ -1001,6 +1014,16 @@ class VolinfoFromGconf(object):
+ def is_hot(self, brickpath):
+ return False
+
++ def is_uuid(self, value):
++ try:
++ uuid.UUID(value)
++ return True
++ except ValueError:
++ return False
++
++ def possible_path(self, value):
++ return "/" in value
++
+ @property
+ @memoize
+ def bricks(self):
+@@ -1014,8 +1037,22 @@ class VolinfoFromGconf(object):
+ out = []
+ for b in bricks_data:
+ parts = b.split(":")
+- bpath = parts[2] if len(parts) == 3 else ""
+- out.append({"host": parts[1], "dir": bpath, "uuid": parts[0]})
++ b_uuid = None
++ if self.is_uuid(parts[0]):
++ b_uuid = parts[0]
++ # Set all parts except first
++ parts = parts[1:]
++
++ if self.possible_path(parts[-1]):
++ bpath = parts[-1]
++ # Set all parts except last
++ parts = parts[0:-1]
++
++ out.append({
++ "host": ":".join(parts), # if remaining parts are IPv6 name
++ "dir": bpath,
++ "uuid": b_uuid
++ })
+
+ return out
+
+--
+1.8.3.1
+
diff --git a/0462-Issue-with-gf_fill_iatt_for_dirent.patch b/0462-Issue-with-gf_fill_iatt_for_dirent.patch
new file mode 100644
index 0000000..aa5fd21
--- /dev/null
+++ b/0462-Issue-with-gf_fill_iatt_for_dirent.patch
@@ -0,0 +1,43 @@
+From f027734165374979bd0bff8ea059dfaadca85e07 Mon Sep 17 00:00:00 2001
+From: Soumya Koduri <skoduri@redhat.com>
+Date: Thu, 2 Jul 2020 02:07:56 +0530
+Subject: [PATCH 462/465] Issue with gf_fill_iatt_for_dirent
+
+In "gf_fill_iatt_for_dirent()", while calculating inode_path for loc,
+the inode should be of parent's. Instead it is loc.inode which results in error
+ and eventually lookup/readdirp fails.
+
+This patch fixes the same.
+
+This is backport of below mainstream fix :
+
+> Change-Id: Ied086234a4634e8cb13520521ac547c87b3c76b5
+> Fixes: #1351
+> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24661/
+
+Change-Id: Ied086234a4634e8cb13520521ac547c87b3c76b5
+BUG: 1853189
+Signed-off-by: Soumya Koduri <skoduri@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/208691
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/gf-dirent.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c
+index f289723..3fa67f2 100644
+--- a/libglusterfs/src/gf-dirent.c
++++ b/libglusterfs/src/gf-dirent.c
+@@ -277,7 +277,7 @@ gf_fill_iatt_for_dirent(gf_dirent_t *entry, inode_t *parent, xlator_t *subvol)
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+ loc.name = entry->d_name;
+ loc.parent = inode_ref(parent);
+- ret = inode_path(loc.inode, entry->d_name, &path);
++ ret = inode_path(loc.parent, entry->d_name, &path);
+ loc.path = path;
+ if (ret < 0)
+ goto out;
+--
+1.8.3.1
+
diff --git a/0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch b/0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch
new file mode 100644
index 0000000..b47cdd1
--- /dev/null
+++ b/0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch
@@ -0,0 +1,87 @@
+From 7d87933f648092ae55d57a96fd06e3df975d764c Mon Sep 17 00:00:00 2001
+From: Ashish Pandey <aspandey@redhat.com>
+Date: Tue, 18 Aug 2020 10:33:48 +0530
+Subject: [PATCH 463/465] cluster/ec: Change handling of heal failure to avoid
+ crash
+
+Problem:
+ec_getxattr_heal_cbk was called with NULL as second argument
+in case heal was failing.
+This function was dereferencing "cookie" argument which caused crash.
+
+Solution:
+Cookie is changed to carry the value that was supposed to be
+stored in fop->data, so even in the case when fop is NULL in error
+case, there won't be any NULL dereference.
+
+Thanks to Xavi for the suggestion about the fix.
+
+>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23050/
+>fixes: bz#1729085
+
+Change-Id: I0798000d5cadb17c3c2fbfa1baf77033ffc2bb8c
+BUG: 1852736
+Reviewed-on: https://code.engineering.redhat.com/gerrit/209012
+Tested-by: Ashish Pandey <aspandey@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
+---
+ xlators/cluster/ec/src/ec-heal.c | 11 ++++++-----
+ xlators/cluster/ec/src/ec-inode-read.c | 4 ++--
+ 2 files changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index 7d25853..6e6948b 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -1966,7 +1966,7 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
+
+ case EC_STATE_REPORT:
+ if (fop->cbks.heal) {
+- fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0,
++ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0,
+ (heal->good | heal->bad), heal->good, heal->bad,
+ 0, NULL);
+ }
+@@ -2022,10 +2022,11 @@ ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
+ {
+- ec_fop_data_t *fop = cookie;
+- ec_heal_t *heal = fop->data;
++ ec_heal_t *heal = cookie;
+
+- fop->heal = NULL;
++ if (heal->fop) {
++ heal->fop->heal = NULL;
++ }
+ heal->fop = NULL;
+ heal->error = op_ret < 0 ? op_errno : 0;
+ syncbarrier_wake(heal->data);
+@@ -2669,7 +2670,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
+ out:
+ ec_reset_entry_healing(fop);
+ if (fop->cbks.heal) {
+- fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno,
++ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno,
+ ec_char_array_to_mask(participants, ec->nodes),
+ mgood & good, mbad & bad, pending, NULL);
+ }
+diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
+index e82e8f6..c50d0ad 100644
+--- a/xlators/cluster/ec/src/ec-inode-read.c
++++ b/xlators/cluster/ec/src/ec-inode-read.c
+@@ -396,8 +396,8 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
+ {
+- ec_fop_data_t *fop = cookie;
+- fop_getxattr_cbk_t func = fop->data;
++ fop_getxattr_cbk_t func = cookie;
++
+ ec_t *ec = xl->private;
+ dict_t *dict = NULL;
+ char *str;
+--
+1.8.3.1
+
diff --git a/0464-storage-posix-Remove-nr_files-usage.patch b/0464-storage-posix-Remove-nr_files-usage.patch
new file mode 100644
index 0000000..d98e33d
--- /dev/null
+++ b/0464-storage-posix-Remove-nr_files-usage.patch
@@ -0,0 +1,102 @@
+From 7c51addf7912a94320e6b148bd66f2dbf274c533 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Mon, 11 Mar 2019 14:04:39 +0530
+Subject: [PATCH 464/465] storage/posix: Remove nr_files usage
+
+nr_files is supposed to represent the number of files opened in posix.
+Present logic doesn't seem to handle anon-fds because of which the
+counts would always be wrong.
+
+I don't remember anyone using this value in debugging any problem probably
+because we always have 'ls -l /proc/<pid>/fd' which not only prints the
+fds that are active but also prints their paths. It also handles directories
+and anon-fds which actually opened the file. So removing this code
+instead of fixing the buggy logic to have the nr_files.
+
+> fixes bz#1688106
+> Change-Id: Ibf8713fdfdc1ef094e08e6818152637206a54040
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+> (Cherry pick from commit f5987d38f216a3142dfe45f03bf66ff4827d9b55)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22333/)
+
+Change-Id: Ibf8713fdfdc1ef094e08e6818152637206a54040
+BUG: 1851989
+Signed-off-by: Mohit Agrawal<moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/209468
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/storage/posix/src/posix-common.c | 2 --
+ xlators/storage/posix/src/posix-entry-ops.c | 2 --
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 2 --
+ xlators/storage/posix/src/posix.h | 1 -
+ 4 files changed, 7 deletions(-)
+
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index ac53796..b317627 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -128,7 +128,6 @@ posix_priv(xlator_t *this)
+ gf_proc_dump_write("max_read", "%" PRId64, GF_ATOMIC_GET(priv->read_value));
+ gf_proc_dump_write("max_write", "%" PRId64,
+ GF_ATOMIC_GET(priv->write_value));
+- gf_proc_dump_write("nr_files", "%" PRId64, GF_ATOMIC_GET(priv->nr_files));
+
+ return 0;
+ }
+@@ -815,7 +814,6 @@ posix_init(xlator_t *this)
+ }
+
+ LOCK_INIT(&_private->lock);
+- GF_ATOMIC_INIT(_private->nr_files, 0);
+ GF_ATOMIC_INIT(_private->read_value, 0);
+ GF_ATOMIC_INIT(_private->write_value, 0);
+
+diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
+index 65650b3..b3a5381 100644
+--- a/xlators/storage/posix/src/posix-entry-ops.c
++++ b/xlators/storage/posix/src/posix-entry-ops.c
+@@ -2243,8 +2243,6 @@ fill_stat:
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED,
+ "failed to set the fd context path=%s fd=%p", real_path, fd);
+
+- GF_ATOMIC_INC(priv->nr_files);
+-
+ op_ret = 0;
+
+ out:
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index d135d8b..81f4a6b 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -1605,7 +1605,6 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED,
+ "failed to set the fd context path=%s fd=%p", real_path, fd);
+
+- GF_ATOMIC_INC(priv->nr_files);
+ op_ret = 0;
+
+ out:
+@@ -2526,7 +2525,6 @@ posix_release(xlator_t *this, fd_t *fd)
+ if (!priv)
+ goto out;
+
+- GF_ATOMIC_DEC(priv->nr_files);
+ out:
+ return 0;
+ }
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index 61495a7..124dbb4 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -154,7 +154,6 @@ struct posix_private {
+
+ gf_atomic_t read_value; /* Total read, from init */
+ gf_atomic_t write_value; /* Total write, from init */
+- gf_atomic_t nr_files;
+ /*
+ In some cases, two exported volumes may reside on the same
+ partition on the server. Sending statvfs info for both
+--
+1.8.3.1
+
diff --git a/0465-posix-Implement-a-janitor-thread-to-close-fd.patch b/0465-posix-Implement-a-janitor-thread-to-close-fd.patch
new file mode 100644
index 0000000..fc22456
--- /dev/null
+++ b/0465-posix-Implement-a-janitor-thread-to-close-fd.patch
@@ -0,0 +1,384 @@
+From 143b93b230b429cc712353243ed794b68494c040 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Mon, 27 Jul 2020 18:08:00 +0530
+Subject: [PATCH 465/465] posix: Implement a janitor thread to close fd
+
+Problem: In the commit fb20713b380e1df8d7f9e9df96563be2f9144fd6 we use
+ syntask to close fd but we have found the patch is reducing the
+ performance
+
+Solution: Use janitor thread to close fd's and save the pfd ctx into
+ ctx janitor list and also save the posix_xlator into pfd object to
+ avoid the race condition during cleanup in brick_mux environment
+
+> Change-Id: Ifb3d18a854b267333a3a9e39845bfefb83fbc092
+> Fixes: #1396
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24755/)
+> (Cherry pick from commit 41b9616435cbdf671805856e487e373060c9455b
+
+Change-Id: Ifb3d18a854b267333a3a9e39845bfefb83fbc092
+BUG: 1851989
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/209448
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfsd/src/glusterfsd.c | 4 ++
+ libglusterfs/src/glusterfs/glusterfs.h | 7 ++
+ rpc/rpc-lib/src/rpcsvc.c | 6 --
+ xlators/storage/posix/src/posix-common.c | 34 +++++++++-
+ xlators/storage/posix/src/posix-helpers.c | 93 ++++++++++++++++++++++++++
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 33 ++++-----
+ xlators/storage/posix/src/posix.h | 7 ++
+ 7 files changed, 161 insertions(+), 23 deletions(-)
+
+diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
+index 9821180..955bf1d 100644
+--- a/glusterfsd/src/glusterfsd.c
++++ b/glusterfsd/src/glusterfsd.c
+@@ -1839,6 +1839,10 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
+
+ INIT_LIST_HEAD(&cmd_args->xlator_options);
+ INIT_LIST_HEAD(&cmd_args->volfile_servers);
++ ctx->pxl_count = 0;
++ pthread_mutex_init(&ctx->fd_lock, NULL);
++ pthread_cond_init(&ctx->fd_cond, NULL);
++ INIT_LIST_HEAD(&ctx->janitor_fds);
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index 495a4d7..bf6a987 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -733,6 +733,13 @@ struct _glusterfs_ctx {
+ } stats;
+
+ struct list_head volfile_list;
++ /* Add members to manage janitor threads for cleanup fd */
++ struct list_head janitor_fds;
++ pthread_cond_t fd_cond;
++ pthread_mutex_t fd_lock;
++ pthread_t janitor;
++ /* The variable is use to save total posix xlator count */
++ uint32_t pxl_count;
+
+ char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */
+ };
+diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
+index 23ca1fd..3f184bf 100644
+--- a/rpc/rpc-lib/src/rpcsvc.c
++++ b/rpc/rpc-lib/src/rpcsvc.c
+@@ -375,12 +375,6 @@ rpcsvc_program_actor(rpcsvc_request_t *req)
+
+ req->ownthread = program->ownthread;
+ req->synctask = program->synctask;
+- if (((req->procnum == GFS3_OP_RELEASE) ||
+- (req->procnum == GFS3_OP_RELEASEDIR)) &&
+- (program->prognum == GLUSTER_FOP_PROGRAM)) {
+- req->ownthread = _gf_false;
+- req->synctask = _gf_true;
+- }
+
+ err = SUCCESS;
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s for %s",
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index b317627..c5a43a1 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -150,6 +150,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
+ struct timespec sleep_till = {
+ 0,
+ };
++ glusterfs_ctx_t *ctx = this->ctx;
+
+ switch (event) {
+ case GF_EVENT_PARENT_UP: {
+@@ -160,8 +161,6 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
+ case GF_EVENT_PARENT_DOWN: {
+ if (!victim->cleanup_starting)
+ break;
+- gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
+- victim->name);
+
+ if (priv->janitor) {
+ pthread_mutex_lock(&priv->janitor_mutex);
+@@ -187,6 +186,16 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
+ GF_FREE(priv->janitor);
+ }
+ priv->janitor = NULL;
++ pthread_mutex_lock(&ctx->fd_lock);
++ {
++ while (priv->rel_fdcount > 0) {
++ pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock);
++ }
++ }
++ pthread_mutex_unlock(&ctx->fd_lock);
++
++ gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
++ victim->name);
+ default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
+ } break;
+ default:
+@@ -1038,7 +1047,13 @@ posix_init(xlator_t *this)
+ pthread_cond_init(&_private->fsync_cond, NULL);
+ pthread_mutex_init(&_private->janitor_mutex, NULL);
+ pthread_cond_init(&_private->janitor_cond, NULL);
++ pthread_cond_init(&_private->fd_cond, NULL);
+ INIT_LIST_HEAD(&_private->fsyncs);
++ _private->rel_fdcount = 0;
++ ret = posix_spawn_ctx_janitor_thread(this);
++ if (ret)
++ goto out;
++
+ ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this,
+ "posixfsy");
+ if (ret) {
+@@ -1133,6 +1148,8 @@ posix_fini(xlator_t *this)
+ {
+ struct posix_private *priv = this->private;
+ gf_boolean_t health_check = _gf_false;
++ glusterfs_ctx_t *ctx = this->ctx;
++ uint32_t count;
+ int ret = 0;
+
+ if (!priv)
+@@ -1166,6 +1183,19 @@ posix_fini(xlator_t *this)
+ priv->janitor = NULL;
+ }
+
++ pthread_mutex_lock(&ctx->fd_lock);
++ {
++ count = --ctx->pxl_count;
++ if (count == 0) {
++ pthread_cond_signal(&ctx->fd_cond);
++ }
++ }
++ pthread_mutex_unlock(&ctx->fd_lock);
++
++ if (count == 0) {
++ pthread_join(ctx->janitor, NULL);
++ }
++
+ if (priv->fsyncer) {
+ (void)gf_thread_cleanup_xint(priv->fsyncer);
+ priv->fsyncer = 0;
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 39dbcce..73a44be 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -1582,6 +1582,99 @@ unlock:
+ return;
+ }
+
++static struct posix_fd *
++janitor_get_next_fd(glusterfs_ctx_t *ctx)
++{
++ struct posix_fd *pfd = NULL;
++
++ while (list_empty(&ctx->janitor_fds)) {
++ if (ctx->pxl_count == 0) {
++ return NULL;
++ }
++
++ pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock);
++ }
++
++ pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list);
++ list_del_init(&pfd->list);
++
++ return pfd;
++}
++
++static void
++posix_close_pfd(xlator_t *xl, struct posix_fd *pfd)
++{
++ THIS = xl;
++
++ if (pfd->dir == NULL) {
++ gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd);
++ sys_close(pfd->fd);
++ } else {
++ gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir);
++ sys_closedir(pfd->dir);
++ }
++
++ GF_FREE(pfd);
++}
++
++static void *
++posix_ctx_janitor_thread_proc(void *data)
++{
++ xlator_t *xl;
++ struct posix_fd *pfd;
++ glusterfs_ctx_t *ctx = NULL;
++ struct posix_private *priv_fd;
++
++ ctx = data;
++
++ pthread_mutex_lock(&ctx->fd_lock);
++
++ while ((pfd = janitor_get_next_fd(ctx)) != NULL) {
++ pthread_mutex_unlock(&ctx->fd_lock);
++
++ xl = pfd->xl;
++ posix_close_pfd(xl, pfd);
++
++ pthread_mutex_lock(&ctx->fd_lock);
++
++ priv_fd = xl->private;
++ priv_fd->rel_fdcount--;
++ if (!priv_fd->rel_fdcount)
++ pthread_cond_signal(&priv_fd->fd_cond);
++ }
++
++ pthread_mutex_unlock(&ctx->fd_lock);
++
++ return NULL;
++}
++
++int
++posix_spawn_ctx_janitor_thread(xlator_t *this)
++{
++ int ret = 0;
++ glusterfs_ctx_t *ctx = NULL;
++
++ ctx = this->ctx;
++
++ pthread_mutex_lock(&ctx->fd_lock);
++ {
++ if (ctx->pxl_count++ == 0) {
++ ret = gf_thread_create(&ctx->janitor, NULL,
++ posix_ctx_janitor_thread_proc, ctx,
++ "posixctxjan");
++
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED,
++ "spawning janitor thread failed");
++ ctx->pxl_count--;
++ }
++ }
++ }
++ pthread_mutex_unlock(&ctx->fd_lock);
++
++ return ret;
++}
++
+ static int
+ is_fresh_file(int64_t ctime_sec)
+ {
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index 81f4a6b..21119ea 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -1352,6 +1352,22 @@ out:
+ return 0;
+ }
+
++static void
++posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd)
++{
++ glusterfs_ctx_t *ctx = this->ctx;
++ struct posix_private *priv = this->private;
++
++ pfd->xl = this;
++ pthread_mutex_lock(&ctx->fd_lock);
++ {
++ list_add_tail(&pfd->list, &ctx->janitor_fds);
++ priv->rel_fdcount++;
++ pthread_cond_signal(&ctx->fd_cond);
++ }
++ pthread_mutex_unlock(&ctx->fd_lock);
++}
++
+ int32_t
+ posix_releasedir(xlator_t *this, fd_t *fd)
+ {
+@@ -1374,11 +1390,7 @@ posix_releasedir(xlator_t *this, fd_t *fd)
+ "pfd->dir is NULL for fd=%p", fd);
+ goto out;
+ }
+-
+- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
+-
+- sys_closedir(pfd->dir);
+- GF_FREE(pfd);
++ posix_add_fd_to_cleanup(this, pfd);
+
+ out:
+ return 0;
+@@ -2494,7 +2506,6 @@ out:
+ int32_t
+ posix_release(xlator_t *this, fd_t *fd)
+ {
+- struct posix_private *priv = NULL;
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ uint64_t tmp_pfd = 0;
+@@ -2502,8 +2513,6 @@ posix_release(xlator_t *this, fd_t *fd)
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+- priv = this->private;
+-
+ ret = fd_ctx_del(fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
+@@ -2517,13 +2526,7 @@ posix_release(xlator_t *this, fd_t *fd)
+ "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd);
+ }
+
+- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
+-
+- sys_close(pfd->fd);
+- GF_FREE(pfd);
+-
+- if (!priv)
+- goto out;
++ posix_add_fd_to_cleanup(this, pfd);
+
+ out:
+ return 0;
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index 124dbb4..07f367b 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -134,6 +134,8 @@ struct posix_fd {
+ off_t dir_eof; /* offset at dir EOF */
+ int odirect;
+ struct list_head list; /* to add to the janitor list */
++ xlator_t *xl;
++ char _pad[4]; /* manual padding */
+ };
+
+ struct posix_private {
+@@ -204,6 +206,7 @@ struct posix_private {
+ pthread_cond_t fsync_cond;
+ pthread_mutex_t janitor_mutex;
+ pthread_cond_t janitor_cond;
++ pthread_cond_t fd_cond;
+ int fsync_queue_count;
+
+ enum {
+@@ -259,6 +262,7 @@ struct posix_private {
+ gf_boolean_t fips_mode_rchecksum;
+ gf_boolean_t ctime;
+ gf_boolean_t janitor_task_stop;
++ uint32_t rel_fdcount;
+ };
+
+ typedef struct {
+@@ -665,6 +669,9 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd,
+ int
+ posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno);
+
++int
++posix_spawn_ctx_janitor_thread(xlator_t *this);
++
+ void
+ posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata);
+
+--
+1.8.3.1
+
diff --git a/0466-cluster-ec-Change-stale-index-handling.patch b/0466-cluster-ec-Change-stale-index-handling.patch
new file mode 100644
index 0000000..1dc9f57
--- /dev/null
+++ b/0466-cluster-ec-Change-stale-index-handling.patch
@@ -0,0 +1,68 @@
+From b603170ae5f583037b8177a9d19e56c7821edf0b Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Tue, 25 Aug 2020 04:19:54 +0530
+Subject: [PATCH 466/466] cluster/ec: Change stale index handling
+
+Problem:
+Earlier approach is setting dirty bit which requires extra heal
+
+Fix:
+Send zero-xattrop which deletes stale index without any need
+for extra heal.
+
+ > Fixes: #1385
+ > Upstream-patch: https://review.gluster.org/c/glusterfs/+/24911/
+
+BUG: 1785714
+Change-Id: I7e97a1d8b5516f7be47cae55d0e56b14332b6cae
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/209904
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Tested-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-heal.c | 14 +++++---------
+ 1 file changed, 5 insertions(+), 9 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index 6e6948b..06bafa5 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -2490,7 +2490,7 @@ out:
+ }
+
+ int
+-ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode)
++ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode)
+ {
+ int i = 0;
+ int ret = 0;
+@@ -2520,7 +2520,6 @@ ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode)
+ xattr[i] = dict;
+ on[i] = 1;
+ }
+- dirty_xattr[EC_METADATA_TXN] = hton64(1);
+ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
+ if (ret < 0) {
+@@ -2621,13 +2620,10 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
+ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
+ "Index entry needs to be purged for: %s ",
+ uuid_utoa(loc->gfid));
+- /* We need to send xattrop to set dirty flag so that it can be
+- * healed and index entry could be removed. We need not to take lock
+- * on this entry to do so as we are just setting dirty flag which
+- * actually increases the trusted.ec.dirty count and does not set
+- * the new value.
+- * This will make sure that it is not interfering in other fops.*/
+- ec_heal_set_dirty_without_lock(frame, ec, loc->inode);
++ /* We need to send zero-xattrop so that stale index entry could be
++ * removed. We need not take lock on this entry to do so as
++ * xattrop on a brick is atomic. */
++ ec_heal_purge_stale_index(frame, ec, loc->inode);
+ } else if (need_heal == EC_HEAL_NONEED) {
+ gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
+ "Heal is not required for : %s ", uuid_utoa(loc->gfid));
+--
+1.8.3.1
+
diff --git a/0467-build-Added-dependency-for-glusterfs-selinux.patch b/0467-build-Added-dependency-for-glusterfs-selinux.patch
new file mode 100644
index 0000000..93bb140
--- /dev/null
+++ b/0467-build-Added-dependency-for-glusterfs-selinux.patch
@@ -0,0 +1,38 @@
+From 9176ee8f10c3c33f31d00261995ed27e8680934a Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Thu, 3 Sep 2020 11:46:38 +0000
+Subject: [PATCH 467/467] build: Added dependency for glusterfs-selinux
+
+> Fixes: #1442
+> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24876/
+> Change-Id: I7d79bceff329db4d525bc8a77ba7ffe41bf53c97
+> Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+
+BUG: 1460657
+
+Change-Id: I7d79bceff329db4d525bc8a77ba7ffe41bf53c97
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/210637
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 9def416..ed6bdf3 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -650,6 +650,9 @@ Summary: Clustered file-system server
+ Requires: %{name}%{?_isa} = %{version}-%{release}
+ Requires: %{name}-cli%{?_isa} = %{version}-%{release}
+ Requires: %{name}-libs%{?_isa} = %{version}-%{release}
++%if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) )
++Requires: glusterfs-selinux >= 0.1.0-2
++%endif
+ # some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse
+ Requires: %{name}-fuse%{?_isa} = %{version}-%{release}
+ # self-heal daemon, rebalance, nfs-server etc. are actually clients
+--
+1.8.3.1
+
diff --git a/0468-build-Update-the-glusterfs-selinux-version.patch b/0468-build-Update-the-glusterfs-selinux-version.patch
new file mode 100644
index 0000000..b4b5ead
--- /dev/null
+++ b/0468-build-Update-the-glusterfs-selinux-version.patch
@@ -0,0 +1,36 @@
+From 4b72f5e7704d480bac869f7a32ac891898bb994f Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Thu, 3 Sep 2020 14:56:27 +0000
+Subject: [PATCH 468/468] build: Update the glusterfs-selinux version
+
+Updated the glusterfs-selinux version according to
+the downstream official version.
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1460657
+
+Change-Id: I7b8bbf53f71f6f56103042950d8910f0cb63a685
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/210685
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ glusterfs.spec.in | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index ed6bdf3..30d7162 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -651,7 +651,7 @@ Requires: %{name}%{?_isa} = %{version}-%{release}
+ Requires: %{name}-cli%{?_isa} = %{version}-%{release}
+ Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+ %if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) )
+-Requires: glusterfs-selinux >= 0.1.0-2
++Requires: glusterfs-selinux >= 1.0-1
+ %endif
+ # some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse
+ Requires: %{name}-fuse%{?_isa} = %{version}-%{release}
+--
+1.8.3.1
+
diff --git a/0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch b/0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch
new file mode 100644
index 0000000..0fadfc9
--- /dev/null
+++ b/0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch
@@ -0,0 +1,33 @@
+From 6fed6cfcb26e6ed3c9640c5f889629315bbd83c2 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Mon, 31 Aug 2020 12:22:05 +0530
+Subject: [PATCH 469/469] cluster/ec: Don't trigger heal for stale index
+
+ > Fixes: #1385
+ > Upstream-patch: https://review.gluster.org/c/glusterfs/+/24930
+
+BUG: 1785714
+Change-Id: I3609dd2e1f63c4bd6a19d528b935bf5b05443824
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/210731
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-heal.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index 06bafa5..f6376cd 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -2624,6 +2624,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
+ * removed. We need not take lock on this entry to do so as
+ * xattrop on a brick is atomic. */
+ ec_heal_purge_stale_index(frame, ec, loc->inode);
++ goto out;
+ } else if (need_heal == EC_HEAL_NONEED) {
+ gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
+ "Heal is not required for : %s ", uuid_utoa(loc->gfid));
+--
+1.8.3.1
+
diff --git a/0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch b/0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch
new file mode 100644
index 0000000..e26d46a
--- /dev/null
+++ b/0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch
@@ -0,0 +1,63 @@
+From 8e427716f4e2855093b1a1a0e3a9ec79ebac7faf Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <sacharya@redhat.com>
+Date: Thu, 10 Sep 2020 13:49:09 +0530
+Subject: [PATCH 470/473] extras/snap_scheduler: changes in
+ gluster-shared-storage mount path
+
+The patch https://review.gluster.org/#/c/glusterfs/+/24934/, changes mount point
+of gluster_shared_storage from /var/run to /run to address the issue of symlink
+at mount path in fstab.
+NOTE: mount point /var/run is symlink to /run
+
+The required changes with respect to gluster_shared_storage mount path are
+introduced with this patch in snap_scheduler.
+
+>Fixes: #1476
+>Change-Id: I9ce88c2f624c6aa5982de04edfee2d0a9f160d62
+>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+
+backport of https://review.gluster.org/#/c/glusterfs/+/24971/
+BUG: 1873469
+Change-Id: I9ce88c2f624c6aa5982de04edfee2d0a9f160d62
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/211391
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/snap_scheduler/gcron.py | 4 ++--
+ extras/snap_scheduler/snap_scheduler.py | 2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py
+index cc16310..0e4df77 100755
+--- a/extras/snap_scheduler/gcron.py
++++ b/extras/snap_scheduler/gcron.py
+@@ -19,10 +19,10 @@ import logging.handlers
+ import fcntl
+
+
+-GCRON_TASKS = "/var/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks"
++GCRON_TASKS = "/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks"
+ GCRON_CROND_TASK = "/etc/cron.d/glusterfs_snap_cron_tasks"
+ GCRON_RELOAD_FLAG = "/var/run/gluster/crond_task_reload_flag"
+-LOCK_FILE_DIR = "/var/run/gluster/shared_storage/snaps/lock_files/"
++LOCK_FILE_DIR = "/run/gluster/shared_storage/snaps/lock_files/"
+ log = logging.getLogger("gcron-logger")
+ start_time = 0.0
+
+diff --git a/extras/snap_scheduler/snap_scheduler.py b/extras/snap_scheduler/snap_scheduler.py
+index 5a29d41..e8fcc44 100755
+--- a/extras/snap_scheduler/snap_scheduler.py
++++ b/extras/snap_scheduler/snap_scheduler.py
+@@ -67,7 +67,7 @@ except ImportError:
+ SCRIPT_NAME = "snap_scheduler"
+ scheduler_enabled = False
+ log = logging.getLogger(SCRIPT_NAME)
+-SHARED_STORAGE_DIR="/var/run/gluster/shared_storage"
++SHARED_STORAGE_DIR="/run/gluster/shared_storage"
+ GCRON_DISABLED = SHARED_STORAGE_DIR+"/snaps/gcron_disabled"
+ GCRON_ENABLED = SHARED_STORAGE_DIR+"/snaps/gcron_enabled"
+ GCRON_TASKS = SHARED_STORAGE_DIR+"/snaps/glusterfs_snap_cron_tasks"
+--
+1.8.3.1
+
diff --git a/0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch b/0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch
new file mode 100644
index 0000000..0ebba37
--- /dev/null
+++ b/0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch
@@ -0,0 +1,73 @@
+From d23ad767281af85cf07f5c3f63de482d40ee1953 Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <sacharya@redhat.com>
+Date: Thu, 10 Sep 2020 13:16:12 +0530
+Subject: [PATCH 471/473] nfs-ganesha: gluster_shared_storage fails to
+ automount on node reboot on rhel 8
+
+The patch https://review.gluster.org/#/c/glusterfs/+/24934/, changes mount point
+of gluster_shared_storage from /var/run to /run to address the issue of symlink
+at mount path in fstab.
+NOTE: mount point /var/run is symlink to /run
+
+The required changes with respect to gluster_shared_storage mount path are
+introduced with this patch in nfs-ganesha.
+
+>Fixes: #1475
+>Change-Id: I9c7677a053e1291f71476d47ba6fa2e729f59625
+>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+
+backport of https://review.gluster.org/#/c/glusterfs/+/24970/
+BUG: 1873469
+Change-Id: I9c7677a053e1291f71476d47ba6fa2e729f59625
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/211392
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/ocf/ganesha_nfsd | 2 +-
+ extras/ganesha/scripts/ganesha-ha.sh | 2 +-
+ extras/hook-scripts/start/post/S31ganesha-start.sh | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd
+index 93fc8be..f91e8b6 100644
+--- a/extras/ganesha/ocf/ganesha_nfsd
++++ b/extras/ganesha/ocf/ganesha_nfsd
+@@ -36,7 +36,7 @@ else
+ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+ fi
+
+-OCF_RESKEY_ha_vol_mnt_default="/var/run/gluster/shared_storage"
++OCF_RESKEY_ha_vol_mnt_default="/run/gluster/shared_storage"
+ : ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}}
+
+ ganesha_meta_data() {
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index a6814b1..9790a71 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -24,7 +24,7 @@ GANESHA_HA_SH=$(realpath $0)
+ HA_NUM_SERVERS=0
+ HA_SERVERS=""
+ HA_VOL_NAME="gluster_shared_storage"
+-HA_VOL_MNT="/var/run/gluster/shared_storage"
++HA_VOL_MNT="/run/gluster/shared_storage"
+ HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha"
+ SERVICE_MAN="DISTRO_NOT_FOUND"
+
+diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh
+index 90ba6bc..7ad6f23 100755
+--- a/extras/hook-scripts/start/post/S31ganesha-start.sh
++++ b/extras/hook-scripts/start/post/S31ganesha-start.sh
+@@ -4,7 +4,7 @@ OPTSPEC="volname:,gd-workdir:"
+ VOL=
+ declare -i EXPORT_ID
+ ganesha_key="ganesha.enable"
+-GANESHA_DIR="/var/run/gluster/shared_storage/nfs-ganesha"
++GANESHA_DIR="/run/gluster/shared_storage/nfs-ganesha"
+ CONF1="$GANESHA_DIR/ganesha.conf"
+ GLUSTERD_WORKDIR=
+
+--
+1.8.3.1
+
diff --git a/0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch b/0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch
new file mode 100644
index 0000000..79d4d0e
--- /dev/null
+++ b/0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch
@@ -0,0 +1,98 @@
+From ccd45222c46b91b4d0cd57db9ea8b1515c97ada0 Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <sacharya@redhat.com>
+Date: Mon, 31 Aug 2020 20:08:39 +0530
+Subject: [PATCH 472/473] geo-rep: gluster_shared_storage fails to automount on
+ node reboot on rhel 8.
+
+Issue: On reboot, all the mounts get wiped out.
+ Only the mounts mentioned in /etc/fstab automatically gets mounted
+ during boot/reboot.
+
+ But /etc/fstab complains on not getting a canonical path
+ (it gets path containing a symlink)
+ This is because the gluster_shared_storage, is mounted to
+ /var/run which is symlink to /run. This is a general practice
+ followed by most operating systems.
+
+ [root@ ~]# ls -lsah /var/run
+ 0 lrwxrwxrwx. 1 root root 6 Jul 22 19:39 /var/run -> ../run
+
+Fix: Mount gluster_shared_storage on /run.
+ (Also It is seen that /var/run is mostly
+ used by old or legacy systems, thus it is a good practice to
+ update /var/run to /run)
+
+>fixes: #1459
+>Change-Id: I8c16653be8cd746c84f01abf0eea19284fb97c77
+>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+
+backport of https://review.gluster.org/#/c/glusterfs/+/24934/
+BUG: 1873469
+Change-Id: I8c16653be8cd746c84f01abf0eea19284fb97c77
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/211387
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../set/post/S32gluster_enable_shared_storage.sh | 18 +++++++++---------
+ geo-replication/gsyncd.conf.in | 2 +-
+ 2 files changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
+index 885ed03..3bae37c 100755
+--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
++++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
+@@ -79,9 +79,9 @@ done
+
+ if [ "$option" == "disable" ]; then
+ # Unmount the volume on all the nodes
+- umount /var/run/gluster/shared_storage
+- cat /etc/fstab | grep -v "gluster_shared_storage /var/run/gluster/shared_storage/" > /var/run/gluster/fstab.tmp
+- mv /var/run/gluster/fstab.tmp /etc/fstab
++ umount /run/gluster/shared_storage
++ cat /etc/fstab | grep -v "gluster_shared_storage /run/gluster/shared_storage/" > /run/gluster/fstab.tmp
++ mv /run/gluster/fstab.tmp /etc/fstab
+ fi
+
+ if [ "$is_originator" == 1 ]; then
+@@ -105,7 +105,7 @@ function check_volume_status()
+ }
+
+ mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \
+- /var/run/gluster/shared_storage"
++ /run/gluster/shared_storage"
+
+ if [ "$option" == "enable" ]; then
+ retry=0;
+@@ -120,10 +120,10 @@ if [ "$option" == "enable" ]; then
+ status=$(check_volume_status)
+ done
+ # Mount the volume on all the nodes
+- umount /var/run/gluster/shared_storage
+- mkdir -p /var/run/gluster/shared_storage
++ umount /run/gluster/shared_storage
++ mkdir -p /run/gluster/shared_storage
+ $mount_cmd
+- cp /etc/fstab /var/run/gluster/fstab.tmp
+- echo "$local_node_hostname:/gluster_shared_storage /var/run/gluster/shared_storage/ glusterfs defaults 0 0" >> /var/run/gluster/fstab.tmp
+- mv /var/run/gluster/fstab.tmp /etc/fstab
++ cp /etc/fstab /run/gluster/fstab.tmp
++ echo "$local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage/ glusterfs defaults 0 0" >> /run/gluster/fstab.tmp
++ mv /run/gluster/fstab.tmp /etc/fstab
+ fi
+diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in
+index 11e57fd..9688c79 100644
+--- a/geo-replication/gsyncd.conf.in
++++ b/geo-replication/gsyncd.conf.in
+@@ -123,7 +123,7 @@ type=bool
+ help=Use this to set Active Passive mode to meta-volume.
+
+ [meta-volume-mnt]
+-value=/var/run/gluster/shared_storage
++value=/run/gluster/shared_storage
+ help=Meta Volume or Shared Volume mount path
+
+ [allow-network]
+--
+1.8.3.1
+
diff --git a/0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch b/0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch
new file mode 100644
index 0000000..0629fa7
--- /dev/null
+++ b/0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch
@@ -0,0 +1,75 @@
+From 80f1b3aedcde02ae25b341519857ba9a5b2fa722 Mon Sep 17 00:00:00 2001
+From: Sheetal Pamecha <spamecha@redhat.com>
+Date: Thu, 24 Sep 2020 19:43:29 +0530
+Subject: [PATCH 473/473] glusterd: Fix Add-brick with increasing replica count
+ failure
+
+Problem: add-brick operation fails with multiple bricks on same
+server error when replica count is increased.
+
+This was happening because of extra runs in a loop to compare
+hostnames and if bricks supplied were less than "replica" count,
+the bricks will get compared to itself resulting in above error.
+
+>Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/25029
+>Fixes: #1508
+
+BUG: 1881823
+Change-Id: I8668e964340b7bf59728bb838525d2db062197ed
+Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/213064
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/glusterd/brick-order-check-add-brick.t | 21 +++++++++++++++++++++
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 4 ++++
+ 2 files changed, 25 insertions(+)
+
+diff --git a/tests/bugs/glusterd/brick-order-check-add-brick.t b/tests/bugs/glusterd/brick-order-check-add-brick.t
+index 29f0ed1..0be31da 100644
+--- a/tests/bugs/glusterd/brick-order-check-add-brick.t
++++ b/tests/bugs/glusterd/brick-order-check-add-brick.t
+@@ -37,4 +37,25 @@ EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks'
+ TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5 force
+ EXPECT '4 x 3 = 12' volinfo_field $V0 'Number of Bricks'
+
++TEST $CLI_1 volume stop $V0
++TEST $CLI_1 volume delete $V0
++
++TEST $CLI_1 volume create $V0 replica 2 $H1:$L1/${V0}1 $H2:$L2/${V0}1
++EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks'
++EXPECT 'Created' volinfo_field $V0 'Status'
++
++TEST $CLI_1 volume start $V0
++EXPECT 'Started' volinfo_field $V0 'Status'
++
++#Add-brick with Increasing replica count
++TEST $CLI_1 volume add-brick $V0 replica 3 $H3:$L3/${V0}1
++EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks'
++
++#Add-brick with Increasing replica count from same host should fail
++TEST ! $CLI_1 volume add-brick $V0 replica 5 $H1:$L1/${V0}2 $H1:$L1/${V0}3
++
++#adding multiple bricks from same host should fail the brick order check
++TEST ! $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}{4..6} $H2:$L2/${V0}{7..9}
++EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks'
++
+ cleanup
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 545e688..d25fc8a 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -14908,6 +14908,10 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
+ i = 0;
+ ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list);
+
++ if (brick_count < sub_count) {
++ sub_count = brick_count;
++ }
++
+ /* Check for bad brick order */
+ while (i < brick_count) {
+ ++i;
+--
+1.8.3.1
+
diff --git a/0474-features-locks-posixlk-clear-lock-should-set-error-a.patch b/0474-features-locks-posixlk-clear-lock-should-set-error-a.patch
new file mode 100644
index 0000000..034a2a2
--- /dev/null
+++ b/0474-features-locks-posixlk-clear-lock-should-set-error-a.patch
@@ -0,0 +1,49 @@
+From 3612b3a46c33d19bb7d4aee6eb6625d8d903d459 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Wed, 17 Jun 2020 10:44:37 +0530
+Subject: [PATCH 474/478] features/locks: posixlk-clear-lock should set error
+ as EINTR
+
+Problem:
+fuse on receiving interrupt for setlk sends clear-lock "fop"
+using virtual-getxattr. At the moment blocked locks which are
+cleared return EAGAIN errno as opposed to EINTR errno
+
+Fix:
+Return EINTR errno.
+
+Upstream:
+> Reviewed-on: https://review.gluster.org/24587
+> Updates: #1310
+> Change-Id: I47de0fcaec370b267f2f5f89deeb37e1b9c0ee9b
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+
+BUG: 1821743
+Change-Id: Id8301ce6e21c009949e88db5904d8b6ecc278f66
+Signed-off-by: Csaba Henk <csaba@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/216157
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/locks/src/clear.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
+index 116aed6..ab1eac6 100644
+--- a/xlators/features/locks/src/clear.c
++++ b/xlators/features/locks/src/clear.c
+@@ -181,9 +181,9 @@ clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
+ if (plock->blocked) {
+ bcount++;
+ pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW,
+- &plock->user_flock, -1, EAGAIN, NULL);
++ &plock->user_flock, -1, EINTR, NULL);
+
+- STACK_UNWIND_STRICT(lk, plock->frame, -1, EAGAIN,
++ STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR,
+ &plock->user_flock, NULL);
+
+ } else {
+--
+1.8.3.1
+
diff --git a/0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch b/0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch
new file mode 100644
index 0000000..24a62b3
--- /dev/null
+++ b/0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch
@@ -0,0 +1,46 @@
+From 47d8c316f622850d060af90d1d939528ace5607a Mon Sep 17 00:00:00 2001
+From: Csaba Henk <csaba@redhat.com>
+Date: Thu, 14 Feb 2019 02:01:38 +0100
+Subject: [PATCH 475/478] fuse lock interrupt: fix flock_interrupt.t
+
+Upstream:
+> Reviewed-on: https://review.gluster.org/22213
+> updates: bz#1193929
+> Change-Id: I347de62755100cd69e3cf341434767ae23fd1ba4
+> Signed-off-by: Csaba Henk <csaba@redhat.com>
+
+BUG: 1821743
+Change-Id: I0088f804bca215152e7ca2c490402c11f7b5333a
+Signed-off-by: Csaba Henk <csaba@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/216158
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/features/flock_interrupt.t | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/tests/features/flock_interrupt.t b/tests/features/flock_interrupt.t
+index 8603b65..964a4bc 100644
+--- a/tests/features/flock_interrupt.t
++++ b/tests/features/flock_interrupt.t
+@@ -22,12 +22,12 @@ EXPECT 'Started' volinfo_field $V0 'Status';
+ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+ TEST touch $M0/testfile;
+
+-function flock_interrupt {
+- flock $MO/testfile sleep 3 & flock -w 1 $M0/testfile true;
+- echo ok;
+-}
++echo > got_lock
++flock $M0/testfile sleep 6 & { sleep 0.3; flock -w 2 $M0/testfile true; echo ok > got_lock; } &
+
+-EXPECT_WITHIN 2 ok flock_interrupt;
++EXPECT_WITHIN 4 ok cat got_lock;
+
+ ## Finish up
++sleep 7;
++rm -f got_lock;
+ cleanup;
+--
+1.8.3.1
+
diff --git a/0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch b/0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch
new file mode 100644
index 0000000..6c9d736
--- /dev/null
+++ b/0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch
@@ -0,0 +1,114 @@
+From 40519185067d891f06818c574301ea1af4b36479 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Wed, 17 Jun 2020 10:45:19 +0530
+Subject: [PATCH 476/478] mount/fuse: use cookies to get fuse-interrupt-record
+ instead of xdata
+
+Problem:
+On executing tests/features/flock_interrupt.t the following error log
+appears
+[2020-06-16 11:51:54.631072 +0000] E
+[fuse-bridge.c:4791:fuse_setlk_interrupt_handler_cbk] 0-glusterfs-fuse:
+interrupt record not found
+
+This happens because fuse-interrupt-record is never sent on the wire by
+getxattr fop and there is no guarantee that in the cbk it will be
+available in case of failures.
+
+Fix:
+wind getxattr fop with fuse-interrupt-record as cookie and recover it
+in the cbk
+
+Upstream:
+> Reviewed-on: https://review.gluster.org/24588
+> Fixes: #1310
+> Change-Id: I4cfff154321a449114fc26e9440db0f08e5c7daa
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+
+BUG: 1821743
+Change-Id: If9576801654d4d743bd66ae90ca259c4d34746a7
+Signed-off-by: Csaba Henk <csaba@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/216159
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/features/flock_interrupt.t | 1 -
+ xlators/mount/fuse/src/fuse-bridge.c | 28 +++++++---------------------
+ 2 files changed, 7 insertions(+), 22 deletions(-)
+
+diff --git a/tests/features/flock_interrupt.t b/tests/features/flock_interrupt.t
+index 964a4bc..b8717e3 100644
+--- a/tests/features/flock_interrupt.t
++++ b/tests/features/flock_interrupt.t
+@@ -28,6 +28,5 @@ flock $M0/testfile sleep 6 & { sleep 0.3; flock -w 2 $M0/testfile true; echo ok
+ EXPECT_WITHIN 4 ok cat got_lock;
+
+ ## Finish up
+-sleep 7;
+ rm -f got_lock;
+ cleanup;
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index f61fa39..1bddac2 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -4768,16 +4768,8 @@ fuse_setlk_interrupt_handler_cbk(call_frame_t *frame, void *cookie,
+ int32_t op_errno, dict_t *dict, dict_t *xdata)
+ {
+ fuse_interrupt_state_t intstat = INTERRUPT_NONE;
+- fuse_interrupt_record_t *fir;
++ fuse_interrupt_record_t *fir = cookie;
+ fuse_state_t *state = NULL;
+- int ret = 0;
+-
+- ret = dict_get_bin(xdata, "fuse-interrupt-record", (void **)&fir);
+- if (ret < 0) {
+- gf_log("glusterfs-fuse", GF_LOG_ERROR, "interrupt record not found");
+-
+- goto out;
+- }
+
+ intstat = op_ret >= 0 ? INTERRUPT_HANDLED : INTERRUPT_SQUELCHED;
+
+@@ -4789,7 +4781,6 @@ fuse_setlk_interrupt_handler_cbk(call_frame_t *frame, void *cookie,
+ GF_FREE(state);
+ }
+
+-out:
+ STACK_DESTROY(frame->root);
+
+ return 0;
+@@ -4827,9 +4818,10 @@ fuse_setlk_interrupt_handler(xlator_t *this, fuse_interrupt_record_t *fir)
+ frame->op = GF_FOP_GETXATTR;
+ state->name = xattr_name;
+
+- STACK_WIND(frame, fuse_setlk_interrupt_handler_cbk, state->active_subvol,
+- state->active_subvol->fops->fgetxattr, state->fd, xattr_name,
+- state->xdata);
++ STACK_WIND_COOKIE(frame, fuse_setlk_interrupt_handler_cbk, fir,
++ state->active_subvol,
++ state->active_subvol->fops->fgetxattr, state->fd,
++ xattr_name, state->xdata);
+
+ return;
+
+@@ -4852,15 +4844,9 @@ fuse_setlk_resume(fuse_state_t *state)
+ fir = fuse_interrupt_record_new(state->finh, fuse_setlk_interrupt_handler);
+ state_clone = gf_memdup(state, sizeof(*state));
+ if (state_clone) {
+- /*
+- * Calling this allocator with fir casted to (char *) seems like
+- * an abuse of this API, but in fact the API is stupid to assume
+- * a (char *) argument (in the funcion it's casted to (void *)
+- * anyway).
+- */
+- state_clone->xdata = dict_for_key_value(
+- "fuse-interrupt-record", (char *)fir, sizeof(*fir), _gf_true);
++ state_clone->xdata = dict_new();
+ }
++
+ if (!fir || !state_clone || !state_clone->xdata) {
+ if (fir) {
+ GF_FREE(fir);
+--
+1.8.3.1
+
diff --git a/0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch b/0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch
new file mode 100644
index 0000000..c604ccd
--- /dev/null
+++ b/0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch
@@ -0,0 +1,51 @@
+From 3d50207b346cb5d95af94aa010ebd1ec3e795554 Mon Sep 17 00:00:00 2001
+From: srijan-sivakumar <ssivakum@redhat.com>
+Date: Wed, 4 Nov 2020 11:44:51 +0530
+Subject: [PATCH 477/478] glusterd/snapshot: Snapshot prevalidation failure not
+ failing.
+
+The value of `ret` is to be set to `-1` to indicate failure
+or else the prevalidation which is supposed to be a failure
+as the snapshot isn't even activated for cloning will move
+to next stage.
+
+Label: DOWNSTREAM ONLY
+BUG: 1837926
+
+Change-Id: I95122c3a261332630efa00033a1892a8f95fc00b
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/216920
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Shwetha Acharya <sacharya@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-snapshot.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+index 5b8ae97..ee3cea0 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+@@ -2298,8 +2298,8 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr,
+ goto out;
+ }
+
+-
+ if (!glusterd_is_volume_started(snap_vol)) {
++ ret = -1;
+ snprintf(err_str, sizeof(err_str),
+ "Snapshot %s is "
+ "not activated",
+@@ -9361,7 +9361,8 @@ glusterd_handle_snapshot_fn(rpcsvc_request_t *req)
+ "for a snapshot");
+ op_errno = EG_OPNOTSUP;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION,
+- "%s (%d < %d)", err_str, conf->op_version, GD_OP_VERSION_RHS_3_0);
++ "%s (%d < %d)", err_str, conf->op_version,
++ GD_OP_VERSION_RHS_3_0);
+ ret = -1;
+ goto out;
+ }
+--
+1.8.3.1
+
diff --git a/0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch b/0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch
new file mode 100644
index 0000000..596fe2b
--- /dev/null
+++ b/0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch
@@ -0,0 +1,119 @@
+From e772bef5631017145cd0270d72a9ada1378e022a Mon Sep 17 00:00:00 2001
+From: Barak Sason Rofman <bsasonro@redhat.com>
+Date: Fri, 30 Oct 2020 08:27:47 +0200
+Subject: [PATCH 478/478] DHT - Fixing rebalance failure on issuing stop
+ command
+
+Issuing a stop command for an ongoing rebalance process results in an error.
+This issue was brought up in https://bugzilla.redhat.com/1286171 and a patch
+(https://review.gluster.org/24103/) was submitted to resolve the issue.
+
+However the submitted patch resolved only part of the
+problem by reducing the number of log messages that were printed (since
+rebalnace is currently a recursive process, an error message was printed
+for every directory) but didn't fully resolve the root cause for the
+failure.
+
+This patch fixes the issue by modifying the code-path which handles the
+termination of the rebalance process by issuing a stop command.
+
+Upstream:
+> Reviewed-on: https://github.com/gluster/glusterfs/pull/1628
+> fixes: #1627
+> Change-Id: I604f2b0f8b1ccb1026b8425a14200bbd1dc5bd03
+> Signed-off-by: Barak Sason Rofman bsasonro@redhat.com
+
+BUG: 1286171
+Change-Id: I604f2b0f8b1ccb1026b8425a14200bbd1dc5bd03
+Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/216896
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-rebalance.c | 22 ++++++++++++----------
+ 1 file changed, 12 insertions(+), 10 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index abc10fc..d49a719 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -3113,12 +3113,10 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
+ struct dht_container *tmp_container = NULL;
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+- ret = -1;
+ goto out;
+ }
+
+ if (dir_dfmeta->offset_var[i].readdir_done == 1) {
+- ret = 0;
+ goto out;
+ }
+
+@@ -3135,7 +3133,6 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
+ &(dir_dfmeta->equeue[i]), xattr_req, NULL);
+ if (ret == 0) {
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+- ret = 0;
+ goto out;
+ }
+
+@@ -3161,7 +3158,6 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
+
+ while (1) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+- ret = -1;
+ goto out;
+ }
+
+@@ -3273,12 +3269,14 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
+ }
+
+ out:
+- if (ret == 0) {
+- *container = tmp_container;
+- } else {
+- if (tmp_container) {
++ if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
++ if (ret == 0) {
++ *container = tmp_container;
++ } else {
+ gf_defrag_free_container(tmp_container);
+ }
++ } else {
++ gf_defrag_free_container(tmp_container);
+ }
+
+ return ret;
+@@ -3487,7 +3485,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ migrate_data, dir_dfmeta, xattr_req,
+ &should_commit_hash, perrno);
+
+- if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ goto out;
+ }
+
+@@ -3947,7 +3945,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout,
+ migrate_data);
+
+- if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ goto out;
+ }
+
+@@ -4015,6 +4013,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
+ ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno);
+
++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
++ goto out;
++ }
++
+ if (ret && (ret != 2)) {
+ if (perrno == ENOENT || perrno == ESTALE) {
+ ret = 0;
+--
+1.8.3.1
+
diff --git a/0479-ganesha-ha-revised-regex-exprs-for-status.patch b/0479-ganesha-ha-revised-regex-exprs-for-status.patch
new file mode 100644
index 0000000..8bbdf9d
--- /dev/null
+++ b/0479-ganesha-ha-revised-regex-exprs-for-status.patch
@@ -0,0 +1,53 @@
+From 9036c9f0fd081c83c5c4fcd1ecba858421442777 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Tue, 10 Nov 2020 07:39:14 -0500
+Subject: [PATCH 479/479] ganesha-ha: revised regex exprs for --status
+
+better whitespace in regex
+
+This has worked for years, but somehow no longer works on rhel8
+
+> Updates: #1000
+> Change-Id: I2c1a3537573d125608334772ba1a263c55407dd4
+> Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+> https://github.com/gluster/glusterfs/commit/4026fe9a956238d8e4785cf39c3b7290eae90f03
+
+BUG: 1895301
+Change-Id: I2c1a3537573d125608334772ba1a263c55407dd4
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/217480
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index 9790a71..491c61d 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -948,18 +948,18 @@ status()
+ # check if the VIP and port block/unblock RAs are on the expected nodes
+ for n in ${nodes[*]}; do
+
+- grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch}
++ grep -E -x "${n}-nfs_block +\(ocf::heartbeat:portblock\): +Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+- grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch}
++ grep -E -x "${n}-cluster_ip-1 +\(ocf::heartbeat:IPaddr\): +Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+- grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch}
++ grep -E -x "${n}-nfs_unblock +\(ocf::heartbeat:portblock\): +Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+ done
+
+- grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch}
++ grep -E "\): +Stopped|FAILED" > /dev/null 2>&1 ${scratch}
+ result=$?
+
+ if [ ${result} -eq 0 ]; then
+--
+1.8.3.1
+
diff --git a/0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch b/0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch
new file mode 100644
index 0000000..31c404f
--- /dev/null
+++ b/0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch
@@ -0,0 +1,255 @@
+From 759c12fc016a6399bb179aa0f930602c87d1e0f8 Mon Sep 17 00:00:00 2001
+From: Barak Sason Rofman <bsasonro@redhat.com>
+Date: Tue, 24 Nov 2020 12:56:10 +0200
+Subject: [PATCH 480/480] DHT/Rebalance - Ensure Rebalance reports status only
+ once upon stopping
+
+Upon issuing rebalance stop command, the status of rebalance is being
+logged twice to the log file, which can sometime result in an
+inconsistent reports (one report states status stopped, while the other
+may report something else).
+
+This fix ensures rebalance reports it's status only once and that the
+correct status is being reported.
+
+Upstream:
+> Reviewed-on: https://github.com/gluster/glusterfs/pull/1783
+> fixes: #1782
+> Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37
+> Signed-off-by: Barak Sason Rofman bsasonro@redhat.com
+
+BUG: 1286171
+Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37
+Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/218953
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Csaba Henk <chenk@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/distribute/bug-1286171.t | 75 +++++++++++++++++++++++++++++++++
+ xlators/cluster/dht/src/dht-common.c | 2 +-
+ xlators/cluster/dht/src/dht-common.h | 2 +-
+ xlators/cluster/dht/src/dht-rebalance.c | 63 ++++++++++++++-------------
+ 4 files changed, 108 insertions(+), 34 deletions(-)
+ create mode 100644 tests/bugs/distribute/bug-1286171.t
+
+diff --git a/tests/bugs/distribute/bug-1286171.t b/tests/bugs/distribute/bug-1286171.t
+new file mode 100644
+index 0000000..a2ca36f
+--- /dev/null
++++ b/tests/bugs/distribute/bug-1286171.t
+@@ -0,0 +1,75 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../cluster.rc
++. $(dirname $0)/../../volume.rc
++
++# Initialize
++#------------------------------------------------------------
++cleanup;
++
++volname=bug-1286171
++
++# Start glusterd
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume info;
++
++# Create a volume
++TEST $CLI volume create $volname $H0:$B0/${volname}{1,2}
++
++# Verify volume creation
++EXPECT "$volname" volinfo_field $volname 'Volume Name';
++EXPECT 'Created' volinfo_field $volname 'Status';
++
++# Start volume and verify successful start
++TEST $CLI volume start $volname;
++EXPECT 'Started' volinfo_field $volname 'Status';
++TEST glusterfs --volfile-id=$volname --volfile-server=$H0 --entry-timeout=0 $M0;
++#------------------------------------------------------------
++
++# Create a nested dir structure and some file under MP
++cd $M0;
++for i in {1..5}
++do
++ mkdir dir$i
++ cd dir$i
++ for j in {1..5}
++ do
++ mkdir dir$i$j
++ cd dir$i$j
++ for k in {1..5}
++ do
++ mkdir dir$i$j$k
++ cd dir$i$j$k
++ touch {1..300}
++ cd ..
++ done
++ touch {1..300}
++ cd ..
++ done
++ touch {1..300}
++ cd ..
++done
++touch {1..300}
++
++# Add-brick and start rebalance
++TEST $CLI volume add-brick $volname $H0:$B0/${volname}4;
++TEST $CLI volume rebalance $volname start;
++
++# Let rebalance run for a while
++sleep 5
++
++# Stop rebalance
++TEST $CLI volume rebalance $volname stop;
++
++# Allow rebalance to stop
++sleep 5
++
++# Examine the logfile for errors
++cd /var/log/glusterfs;
++failures=`grep "failures:" ${volname}-rebalance.log | tail -1 | sed 's/.*failures: //; s/,.*//'`;
++
++TEST [ $failures == 0 ];
++
++cleanup;
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 23cc80c..4db89df 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -10969,7 +10969,7 @@ dht_notify(xlator_t *this, int event, void *data, ...)
+ if ((cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
+ (cmd == GF_DEFRAG_CMD_DETACH_STATUS))
+- gf_defrag_status_get(conf, output);
++ gf_defrag_status_get(conf, output, _gf_false);
+ else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
+ gf_defrag_start_detach_tier(defrag);
+ else if (cmd == GF_DEFRAG_CMD_DETACH_START)
+diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
+index 9ec5b51..92f1b89 100644
+--- a/xlators/cluster/dht/src/dht-common.h
++++ b/xlators/cluster/dht/src/dht-common.h
+@@ -1252,7 +1252,7 @@ dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
+
+ int
+-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
++gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status);
+
+ void
+ gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state);
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index d49a719..16ac16c 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -2720,7 +2720,6 @@ gf_defrag_migrate_single_file(void *opaque)
+ iatt_ptr = &entry->d_stat;
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+- ret = -1;
+ goto out;
+ }
+
+@@ -3833,7 +3832,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ list_for_each_entry_safe(entry, tmp, &entries.list, list)
+ {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+- ret = 1;
+ goto out;
+ }
+
+@@ -4863,7 +4861,7 @@ out:
+ LOCK(&defrag->lock);
+ {
+ status = dict_new();
+- gf_defrag_status_get(conf, status);
++ gf_defrag_status_get(conf, status, _gf_true);
+ if (ctx && ctx->notify)
+ ctx->notify(GF_EN_DEFRAG_STATUS, status);
+ if (status)
+@@ -4998,7 +4996,7 @@ out:
+ }
+
+ int
+-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
++gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status)
+ {
+ int ret = 0;
+ uint64_t files = 0;
+@@ -5095,34 +5093,35 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set time-left");
+
+ log:
+- switch (defrag->defrag_status) {
+- case GF_DEFRAG_STATUS_NOT_STARTED:
+- status = "not started";
+- break;
+- case GF_DEFRAG_STATUS_STARTED:
+- status = "in progress";
+- break;
+- case GF_DEFRAG_STATUS_STOPPED:
+- status = "stopped";
+- break;
+- case GF_DEFRAG_STATUS_COMPLETE:
+- status = "completed";
+- break;
+- case GF_DEFRAG_STATUS_FAILED:
+- status = "failed";
+- break;
+- default:
+- break;
+- }
++ if (log_status) {
++ switch (defrag->defrag_status) {
++ case GF_DEFRAG_STATUS_NOT_STARTED:
++ status = "not started";
++ break;
++ case GF_DEFRAG_STATUS_STARTED:
++ status = "in progress";
++ break;
++ case GF_DEFRAG_STATUS_STOPPED:
++ status = "stopped";
++ break;
++ case GF_DEFRAG_STATUS_COMPLETE:
++ status = "completed";
++ break;
++ case GF_DEFRAG_STATUS_FAILED:
++ status = "failed";
++ break;
++ default:
++ break;
++ }
+
+- gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
+- "Rebalance is %s. Time taken is %.2f secs", status, elapsed);
+- gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
+- "Files migrated: %" PRIu64 ", size: %" PRIu64 ", lookups: %" PRIu64
+- ", failures: %" PRIu64
+- ", skipped: "
+- "%" PRIu64,
+- files, size, lookup, failures, skipped);
++ gf_msg("DHT", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
++ "Rebalance is %s. Time taken is %.2f secs "
++ "Files migrated: %" PRIu64 ", size: %" PRIu64
++ ", lookups: %" PRIu64 ", failures: %" PRIu64
++ ", skipped: "
++ "%" PRIu64,
++ status, elapsed, files, size, lookup, failures, skipped);
++ }
+ out:
+ return 0;
+ }
+@@ -5299,7 +5298,7 @@ gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output)
+ defrag->defrag_status = status;
+
+ if (output)
+- gf_defrag_status_get(conf, output);
++ gf_defrag_status_get(conf, output, _gf_false);
+ ret = 0;
+ out:
+ gf_msg_debug("", 0, "Returning %d", ret);
+--
+1.8.3.1
+
diff --git a/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch b/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch
new file mode 100644
index 0000000..dd9b0ab
--- /dev/null
+++ b/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch
@@ -0,0 +1,33 @@
+From 346aa7cbc34b9bbbaca45180215a4d9ffd5055df Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Fri, 19 Feb 2021 06:19:07 +0000
+Subject: [PATCH 481/481] RHGS-3.5.3 rebuild to ship with RHEL.
+
+Label: DOWNSTREAM ONLY
+BUG: 1930561
+
+Change-Id: I9c7f30cc6bc616344b27072bfde056c7bba1e143
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/228413
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 30d7162..52f9b40 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1983,6 +1983,8 @@ fi
+ %endif
+
+ %changelog
++* Fri Feb 19 2021 Rinku Kothiya <rkothiya@redhat.com>
++- Build RGHS clients for RHEL (#1930561)
+
+ * Mon May 11 2020 Sunny Kumar <sunkumar@redhat.com>
+ - added requires policycoreutils-python-utils on rhel8 for geo-replication
+--
+1.8.3.1
+
diff --git a/0482-logger-Always-print-errors-in-english.patch b/0482-logger-Always-print-errors-in-english.patch
new file mode 100644
index 0000000..e454bec
--- /dev/null
+++ b/0482-logger-Always-print-errors-in-english.patch
@@ -0,0 +1,49 @@
+From e43af5b15d14e43c3201fd0fb7bf02663e3e0127 Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Sat, 7 Nov 2020 12:09:36 +0530
+Subject: [PATCH 482/511] logger: Always print errors in english
+
+Upstream:
+> Reviewed-on: https://github.com/gluster/glusterfs/pull/1657
+> fixes: #1302
+> Change-Id: If0e21f016155276a953c64a8dd13ff3eb281d09d
+> Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+
+BUG: 1896425
+
+Change-Id: If0e21f016155276a953c64a8dd13ff3eb281d09d
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/219999
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/logging.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c
+index 7f0eff6..5874c34 100644
+--- a/libglusterfs/src/logging.c
++++ b/libglusterfs/src/logging.c
+@@ -513,6 +513,7 @@ gf_openlog(const char *ident, int option, int facility)
+ {
+ int _option = option;
+ int _facility = facility;
++ char *language = NULL;
+
+ if (-1 == _option) {
+ _option = LOG_PID | LOG_NDELAY;
+@@ -522,7 +523,10 @@ gf_openlog(const char *ident, int option, int facility)
+ }
+
+ /* TODO: Should check for errors here and return appropriately */
+- setlocale(LC_ALL, "");
++ language = setlocale(LC_ALL, "en_US.UTF-8");
++ if (!language)
++ setlocale(LC_ALL, "");
++
+ setlocale(LC_NUMERIC, "C"); /* C-locale for strtod, ... */
+ /* close the previous syslog if open as we are changing settings */
+ closelog();
+--
+1.8.3.1
+
diff --git a/0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch b/0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch
new file mode 100644
index 0000000..c0f2118
--- /dev/null
+++ b/0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch
@@ -0,0 +1,150 @@
+From 8c366f34a279a5ab2a6301bfd93534fe746a23e8 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Mon, 7 Dec 2020 09:53:27 +0530
+Subject: [PATCH 483/511] afr: more quorum checks in lookup and new entry
+ marking
+
+Problem: See upstream github issue for details.
+
+Fix:
+-In lookup if the entry exists in 2 out of 3 bricks, don't fail the
+lookup with ENOENT just because there is an entrylk on the parent.
+Consider quorum before deciding.
+
+-If entry FOP does not succeed on quorum no. of bricks, do not perform
+new entry mark.
+
+Upstream patch details:
+> Reviewed-on: https://review.gluster.org/#/c/glusterfs/+/24499/
+> Fixes: #1303
+> Change-Id: I56df8c89ad53b29fa450c7930a7b7ccec9f4a6c5
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+
+BUG: 1821599
+Change-Id: If513e8a7d6088a676288927630d8e616269bf5d5
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220363
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ ...20-mark-dirty-for-entry-txn-on-quorum-failure.t | 2 --
+ xlators/cluster/afr/src/afr-common.c | 24 ++++++++++++----------
+ xlators/cluster/afr/src/afr-dir-write.c | 8 ++++++++
+ xlators/cluster/afr/src/afr.h | 4 ++++
+ 4 files changed, 25 insertions(+), 13 deletions(-)
+
+diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
+index 26f9049..49c4dea 100644
+--- a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
++++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
+@@ -53,8 +53,6 @@ TEST ! ls $B0/${V0}1/file$i
+ TEST ls $B0/${V0}2/file$i
+ dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}2)
+ TEST [ "$dirty" != "000000000000000000000000" ]
+-EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file$i
+-EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file$i
+
+ TEST $CLI volume set $V0 self-heal-daemon on
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 89e2483..851ccad 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -1236,7 +1236,7 @@ refresh_done:
+ return 0;
+ }
+
+-static void
++void
+ afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies)
+ {
+@@ -2290,6 +2290,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
+ 0,
+ };
+ gf_boolean_t locked_entry = _gf_false;
++ gf_boolean_t in_flight_create = _gf_false;
+ gf_boolean_t can_interpret = _gf_true;
+ inode_t *parent = NULL;
+ ia_type_t ia_type = IA_INVAL;
+@@ -2333,17 +2334,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
+ if (!replies[i].valid)
+ continue;
+
+- if (locked_entry && replies[i].op_ret == -1 &&
+- replies[i].op_errno == ENOENT) {
+- /* Second, check entry is still
+- "underway" in creation */
+- local->op_ret = -1;
+- local->op_errno = ENOENT;
+- goto error;
+- }
+-
+- if (replies[i].op_ret == -1)
++ if (replies[i].op_ret == -1) {
++ if (locked_entry && replies[i].op_errno == ENOENT) {
++ in_flight_create = _gf_true;
++ }
+ continue;
++ }
+
+ if (read_subvol == -1 || !readable[read_subvol]) {
+ read_subvol = i;
+@@ -2353,6 +2349,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
+ }
+ }
+
++ if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) {
++ local->op_ret = -1;
++ local->op_errno = ENOENT;
++ goto error;
++ }
++
+ if (read_subvol == -1)
+ goto error;
+ /* We now have a read_subvol, which is readable[] (if there
+diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
+index 84e2a34..416c19d 100644
+--- a/xlators/cluster/afr/src/afr-dir-write.c
++++ b/xlators/cluster/afr/src/afr-dir-write.c
+@@ -349,6 +349,7 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
+ afr_private_t *priv = NULL;
+ int pre_op_count = 0;
+ int failed_count = 0;
++ unsigned char *success_replies = NULL;
+
+ local = frame->local;
+ priv = this->private;
+@@ -364,9 +365,16 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
+ failed_count = AFR_COUNT(local->transaction.failed_subvols,
+ priv->child_count);
+
++ /* FOP succeeded on all bricks. */
+ if (pre_op_count == priv->child_count && !failed_count)
+ return;
+
++ /* FOP did not suceed on quorum no. of bricks. */
++ success_replies = alloca0(priv->child_count);
++ afr_fill_success_replies(local, priv, success_replies);
++ if (!afr_has_quorum(success_replies, this, NULL))
++ return;
++
+ if (priv->thin_arbiter_count) {
+ /*Mark new entry using ta file*/
+ local->is_new_entry = _gf_true;
+diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
+index ff96246..ed5096e 100644
+--- a/xlators/cluster/afr/src/afr.h
++++ b/xlators/cluster/afr/src/afr.h
+@@ -1334,4 +1334,8 @@ afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this);
+
+ void
+ afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
++
++void
++afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
++ unsigned char *replies);
+ #endif /* __AFR_H__ */
+--
+1.8.3.1
+
diff --git a/0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch b/0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch
new file mode 100644
index 0000000..56d4feb
--- /dev/null
+++ b/0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch
@@ -0,0 +1,90 @@
+From 6c3b21ce5bb76b35856a6c270eb65d11f869061f Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Fri, 26 Jun 2020 12:10:31 +0530
+Subject: [PATCH 484/511] glusterd: rebalance status displays stats as 0 after
+ reboot
+
+problem: while the rebalance is in progress, if a node is
+rebooted rebalance v status shows the stats of this node as
+0 once the node is back.
+
+Reason: when the node is rebooted, once it is back
+glusterd_volume_defrag_restart() starts the rebalance and
+creates the rpc. but due to some race, rebalance process is
+sending disconnect event, so rpc object is getting destroyed. As
+the rpc object is null, request for fetching the latest stats is
+not sent to rebalance process. and stats are shows as default values
+which is 0.
+
+Solution: When the rpc object null, we should create the rpc if the
+rebalance process is up. so that request can be sent to rebalance
+process using the rpc.
+
+>fixes: #1339
+>Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
+>Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Upstream Patch : https://review.gluster.org/c/glusterfs/+/24641
+
+BUG: 1832306
+Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
+Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220369
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-syncop.c | 29 ++++++++++++++++++++---------
+ 1 file changed, 20 insertions(+), 9 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+index c78983a..df78fef 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+@@ -1693,6 +1693,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ rpc_clnt_t *rpc = NULL;
+ dict_t *rsp_dict = NULL;
+ int32_t cmd = GF_OP_CMD_NONE;
++ glusterd_volinfo_t *volinfo = NULL;
+
+ this = THIS;
+ rsp_dict = dict_new();
+@@ -1724,18 +1725,28 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ cds_list_for_each_entry_safe(pending_node, tmp, &selected, list)
+ {
+ rpc = glusterd_pending_node_get_rpc(pending_node);
++ /* In the case of rebalance if the rpc object is null, we try to
++ * create the rpc object. if the rebalance daemon is down, it returns
++ * -1. otherwise, rpc object will be created and referenced.
++ */
+ if (!rpc) {
+- if (pending_node->type == GD_NODE_REBALANCE) {
+- ret = 0;
+- glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
++ if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
++ volinfo = pending_node->node;
++ ret = glusterd_rebalance_rpc_create(volinfo);
++ if (ret) {
++ ret = 0;
++ glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
++ goto out;
++ } else {
++ rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag);
++ }
++ } else {
++ ret = -1;
++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
++ "Brick Op failed "
++ "due to rpc failure.");
+ goto out;
+ }
+-
+- ret = -1;
+- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
+- "Brick Op failed "
+- "due to rpc failure.");
+- goto out;
+ }
+
+ /* Redirect operation to be detach tier via rebalance flow. */
+--
+1.8.3.1
+
diff --git a/0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch b/0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch
new file mode 100644
index 0000000..6ed4f1c
--- /dev/null
+++ b/0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch
@@ -0,0 +1,87 @@
+From 2e6a5e504e66bc95208420e4882e453a53ac9ea2 Mon Sep 17 00:00:00 2001
+From: schaffung <ssivakum@redhat.com>
+Date: Mon, 2 Nov 2020 11:18:01 +0530
+Subject: [PATCH 485/511] cli-rpc: conditional init of global quota rpc (#1578)
+
+Issue: It is seem that the initialization of rpc to
+connect with quotad is done in every glusterfs cli command,
+irrespective of whether the quota feature is enabled or disabled.
+This seems to be an overkill.
+
+Code change: The file /var/run/quotad/quotad.pid is present
+signals that quotad is enabled. Hence we can put a conditional
+check for seeing when this file exists and if it doesn't we
+just skip over the initialization of the global quotad rpc.
+
+This will go on to reduce the extra rpc calls and operations
+being performed in the kernel space.
+
+>Fixes: #1577
+>Change-Id: Icb69d35330f76ce95626f59af75a12726eb620ff
+>Signed-off-by: srijan-sivakumar <ssivakumar@redhat.com>
+Upstream Patch : https://github.com/gluster/glusterfs/pull/1578
+
+BUG: 1885966
+Change-Id: Icb69d35330f76ce95626f59af75a12726eb620ff
+Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220371
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli.c | 18 +++++++++++++-----
+ cli/src/cli.h | 3 +++
+ 2 files changed, 16 insertions(+), 5 deletions(-)
+
+diff --git a/cli/src/cli.c b/cli/src/cli.c
+index 99a16a0..a76c5a2 100644
+--- a/cli/src/cli.c
++++ b/cli/src/cli.c
+@@ -64,8 +64,7 @@
+ extern int connected;
+ /* using argp for command line parsing */
+
+-const char *argp_program_version =
+- PACKAGE_NAME" "PACKAGE_VERSION;
++const char *argp_program_version = PACKAGE_NAME " " PACKAGE_VERSION;
+ const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
+
+ struct rpc_clnt *global_quotad_rpc;
+@@ -840,9 +839,18 @@ main(int argc, char *argv[])
+ if (!global_rpc)
+ goto out;
+
+- global_quotad_rpc = cli_quotad_clnt_rpc_init();
+- if (!global_quotad_rpc)
+- goto out;
++ /*
++ * Now, one doesn't need to initialize global rpc
++ * for quota unless and until quota is enabled.
++ * So why not put a check to save all the rpc related
++ * ops here.
++ */
++ ret = sys_access(QUOTAD_PID_PATH, F_OK);
++ if (!ret) {
++ global_quotad_rpc = cli_quotad_clnt_rpc_init();
++ if (!global_quotad_rpc)
++ goto out;
++ }
+
+ ret = cli_cmds_register(&state);
+ if (ret)
+diff --git a/cli/src/cli.h b/cli/src/cli.h
+index 37e4d9d..c30ae9c 100644
+--- a/cli/src/cli.h
++++ b/cli/src/cli.h
+@@ -30,6 +30,9 @@
+ #define CLI_TAB_LENGTH 8
+ #define CLI_BRICK_STATUS_LINE_LEN 78
+
++// Quotad pid path.
++#define QUOTAD_PID_PATH "/var/run/gluster/quotad/quotad.pid"
++
+ /* Geo-rep command positional arguments' index */
+ #define GEO_REP_CMD_INDEX 1
+ #define GEO_REP_CMD_CONFIG_INDEX 4
+--
+1.8.3.1
+
diff --git a/0486-glusterd-brick-sock-file-deleted-log-error-1560.patch b/0486-glusterd-brick-sock-file-deleted-log-error-1560.patch
new file mode 100644
index 0000000..60750db
--- /dev/null
+++ b/0486-glusterd-brick-sock-file-deleted-log-error-1560.patch
@@ -0,0 +1,87 @@
+From 9b19d4841fc3002d30ec3e44c85ec37682c11bfb Mon Sep 17 00:00:00 2001
+From: schaffung <ssivakum@redhat.com>
+Date: Thu, 22 Oct 2020 13:07:09 +0530
+Subject: [PATCH 486/511] glusterd: brick sock file deleted, log error (#1560)
+
+Issue: The satus of the brick as tracked by glusterd is
+stopped if the socket file corresponding to a running
+brick process is absent in /var/run/gluster. The glusterd
+keeps on trying to reconnect ( rpc layer ) but it fails.
+
+Code change: Rather than registering the rpc connection
+with the help of the given sockfilepath which is not
+even present as it keeps on reconnecting, why not log
+this as an error and not try to reconnect using the
+non-existing sock file path.
+
+>Fixes: #1526
+>Change-Id: I6c81691ab1624c66dec74f5ffcc6c383201ac757
+>Signed-off-by: srijan-sivakumar <ssivakumar@redhat.com>
+Upstream Patch : https://github.com/gluster/glusterfs/pull/1560
+
+BUG: 1882923
+Change-Id: I6c81691ab1624c66dec74f5ffcc6c383201ac757
+Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220376
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 27 +++++++++++++++++++++++++--
+ 1 file changed, 25 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index d25fc8a..a72c494 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -6310,7 +6310,7 @@ find_compatible_brick(glusterd_conf_t *conf, glusterd_volinfo_t *volinfo,
+ check if passed pid is match with running glusterfs process
+ */
+
+-int
++static int
+ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len)
+ {
+ char fname[128] = "";
+@@ -6383,7 +6383,17 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len)
+
+ if (tmpsockpath[0]) {
+ strncpy(sockpath, tmpsockpath, i);
+- ret = 0;
++ /*
++ * Condition to check if the brick socket file is present
++ * in the stated path or not. This helps in preventing
++ * constant re-connect triggered in the RPC layer and also
++ * a log message would help out the user.
++ */
++ ret = sys_access(sockpath, F_OK);
++ if (ret) {
++ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_NOT_FOUND,
++ "%s not found", sockpath, NULL);
++ }
+ }
+
+ return ret;
+@@ -6581,7 +6591,20 @@ glusterd_brick_start(glusterd_volinfo_t *volinfo,
+ if (!is_brick_mx_enabled()) {
+ glusterd_set_brick_socket_filepath(
+ volinfo, brickinfo, socketpath, sizeof(socketpath));
++ /*
++ * Condition to check if the brick socket file is present
++ * in the stated path or not. This helps in preventing
++ * constant re-connect triggered in the RPC layer and also
++ * a log message would help out the user.
++ */
++ ret = sys_access(socketpath, F_OK);
++ if (ret) {
++ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_NOT_FOUND,
++ "%s not found", socketpath, NULL);
++ goto out;
++ }
+ }
++
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Using %s as sockfile for brick %s of volume %s ",
+ socketpath, brickinfo->path, volinfo->volname);
+--
+1.8.3.1
+
diff --git a/0487-Events-Log-file-not-re-opened-after-logrotate.patch b/0487-Events-Log-file-not-re-opened-after-logrotate.patch
new file mode 100644
index 0000000..ac0d1cc
--- /dev/null
+++ b/0487-Events-Log-file-not-re-opened-after-logrotate.patch
@@ -0,0 +1,56 @@
+From c961ee1d7c1abb2552b79ed39ed7fd1bd1b3962f Mon Sep 17 00:00:00 2001
+From: srijan-sivakumar <ssivakum@redhat.com>
+Date: Fri, 7 Aug 2020 15:02:07 +0530
+Subject: [PATCH 487/511] Events: Log file not re-opened after logrotate.
+
+Issue: The logging is being done in the same file
+even after the logrotate utility has changed the file.
+This causes the logfile to grow indefinitely.
+
+Code Changes: Using the WatchedFileHandler class instead
+of FileHandler class. This watches the file it is logging
+into and if the file changes, it is closed and reopened
+using the file name. Hence after file rotate, a new file
+will be used for logging instead of continuing with
+the same old file.
+
+>Fixes: #1289
+>Change-Id: I773d04f17613a03709cb682692efb39fd8e664e2
+>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Upstream Patch : https://review.gluster.org/c/glusterfs/+/24820
+
+BUG: 1814744
+Change-Id: I773d04f17613a03709cb682692efb39fd8e664e2
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220370
+Reviewed-by: Shwetha Acharya <sacharya@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ events/src/utils.py | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/events/src/utils.py b/events/src/utils.py
+index 38b707a..6d4e079 100644
+--- a/events/src/utils.py
++++ b/events/src/utils.py
+@@ -13,6 +13,7 @@ import sys
+ import json
+ import os
+ import logging
++import logging.handlers
+ import fcntl
+ from errno import EBADF
+ from threading import Thread
+@@ -98,7 +99,7 @@ def setup_logger():
+ logger.setLevel(logging.INFO)
+
+ # create the logging file handler
+- fh = logging.FileHandler(LOG_FILE)
++ fh = logging.handlers.WatchedFileHandler(LOG_FILE)
+
+ formatter = logging.Formatter("[%(asctime)s] %(levelname)s "
+ "[%(module)s - %(lineno)s:%(funcName)s] "
+--
+1.8.3.1
+
diff --git a/0488-glusterd-afr-enable-granular-entry-heal-by-default.patch b/0488-glusterd-afr-enable-granular-entry-heal-by-default.patch
new file mode 100644
index 0000000..310bc53
--- /dev/null
+++ b/0488-glusterd-afr-enable-granular-entry-heal-by-default.patch
@@ -0,0 +1,864 @@
+From 0502383024cbf7e4776816e0a992dccc484a3cf2 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Tue, 8 Dec 2020 17:23:22 +0530
+Subject: [PATCH 488/511] glusterd/afr: enable granular-entry-heal by default
+
+XXXXXXXXXXXXXXXXXXX
+ IMPORTANT:
+XXXXXXXXXXXXXXXXXXXX
+I see that for rhgs-3.5.3, GD_OP_VERSION_MAX is GD_OP_VERSION_7_0. Since
+this patch should only act on new volumes in rhgs-3.5.4, I am bumping
+the op-version to GD_OP_VERSION_7_1. In glusterfs upstream, the patch
+acts only if op-version >= GD_OP_VERSION_9_0 as seen in the commit
+messae below.
+
+Upstream patch details:
+/------------------------------------------------------------------------------/
+1. The option has been enabled and tested for quite some time now in RHHI-V
+downstream and I think it is safe to make it 'on' by default. Since it
+is not possible to simply change it from 'off' to 'on' without breaking
+rolling upgrades, old clients etc., I have made it default only for new volumes
+starting from op-verison GD_OP_VERSION_9_0.
+
+Note: If you do a volume reset, the option will be turned back off.
+This is okay as the dir's gfid will be captured in 'xattrop' folder and heals
+will proceed. There might be stale entries inside entry-changes' folder,
+which will be removed when we enable the option again.
+
+2. I encountered a cust. issue where entry heal was pending on a dir. with
+236436 files in it and the glustershd.log output was just stuck at
+"performing entry selfheal", so I have added logs to give us
+more info in DEBUG level about whether entry heal and data heal are
+progressing (metadata heal doesn't take much time). That way, we have a
+quick visual indication to say things are not 'stuck' if we briefly
+enable debug logs, instead of taking statedumps or checking profile info
+etc.
+
+>Fixes: #1483
+>Change-Id: I4f116f8c92f8cd33f209b758ff14f3c7e1981422
+>Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Upstream Patch: https://github.com/gluster/glusterfs/pull/1621
+/------------------------------------------------------------------------------/
+
+BUG: 1890506
+Change-Id: If449a1e873633616cfc508d74b5c22eb434b55ae
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220555
+Tested-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/globals.h | 4 +-
+ libglusterfs/src/syncop-utils.c | 4 +-
+ tests/basic/afr/add-brick-self-heal-non-granular.t | 75 +++++++++++++
+ tests/basic/afr/add-brick-self-heal.t | 4 +-
+ tests/basic/afr/bug-1130892-non-granular.t | 77 ++++++++++++++
+ .../basic/afr/bug-1493415-gfid-heal-non-granular.t | 79 ++++++++++++++
+ ...507-type-mismatch-error-handling-non-granular.t | 117 +++++++++++++++++++++
+ ...1749322-entry-heal-not-happening-non-granular.t | 90 ++++++++++++++++
+ .../afr/replace-brick-self-heal-non-granular.t | 65 ++++++++++++
+ tests/basic/afr/replace-brick-self-heal.t | 2 +-
+ tests/bugs/replicate/bug-1130892.t | 2 +-
+ tests/bugs/replicate/bug-1493415-gfid-heal.t | 2 +-
+ .../bug-1722507-type-mismatch-error-handling.t | 26 +++--
+ .../bug-1749322-entry-heal-not-happening.t | 7 +-
+ xlators/cluster/afr/src/afr-self-heal-common.c | 5 +
+ xlators/cluster/afr/src/afr-self-heal-data.c | 3 +
+ xlators/cluster/afr/src/afr-self-heal-entry.c | 7 +-
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 13 +++
+ 18 files changed, 558 insertions(+), 24 deletions(-)
+ create mode 100644 tests/basic/afr/add-brick-self-heal-non-granular.t
+ create mode 100644 tests/basic/afr/bug-1130892-non-granular.t
+ create mode 100644 tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
+ create mode 100644 tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
+ create mode 100644 tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
+ create mode 100644 tests/basic/afr/replace-brick-self-heal-non-granular.t
+
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index 31717ed..cc145cd 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -50,7 +50,7 @@
+ 1 /* MIN is the fresh start op-version, mostly \
+ should not change */
+ #define GD_OP_VERSION_MAX \
+- GD_OP_VERSION_7_0 /* MAX VERSION is the maximum \
++ GD_OP_VERSION_7_1 /* MAX VERSION is the maximum \
+ count in VME table, should \
+ keep changing with \
+ introduction of newer \
+@@ -138,6 +138,8 @@
+
+ #define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */
+
++#define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */
++
+ #include "glusterfs/xlator.h"
+ #include "glusterfs/options.h"
+
+diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
+index be03527..2269c76 100644
+--- a/libglusterfs/src/syncop-utils.c
++++ b/libglusterfs/src/syncop-utils.c
+@@ -495,9 +495,7 @@ syncop_dir_scan(xlator_t *subvol, loc_t *loc, int pid, void *data,
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
+
+- ret = fn(subvol, entry, loc, data);
+- if (ret)
+- break;
++ ret |= fn(subvol, entry, loc, data);
+ }
+ gf_dirent_free(&entries);
+ if (ret)
+diff --git a/tests/basic/afr/add-brick-self-heal-non-granular.t b/tests/basic/afr/add-brick-self-heal-non-granular.t
+new file mode 100644
+index 0000000..19caf24
+--- /dev/null
++++ b/tests/basic/afr/add-brick-self-heal-non-granular.t
+@@ -0,0 +1,75 @@
++#!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
++EXPECT 'Created' volinfo_field $V0 'Status';
++TEST $CLI volume set $V0 cluster.granular-entry-heal off
++TEST $CLI volume start $V0
++EXPECT 'Started' volinfo_field $V0 'Status';
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
++
++TEST $CLI volume set $V0 cluster.data-self-heal off
++TEST $CLI volume set $V0 cluster.metadata-self-heal off
++TEST $CLI volume set $V0 cluster.entry-self-heal off
++TEST $CLI volume set $V0 cluster.heal-timeout 5
++
++TEST $CLI volume set $V0 self-heal-daemon off
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
++
++# Create files
++for i in {1..5}
++do
++ echo $i > $M0/file$i.txt
++done
++
++# Metadata changes
++TEST setfattr -n user.test -v qwerty $M0/file5.txt
++
++# Add brick1
++TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
++
++# New-brick should accuse the old-bricks (Simulating case for data-loss)
++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/
++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/
++
++# Check if pending xattr and dirty-xattr are set for newly-added-brick
++EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
++EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
++EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
++
++TEST $CLI volume set $V0 self-heal-daemon on
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0
++
++# Wait for heal to complete
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++# Check if entry-heal has happened
++TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}2 | sort)
++TEST diff <(ls $B0/${V0}1 | sort) <(ls $B0/${V0}2 | sort)
++
++# Test if data was healed
++TEST diff $B0/${V0}0/file1.txt $B0/${V0}2/file1.txt
++
++# Test if metadata was healed and exists on both the bricks
++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}2/file5.txt
++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
++
++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2
++
++cleanup;
+diff --git a/tests/basic/afr/add-brick-self-heal.t b/tests/basic/afr/add-brick-self-heal.t
+index c847e22..7ebf4f6 100644
+--- a/tests/basic/afr/add-brick-self-heal.t
++++ b/tests/basic/afr/add-brick-self-heal.t
+@@ -38,8 +38,8 @@ TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0
+ TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/
+
+ # Check if pending xattr and dirty-xattr are set for newly-added-brick
+-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
++EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
++EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+ EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
+
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+diff --git a/tests/basic/afr/bug-1130892-non-granular.t b/tests/basic/afr/bug-1130892-non-granular.t
+new file mode 100644
+index 0000000..3cdbc7d
+--- /dev/null
++++ b/tests/basic/afr/bug-1130892-non-granular.t
+@@ -0,0 +1,77 @@
++#!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume info;
++
++# Create a 1X2 replica
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
++EXPECT 'Created' volinfo_field $V0 'Status';
++TEST $CLI volume set $V0 cluster.granular-entry-heal off
++
++# Disable self-heal daemon
++TEST gluster volume set $V0 self-heal-daemon off
++
++# Enable Client side heal
++TEST $CLI volume set $V0 cluster.data-self-heal off
++TEST $CLI volume set $V0 cluster.metadata-self-heal off
++TEST $CLI volume set $V0 cluster.entry-self-heal off
++
++# Disable all perf-xlators
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume set $V0 performance.read-ahead off
++
++# Volume start
++TEST $CLI volume start $V0;
++EXPECT 'Started' volinfo_field $V0 'Status';
++
++# FUSE Mount
++TEST ${GFS} -s $H0 --volfile-id $V0 $M0
++
++# Create files and dirs
++TEST mkdir -p $M0/one/two/
++TEST `echo "Carpe diem" > $M0/one/two/three`
++
++# Simulate disk-replacement
++TEST kill_brick $V0 $H0 $B0/${V0}-1
++EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1
++TEST rm -rf $B0/${V0}-1/one
++TEST rm -rf $B0/${V0}-1/.glusterfs
++
++#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI
++#which will create .glusterfs folder.
++mkdir $B0/${V0}-1/.glusterfs && chmod 600 $B0/${V0}-1/.glusterfs
++
++# Start force
++TEST $CLI volume start $V0 force
++
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
++
++TEST stat $M0/one
++
++sleep 1
++
++# Check pending xattrs
++EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
++EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry
++EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata
++
++TEST gluster volume set $V0 self-heal-daemon on
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one
++EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two
++EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_file_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two/three
++
++cleanup;
+diff --git a/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
+new file mode 100644
+index 0000000..aff001c
+--- /dev/null
++++ b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
+@@ -0,0 +1,79 @@
++#!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
++TEST $CLI volume set $V0 cluster.granular-entry-heal off
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
++TEST $CLI volume set $V0 self-heal-daemon off
++
++# Create base entry in indices/xattrop
++echo "Data" > $M0/FILE
++
++#------------------------------------------------------------------------------#
++TEST touch $M0/f1
++gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/f1)
++gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
++
++# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a
++# brick crash at the point where file got created but no xattrs were set.
++TEST setfattr -x trusted.gfid $B0/${V0}1/f1
++TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
++
++# storage/posix considers that a file without gfid changed less than a second
++# before doesn't exist, so we need to wait for a second to force posix to
++# consider that this is a valid file but without gfid.
++sleep 2
++
++# Assume there were no pending xattrs on parent dir due to 1st brick crashing
++# too. Then name heal from client must heal the gfid.
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
++TEST stat $M0/f1
++EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/f1
++TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
++
++#------------------------------------------------------------------------------#
++TEST mkdir $M0/dir
++TEST touch $M0/dir/f2
++gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir/f2)
++gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2)
++
++# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a
++# brick crash at the point where file got created but no xattrs were set.
++TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2
++TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
++
++#Now simulate setting of pending entry xattr on parent dir of 1st brick.
++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir
++create_brick_xattrop_entry $B0/${V0}0 dir
++
++# storage/posix considers that a file without gfid changed less than a second
++# before doesn't exist, so we need to wait for a second to force posix to
++# consider that this is a valid file but without gfid.
++sleep 2
++
++#Trigger entry-heal via shd
++TEST $CLI volume set $V0 self-heal-daemon on
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir/f2
++TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
++
++#------------------------------------------------------------------------------#
++cleanup;
+diff --git a/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
+new file mode 100644
+index 0000000..9079c93
+--- /dev/null
++++ b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
+@@ -0,0 +1,117 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++## Start and create a volume
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume info;
++
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
++TEST $CLI volume set $V0 cluster.granular-entry-heal off
++TEST $CLI volume start $V0;
++TEST $CLI volume set $V0 cluster.heal-timeout 5
++TEST $CLI volume heal $V0 disable
++EXPECT 'Started' volinfo_field $V0 'Status';
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++TEST mkdir $M0/dir
++
++##########################################################################################
++# GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
++
++TEST touch $M0/dir/file
++TEST `echo append>> $M0/dir/file`
++
++#B0 and B2 must blame B1
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++
++# Add entry to xattrop dir to trigger index heal.
++xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
++base_entry_b0=`ls $xattrop_dir0`
++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++EXPECT "^1$" get_pending_heal_count $V0
++
++# Remove the gfid xattr and the link file on one brick.
++gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
++gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
++TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
++TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++
++# Launch heal
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
++
++# Wait for 2 second to force posix to consider that this is a valid file but
++# without gfid.
++sleep 2
++TEST $CLI volume heal $V0
++
++# Heal should not fail as the file is missing gfid xattr and the link file,
++# which is not actually the gfid or type mismatch.
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
++TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++rm -f $M0/dir/file
++
++
++###########################################################################################
++# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed.
++
++TEST $CLI volume heal $V0 disable
++TEST touch $M0/dir/file
++#TEST kill_brick $V0 $H0 $B0/$V0"1"
++
++#B0 and B2 must blame B1
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++
++# Add entry to xattrop dir to trigger index heal.
++xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
++base_entry_b0=`ls $xattrop_dir0`
++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++EXPECT "^1$" get_pending_heal_count $V0
++
++# Remove the gfid xattr and the link file on two bricks.
++gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
++gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
++TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
++TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file
++TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++
++# Launch heal
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
++
++# Wait for 2 second to force posix to consider that this is a valid file but
++# without gfid.
++sleep 2
++TEST $CLI volume heal $V0
++
++# Heal should not fail as the file is missing gfid xattr and the link file,
++# which is not actually the gfid or type mismatch.
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
++TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file
++TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++
++cleanup
+diff --git a/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
+new file mode 100644
+index 0000000..4f27da4
+--- /dev/null
++++ b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
+@@ -0,0 +1,90 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup
++
++function check_gfid_and_link_count
++{
++ local file=$1
++
++ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
++ TEST [ ! -z $file_gfid_b0 ]
++ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
++ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
++ EXPECT $file_gfid_b0 echo $file_gfid_b1
++ EXPECT $file_gfid_b0 echo $file_gfid_b2
++
++ EXPECT "2" stat -c %h $B0/${V0}0/$file
++ EXPECT "2" stat -c %h $B0/${V0}1/$file
++ EXPECT "2" stat -c %h $B0/${V0}2/$file
++}
++TESTS_EXPECTED_IN_LOOP=18
++
++################################################################################
++## Start and create a volume
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume info;
++
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
++TEST $CLI volume set $V0 cluster.granular-entry-heal off
++TEST $CLI volume start $V0;
++TEST $CLI volume set $V0 cluster.heal-timeout 5
++TEST $CLI volume heal $V0 disable
++EXPECT 'Started' volinfo_field $V0 'Status';
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++TEST mkdir $M0/dir
++TEST `echo "File 1 " > $M0/dir/file1`
++TEST touch $M0/dir/file{2..4}
++
++# Remove file2 from 1st & 3rd bricks
++TEST rm -f $B0/$V0"0"/dir/file2
++TEST rm -f $B0/$V0"2"/dir/file2
++
++# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
++gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
++gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
++TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
++TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
++TEST rm -f $B0/$V0"0"/dir/file3
++TEST rm -f $B0/$V0"1"/dir/file3
++
++# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
++gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
++gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
++TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
++TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
++
++# B0 and B2 blame each other
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++
++# Add entry to xattrop dir on first brick.
++xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
++base_entry_b0=`ls $xattrop_dir0`
++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++
++EXPECT "^1$" get_pending_heal_count $V0
++
++# Launch heal
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++# All the files must be present on all the bricks after conservative merge and
++# should have the gfid xattr and the .glusterfs hardlink.
++check_gfid_and_link_count dir/file1
++check_gfid_and_link_count dir/file2
++check_gfid_and_link_count dir/file3
++check_gfid_and_link_count dir/file4
++
++cleanup
+diff --git a/tests/basic/afr/replace-brick-self-heal-non-granular.t b/tests/basic/afr/replace-brick-self-heal-non-granular.t
+new file mode 100644
+index 0000000..c86bff1
+--- /dev/null
++++ b/tests/basic/afr/replace-brick-self-heal-non-granular.t
+@@ -0,0 +1,65 @@
++#!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
++TEST $CLI volume set $V0 cluster.granular-entry-heal off
++TEST $CLI volume start $V0
++TEST $CLI volume set $V0 cluster.data-self-heal off
++TEST $CLI volume set $V0 cluster.metadata-self-heal off
++TEST $CLI volume set $V0 cluster.entry-self-heal off
++TEST $CLI volume set $V0 cluster.heal-timeout 5
++TEST $CLI volume set $V0 self-heal-daemon off
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
++
++# Create files
++for i in {1..5}
++do
++ echo $i > $M0/file$i.txt
++done
++
++# Metadata changes
++TEST setfattr -n user.test -v qwerty $M0/file5.txt
++
++# Replace brick1
++TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force
++
++# Replaced-brick should accuse the non-replaced-brick (Simulating case for data-loss)
++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/
++
++# Check if pending xattr and dirty-xattr are set for replaced-brick
++EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
++EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++
++TEST $CLI volume set $V0 self-heal-daemon on
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++TEST $CLI volume heal $V0
++
++# Wait for heal to complete
++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
++
++# Check if entry-heal has happened
++TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1_new | sort)
++
++# To make sure that files were not lost from brick0
++TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
++
++# Test if data was healed
++TEST diff $B0/${V0}0/file1.txt $B0/${V0}1_new/file1.txt
++# To make sure that data was not lost from brick0
++TEST diff $B0/${V0}0/file1.txt $B0/${V0}1/file1.txt
++
++# Test if metadata was healed and exists on both the bricks
++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1_new/file5.txt
++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
++
++cleanup;
+diff --git a/tests/basic/afr/replace-brick-self-heal.t b/tests/basic/afr/replace-brick-self-heal.t
+index 0360db7..da31c87 100644
+--- a/tests/basic/afr/replace-brick-self-heal.t
++++ b/tests/basic/afr/replace-brick-self-heal.t
+@@ -30,7 +30,7 @@ TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit forc
+ TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/
+
+ # Check if pending xattr and dirty-xattr are set for replaced-brick
+-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
++EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+ EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
+
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+diff --git a/tests/bugs/replicate/bug-1130892.t b/tests/bugs/replicate/bug-1130892.t
+index 0f57d66..e23eb26 100644
+--- a/tests/bugs/replicate/bug-1130892.t
++++ b/tests/bugs/replicate/bug-1130892.t
+@@ -56,7 +56,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+ TEST stat $M0/one
+
+ # Check pending xattrs
+-EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
+ EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry
+ EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata
+
+diff --git a/tests/bugs/replicate/bug-1493415-gfid-heal.t b/tests/bugs/replicate/bug-1493415-gfid-heal.t
+index 125c35a..9714d5e 100644
+--- a/tests/bugs/replicate/bug-1493415-gfid-heal.t
++++ b/tests/bugs/replicate/bug-1493415-gfid-heal.t
+@@ -49,7 +49,7 @@ TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2
+ TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+ #Now simulate setting of pending entry xattr on parent dir of 1st brick.
+-TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir
++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/${V0}0/dir
+ create_brick_xattrop_entry $B0/${V0}0 dir
+
+ #Trigger entry-heal via shd
+diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
+index 0aeaaaf..1fdf7ea 100644
+--- a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
++++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
+@@ -23,19 +23,21 @@ TEST mkdir $M0/dir
+ ##########################################################################################
+ # GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
+
+-TEST touch $M0/dir/file
+-#TEST kill_brick $V0 $H0 $B0/$V0"1"
++TEST `echo append>> $M0/dir/file`
+
+ #B0 and B2 must blame B1
+-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+-setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++# Set data part of the xattr also to 1 so that local->need_full_crawl is true.
++# Another way is to create the needed entries inside indices/entry-changes
++# folder.
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
+
+ # Add entry to xattrop dir to trigger index heal.
+ xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+ base_entry_b0=`ls $xattrop_dir0`
+ gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
+-ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+ EXPECT "^1$" get_pending_heal_count $V0
+
+ # Remove the gfid xattr and the link file on one brick.
+@@ -70,18 +72,20 @@ rm -f $M0/dir/file
+
+ TEST $CLI volume heal $V0 disable
+ TEST touch $M0/dir/file
+-#TEST kill_brick $V0 $H0 $B0/$V0"1"
+
+ #B0 and B2 must blame B1
+-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+-setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++# Set data part of the xattr also to 1 so that local->need_full_crawl is true.
++# Another way is to create the needed entries inside indices/entry-changes
++# folder.
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
+
+ # Add entry to xattrop dir to trigger index heal.
+ xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+ base_entry_b0=`ls $xattrop_dir0`
+ gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
+-ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+ EXPECT "^1$" get_pending_heal_count $V0
+
+ # Remove the gfid xattr and the link file on two bricks.
+diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
+index 9627908..3da873a 100644
+--- a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
++++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
+@@ -59,8 +59,11 @@ TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_
+ TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
+
+ # B0 and B2 blame each other
+-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++# Set data part of the xattr also to 1 so that local->need_full_crawl is true.
++# Another way is to create the needed entries inside indices/entry-changes
++# folder.
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
+
+ # Add entry to xattrop dir on first brick.
+ xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index 1608f75..36fd3a9 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -2549,6 +2549,11 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid)
+ }
+ }
+
++ gf_msg_debug(
++ this->name, 0,
++ "heals needed for %s: [entry-heal=%d, metadata-heal=%d, data-heal=%d]",
++ uuid_utoa(gfid), entry_selfheal, metadata_selfheal, data_selfheal);
++
+ if (data_selfheal && priv->data_self_heal)
+ data_ret = afr_selfheal_data(frame, this, fd);
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
+index cdff4a5..b97c66b 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-data.c
++++ b/xlators/cluster/afr/src/afr-self-heal-data.c
+@@ -239,6 +239,9 @@ afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ sink_count = AFR_COUNT(healed_sinks, priv->child_count);
+ data_lock = alloca0(priv->child_count);
+
++ gf_msg_debug(this->name, 0, "gfid:%s, offset=%jd, size=%zu",
++ uuid_utoa(fd->inode->gfid), offset, size);
++
+ ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size,
+ data_lock);
+ {
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index 40be898..00b5b2d 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -206,8 +206,11 @@ __afr_selfheal_heal_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ replies);
+ } else {
+ if (!gf_uuid_compare(replies[i].poststat.ia_gfid,
+- replies[source].poststat.ia_gfid))
++ replies[source].poststat.ia_gfid)) {
++ gf_msg_debug(this->name, 0, "skipping %s, no heal needed.",
++ name);
+ continue;
++ }
+
+ ret = afr_selfheal_recreate_entry(frame, i, source, sources,
+ fd->inode, name, inode, replies);
+@@ -839,7 +842,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
+
+ out:
+ loc_wipe(&loc);
+- return 0;
++ return ret;
+ }
+
+ static int
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index a72c494..bd17a82 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -13181,6 +13181,19 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option)
+ goto out;
+ }
+ }
++
++ if ((conf->op_version >= GD_OP_VERSION_7_1) &&
++ (volinfo->status == GLUSTERD_STATUS_NONE)) {
++ ret = dict_set_dynstr_with_alloc(volinfo->dict,
++ "cluster.granular-entry-heal", "on");
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
++ "Failed to set option 'cluster.granular-entry-heal' "
++ "on volume %s",
++ volinfo->volname);
++ goto out;
++ }
++ }
+ out:
+ return ret;
+ }
+--
+1.8.3.1
+
diff --git a/0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch b/0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch
new file mode 100644
index 0000000..dde2156
--- /dev/null
+++ b/0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch
@@ -0,0 +1,141 @@
+From 2d172144810956225eac3599c943416c4a7e25d0 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Tue, 8 Dec 2020 20:30:23 +0530
+Subject: [PATCH 489/511] glusterd: fix bug in enabling granular-entry-heal
+
+Upstream patch details:
+/------------------------------------------------------------------------------/
+commit f5e1eb87d4af44be3b317b7f99ab88f89c2f0b1a meant to enable the
+volume option only for replica volumes but inadvertently enabled
+it for all volume types. Fixing it now.
+
+Also found a bug in glusterd where disabling the option on plain
+distribute was succeeding even though setting it in the fist place
+fails. Fixed that too.
+
+>Fixes: #1483
+>Change-Id: Icb6c169a8eec44cc4fb4dd636405d3b3485e91b4
+>Reported-by: Sheetal Pamecha <spamecha@redhat.com>
+>Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Upstream Patch: https://github.com/gluster/glusterfs/pull/1752
+/------------------------------------------------------------------------------/
+
+BUG: 1890506
+Change-Id: Id63655dac08d2cfda4899d7ee0efe96e72cd6986
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220556
+Tested-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/afr/granular-esh/cli.t | 30 ++++++++++++++++++++-----
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 3 ++-
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 12 +++++-----
+ 3 files changed, 34 insertions(+), 11 deletions(-)
+
+diff --git a/tests/basic/afr/granular-esh/cli.t b/tests/basic/afr/granular-esh/cli.t
+index 995d93e..5ab2e39 100644
+--- a/tests/basic/afr/granular-esh/cli.t
++++ b/tests/basic/afr/granular-esh/cli.t
+@@ -11,25 +11,38 @@ TESTS_EXPECTED_IN_LOOP=4
+ TEST glusterd
+ TEST pidof glusterd
+
+-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+-# Test that enabling the option should work on a newly created volume
+-TEST $CLI volume set $V0 cluster.granular-entry-heal on
+-TEST $CLI volume set $V0 cluster.granular-entry-heal off
+-
+ #########################
+ ##### DISPERSE TEST #####
+ #########################
+ # Execute the same command on a disperse volume and make sure it fails.
+ TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2}
++EXPECT "no" volume_get_field $V1 cluster.granular-entry-heal
++TEST $CLI volume start $V1
++TEST ! $CLI volume heal $V1 granular-entry-heal enable
++TEST ! $CLI volume heal $V1 granular-entry-heal disable
++
++TEST $CLI volume stop $V1
++TEST $CLI volume delete $V1
++
++#########################
++##### PLAIN DISTRIBUTE TEST #####
++#########################
++# Execute the same command on a distribute volume and make sure it fails.
++TEST $CLI volume create $V1 $H0:$B0/${V1}{0,1,2}
++EXPECT "no" volume_get_field $V1 cluster.granular-entry-heal
+ TEST $CLI volume start $V1
+ TEST ! $CLI volume heal $V1 granular-entry-heal enable
+ TEST ! $CLI volume heal $V1 granular-entry-heal disable
++TEST $CLI volume stop $V1
++TEST $CLI volume delete $V1
+
+ #######################
+ ###### TIER TEST ######
+ #######################
+ # Execute the same command on a disperse + replicate tiered volume and make
+ # sure the option is set on the replicate leg of the volume
++TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2}
++TEST $CLI volume start $V1
+ TEST $CLI volume tier $V1 attach replica 2 $H0:$B0/${V1}{3,4}
+ TEST $CLI volume heal $V1 granular-entry-heal enable
+ EXPECT "enable" volume_get_field $V1 cluster.granular-entry-heal
+@@ -52,10 +65,17 @@ TEST kill_brick $V1 $H0 $B0/${V1}3
+ # failed.
+ TEST ! $CLI volume heal $V1 granular-entry-heal enable
+ EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal
++TEST $CLI volume stop $V1
++TEST $CLI volume delete $V1
+
+ ######################
+ ### REPLICATE TEST ###
+ ######################
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++EXPECT "on" volume_get_field $V0 cluster.granular-entry-heal
++# Test that enabling the option should work on a newly created volume
++TEST $CLI volume set $V0 cluster.granular-entry-heal on
++TEST $CLI volume set $V0 cluster.granular-entry-heal off
+ TEST $CLI volume start $V0
+ TEST $CLI volume set $V0 cluster.data-self-heal off
+ TEST $CLI volume set $V0 cluster.metadata-self-heal off
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index bd17a82..ad3750e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -13183,7 +13183,8 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option)
+ }
+
+ if ((conf->op_version >= GD_OP_VERSION_7_1) &&
+- (volinfo->status == GLUSTERD_STATUS_NONE)) {
++ (volinfo->status == GLUSTERD_STATUS_NONE) &&
++ (volinfo->type == GF_CLUSTER_TYPE_REPLICATE)) {
+ ret = dict_set_dynstr_with_alloc(volinfo->dict,
+ "cluster.granular-entry-heal", "on");
+ if (ret) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index 134b04c..09e6ead 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -621,11 +621,13 @@ glusterd_handle_heal_options_enable_disable(rpcsvc_request_t *req, dict_t *dict,
+ goto out;
+ }
+
+- if (((heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) ||
+- (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE)) &&
+- (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)) {
+- ret = -1;
+- goto out;
++ if ((heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) ||
++ (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE)) {
++ if ((volinfo->type != GF_CLUSTER_TYPE_REPLICATE) &&
++ (volinfo->type != GF_CLUSTER_TYPE_TIER)) {
++ ret = -1;
++ goto out;
++ }
+ }
+
+ if ((heal_op == GF_SHD_OP_HEAL_ENABLE) ||
+--
+1.8.3.1
+
diff --git a/0490-Segmentation-fault-occurs-during-truncate.patch b/0490-Segmentation-fault-occurs-during-truncate.patch
new file mode 100644
index 0000000..bd3c777
--- /dev/null
+++ b/0490-Segmentation-fault-occurs-during-truncate.patch
@@ -0,0 +1,57 @@
+From 5a110946b41619577b365cdceddc4da551ff49f0 Mon Sep 17 00:00:00 2001
+From: kinsu <vpolakis@gmail.com>
+Date: Thu, 19 Sep 2019 08:34:32 +0000
+Subject: [PATCH 490/511] Segmentation fault occurs during truncate
+
+Problem:
+Segmentation fault occurs when bricks are nearly full 100% and in
+parallel truncate of a file is attempted (No space left on device).
+Prerequicite is that performance xlators are activated
+(read-ahead, write-behind etc)
+while stack unwind of the frames following an error responce
+from brick (No space left on device) frame->local includes a memory
+location that is not allocated via mem_get but via calloc.
+The destroyed frame is always ra_truncate_cbk winded from ra_ftruncate
+and the inode ptr is copied to the frame local in the wb_ftruncate.
+
+Fix:
+extra check is added for the pool ptr
+
+>Change-Id: Ic5d3bd0ab7011e40b2811c6dece063b256e4d9d1
+>Fixes: bz#1797882
+>Signed-off-by: kinsu <vpolakis@gmail.com>
+
+Upstream-patch: https://review.gluster.org/c/glusterfs/+/23445
+
+BUG: 1842449
+Change-Id: Ic5d3bd0ab7011e40b2811c6dece063b256e4d9d1
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220540
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/mem-pool.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
+index 73503e0..1390747 100644
+--- a/libglusterfs/src/mem-pool.c
++++ b/libglusterfs/src/mem-pool.c
+@@ -857,6 +857,14 @@ mem_put(void *ptr)
+ /* Not one of ours; don't touch it. */
+ return;
+ }
++
++ if (!hdr->pool_list) {
++ gf_msg_callingfn("mem-pool", GF_LOG_CRITICAL, EINVAL,
++ LG_MSG_INVALID_ARG,
++ "invalid argument hdr->pool_list NULL");
++ return;
++ }
++
+ pool_list = hdr->pool_list;
+ pt_pool = &pool_list->pools[hdr->power_of_two - POOL_SMALLEST];
+
+--
+1.8.3.1
+
diff --git a/0491-glusterd-mount-directory-getting-truncated-on-mounti.patch b/0491-glusterd-mount-directory-getting-truncated-on-mounti.patch
new file mode 100644
index 0000000..375cfd2
--- /dev/null
+++ b/0491-glusterd-mount-directory-getting-truncated-on-mounti.patch
@@ -0,0 +1,56 @@
+From 0fed8ca9c6c9e3a9041951bc748c7936d0abc8cf Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Tue, 15 Sep 2020 16:20:19 +0530
+Subject: [PATCH 491/511] glusterd: mount directory getting truncated on
+ mounting shared_storage
+
+Issue:
+In case of a user created volume the mount point
+is the brick path 'ex: /data/brick' but in case of
+shared_storage the mount point is '/'.So, here
+we increment the array by one so as to get the exact
+path of brick without '/', which works fine for other
+volumes as the pointer of the brick_dir variable is
+at '/', but for shared_storage it is at 'v'(where v is
+starting letter of 'var' directory). So, on incrementing
+the path we get in case of shared_storage starts from
+'ar/lib/glusterd/...'
+
+Fix:
+Only, increment the pointer if the current position is '/',
+else the path will be wrong.
+
+>Fixes: #1480
+
+>Change-Id: Id31bb13f58134ae2099884fbc5984c4e055fb357
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream patch: https://review.gluster.org/c/glusterfs/+/24989
+
+BUG: 1878077
+Change-Id: Id31bb13f58134ae2099884fbc5984c4e055fb357
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220536
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index ad3750e..b343eee 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -1221,7 +1221,8 @@ glusterd_get_brick_mount_dir(char *brickpath, char *hostname, char *mount_dir)
+ }
+
+ brick_dir = &brickpath[strlen(mnt_pt)];
+- brick_dir++;
++ if (brick_dir[0] == '/')
++ brick_dir++;
+
+ snprintf(mount_dir, VALID_GLUSTERD_PATHMAX, "/%s", brick_dir);
+ }
+--
+1.8.3.1
+
diff --git a/0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch b/0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch
new file mode 100644
index 0000000..a983baa
--- /dev/null
+++ b/0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch
@@ -0,0 +1,188 @@
+From bde1ad97f8739f8370a2bbb92229b1b397ecd82c Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Tue, 8 Dec 2020 19:06:03 +0530
+Subject: [PATCH 492/511] afr/lookup: Pass xattr_req in while doing a selfheal
+ in lookup
+
+We were not passing xattr_req when doing a name self heal
+as well as a meta data heal. Because of this, some xdata
+was missing which causes i/o errors
+
+Upstream patch details:
+> Change-Id: Ibfb1205a7eb0195632dc3820116ffbbb8043545f
+> Fixes: bz#1728770
+> Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Upstream Patch : https://review.gluster.org/#/c/glusterfs/+/23024/
+
+BUG: 1726673
+Change-Id: Ibfb1205a7eb0195632dc3820116ffbbb8043545f
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220538
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/replicate/bug-1728770-pass-xattrs.t | 52 ++++++++++++++++++++++++++
+ tests/include.rc | 1 +
+ xlators/cluster/afr/src/afr-common.c | 8 +++-
+ xlators/cluster/afr/src/afr-self-heal-common.c | 9 ++++-
+ xlators/cluster/afr/src/afr-self-heal.h | 2 +-
+ 5 files changed, 67 insertions(+), 5 deletions(-)
+ create mode 100644 tests/bugs/replicate/bug-1728770-pass-xattrs.t
+
+diff --git a/tests/bugs/replicate/bug-1728770-pass-xattrs.t b/tests/bugs/replicate/bug-1728770-pass-xattrs.t
+new file mode 100644
+index 0000000..159c4fc
+--- /dev/null
++++ b/tests/bugs/replicate/bug-1728770-pass-xattrs.t
+@@ -0,0 +1,52 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../snapshot.rc
++
++cleanup;
++
++function fop_on_bad_disk {
++ local path=$1
++ mkdir $path/dir{1..1000} 2>/dev/null
++ mv $path/dir1 $path/newdir
++ touch $path/foo.txt
++ echo $?
++}
++
++function ls_fop_on_bad_disk {
++ local path=$1
++ ls $path
++ echo $?
++}
++
++TEST init_n_bricks 6;
++TEST setup_lvm 6;
++
++TEST glusterd;
++TEST pidof glusterd;
++
++TEST $CLI volume create $V0 replica 3 $H0:$L1 $H0:$L2 $H0:$L3 $H0:$L4 $H0:$L5 $H0:$L6;
++TEST $CLI volume set $V0 health-check-interval 1000;
++
++TEST $CLI volume start $V0;
++
++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
++#corrupt last disk
++dd if=/dev/urandom of=/dev/mapper/patchy_snap_vg_6-brick_lvm bs=512K count=200 status=progress && sync
++
++
++# Test the disk is now returning EIO for touch and ls
++EXPECT_WITHIN $DISK_FAIL_TIMEOUT "^1$" fop_on_bad_disk "$L6"
++EXPECT_WITHIN $DISK_FAIL_TIMEOUT "^2$" ls_fop_on_bad_disk "$L6"
++
++TEST touch $M0/foo{1..100}
++TEST $CLI volume remove-brick $V0 replica 3 $H0:$L4 $H0:$L5 $H0:$L6 start
++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$L4 $H0:$L5 $H0:$L6";
++
++#check that remove-brick status should not have any failed or skipped files
++var=`$CLI volume remove-brick $V0 $H0:$L4 $H0:$L5 $H0:$L6 status | grep completed`
++TEST [ `echo $var | awk '{print $5}'` = "0" ]
++TEST [ `echo $var | awk '{print $6}'` = "0" ]
++
++cleanup;
+diff --git a/tests/include.rc b/tests/include.rc
+index 762c5e2..c925941 100644
+--- a/tests/include.rc
++++ b/tests/include.rc
+@@ -89,6 +89,7 @@ GRAPH_SWITCH_TIMEOUT=10
+ UNLINK_TIMEOUT=5
+ MDC_TIMEOUT=5
+ IO_WAIT_TIMEOUT=5
++DISK_FAIL_TIMEOUT=80
+
+ LOGDIR=$(gluster --print-logdir)
+
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 851ccad..fca2cd5 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -2609,6 +2609,10 @@ afr_lookup_sh_metadata_wrap(void *opaque)
+ dict = dict_new();
+ if (!dict)
+ goto out;
++ if (local->xattr_req) {
++ dict_copy(local->xattr_req, dict);
++ }
++
+ ret = dict_set_sizen_str_sizen(dict, "link-count", GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
+@@ -2617,7 +2621,7 @@ afr_lookup_sh_metadata_wrap(void *opaque)
+ if (loc_is_nameless(&local->loc)) {
+ ret = afr_selfheal_unlocked_discover_on(frame, local->inode,
+ local->loc.gfid, local->replies,
+- local->child_up);
++ local->child_up, dict);
+ } else {
+ inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
+ local->loc.name, local->replies,
+@@ -2791,7 +2795,7 @@ afr_lookup_selfheal_wrap(void *opaque)
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
+ local->loc.name, local->replies,
+- local->child_up, NULL);
++ local->child_up, local->xattr_req);
+ if (inode)
+ inode_unref(inode);
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index 36fd3a9..9b6575f 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -1861,7 +1861,7 @@ afr_set_multi_dom_lock_count_request(xlator_t *this, dict_t *dict)
+ int
+ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies,
+- unsigned char *discover_on)
++ unsigned char *discover_on, dict_t *dict)
+ {
+ loc_t loc = {
+ 0,
+@@ -1876,6 +1876,8 @@ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
+ xattr_req = dict_new();
+ if (!xattr_req)
+ return -ENOMEM;
++ if (dict)
++ dict_copy(dict, xattr_req);
+
+ if (afr_xattr_req_prepare(frame->this, xattr_req) != 0) {
+ dict_unref(xattr_req);
+@@ -1906,11 +1908,14 @@ afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
+ struct afr_reply *replies)
+ {
+ afr_local_t *local = NULL;
++ dict_t *dict = NULL;
+
+ local = frame->local;
++ if (local && local->xattr_req)
++ dict = local->xattr_req;
+
+ return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
+- local->child_up);
++ local->child_up, dict);
+ }
+
+ unsigned int
+diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
+index b39af02..8f6fb00 100644
+--- a/xlators/cluster/afr/src/afr-self-heal.h
++++ b/xlators/cluster/afr/src/afr-self-heal.h
+@@ -188,7 +188,7 @@ afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
+ int
+ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies,
+- unsigned char *discover_on);
++ unsigned char *discover_on, dict_t *dict);
+ inode_t *
+ afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent,
+ const char *name, struct afr_reply *replies,
+--
+1.8.3.1
+
diff --git a/0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch b/0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch
new file mode 100644
index 0000000..e712886
--- /dev/null
+++ b/0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch
@@ -0,0 +1,283 @@
+From 03de45e5fb1c8aa5369848ed9e52abd1365e1d21 Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <sacharya@redhat.com>
+Date: Wed, 31 Jul 2019 11:34:19 +0530
+Subject: [PATCH 493/511] geo-rep: Note section is required for ignore_deletes
+
+There exists a window of 15 sec, where the deletes are picked up
+by history crawl when the ignore_deletes is set to true.
+And it eventually deletes the file/s from slave which is/are not
+supposed to be deleted. Though it is working as per design, a
+note regarding this is needed.
+
+Added a warning message indicating the same.
+Also logged info when the worker restarts after ignore-deletes
+option set.
+
+>fixes: bz#1708603
+>Change-Id: I103be882fac18b4cef935efa355f5037a396f7c1
+>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Upstream patch: https://review.gluster.org/c/glusterfs/+/22702
+
+BUG: 1224906
+Change-Id: I103be882fac18b4cef935efa355f5037a396f7c1
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220757
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-parser.c | 45 ++++++++++++++++++++------
+ cli/src/cli-cmd-volume.c | 20 ++++++++----
+ cli/src/cli.h | 3 +-
+ geo-replication/syncdaemon/gsyncd.py | 2 +-
+ geo-replication/syncdaemon/master.py | 6 ++++
+ tests/00-geo-rep/bug-1708603.t | 63 ++++++++++++++++++++++++++++++++++++
+ 6 files changed, 120 insertions(+), 19 deletions(-)
+ create mode 100644 tests/00-geo-rep/bug-1708603.t
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index 5fd05f4..34f17c9 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -2901,7 +2901,8 @@ out:
+ }
+
+ int32_t
+-cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options)
++cli_cmd_gsync_set_parse(struct cli_state *state, const char **words,
++ int wordcount, dict_t **options, char **errstr)
+ {
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+@@ -2918,6 +2919,8 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options)
+ char *save_ptr = NULL;
+ char *slave_temp = NULL;
+ char *token = NULL;
++ gf_answer_t answer = GF_ANSWER_NO;
++ const char *question = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+@@ -2990,8 +2993,10 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options)
+
+ if (masteri && gsyncd_url_check(words[masteri]))
+ goto out;
+- if (slavei && !glob && !gsyncd_url_check(words[slavei]))
++ if (slavei && !glob && !gsyncd_url_check(words[slavei])) {
++ gf_asprintf(errstr, "Invalid slave url: %s", words[slavei]);
+ goto out;
++ }
+
+ w = str_getunamb(words[cmdi], opwords);
+ if (!w)
+@@ -3101,16 +3106,36 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options)
+ }
+ if (!ret)
+ ret = dict_set_int32(dict, "type", type);
+- if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG)
++ if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) {
++ if (!strcmp((char *)words[wordcount - 2], "ignore-deletes") &&
++ !strcmp((char *)words[wordcount - 1], "true")) {
++ question =
++ "There exists ~15 seconds delay for the option to take"
++ " effect from stime of the corresponding brick. Please"
++ " check the log for the time, the option is effective."
++ " Proceed";
++
++ answer = cli_cmd_get_confirmation(state, question);
++
++ if (GF_ANSWER_NO == answer) {
++ gf_log("cli", GF_LOG_INFO,
++ "Operation "
++ "cancelled, exiting");
++ *errstr = gf_strdup("Aborted by user.");
++ ret = -1;
++ goto out;
++ }
++ }
++
+ ret = config_parse(words, wordcount, dict, cmdi, glob);
++ }
+
+ out:
+ if (slave_temp)
+ GF_FREE(slave_temp);
+- if (ret) {
+- if (dict)
+- dict_unref(dict);
+- } else
++ if (ret && dict)
++ dict_unref(dict);
++ else
+ *options = dict;
+
+ return ret;
+@@ -5659,9 +5684,9 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options)
+ int32_t ret = -1;
+ char *w = NULL;
+ char *volname = NULL;
+- char *opwords[] = {
+- "enable", "disable", "scrub-throttle", "scrub-frequency", "scrub",
+- "signing-time", "signer-threads", NULL};
++ char *opwords[] = {"enable", "disable", "scrub-throttle",
++ "scrub-frequency", "scrub", "signing-time",
++ "signer-threads", NULL};
+ char *scrub_throt_values[] = {"lazy", "normal", "aggressive", NULL};
+ char *scrub_freq_values[] = {"hourly", "daily", "weekly", "biweekly",
+ "monthly", "minute", NULL};
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index 72504ca..6f5bf8b 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -2457,6 +2457,7 @@ cli_cmd_volume_gsync_set_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
++ char *errstr = NULL;
+ #if (USE_EVENTS)
+ int ret1 = -1;
+ int cmd_type = -1;
+@@ -2468,16 +2469,21 @@ cli_cmd_volume_gsync_set_cbk(struct cli_state *state, struct cli_cmd_word *word,
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GSYNC_SET];
+
+- frame = create_frame(THIS, THIS->ctx->pool);
+- if (frame == NULL) {
+- ret = -1;
++ ret = cli_cmd_gsync_set_parse(state, words, wordcount, &options, &errstr);
++ if (ret) {
++ if (errstr) {
++ cli_err("%s", errstr);
++ GF_FREE(errstr);
++ } else {
++ cli_usage_out(word->pattern);
++ }
++ parse_err = 1;
+ goto out;
+ }
+
+- ret = cli_cmd_gsync_set_parse(words, wordcount, &options);
+- if (ret) {
+- cli_usage_out(word->pattern);
+- parse_err = 1;
++ frame = create_frame(THIS, THIS->ctx->pool);
++ if (frame == NULL) {
++ ret = -1;
+ goto out;
+ }
+
+diff --git a/cli/src/cli.h b/cli/src/cli.h
+index c30ae9c..7b4f446 100644
+--- a/cli/src/cli.h
++++ b/cli/src/cli.h
+@@ -269,7 +269,8 @@ int32_t
+ cli_cmd_volume_reset_parse(const char **words, int wordcount, dict_t **opt);
+
+ int32_t
+-cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **opt);
++cli_cmd_gsync_set_parse(struct cli_state *state, const char **words,
++ int wordcount, dict_t **opt, char **errstr);
+
+ int32_t
+ cli_cmd_quota_parse(const char **words, int wordcount, dict_t **opt);
+diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
+index 8940384..215c62d 100644
+--- a/geo-replication/syncdaemon/gsyncd.py
++++ b/geo-replication/syncdaemon/gsyncd.py
+@@ -315,7 +315,7 @@ def main():
+
+ # Log message for loaded config file
+ if config_file is not None:
+- logging.info(lf("Using session config file", path=config_file))
++ logging.debug(lf("Using session config file", path=config_file))
+
+ set_term_handler()
+ excont = FreeObject(exval=0)
+diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
+index 08e98f8..98637e7 100644
+--- a/geo-replication/syncdaemon/master.py
++++ b/geo-replication/syncdaemon/master.py
+@@ -1549,6 +1549,12 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
+ data_stime = self.get_data_stime()
+
+ end_time = int(time.time())
++
++ #as start of historical crawl marks Geo-rep worker restart
++ if gconf.get("ignore-deletes"):
++ logging.info(lf('ignore-deletes config option is set',
++ stime=data_stime))
++
+ logging.info(lf('starting history crawl',
+ turns=self.history_turns,
+ stime=data_stime,
+diff --git a/tests/00-geo-rep/bug-1708603.t b/tests/00-geo-rep/bug-1708603.t
+new file mode 100644
+index 0000000..26913f1
+--- /dev/null
++++ b/tests/00-geo-rep/bug-1708603.t
+@@ -0,0 +1,63 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++. $(dirname $0)/../geo-rep.rc
++. $(dirname $0)/../env.rc
++
++SCRIPT_TIMEOUT=300
++
++##Cleanup and start glusterd
++cleanup;
++TEST glusterd;
++TEST pidof glusterd
++
++
++##Variables
++GEOREP_CLI="gluster volume geo-replication"
++master=$GMV0
++SH0="127.0.0.1"
++slave=${SH0}::${GSV0}
++num_active=2
++num_passive=2
++master_mnt=$M0
++slave_mnt=$M1
++
++############################################################
++#SETUP VOLUMES AND GEO-REPLICATION
++############################################################
++
++##create_and_start_master_volume
++TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
++TEST $CLI volume start $GMV0
++
++##create_and_start_slave_volume
++TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
++TEST $CLI volume start $GSV0
++
++##Mount master
++TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
++
++##Mount slave
++TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
++
++#Create geo-rep session
++TEST create_georep_session $master $slave
++
++echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1
++EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore-deletes)
++echo y | $GEOREP_CLI $master $slave config ignore-deletes true
++EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes)
++
++#Stop Geo-rep
++TEST $GEOREP_CLI $master $slave stop
++
++#Delete Geo-rep
++TEST $GEOREP_CLI $master $slave delete
++
++#Cleanup authorized keys
++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
++sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
++
++cleanup;
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+--
+1.8.3.1
+
diff --git a/0494-glusterd-start-the-brick-on-a-different-port.patch b/0494-glusterd-start-the-brick-on-a-different-port.patch
new file mode 100644
index 0000000..d11b138
--- /dev/null
+++ b/0494-glusterd-start-the-brick-on-a-different-port.patch
@@ -0,0 +1,54 @@
+From 1b24bc4319203128a9ff7f97fe14f4b3622c4eec Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Wed, 26 Aug 2020 20:05:35 +0530
+Subject: [PATCH 494/511] glusterd: start the brick on a different port
+
+Problem: brick fails to start when the port provided by
+glusterd is in use by any other process
+
+Solution: glusterd should check errno set by runner_run()
+and if it is set to EADDRINUSE, it should allocate a new
+port to the brick and try to start it again.
+
+Previously ret value is checked instead of errno, so the
+retry part never executed. Now, we initialize errno to 0
+before calling runner framework. and afterwards store the
+errno into ret to avoid modification of errno in subsequent
+function calls.
+
+>fixes: #1101
+
+>Change-Id: I1aa048a77c5f8b035dece36976d60602d9753b1a
+>Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream patch: https://review.gluster.org/c/glusterfs/+/24923/
+
+BUG: 1865796
+Change-Id: I1aa048a77c5f8b035dece36976d60602d9753b1a
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220541
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index b343eee..f7030fb 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -2289,7 +2289,10 @@ retry:
+
+ if (wait) {
+ synclock_unlock(&priv->big_lock);
++ errno = 0;
+ ret = runner_run(&runner);
++ if (errno != 0)
++ ret = errno;
+ synclock_lock(&priv->big_lock);
+
+ if (ret == EADDRINUSE) {
+--
+1.8.3.1
+
diff --git a/0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch b/0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch
new file mode 100644
index 0000000..6b3f6f5
--- /dev/null
+++ b/0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch
@@ -0,0 +1,60 @@
+From 17a2a880290d2038c913c23985df620e3c9741b3 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Mon, 16 Mar 2020 15:17:23 +0000
+Subject: [PATCH 495/511] geo-rep: descriptive message when worker crashes due
+ to EIO
+
+With this patch now you can notice log if it is due to EIO:
+
+[2020-03-16 16:24:48.293837] E [syncdutils(worker /bricks/brick1/mbr3):348:log_raise_exception] <top>: Getting "Input/Output error" is most likely due to a. Brick is down or b. Split brain issue.
+[2020-03-16 16:24:48.293915] E [syncdutils(worker /bricks/brick1/mbr3):352:log_raise_exception] <top>: This is expected as per design to keep the consistency of the file system. Once the above issue is resolved geo-rep would automatically proceed further.
+
+>Change-Id: Ie33f2440bc96089731ce12afa8dab91d9550a7ca
+>Fixes: #1104
+>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+>Upstream Patch : https://review.gluster.org/c/glusterfs/+/24228/
+
+BUG: 1412494
+Change-Id: Ie33f2440bc96089731ce12afa8dab91d9550a7ca
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220874
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/syncdutils.py | 13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
+index f43e13b..d5a94d4 100644
+--- a/geo-replication/syncdaemon/syncdutils.py
++++ b/geo-replication/syncdaemon/syncdutils.py
+@@ -22,7 +22,7 @@ import socket
+ from subprocess import PIPE
+ from threading import Lock, Thread as baseThread
+ from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED
+-from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode
++from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode, EIO
+ from signal import signal, SIGTERM
+ import select as oselect
+ from os import waitpid as owaitpid
+@@ -346,6 +346,17 @@ def log_raise_exception(excont):
+ ECONNABORTED):
+ logging.error(lf('Gluster Mount process exited',
+ error=errorcode[exc.errno]))
++ elif isinstance(exc, OSError) and exc.errno == EIO:
++ logging.error("Getting \"Input/Output error\" "
++ "is most likely due to "
++ "a. Brick is down or "
++ "b. Split brain issue.")
++ logging.error("This is expected as per design to "
++ "keep the consistency of the file system. "
++ "Once the above issue is resolved "
++ "geo-replication would automatically "
++ "proceed further.")
++ logtag = "FAIL"
+ else:
+ logtag = "FAIL"
+ if not logtag and logging.getLogger().isEnabledFor(logging.DEBUG):
+--
+1.8.3.1
+
diff --git a/0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch b/0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch
new file mode 100644
index 0000000..590aea3
--- /dev/null
+++ b/0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch
@@ -0,0 +1,139 @@
+From 5893e64ca8c147b7acfa12cd9824f254d53ee261 Mon Sep 17 00:00:00 2001
+From: mohit84 <moagrawa@redhat.com>
+Date: Wed, 4 Nov 2020 09:02:03 +0530
+Subject: [PATCH 496/511] posix: Use MALLOC instead of alloca to allocate
+ memory for xattrs list (#1730)
+
+In case of file is having huge xattrs on backend a brick process is
+crashed while alloca(size) limit has been crossed 256k because iot_worker
+stack size is 256k.
+
+> Fixes: #1699
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> Change-Id: I100468234f83329a7d65b43cbe4e10450c1ccecd
+> (Cherry pick from commit fd666caa35ac84dd1cba55399761982011b77112)
+> (Reviewed on upstream link https://github.com/gluster/glusterfs/pull/1828)
+
+Change-Id: I100468234f83329a7d65b43cbe4e10450c1ccecd
+Bug: 1903468
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220872
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/storage/posix/src/posix-gfid-path.c | 5 ++++-
+ xlators/storage/posix/src/posix-helpers.c | 3 ++-
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 12 +++++++++---
+ 3 files changed, 15 insertions(+), 5 deletions(-)
+
+diff --git a/xlators/storage/posix/src/posix-gfid-path.c b/xlators/storage/posix/src/posix-gfid-path.c
+index 64b5c6c..01315ac 100644
+--- a/xlators/storage/posix/src/posix-gfid-path.c
++++ b/xlators/storage/posix/src/posix-gfid-path.c
+@@ -195,7 +195,8 @@ posix_get_gfid2path(xlator_t *this, inode_t *inode, const char *real_path,
+ if (size == 0)
+ goto done;
+ }
+- list = alloca(size);
++
++ list = GF_MALLOC(size, gf_posix_mt_char);
+ if (!list) {
+ *op_errno = errno;
+ goto err;
+@@ -309,6 +310,7 @@ done:
+ GF_FREE(paths[j]);
+ }
+ ret = 0;
++ GF_FREE(list);
+ return ret;
+ err:
+ if (path)
+@@ -317,5 +319,6 @@ err:
+ if (paths[j])
+ GF_FREE(paths[j]);
+ }
++ GF_FREE(list);
+ return ret;
+ }
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 73a44be..ceac52a 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -349,7 +349,7 @@ _posix_get_marker_all_contributions(posix_xattr_filler_t *filler)
+ goto out;
+ }
+
+- list = alloca(size);
++ list = GF_MALLOC(size, gf_posix_mt_char);
+ if (!list) {
+ goto out;
+ }
+@@ -379,6 +379,7 @@ _posix_get_marker_all_contributions(posix_xattr_filler_t *filler)
+ ret = 0;
+
+ out:
++ GF_FREE(list);
+ return ret;
+ }
+
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index 21119ea..1d37aed 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -3305,7 +3305,7 @@ posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode,
+ goto out;
+ }
+
+- list = alloca(size);
++ list = GF_MALLOC(size, gf_posix_mt_char);
+ if (!list) {
+ *op_errno = errno;
+ goto out;
+@@ -3385,6 +3385,7 @@ posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode,
+ op_ret = 0;
+
+ out:
++ GF_FREE(list);
+ return op_ret;
+ }
+
+@@ -3810,7 +3811,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ if (size == 0)
+ goto done;
+ }
+- list = alloca(size);
++
++ list = GF_MALLOC(size, gf_posix_mt_char);
+ if (!list) {
+ op_errno = errno;
+ goto out;
+@@ -3937,6 +3939,7 @@ out:
+ dict_unref(dict);
+ }
+
++ GF_FREE(list);
+ return 0;
+ }
+
+@@ -4136,7 +4139,8 @@ posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ if (size == 0)
+ goto done;
+ }
+- list = alloca(size + 1);
++
++ list = GF_MALLOC(size, gf_posix_mt_char);
+ if (!list) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+@@ -4240,6 +4244,8 @@ out:
+ if (dict)
+ dict_unref(dict);
+
++ GF_FREE(list);
++
+ return 0;
+ }
+
+--
+1.8.3.1
+
diff --git a/0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch b/0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch
new file mode 100644
index 0000000..9d477ae
--- /dev/null
+++ b/0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch
@@ -0,0 +1,80 @@
+From 85a5cce40dba0393e636c0eb5af9d8f8746f2315 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Thu, 2 Jan 2020 10:23:52 +0530
+Subject: [PATCH 497/511] socket: Use AES128 cipher in SSL if AES is supported
+ by CPU
+
+SSL performance is improved after configuring AES128 cipher
+so use AES128 cipher as a default cipher on the CPU those
+enabled AES bits otherwise ssl use AES256 cipher
+
+> Change-Id: I91c50fe987cbb22ed76f8012094730c592c63506
+> Fixes: #1050
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Cherry pick from commit 177cc09d24515596eb51739ce0a276c26e3c52f1)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23952/)
+
+Change-Id: I91c50fe987cbb22ed76f8012094730c592c63506
+Bug: 1612973
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220870
+Tested-by: Mohit Agrawal <moagrawa@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ rpc/rpc-transport/socket/src/socket.c | 32 ++++++++++++++++++++++++++++++++
+ 1 file changed, 32 insertions(+)
+
+diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
+index 54cd5df..1ee7320 100644
+--- a/rpc/rpc-transport/socket/src/socket.c
++++ b/rpc/rpc-transport/socket/src/socket.c
+@@ -4238,6 +4238,34 @@ static void __attribute__((destructor)) fini_openssl_mt(void)
+ ERR_free_strings();
+ }
+
++/* The function returns 0 if AES bit is enabled on the CPU */
++static int
++ssl_check_aes_bit(void)
++{
++ FILE *fp = fopen("/proc/cpuinfo", "r");
++ int ret = 1;
++ size_t len = 0;
++ char *line = NULL;
++ char *match = NULL;
++
++ GF_ASSERT(fp != NULL);
++
++ while (getline(&line, &len, fp) > 0) {
++ if (!strncmp(line, "flags", 5)) {
++ match = strstr(line, " aes");
++ if ((match != NULL) && ((match[4] == ' ') || (match[4] == 0))) {
++ ret = 0;
++ break;
++ }
++ }
++ }
++
++ free(line);
++ fclose(fp);
++
++ return ret;
++}
++
+ static int
+ ssl_setup_connection_params(rpc_transport_t *this)
+ {
+@@ -4261,6 +4289,10 @@ ssl_setup_connection_params(rpc_transport_t *this)
+ return 0;
+ }
+
++ if (!ssl_check_aes_bit()) {
++ cipher_list = "AES128:" DEFAULT_CIPHER_LIST;
++ }
++
+ priv->ssl_own_cert = DEFAULT_CERT_PATH;
+ if (dict_get_str(this->options, SSL_OWN_CERT_OPT, &optstr) == 0) {
+ if (!priv->ssl_enabled) {
+--
+1.8.3.1
+
diff --git a/0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch b/0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch
new file mode 100644
index 0000000..078c390
--- /dev/null
+++ b/0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch
@@ -0,0 +1,69 @@
+From 11d648660b8bd246756f87b2f40c72fbabf084d1 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Tue, 19 May 2020 16:13:01 +0100
+Subject: [PATCH 498/511] geo-rep: Fix corner case in rename on mkdir during
+ hybrid crawl
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Problem:
+The issue is being hit during hybrid mode while handling rename on slave.
+In this special case the rename is recorded as mkdir and geo-rep process it
+by resolving the path form backend.
+
+While resolving the backend path during this special handling one corner case is not considered.
+
+<snip>
+Traceback (most recent call last):
+  File "/usr/libexec/glusterfs/python/syncdaemon/repce.py", line 118, in worker
+    res = getattr(self.obj, rmeth)(*in_data[2:])
+  File "/usr/libexec/glusterfs/python/syncdaemon/resource.py", line 588, in entry_ops
+    src_entry = get_slv_dir_path(slv_host, slv_volume, gfid)
+  File "/usr/libexec/glusterfs/python/syncdaemon/syncdutils.py", line 710, in get_slv_dir_path
+    dir_entry = os.path.join(pfx, pargfid, basename)
+  File "/usr/lib64/python2.7/posixpath.py", line 75, in join
+    if b.startswith('/'):
+AttributeError: 'int' object has no attribute 'startswith'
+
+In pyhthon3:
+Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ File "/usr/lib64/python3.8/posixpath.py", line 90, in join
+ genericpath._check_arg_types('join', a, *p)
+ File "/usr/lib64/python3.8/genericpath.py", line 152, in _check_arg_types
+ raise TypeError(f'{funcname}() argument must be str, bytes, or '
+TypeError: join() argument must be str, bytes, or os.PathLike object, not 'int'
+</snip>
+
+>Change-Id: I8b926899c60ad8c4ffc886d57028ba70fd21e332
+>Fixes: #1250
+>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Upstream Patch: https://review.gluster.org/c/glusterfs/+/24468/
+
+BUG: 1835229
+Change-Id: I8b926899c60ad8c4ffc886d57028ba70fd21e332
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220867
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/syncdutils.py | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
+index d5a94d4..26c79d0 100644
+--- a/geo-replication/syncdaemon/syncdutils.py
++++ b/geo-replication/syncdaemon/syncdutils.py
+@@ -732,6 +732,8 @@ def get_slv_dir_path(slv_host, slv_volume, gfid):
+ else:
+ dirpath = dirpath.strip("/")
+ pargfid = get_gfid_from_mnt(dirpath)
++ if isinstance(pargfid, int):
++ return None
+ dir_entry = os.path.join(pfx, pargfid, basename)
+ return dir_entry
+
+--
+1.8.3.1
+
diff --git a/0499-gfapi-give-appropriate-error-when-size-exceeds.patch b/0499-gfapi-give-appropriate-error-when-size-exceeds.patch
new file mode 100644
index 0000000..edeca1a
--- /dev/null
+++ b/0499-gfapi-give-appropriate-error-when-size-exceeds.patch
@@ -0,0 +1,63 @@
+From f78a5d86c55149d80b6efdf60eae7221c238654e Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Thu, 24 Sep 2020 12:43:51 +0000
+Subject: [PATCH 499/511] gfapi: give appropriate error when size exceeds
+
+This patch help generate appropriate error message
+when the gfapi tries to write data equal to or
+greater than 1 Gb due to the limitation at the
+socket layer.
+
+Upstream:
+> Reviewed-on: https://github.com/gluster/glusterfs/pull/1557
+> fixes: #1518
+> Change-Id: I1234a0b5a6e675a0b20c6b1afe0f4390fd721f6f
+> Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+
+BUG: 1691320
+Change-Id: I1234a0b5a6e675a0b20c6b1afe0f4390fd721f6f
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/219998
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ api/src/gfapi-messages.h | 4 +++-
+ api/src/glfs-fops.c | 8 ++++++++
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+diff --git a/api/src/gfapi-messages.h b/api/src/gfapi-messages.h
+index 68d1242..2ffd5ac 100644
+--- a/api/src/gfapi-messages.h
++++ b/api/src/gfapi-messages.h
+@@ -49,6 +49,8 @@ GLFS_MSGID(API, API_MSG_MEM_ACCT_INIT_FAILED, API_MSG_MASTER_XLATOR_INIT_FAILED,
+ API_MSG_INODE_LINK_FAILED, API_MSG_STATEDUMP_FAILED,
+ API_MSG_XREADDIRP_R_FAILED, API_MSG_LOCK_INSERT_MERGE_FAILED,
+ API_MSG_SETTING_LOCK_TYPE_FAILED, API_MSG_INODE_FIND_FAILED,
+- API_MSG_FDCTX_SET_FAILED, API_MSG_UPCALL_SYNCOP_FAILED);
++ API_MSG_FDCTX_SET_FAILED, API_MSG_UPCALL_SYNCOP_FAILED,
++ API_MSG_INVALID_ARG);
+
++#define API_MSG_INVALID_ARG_STR "Invalid"
+ #endif /* !_GFAPI_MESSAGES_H__ */
+diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
+index e6adea5..051541f 100644
+--- a/api/src/glfs-fops.c
++++ b/api/src/glfs-fops.c
+@@ -1525,6 +1525,14 @@ glfs_pwritev_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+
+ GF_REF_GET(glfd);
+
++ if (iovec->iov_len >= GF_UNIT_GB) {
++ ret = -1;
++ errno = EINVAL;
++ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG,
++ "size >= %llu is not allowed", GF_UNIT_GB, NULL);
++ goto out;
++ }
++
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+--
+1.8.3.1
+
diff --git a/0500-features-shard-Convert-shard-block-indices-to-uint64.patch b/0500-features-shard-Convert-shard-block-indices-to-uint64.patch
new file mode 100644
index 0000000..4898422
--- /dev/null
+++ b/0500-features-shard-Convert-shard-block-indices-to-uint64.patch
@@ -0,0 +1,104 @@
+From 60789c658ea22063c26168cb4ce15ac5fd279e58 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Mon, 14 Dec 2020 10:57:03 +0530
+Subject: [PATCH 500/511] features/shard: Convert shard block indices to uint64
+
+This patch fixes a crash in FOPs that operate on really large sharded
+files where number of participant shards could sometimes exceed
+signed int32 max.
+
+The patch also adds GF_ASSERTs to ensure that number of participating
+shards is always greater than 0 for files that do have more than one
+shard.
+
+Upstream:
+> https://review.gluster.org/#/c/glusterfs/+/23407/
+> Change-Id: I354de58796f350eb1aa42fcdf8092ca2e69ccbb6
+> Fixes: #1348
+> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+
+BUG: 1752739
+Change-Id: I354de58796f350eb1aa42fcdf8092ca2e69ccbb6
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221061
+Tested-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ xlators/features/shard/src/shard.c | 14 ++++++++------
+ xlators/features/shard/src/shard.h | 6 +++---
+ 2 files changed, 11 insertions(+), 9 deletions(-)
+
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index 16d557b..a967f35 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -1855,10 +1855,9 @@ int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this,
+ */
+ if (!inode) {
+ gf_msg_debug(this->name, 0,
+- "Last shard to be truncated absent"
+- " in backend: %s. Directly proceeding to update "
+- "file size",
+- uuid_utoa(inode->gfid));
++ "Last shard to be truncated absent in backend: " PRIu64
++ " of gfid: %s. Directly proceeding to update file size",
++ local->first_block, uuid_utoa(local->loc.inode->gfid));
+ shard_update_file_size(frame, this, NULL, &local->loc,
+ shard_post_update_size_truncate_handler);
+ return 0;
+@@ -2389,6 +2388,7 @@ int shard_truncate_begin(call_frame_t *frame, xlator_t *this) {
+ get_highest_block(0, local->prebuf.ia_size, local->block_size);
+
+ local->num_blocks = local->last_block - local->first_block + 1;
++ GF_ASSERT(local->num_blocks > 0);
+ local->resolver_base_inode =
+ (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode;
+
+@@ -4809,6 +4809,7 @@ int shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) {
+ get_highest_block(local->offset, local->total_size, local->block_size);
+
+ local->num_blocks = local->last_block - local->first_block + 1;
++ GF_ASSERT(local->num_blocks > 0);
+ local->resolver_base_inode = local->loc.inode;
+
+ local->inode_list =
+@@ -5266,6 +5267,7 @@ int shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
+ local->last_block =
+ get_highest_block(local->offset, local->total_size, local->block_size);
+ local->num_blocks = local->last_block - local->first_block + 1;
++ GF_ASSERT(local->num_blocks > 0);
+ local->inode_list =
+ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list);
+ if (!local->inode_list) {
+@@ -5274,8 +5276,8 @@ int shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
+ }
+
+ gf_msg_trace(
+- this->name, 0, "%s: gfid=%s first_block=%" PRIu32 " "
+- "last_block=%" PRIu32 " num_blocks=%" PRIu32
++ this->name, 0, "%s: gfid=%s first_block=%" PRIu64 " "
++ "last_block=%" PRIu64 " num_blocks=%" PRIu64
+ " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "",
+ gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid),
+ local->first_block, local->last_block, local->num_blocks, local->offset,
+diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
+index 1721417..4fe181b 100644
+--- a/xlators/features/shard/src/shard.h
++++ b/xlators/features/shard/src/shard.h
+@@ -254,9 +254,9 @@ typedef int32_t (*shard_post_update_size_fop_handler_t)(call_frame_t *frame,
+ typedef struct shard_local {
+ int op_ret;
+ int op_errno;
+- int first_block;
+- int last_block;
+- int num_blocks;
++ uint64_t first_block;
++ uint64_t last_block;
++ uint64_t num_blocks;
+ int call_count;
+ int eexist_count;
+ int create_count;
+--
+1.8.3.1
+
diff --git a/0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch b/0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch
new file mode 100644
index 0000000..5152df8
--- /dev/null
+++ b/0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch
@@ -0,0 +1,48 @@
+From 070698ede9c3765c95364e8207c8311dbf895499 Mon Sep 17 00:00:00 2001
+From: kiyer <kiyer@redhat.com>
+Date: Tue, 8 Dec 2020 15:18:49 +0530
+Subject: [PATCH 501/511] Cli: Removing old syntax of tier cmds from help menu
+
+Remove old syntax of attach-tier and detach-tier
+commands from help menu.
+
+Label: DOWNSTREAM ONLY
+BUG: 1813866
+
+Change-Id: If86e4828b475fb593a5105ca8deac96374f9542d
+Signed-off-by: kiyer <kiyer@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220510
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-volume.c | 13 -------------
+ 1 file changed, 13 deletions(-)
+
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index 6f5bf8b..b6bef80 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -3331,19 +3331,6 @@ struct cli_cmd tier_cmds[] = {
+ {"volume tier <VOLNAME> detach <start|stop|status|commit|[force]>",
+ cli_cmd_volume_tier_cbk, "Detach the hot tier from <VOLNAME>"},
+
+- {"volume attach-tier <VOLNAME> [<replica COUNT>] <NEW-BRICK>...",
+- cli_cmd_volume_tier_cbk,
+- "NOTE: this is old syntax, will be deprecated in next release. "
+- "Please use gluster volume tier <vol> attach "
+- "[<replica COUNT>] <NEW-BRICK>..."},
+-
+- {"volume detach-tier <VOLNAME> "
+- "<start|stop|status|commit|force>",
+- cli_cmd_volume_tier_cbk,
+- "NOTE: this is old syntax, will be deprecated in next release. "
+- "Please use gluster volume tier <vol> detach "
+- "{start|stop|commit} [force]"},
+-
+ {"volume tier <VOLNAME> status\n"
+ "volume tier <VOLNAME> start [force]\n"
+ "volume tier <VOLNAME> stop\n"
+--
+1.8.3.1
+
diff --git a/0502-dht-fixing-a-permission-update-issue.patch b/0502-dht-fixing-a-permission-update-issue.patch
new file mode 100644
index 0000000..7c136d0
--- /dev/null
+++ b/0502-dht-fixing-a-permission-update-issue.patch
@@ -0,0 +1,225 @@
+From 3f1eee125a35c33ecb078e5d3bfd80d80e63881d Mon Sep 17 00:00:00 2001
+From: Barak Sason Rofman <bsasonro@redhat.com>
+Date: Wed, 15 Jan 2020 12:02:05 +0200
+Subject: [PATCH 502/511] dht - fixing a permission update issue
+
+When bringing back a downed brick and performing lookup from the client
+side, the permission on said brick aren't updated on the first lookup,
+but only on the second.
+
+This patch modifies permission update logic so the first lookup will
+trigger a permission update on the downed brick.
+
+LIMITATIONS OF THE PATCH:
+As the choice of source depends on whether the directory has layout or not.
+Even the directories on the newly added brick will have layout xattr[zeroed], but the same is not true for a root directory.
+Hence, in case in the entire cluster only the newly added bricks are up [and others are down], then any change in permission during this time will be overwritten by the older permissions when the cluster is restarted.
+
+Upstream:
+> Reviewed-on: https://review.gluster.org/#/c/glusterfs/+/24020/
+> fixes: #999
+> Change-Id: Ieb70246d41e59f9cae9f70bc203627a433dfbd33
+> Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+
+BUG: 1663821
+Change-Id: Ieb70246d41e59f9cae9f70bc203627a433dfbd33
+Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221116
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/bug-1064147.t | 71 ++++++++++++++++++++++++++++++++
+ xlators/cluster/dht/src/dht-common.c | 28 ++++++++++---
+ xlators/cluster/dht/src/dht-selfheal.c | 15 +++++--
+ xlators/storage/posix/src/posix-common.c | 16 +++----
+ 4 files changed, 111 insertions(+), 19 deletions(-)
+ create mode 100755 tests/bugs/bug-1064147.t
+
+diff --git a/tests/bugs/bug-1064147.t b/tests/bugs/bug-1064147.t
+new file mode 100755
+index 0000000..617a1aa
+--- /dev/null
++++ b/tests/bugs/bug-1064147.t
+@@ -0,0 +1,71 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++
++# Initialize
++#------------------------------------------------------------
++cleanup;
++
++# Start glusterd
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume info;
++
++# Create a volume
++TEST $CLI volume create $V0 $H0:/${V0}{1,2};
++
++# Verify volume creation
++ EXPECT "$V0" volinfo_field $V0 'Volume Name';
++ EXPECT 'Created' volinfo_field $V0 'Status';
++
++# Start volume and verify successful start
++ TEST $CLI volume start $V0;
++ EXPECT 'Started' volinfo_field $V0 'Status';
++ TEST glusterfs -s $H0 --volfile-id=$V0 $M0
++#------------------------------------------------------------
++
++# Test case 1 - Subvolume down + Healing
++#------------------------------------------------------------
++# Kill 2nd brick process
++TEST kill -9 `ps aux | grep glusterfsd | grep ${V0}2 | grep -v grep | awk '{print $2}'`;
++
++# Change root permissions
++TEST chmod 444 $M0
++
++# Store permission for comparision
++TEST permission_new=`stat -c "%A" $M0`
++
++# Bring up the killed brick process
++TEST $CLI volume start $V0 force
++
++# Perform lookup
++sleep 5
++TEST ls $M0
++
++# Check brick permissions
++TEST brick_perm=`stat -c "%A" /${V0}2`
++TEST [ ${brick_perm} = ${permission_new} ]
++#------------------------------------------------------------
++
++# Test case 2 - Add-brick + Healing
++#------------------------------------------------------------
++# Change root permissions
++TEST chmod 777 $M0
++
++# Store permission for comparision
++TEST permission_new_2=`stat -c "%A" $M0`
++
++# Add a 3rd brick
++TEST $CLI volume add-brick $V0 $H0:/${V0}3
++
++# Perform lookup
++sleep 5
++TEST ls $M0
++
++# Check permissions on the new brick
++TEST brick_perm2=`stat -c "%A" /${V0}3`
++
++TEST [ ${brick_perm2} = ${permission_new_2} ]
++
++cleanup;
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 4db89df..fe1d0ee 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -1363,13 +1363,29 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ dht_aggregate_xattr(local->xattr, xattr);
+ }
+
++ if (__is_root_gfid(stbuf->ia_gfid)) {
++ ret = dht_dir_has_layout(xattr, conf->xattr_name);
++ if (ret >= 0) {
++ if (is_greater_time(local->prebuf.ia_ctime,
++ local->prebuf.ia_ctime_nsec,
++ stbuf->ia_ctime, stbuf->ia_ctime_nsec)) {
++ /* Choose source */
++ local->prebuf.ia_gid = stbuf->ia_gid;
++ local->prebuf.ia_uid = stbuf->ia_uid;
++
++ local->prebuf.ia_ctime = stbuf->ia_ctime;
++ local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec;
++ local->prebuf.ia_prot = stbuf->ia_prot;
++ }
++ }
++ }
++
+ if (local->stbuf.ia_type != IA_INVAL) {
+ /* This is not the first subvol to respond */
+- if (!__is_root_gfid(stbuf->ia_gfid) &&
+- ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+- (local->stbuf.ia_uid != stbuf->ia_uid) ||
+- (is_permission_different(&local->stbuf.ia_prot,
+- &stbuf->ia_prot)))) {
++ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
++ (local->stbuf.ia_uid != stbuf->ia_uid) ||
++ (is_permission_different(&local->stbuf.ia_prot,
++ &stbuf->ia_prot))) {
+ local->need_attrheal = 1;
+ }
+ }
+@@ -10969,7 +10985,7 @@ dht_notify(xlator_t *this, int event, void *data, ...)
+ if ((cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
+ (cmd == GF_DEFRAG_CMD_DETACH_STATUS))
+- gf_defrag_status_get(conf, output, _gf_false);
++ gf_defrag_status_get(conf, output, _gf_false);
+ else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
+ gf_defrag_start_detach_tier(defrag);
+ else if (cmd == GF_DEFRAG_CMD_DETACH_START)
+diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
+index f5dfff9..f4e17d1 100644
+--- a/xlators/cluster/dht/src/dht-selfheal.c
++++ b/xlators/cluster/dht/src/dht-selfheal.c
+@@ -2097,9 +2097,18 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref(this, layout);
+
+- if (local->need_attrheal && !IA_ISINVAL(local->mds_stbuf.ia_type)) {
+- /*Use the one in the mds_stbuf*/
+- local->stbuf = local->mds_stbuf;
++ if (local->need_attrheal) {
++ if (__is_root_gfid(local->stbuf.ia_gfid)) {
++ local->stbuf.ia_gid = local->prebuf.ia_gid;
++ local->stbuf.ia_uid = local->prebuf.ia_uid;
++
++ local->stbuf.ia_ctime = local->prebuf.ia_ctime;
++ local->stbuf.ia_ctime_nsec = local->prebuf.ia_ctime_nsec;
++ local->stbuf.ia_prot = local->prebuf.ia_prot;
++
++ } else if (!IA_ISINVAL(local->mds_stbuf.ia_type)) {
++ local->stbuf = local->mds_stbuf;
++ }
+ }
+
+ if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index c5a43a1..e5c6e62 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -598,6 +598,7 @@ posix_init(xlator_t *this)
+ int force_directory = -1;
+ int create_mask = -1;
+ int create_directory_mask = -1;
++ char value;
+
+ dir_data = dict_get(this->options, "directory");
+
+@@ -654,16 +655,11 @@ posix_init(xlator_t *this)
+ }
+
+ /* Check for Extended attribute support, if not present, log it */
+- op_ret = sys_lsetxattr(dir_data->data, "trusted.glusterfs.test", "working",
+- 8, 0);
+- if (op_ret != -1) {
+- ret = sys_lremovexattr(dir_data->data, "trusted.glusterfs.test");
+- if (ret) {
+- gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_INVALID_OPTION,
+- "failed to remove xattr: "
+- "trusted.glusterfs.test");
+- }
+- } else {
++ size = sys_lgetxattr(dir_data->data, "user.x", &value, sizeof(value));
++
++ if ((size == -1) && (errno == EOPNOTSUPP)) {
++ gf_msg(this->name, GF_LOG_DEBUG, 0, P_MSG_XDATA_GETXATTR,
++ "getxattr returned %zd", size);
+ tmp_data = dict_get(this->options, "mandate-attribute");
+ if (tmp_data) {
+ if (gf_string2boolean(tmp_data->data, &tmp_bool) == -1) {
+--
+1.8.3.1
+
diff --git a/0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch b/0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch
new file mode 100644
index 0000000..466bf4e
--- /dev/null
+++ b/0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch
@@ -0,0 +1,179 @@
+From 5946a6ec18976c0f52162fe0f47e9b5171af87ec Mon Sep 17 00:00:00 2001
+From: Soumya Koduri <skoduri@redhat.com>
+Date: Mon, 6 Apr 2020 12:36:44 +0530
+Subject: [PATCH 503/511] gfapi: Suspend synctasks instead of blocking them
+
+There are certain conditions which blocks the current
+execution thread (like waiting on mutex lock or condition
+variable or I/O response). In such cases, if it is a
+synctask thread, we should suspend the task instead
+of blocking it (like done in SYNCOP using synctask_yield)
+
+This is to avoid deadlock like the one mentioned below -
+
+1) synctaskA sets fs->migration_in_progress to 1 and
+ does I/O (LOOKUP)
+2) Other synctask threads wait for fs->migration_in_progress
+ to be reset to 0 by synctaskA and hence blocked
+3) but synctaskA cannot resume as all synctask threads are blocked
+ on (2).
+
+Note: this same approach is already used by few other components
+like syncbarrier etc.
+
+>Change-Id: If90f870d663bb242c702a5b86ac52eeda67c6f0d
+>Fixes: #1146
+>Signed-off-by: Soumya Koduri <skoduri@redhat.com>
+Upstream patch: https://review.gluster.org/c/glusterfs/+/24276
+
+BUG: 1779238
+Change-Id: If90f870d663bb242c702a5b86ac52eeda67c6f0d
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221081
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+---
+ api/src/glfs-internal.h | 34 ++++++++++++++++++++++++++++++++--
+ api/src/glfs-resolve.c | 9 +++++++++
+ api/src/glfs.c | 9 +++++++++
+ 3 files changed, 50 insertions(+), 2 deletions(-)
+
+diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h
+index 55401b2..15cf0ee 100644
+--- a/api/src/glfs-internal.h
++++ b/api/src/glfs-internal.h
+@@ -16,6 +16,7 @@
+ #include <glusterfs/upcall-utils.h>
+ #include "glfs-handles.h"
+ #include <glusterfs/refcount.h>
++#include <glusterfs/syncop.h>
+
+ #define GLFS_SYMLINK_MAX_FOLLOW 2048
+
+@@ -207,6 +208,7 @@ struct glfs {
+ glfs_upcall_cbk up_cbk; /* upcall cbk function to be registered */
+ void *up_data; /* Opaque data provided by application
+ * during upcall registration */
++ struct list_head waitq; /* waiting synctasks */
+ };
+
+ /* This enum is used to maintain the state of glfd. In case of async fops
+@@ -442,6 +444,34 @@ glfs_process_upcall_event(struct glfs *fs, void *data)
+ THIS = glfd->fd->inode->table->xl->ctx->master; \
+ } while (0)
+
++#define __GLFS_LOCK_WAIT(fs) \
++ do { \
++ struct synctask *task = NULL; \
++ \
++ task = synctask_get(); \
++ \
++ if (task) { \
++ list_add_tail(&task->waitq, &fs->waitq); \
++ pthread_mutex_unlock(&fs->mutex); \
++ synctask_yield(task, NULL); \
++ pthread_mutex_lock(&fs->mutex); \
++ } else { \
++ /* non-synctask */ \
++ pthread_cond_wait(&fs->cond, &fs->mutex); \
++ } \
++ } while (0)
++
++#define __GLFS_SYNCTASK_WAKE(fs) \
++ do { \
++ struct synctask *waittask = NULL; \
++ \
++ while (!list_empty(&fs->waitq)) { \
++ waittask = list_entry(fs->waitq.next, struct synctask, waitq); \
++ list_del_init(&waittask->waitq); \
++ synctask_wake(waittask); \
++ } \
++ } while (0)
++
+ /*
+ By default all lock attempts from user context must
+ use glfs_lock() and glfs_unlock(). This allows
+@@ -466,10 +496,10 @@ glfs_lock(struct glfs *fs, gf_boolean_t wait_for_migration)
+ pthread_mutex_lock(&fs->mutex);
+
+ while (!fs->init)
+- pthread_cond_wait(&fs->cond, &fs->mutex);
++ __GLFS_LOCK_WAIT(fs);
+
+ while (wait_for_migration && fs->migration_in_progress)
+- pthread_cond_wait(&fs->cond, &fs->mutex);
++ __GLFS_LOCK_WAIT(fs);
+
+ return 0;
+ }
+diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c
+index 062b7dc..58b6ace 100644
+--- a/api/src/glfs-resolve.c
++++ b/api/src/glfs-resolve.c
+@@ -65,6 +65,9 @@ __glfs_first_lookup(struct glfs *fs, xlator_t *subvol)
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast(&fs->cond);
+
++ /* wake up other waiting tasks */
++ __GLFS_SYNCTASK_WAKE(fs);
++
+ return ret;
+ }
+
+@@ -154,6 +157,9 @@ __glfs_refresh_inode(struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast(&fs->cond);
+
++ /* wake up other waiting tasks */
++ __GLFS_SYNCTASK_WAKE(fs);
++
+ return newinode;
+ }
+
+@@ -841,6 +847,9 @@ __glfs_migrate_fd(struct glfs *fs, xlator_t *newsubvol, struct glfs_fd *glfd)
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast(&fs->cond);
+
++ /* wake up other waiting tasks */
++ __GLFS_SYNCTASK_WAKE(fs);
++
+ return newfd;
+ }
+
+diff --git a/api/src/glfs.c b/api/src/glfs.c
+index f36616d..ae994fa 100644
+--- a/api/src/glfs.c
++++ b/api/src/glfs.c
+@@ -740,6 +740,7 @@ glfs_new_fs(const char *volname)
+
+ INIT_LIST_HEAD(&fs->openfds);
+ INIT_LIST_HEAD(&fs->upcall_list);
++ INIT_LIST_HEAD(&fs->waitq);
+
+ PTHREAD_MUTEX_INIT(&fs->mutex, NULL, fs->pthread_flags, GLFS_INIT_MUTEX,
+ err);
+@@ -1228,6 +1229,7 @@ pub_glfs_fini(struct glfs *fs)
+ call_pool_t *call_pool = NULL;
+ int fs_init = 0;
+ int err = -1;
++ struct synctask *waittask = NULL;
+
+ DECLARE_OLD_THIS;
+
+@@ -1249,6 +1251,13 @@ pub_glfs_fini(struct glfs *fs)
+
+ call_pool = fs->ctx->pool;
+
++ /* Wake up any suspended synctasks */
++ while (!list_empty(&fs->waitq)) {
++ waittask = list_entry(fs->waitq.next, struct synctask, waitq);
++ list_del_init(&waittask->waitq);
++ synctask_wake(waittask);
++ }
++
+ while (countdown--) {
+ /* give some time for background frames to finish */
+ pthread_mutex_lock(&fs->mutex);
+--
+1.8.3.1
+
diff --git a/0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch b/0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch
new file mode 100644
index 0000000..21d7f7f
--- /dev/null
+++ b/0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch
@@ -0,0 +1,109 @@
+From baa566be8832a56fdea7068d84844ec1ec84d8d9 Mon Sep 17 00:00:00 2001
+From: mohit84 <moagrawa@redhat.com>
+Date: Thu, 15 Oct 2020 16:28:58 +0530
+Subject: [PATCH 504/511] io-stats: Configure ios_sample_buf_size based on
+ sample_interval value (#1574)
+
+io-stats xlator declares a ios_sample_buf_size 64k object(10M) per xlator
+but in case of sample_interval is 0 this big buffer is not required so
+declare the default value only while sample_interval is not 0.The new
+change would be helpful to reduce RSS size for a brick and shd process
+while the number of volumes are huge.
+
+> Change-Id: I3e82cca92e40549355edfac32580169f3ce51af8
+> Fixes: #1542
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry picked from commit f71660eb879a9cd5761e5adbf10c783e959a990a)
+> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1542)
+
+Change-Id: I3e82cca92e40549355edfac32580169f3ce51af8
+BUG: 1898778
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221183
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/glusterd/daemon-log-level-option.t | 8 ++++----
+ xlators/debug/io-stats/src/io-stats.c | 26 ++++++++++++++++++++++----
+ 2 files changed, 26 insertions(+), 8 deletions(-)
+
+diff --git a/tests/bugs/glusterd/daemon-log-level-option.t b/tests/bugs/glusterd/daemon-log-level-option.t
+index 66e55e3..5352a63 100644
+--- a/tests/bugs/glusterd/daemon-log-level-option.t
++++ b/tests/bugs/glusterd/daemon-log-level-option.t
+@@ -61,8 +61,8 @@ rm -f /var/log/glusterfs/glustershd.log
+ TEST $CLI volume set all cluster.daemon-log-level WARNING
+ TEST $CLI volume start $V0
+
+-# log should not have any info messages
+-EXPECT 0 Info_messages_count "/var/log/glusterfs/glustershd.log"
++# log does have 1 info message specific to configure ios_sample_buf_size in io-stats xlator
++EXPECT 1 Info_messages_count "/var/log/glusterfs/glustershd.log"
+
+ # log should not have any debug messages
+ EXPECT 0 Debug_messages_count "/var/log/glusterfs/glustershd.log"
+@@ -78,8 +78,8 @@ rm -f /var/log/glusterfs/glustershd.log
+ TEST $CLI volume set all cluster.daemon-log-level ERROR
+ TEST $CLI volume start $V0
+
+-# log should not have any info messages
+-EXPECT 0 Info_messages_count "/var/log/glusterfs/glustershd.log"
++# log does have 1 info message specific to configure ios_sample_buf_size in io-stats xlator
++EXPECT 1 Info_messages_count "/var/log/glusterfs/glustershd.log"
+
+ # log should not have any warning messages
+ EXPECT 0 Warning_messages_count "/var/log/glusterfs/glustershd.log"
+diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
+index aa91a0a..9b34895 100644
+--- a/xlators/debug/io-stats/src/io-stats.c
++++ b/xlators/debug/io-stats/src/io-stats.c
+@@ -3724,6 +3724,15 @@ xlator_set_loglevel(xlator_t *this, int log_level)
+ }
+ }
+
++void
++ios_sample_buf_size_configure(char *name, struct ios_conf *conf)
++{
++ conf->ios_sample_buf_size = 1024;
++ gf_log(name, GF_LOG_INFO,
++ "Configure ios_sample_buf "
++ " size is 1024 because ios_sample_interval is 0");
++}
++
+ int
+ reconfigure(xlator_t *this, dict_t *options)
+ {
+@@ -3779,8 +3788,13 @@ reconfigure(xlator_t *this, dict_t *options)
+ int32, out);
+ GF_OPTION_RECONF("ios-dump-format", dump_format_str, options, str, out);
+ ios_set_log_format_code(conf, dump_format_str);
+- GF_OPTION_RECONF("ios-sample-buf-size", conf->ios_sample_buf_size, options,
+- int32, out);
++ if (conf->ios_sample_interval) {
++ GF_OPTION_RECONF("ios-sample-buf-size", conf->ios_sample_buf_size,
++ options, int32, out);
++ } else {
++ ios_sample_buf_size_configure(this->name, conf);
++ }
++
+ GF_OPTION_RECONF("sys-log-level", sys_log_str, options, str, out);
+ if (sys_log_str) {
+ sys_log_level = glusterd_check_log_level(sys_log_str);
+@@ -3947,8 +3961,12 @@ init(xlator_t *this)
+ GF_OPTION_INIT("ios-dump-format", dump_format_str, str, out);
+ ios_set_log_format_code(conf, dump_format_str);
+
+- GF_OPTION_INIT("ios-sample-buf-size", conf->ios_sample_buf_size, int32,
+- out);
++ if (conf->ios_sample_interval) {
++ GF_OPTION_INIT("ios-sample-buf-size", conf->ios_sample_buf_size, int32,
++ out);
++ } else {
++ ios_sample_buf_size_configure(this->name, conf);
++ }
+
+ ret = ios_init_sample_buf(conf);
+ if (ret) {
+--
+1.8.3.1
+
diff --git a/0505-trash-Create-inode_table-only-while-feature-is-enabl.patch b/0505-trash-Create-inode_table-only-while-feature-is-enabl.patch
new file mode 100644
index 0000000..a0f6b62
--- /dev/null
+++ b/0505-trash-Create-inode_table-only-while-feature-is-enabl.patch
@@ -0,0 +1,107 @@
+From 43a8e2c7441b14f5f238cb11d83f32f248b16abb Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Tue, 13 Oct 2020 18:56:20 +0530
+Subject: [PATCH 505/511] trash: Create inode_table only while feature is
+ enabled
+
+Currently trash xlator create a inode table(1M) even if
+feature is not enabled.In brick_mux environment while 250
+bricks are attached with a single brick process and feature
+is not enable brick process increase RSS size unnecessarily.
+
+Solution: Create inode_table only while a feature is enabled.
+The patch reduces 250M RSS size per brick process
+if trash feature is not enabled.
+
+> Change-Id: I11a6fd2b8419fe2988f398be6ec30fb4f3b99a5d
+> Fixes: #1543
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry pick from commit 32f25e7b1b4b080ab2640e178b407c878e629376)
+> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1543)
+
+Change-Id: I11a6fd2b8419fe2988f398be6ec30fb4f3b99a5d
+BUG: 1898781
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221184
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/trash/src/trash.c | 47 +++++++++++++++++++++++++++++++++++---
+ 1 file changed, 44 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
+index f96ed73..93f020f 100644
+--- a/xlators/features/trash/src/trash.c
++++ b/xlators/features/trash/src/trash.c
+@@ -2235,16 +2235,47 @@ reconfigure(xlator_t *this, dict_t *options)
+ char trash_dir[PATH_MAX] = {
+ 0,
+ };
++ gf_boolean_t active_earlier = _gf_false;
++ gf_boolean_t active_now = _gf_false;
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
++ active_earlier = priv->state;
++ GF_OPTION_RECONF("trash", active_now, options, bool, out);
++
++ /* Disable of trash feature is not allowed at this point until
++ we are not able to find an approach to cleanup resource
++ gracefully. Here to disable the feature need to destroy inode
++ table and currently it is difficult to ensure inode is not
++ being used
++ */
++ if (active_earlier && !active_now) {
++ gf_log(this->name, GF_LOG_INFO,
++ "Disable of trash feature is not allowed "
++ "during graph reconfigure");
++ ret = 0;
++ goto out;
++ }
++
++ if (!active_earlier && active_now) {
++ if (!priv->trash_itable) {
++ priv->trash_itable = inode_table_new(0, this);
++ if (!priv->trash_itable) {
++ ret = -ENOMEM;
++ gf_log(this->name, GF_LOG_ERROR,
++ "failed to create trash inode_table"
++ " during graph reconfigure");
++ goto out;
++ }
++ }
++ priv->state = active_now;
++ }
++
+ GF_OPTION_RECONF("trash-internal-op", priv->internal, options, bool, out);
+ GF_OPTION_RECONF("trash-dir", tmp, options, str, out);
+
+- GF_OPTION_RECONF("trash", priv->state, options, bool, out);
+-
+ if (priv->state) {
+ ret = create_or_rename_trash_directory(this);
+
+@@ -2501,7 +2532,17 @@ init(xlator_t *this)
+ goto out;
+ }
+
+- priv->trash_itable = inode_table_new(0, this);
++ if (priv->state) {
++ priv->trash_itable = inode_table_new(0, this);
++ if (!priv->trash_itable) {
++ ret = -ENOMEM;
++ priv->state = _gf_false;
++ gf_log(this->name, GF_LOG_ERROR,
++ "failed to create trash inode_table disable trash");
++ goto out;
++ }
++ }
++
+ gf_log(this->name, GF_LOG_DEBUG, "brick path is%s", priv->brick_path);
+
+ this->private = (void *)priv;
+--
+1.8.3.1
+
diff --git a/0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch b/0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch
new file mode 100644
index 0000000..cf978f5
--- /dev/null
+++ b/0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch
@@ -0,0 +1,499 @@
+From 17a9ce965ef2fec9ee5c8e4b76981bb7cbcf1352 Mon Sep 17 00:00:00 2001
+From: mohit84 <moagrawa@redhat.com>
+Date: Mon, 9 Nov 2020 17:15:42 +0530
+Subject: [PATCH 506/511] posix: Attach a posix_spawn_disk_thread with
+ glusterfs_ctx (#1595)
+
+Currently posix xlator spawns posix_disk_space_threads per brick and in
+case of brick_mux environment while glusterd attached bricks at maximum
+level(250) with a single brick process in that case 250 threads are
+spawned for all bricks and brick process memory size also increased.
+
+Solution: Attach a posix_disk_space thread with glusterfs_ctx to
+ spawn a thread per process basis instead of spawning a per brick
+
+> Fixes: #1482
+> Change-Id: I8dd88f252a950495b71742e2a7588bd5bb019ec7
+> Cherry-picked from commit 3f93be77e1acf5baacafa97a320e91e6879d1c0e
+> Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1482
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+
+Change-Id: I8dd88f252a950495b71742e2a7588bd5bb019ec7
+Bug: 1898776
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220366
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfsd/src/glusterfsd.c | 4 +
+ libglusterfs/src/glusterfs/glusterfs.h | 6 ++
+ xlators/storage/posix/src/posix-common.c | 68 +++++++++++--
+ xlators/storage/posix/src/posix-handle.h | 3 +-
+ xlators/storage/posix/src/posix-helpers.c | 131 ++++++++++++++-----------
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 3 +-
+ xlators/storage/posix/src/posix-mem-types.h | 1 +
+ xlators/storage/posix/src/posix.h | 12 ++-
+ 8 files changed, 160 insertions(+), 68 deletions(-)
+
+diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
+index 955bf1d..ac25255 100644
+--- a/glusterfsd/src/glusterfsd.c
++++ b/glusterfsd/src/glusterfsd.c
+@@ -1840,9 +1840,13 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
+ INIT_LIST_HEAD(&cmd_args->xlator_options);
+ INIT_LIST_HEAD(&cmd_args->volfile_servers);
+ ctx->pxl_count = 0;
++ ctx->diskxl_count = 0;
+ pthread_mutex_init(&ctx->fd_lock, NULL);
+ pthread_cond_init(&ctx->fd_cond, NULL);
+ INIT_LIST_HEAD(&ctx->janitor_fds);
++ pthread_mutex_init(&ctx->xl_lock, NULL);
++ pthread_cond_init(&ctx->xl_cond, NULL);
++ INIT_LIST_HEAD(&ctx->diskth_xl);
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index bf6a987..d3400bf 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -740,7 +740,13 @@ struct _glusterfs_ctx {
+ pthread_t janitor;
+ /* The variable is use to save total posix xlator count */
+ uint32_t pxl_count;
++ uint32_t diskxl_count;
+
++ /* List of posix xlator use by disk thread*/
++ struct list_head diskth_xl;
++ pthread_mutex_t xl_lock;
++ pthread_cond_t xl_cond;
++ pthread_t disk_space_check;
+ char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */
+ };
+ typedef struct _glusterfs_ctx glusterfs_ctx_t;
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index e5c6e62..2c9030b 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -138,6 +138,36 @@ posix_inode(xlator_t *this)
+ return 0;
+ }
+
++static void
++delete_posix_diskxl(xlator_t *this)
++{
++ struct posix_private *priv = this->private;
++ struct posix_diskxl *pxl = priv->pxl;
++ glusterfs_ctx_t *ctx = this->ctx;
++ uint32_t count = 1;
++
++ if (pxl) {
++ pthread_mutex_lock(&ctx->xl_lock);
++ {
++ pxl->detach_notify = _gf_true;
++ while (pxl->is_use)
++ pthread_cond_wait(&pxl->cond, &ctx->xl_lock);
++ list_del_init(&pxl->list);
++ priv->pxl = NULL;
++ count = --ctx->diskxl_count;
++ if (count == 0)
++ pthread_cond_signal(&ctx->xl_cond);
++ }
++ pthread_mutex_unlock(&ctx->xl_lock);
++ pthread_cond_destroy(&pxl->cond);
++ GF_FREE(pxl);
++ if (count == 0) {
++ pthread_join(ctx->disk_space_check, NULL);
++ ctx->disk_space_check = 0;
++ }
++ }
++}
++
+ /**
+ * notify - when parent sends PARENT_UP, send CHILD_UP event from here
+ */
+@@ -194,6 +224,8 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+
++ delete_posix_diskxl(this);
++
+ gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
+ victim->name);
+ default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
+@@ -318,6 +350,7 @@ posix_reconfigure(xlator_t *this, dict_t *options)
+ int32_t force_directory_mode = -1;
+ int32_t create_mask = -1;
+ int32_t create_directory_mask = -1;
++ double old_disk_reserve = 0.0;
+
+ priv = this->private;
+
+@@ -383,6 +416,7 @@ posix_reconfigure(xlator_t *this, dict_t *options)
+ " fallback to <hostname>:<export>");
+ }
+
++ old_disk_reserve = priv->disk_reserve;
+ GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size,
+ out);
+ /* option can be any one of percent or bytes */
+@@ -390,11 +424,19 @@ posix_reconfigure(xlator_t *this, dict_t *options)
+ if (priv->disk_reserve < 100.0)
+ priv->disk_unit = 'p';
+
+- if (priv->disk_reserve) {
++ /* Delete a pxl object from a list of disk_reserve while something
++ is changed for reserve option during graph reconfigure
++ */
++ if (old_disk_reserve != priv->disk_reserve) {
++ delete_posix_diskxl(this);
++ old_disk_reserve = 0;
++ }
++
++ if (!old_disk_reserve && priv->disk_reserve) {
+ ret = posix_spawn_disk_space_check_thread(this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED,
+- "Getting disk space check from thread failed");
++ "Getting disk space check from thread failed ");
+ goto out;
+ }
+ }
+@@ -1008,13 +1050,13 @@ posix_init(xlator_t *this)
+ " fallback to <hostname>:<export>");
+ }
+
+- _private->disk_space_check_active = _gf_false;
+ _private->disk_space_full = 0;
+
+ GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out);
+
+ /* option can be any one of percent or bytes */
+ _private->disk_unit = 0;
++ pthread_cond_init(&_private->fd_cond, NULL);
+ if (_private->disk_reserve < 100.0)
+ _private->disk_unit = 'p';
+
+@@ -1162,12 +1204,6 @@ posix_fini(xlator_t *this)
+ priv->health_check = 0;
+ }
+
+- if (priv->disk_space_check) {
+- priv->disk_space_check_active = _gf_false;
+- (void)gf_thread_cleanup_xint(priv->disk_space_check);
+- priv->disk_space_check = 0;
+- }
+-
+ if (priv->janitor) {
+ /*TODO: Make sure the synctask is also complete */
+ ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
+@@ -1192,10 +1228,24 @@ posix_fini(xlator_t *this)
+ pthread_join(ctx->janitor, NULL);
+ }
+
++ pthread_mutex_lock(&ctx->xl_lock);
++ {
++ count = --ctx->diskxl_count;
++ if (count == 0)
++ pthread_cond_signal(&ctx->xl_cond);
++ }
++ pthread_mutex_unlock(&ctx->xl_lock);
++
++ if (count == 0) {
++ pthread_join(ctx->disk_space_check, NULL);
++ ctx->disk_space_check = 0;
++ }
++
+ if (priv->fsyncer) {
+ (void)gf_thread_cleanup_xint(priv->fsyncer);
+ priv->fsyncer = 0;
+ }
++
+ /*unlock brick dir*/
+ if (priv->mount_lock)
+ (void)sys_closedir(priv->mount_lock);
+diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h
+index c4d7cb1..8e4c719 100644
+--- a/xlators/storage/posix/src/posix-handle.h
++++ b/xlators/storage/posix/src/posix-handle.h
+@@ -206,5 +206,6 @@ int
+ posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata);
+
+ void
+-posix_disk_space_check(xlator_t *this);
++posix_disk_space_check(struct posix_private* priv);
++
+ #endif /* !_POSIX_HANDLE_H */
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index ceac52a..110d383 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -2284,9 +2284,8 @@ unlock:
+ }
+
+ void
+-posix_disk_space_check(xlator_t *this)
++posix_disk_space_check(struct posix_private *priv)
+ {
+- struct posix_private *priv = NULL;
+ char *subvol_path = NULL;
+ int op_ret = 0;
+ double size = 0;
+@@ -2295,16 +2294,14 @@ posix_disk_space_check(xlator_t *this)
+ double totsz = 0;
+ double freesz = 0;
+
+- GF_VALIDATE_OR_GOTO(this->name, this, out);
+- priv = this->private;
+- GF_VALIDATE_OR_GOTO(this->name, priv, out);
++ GF_VALIDATE_OR_GOTO("posix-helpers", priv, out);
+
+ subvol_path = priv->base_path;
+
+ op_ret = sys_statvfs(subvol_path, &buf);
+
+ if (op_ret == -1) {
+- gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
++ gf_msg("posix-disk", GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
+ "statvfs failed on %s", subvol_path);
+ goto out;
+ }
+@@ -2328,78 +2325,102 @@ out:
+ }
+
+ static void *
+-posix_disk_space_check_thread_proc(void *data)
++posix_ctx_disk_thread_proc(void *data)
+ {
+- xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
++ glusterfs_ctx_t *ctx = NULL;
+ uint32_t interval = 0;
+- int ret = -1;
+-
+- this = data;
+- priv = this->private;
++ struct posix_diskxl *pthis = NULL;
++ xlator_t *this = NULL;
++ struct timespec sleep_till = {
++ 0,
++ };
+
++ ctx = data;
+ interval = 5;
+- gf_msg_debug(this->name, 0,
+- "disk-space thread started, "
++
++ gf_msg_debug("glusterfs_ctx", 0,
++ "Ctx disk-space thread started, "
+ "interval = %d seconds",
+ interval);
+- while (1) {
+- /* aborting sleep() is a request to exit this thread, sleep()
+- * will normally not return when cancelled */
+- ret = sleep(interval);
+- if (ret > 0)
+- break;
+- /* prevent thread errors while doing the health-check(s) */
+- pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+-
+- /* Do the disk-check.*/
+- posix_disk_space_check(this);
+- if (!priv->disk_space_check_active)
+- goto out;
+- pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+- }
+
+-out:
+- gf_msg_debug(this->name, 0, "disk space check thread exiting");
+- LOCK(&priv->lock);
++ pthread_mutex_lock(&ctx->xl_lock);
+ {
+- priv->disk_space_check_active = _gf_false;
++ while (ctx->diskxl_count > 0) {
++ list_for_each_entry(pthis, &ctx->diskth_xl, list)
++ {
++ pthis->is_use = _gf_true;
++ pthread_mutex_unlock(&ctx->xl_lock);
++
++ THIS = this = pthis->xl;
++ priv = this->private;
++
++ posix_disk_space_check(priv);
++
++ pthread_mutex_lock(&ctx->xl_lock);
++ pthis->is_use = _gf_false;
++ /* Send a signal to posix_notify function */
++ if (pthis->detach_notify)
++ pthread_cond_signal(&pthis->cond);
++ }
++
++ timespec_now_realtime(&sleep_till);
++ sleep_till.tv_sec += 5;
++ (void)pthread_cond_timedwait(&ctx->xl_cond, &ctx->xl_lock,
++ &sleep_till);
++ }
+ }
+- UNLOCK(&priv->lock);
++ pthread_mutex_unlock(&ctx->xl_lock);
+
+ return NULL;
+ }
+
+ int
+-posix_spawn_disk_space_check_thread(xlator_t *xl)
++posix_spawn_disk_space_check_thread(xlator_t *this)
+ {
+- struct posix_private *priv = NULL;
+- int ret = -1;
++ int ret = 0;
++ glusterfs_ctx_t *ctx = this->ctx;
++ struct posix_diskxl *pxl = NULL;
++ struct posix_private *priv = this->private;
+
+- priv = xl->private;
++ pxl = GF_CALLOC(1, sizeof(struct posix_diskxl), gf_posix_mt_diskxl_t);
++ if (!pxl) {
++ ret = -ENOMEM;
++ gf_log(this->name, GF_LOG_ERROR,
++ "Calloc is failed to allocate "
++ "memory for diskxl object");
++ goto out;
++ }
++ pthread_cond_init(&pxl->cond, NULL);
+
+- LOCK(&priv->lock);
++ pthread_mutex_lock(&ctx->xl_lock);
+ {
+- /* cancel the running thread */
+- if (priv->disk_space_check_active == _gf_true) {
+- pthread_cancel(priv->disk_space_check);
+- priv->disk_space_check_active = _gf_false;
+- }
++ if (ctx->diskxl_count++ == 0) {
++ ret = gf_thread_create(&ctx->disk_space_check, NULL,
++ posix_ctx_disk_thread_proc, ctx,
++ "posixctxres");
+
+- ret = gf_thread_create(&priv->disk_space_check, NULL,
+- posix_disk_space_check_thread_proc, xl,
+- "posix_reserve");
+- if (ret) {
+- priv->disk_space_check_active = _gf_false;
+- gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED,
+- "unable to setup disk space check thread");
+- goto unlock;
++ if (ret) {
++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED,
++ "spawning disk space check thread failed");
++ ctx->diskxl_count--;
++ pthread_mutex_unlock(&ctx->xl_lock);
++ goto out;
++ }
+ }
++ pxl->xl = this;
++ priv->pxl = (void *)pxl;
++ list_add_tail(&pxl->list, &ctx->diskth_xl);
++ }
++ pthread_mutex_unlock(&ctx->xl_lock);
+
+- priv->disk_space_check_active = _gf_true;
++out:
++ if (ret) {
++ if (pxl) {
++ pthread_cond_destroy(&pxl->cond);
++ GF_FREE(pxl);
++ }
+ }
+-unlock:
+- UNLOCK(&priv->lock);
+ return ret;
+ }
+
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index 1d37aed..761e018 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -37,6 +37,7 @@
+ #include <fcntl.h>
+ #endif /* HAVE_LINKAT */
+
++#include "posix-handle.h"
+ #include <glusterfs/glusterfs.h>
+ #include <glusterfs/checksum.h>
+ #include <glusterfs/dict.h>
+@@ -713,7 +714,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ option behaviour
+ */
+ if (priv->disk_reserve)
+- posix_disk_space_check(this);
++ posix_disk_space_check(priv);
+
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock);
+
+diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h
+index 2253f38..bb4c56d 100644
+--- a/xlators/storage/posix/src/posix-mem-types.h
++++ b/xlators/storage/posix/src/posix-mem-types.h
+@@ -20,6 +20,7 @@ enum gf_posix_mem_types_ {
+ gf_posix_mt_paiocb,
+ gf_posix_mt_inode_ctx_t,
+ gf_posix_mt_mdata_attr,
++ gf_posix_mt_diskxl_t,
+ gf_posix_mt_end
+ };
+ #endif
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index 07f367b..4be979c 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -36,7 +36,6 @@
+ #include <glusterfs/compat.h>
+ #include <glusterfs/timer.h>
+ #include "posix-mem-types.h"
+-#include "posix-handle.h"
+ #include <glusterfs/call-stub.h>
+
+ #ifdef HAVE_LIBAIO
+@@ -138,6 +137,14 @@ struct posix_fd {
+ char _pad[4]; /* manual padding */
+ };
+
++struct posix_diskxl {
++ pthread_cond_t cond;
++ struct list_head list;
++ xlator_t *xl;
++ gf_boolean_t detach_notify;
++ gf_boolean_t is_use;
++};
++
+ struct posix_private {
+ char *base_path;
+ int32_t base_path_length;
+@@ -207,6 +214,7 @@ struct posix_private {
+ pthread_mutex_t janitor_mutex;
+ pthread_cond_t janitor_cond;
+ pthread_cond_t fd_cond;
++ pthread_cond_t disk_cond;
+ int fsync_queue_count;
+
+ enum {
+@@ -233,7 +241,6 @@ struct posix_private {
+ char disk_unit;
+ uint32_t disk_space_full;
+ pthread_t disk_space_check;
+- gf_boolean_t disk_space_check_active;
+
+ #ifdef GF_DARWIN_HOST_OS
+ enum {
+@@ -263,6 +270,7 @@ struct posix_private {
+ gf_boolean_t ctime;
+ gf_boolean_t janitor_task_stop;
+ uint32_t rel_fdcount;
++ void *pxl;
+ };
+
+ typedef struct {
+--
+1.8.3.1
+
diff --git a/0507-inode-make-critical-section-smaller.patch b/0507-inode-make-critical-section-smaller.patch
new file mode 100644
index 0000000..3b1dac5
--- /dev/null
+++ b/0507-inode-make-critical-section-smaller.patch
@@ -0,0 +1,764 @@
+From b3a17b67a69142eef1b4adde3409d5e54dda1e0b Mon Sep 17 00:00:00 2001
+From: Amar Tumballi <amarts@redhat.com>
+Date: Sat, 9 Feb 2019 13:23:06 +0530
+Subject: [PATCH 507/511] inode: make critical section smaller
+
+do all the 'static' tasks outside of locked region.
+
+* hash_dentry() and hash_gfid() are now called outside locked region.
+* remove extra __dentry_hash exported in libglusterfs.sym
+* avoid checks in locked functions, if the check is done in calling
+ function.
+* implement dentry_destroy(), which handles freeing of dentry separately,
+ from that of dentry_unset (which takes care of separating dentry from
+ inode, and table)
+
+> Updates: bz#1670031
+> Change-Id: I584213e0748464bb427fbdef3c4ab6615d7d5eb0
+> Signed-off-by: Amar Tumballi <amarts@redhat.com>
+> (Cherry pick from commit 8a90d346b9d3f69ff11241feb0011c90a8e57e30)
+> (Review on upstream link https://review.gluster.org/#/c/glusterfs/+/22184/)
+
+Change-Id: I584213e0748464bb427fbdef3c4ab6615d7d5eb0
+BUG: 1898777
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221189
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/inode.h | 3 -
+ libglusterfs/src/inode.c | 323 +++++++++++++------------------------
+ libglusterfs/src/libglusterfs.sym | 1 -
+ 3 files changed, 111 insertions(+), 216 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
+index 4421c47..c875653 100644
+--- a/libglusterfs/src/glusterfs/inode.h
++++ b/libglusterfs/src/glusterfs/inode.h
+@@ -167,9 +167,6 @@ inode_rename(inode_table_t *table, inode_t *olddir, const char *oldname,
+ inode_t *newdir, const char *newname, inode_t *inode,
+ struct iatt *stbuf);
+
+-dentry_t *
+-__dentry_grep(inode_table_t *table, inode_t *parent, const char *name);
+-
+ inode_t *
+ inode_grep(inode_table_t *table, inode_t *parent, const char *name);
+
+diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
+index 4c3c546..71b2d2a 100644
+--- a/libglusterfs/src/inode.c
++++ b/libglusterfs/src/inode.c
+@@ -159,27 +159,15 @@ hash_dentry(inode_t *parent, const char *name, int mod)
+ static int
+ hash_gfid(uuid_t uuid, int mod)
+ {
+- int ret = 0;
+-
+- ret = uuid[15] + (uuid[14] << 8);
+-
+- return ret;
++ return ((uuid[15] + (uuid[14] << 8)) % mod);
+ }
+
+ static void
+-__dentry_hash(dentry_t *dentry)
++__dentry_hash(dentry_t *dentry, const int hash)
+ {
+ inode_table_t *table = NULL;
+- int hash = 0;
+-
+- if (!dentry) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+- "dentry not found");
+- return;
+- }
+
+ table = dentry->inode->table;
+- hash = hash_dentry(dentry->parent, dentry->name, table->hashsize);
+
+ list_del_init(&dentry->hash);
+ list_add(&dentry->hash, &table->name_hash[hash]);
+@@ -188,49 +176,44 @@ __dentry_hash(dentry_t *dentry)
+ static int
+ __is_dentry_hashed(dentry_t *dentry)
+ {
+- if (!dentry) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+- "dentry not found");
+- return 0;
+- }
+-
+ return !list_empty(&dentry->hash);
+ }
+
+ static void
+ __dentry_unhash(dentry_t *dentry)
+ {
+- if (!dentry) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+- "dentry not found");
+- return;
+- }
+-
+ list_del_init(&dentry->hash);
+ }
+
+ static void
+-__dentry_unset(dentry_t *dentry)
++dentry_destroy(dentry_t *dentry)
+ {
+- if (!dentry) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+- "dentry not found");
++ if (!dentry)
+ return;
+- }
++
++ GF_FREE(dentry->name);
++ dentry->name = NULL;
++ mem_put(dentry);
++
++ return;
++}
++
++static dentry_t *
++__dentry_unset(dentry_t *dentry)
++{
++ if (!dentry)
++ return NULL;
+
+ __dentry_unhash(dentry);
+
+ list_del_init(&dentry->inode_list);
+
+- GF_FREE(dentry->name);
+- dentry->name = NULL;
+-
+ if (dentry->parent) {
+ __inode_unref(dentry->parent, false);
+ dentry->parent = NULL;
+ }
+
+- mem_put(dentry);
++ return dentry;
+ }
+
+ static int
+@@ -289,22 +272,14 @@ static int
+ __is_dentry_cyclic(dentry_t *dentry)
+ {
+ int ret = 0;
+- inode_t *inode = NULL;
+- char *name = "<nul>";
+
+ ret = __foreach_ancestor_dentry(dentry, __check_cycle, dentry->inode);
+ if (ret) {
+- inode = dentry->inode;
+-
+- if (dentry->name)
+- name = dentry->name;
+-
+ gf_msg(dentry->inode->table->name, GF_LOG_CRITICAL, 0,
+ LG_MSG_DENTRY_CYCLIC_LOOP,
+- "detected cyclic loop "
+- "formation during inode linkage. inode (%s) linking "
+- "under itself as %s",
+- uuid_utoa(inode->gfid), name);
++ "detected cyclic loop formation during inode linkage. "
++ "inode (%s) linking under itself as %s",
++ uuid_utoa(dentry->inode->gfid), dentry->name);
+ }
+
+ return ret;
+@@ -313,41 +288,19 @@ __is_dentry_cyclic(dentry_t *dentry)
+ static void
+ __inode_unhash(inode_t *inode)
+ {
+- if (!inode) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+- "inode not found");
+- return;
+- }
+-
+ list_del_init(&inode->hash);
+ }
+
+ static int
+ __is_inode_hashed(inode_t *inode)
+ {
+- if (!inode) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+- "inode not found");
+- return 0;
+- }
+-
+ return !list_empty(&inode->hash);
+ }
+
+ static void
+-__inode_hash(inode_t *inode)
++__inode_hash(inode_t *inode, const int hash)
+ {
+- inode_table_t *table = NULL;
+- int hash = 0;
+-
+- if (!inode) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+- "inode not found");
+- return;
+- }
+-
+- table = inode->table;
+- hash = hash_gfid(inode->gfid, 65536);
++ inode_table_t *table = inode->table;
+
+ list_del_init(&inode->hash);
+ list_add(&inode->hash, &table->inode_hash[hash]);
+@@ -359,12 +312,6 @@ __dentry_search_for_inode(inode_t *inode, uuid_t pargfid, const char *name)
+ dentry_t *dentry = NULL;
+ dentry_t *tmp = NULL;
+
+- if (!inode || !name) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+- "inode || name not found");
+- return NULL;
+- }
+-
+ /* earlier, just the ino was sent, which could have been 0, now
+ we deal with gfid, and if sent gfid is null or 0, no need to
+ continue with the check */
+@@ -390,12 +337,6 @@ __inode_ctx_free(inode_t *inode)
+ xlator_t *xl = NULL;
+ xlator_t *old_THIS = NULL;
+
+- if (!inode) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+- "inode not found");
+- return;
+- }
+-
+ if (!inode->_ctx) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_CTX_NULL,
+ "_ctx not found");
+@@ -424,12 +365,6 @@ noctx:
+ static void
+ __inode_destroy(inode_t *inode)
+ {
+- if (!inode) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+- "inode not found");
+- return;
+- }
+-
+ __inode_ctx_free(inode);
+
+ LOCK_DESTROY(&inode->lock);
+@@ -472,9 +407,6 @@ inode_ctx_merge(fd_t *fd, inode_t *inode, inode_t *linked_inode)
+ static void
+ __inode_activate(inode_t *inode)
+ {
+- if (!inode)
+- return;
+-
+ list_move(&inode->list, &inode->table->active);
+ inode->table->active_size++;
+ }
+@@ -485,19 +417,13 @@ __inode_passivate(inode_t *inode)
+ dentry_t *dentry = NULL;
+ dentry_t *t = NULL;
+
+- if (!inode) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+- "inode not found");
+- return;
+- }
+-
+ list_move_tail(&inode->list, &inode->table->lru);
+ inode->table->lru_size++;
+
+ list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list)
+ {
+ if (!__is_dentry_hashed(dentry))
+- __dentry_unset(dentry);
++ dentry_destroy(__dentry_unset(dentry));
+ }
+ }
+
+@@ -507,12 +433,6 @@ __inode_retire(inode_t *inode)
+ dentry_t *dentry = NULL;
+ dentry_t *t = NULL;
+
+- if (!inode) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+- "inode not found");
+- return;
+- }
+-
+ list_move_tail(&inode->list, &inode->table->purge);
+ inode->table->purge_size++;
+
+@@ -520,7 +440,7 @@ __inode_retire(inode_t *inode)
+
+ list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list)
+ {
+- __dentry_unset(dentry);
++ dentry_destroy(__dentry_unset(dentry));
+ }
+ }
+
+@@ -547,9 +467,6 @@ __inode_unref(inode_t *inode, bool clear)
+ xlator_t *this = NULL;
+ uint64_t nlookup = 0;
+
+- if (!inode)
+- return NULL;
+-
+ /*
+ * Root inode should always be in active list of inode table. So unrefs
+ * on root inode are no-ops.
+@@ -677,16 +594,10 @@ inode_ref(inode_t *inode)
+ }
+
+ static dentry_t *
+-__dentry_create(inode_t *inode, inode_t *parent, const char *name)
++dentry_create(inode_t *inode, inode_t *parent, const char *name)
+ {
+ dentry_t *newd = NULL;
+
+- if (!inode || !parent || !name) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+- "inode || parent || name not found");
+- return NULL;
+- }
+-
+ newd = mem_get0(parent->table->dentry_pool);
+ if (newd == NULL) {
+ goto out;
+@@ -702,10 +613,6 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name)
+ goto out;
+ }
+
+- if (parent)
+- newd->parent = __inode_ref(parent, false);
+-
+- list_add(&newd->inode_list, &inode->dentry_list);
+ newd->inode = inode;
+
+ out:
+@@ -717,14 +624,6 @@ __inode_create(inode_table_t *table)
+ {
+ inode_t *newi = NULL;
+
+- if (!table) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+- LG_MSG_INODE_TABLE_NOT_FOUND,
+- "table not "
+- "found");
+- return NULL;
+- }
+-
+ newi = mem_get0(table->inode_pool);
+ if (!newi) {
+ goto out;
+@@ -795,9 +694,6 @@ __inode_ref_reduce_by_n(inode_t *inode, uint64_t nref)
+ {
+ uint64_t nlookup = 0;
+
+- if (!inode)
+- return NULL;
+-
+ GF_ASSERT(inode->ref >= nref);
+
+ inode->ref -= nref;
+@@ -837,17 +733,12 @@ inode_forget_atomic(inode_t *inode, uint64_t nlookup)
+ }
+
+ dentry_t *
+-__dentry_grep(inode_table_t *table, inode_t *parent, const char *name)
++__dentry_grep(inode_table_t *table, inode_t *parent, const char *name,
++ const int hash)
+ {
+- int hash = 0;
+ dentry_t *dentry = NULL;
+ dentry_t *tmp = NULL;
+
+- if (!table || !name || !parent)
+- return NULL;
+-
+- hash = hash_dentry(parent, name, table->hashsize);
+-
+ list_for_each_entry(tmp, &table->name_hash[hash], hash)
+ {
+ if (tmp->parent == parent && !strcmp(tmp->name, name)) {
+@@ -872,15 +763,16 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)
+ return NULL;
+ }
+
++ int hash = hash_dentry(parent, name, table->hashsize);
++
+ pthread_mutex_lock(&table->lock);
+ {
+- dentry = __dentry_grep(table, parent, name);
+-
+- if (dentry)
++ dentry = __dentry_grep(table, parent, name, hash);
++ if (dentry) {
+ inode = dentry->inode;
+-
+- if (inode)
+- __inode_ref(inode, false);
++ if (inode)
++ __inode_ref(inode, false);
++ }
+ }
+ pthread_mutex_unlock(&table->lock);
+
+@@ -947,17 +839,18 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name,
+ return ret;
+ }
+
++ int hash = hash_dentry(parent, name, table->hashsize);
++
+ pthread_mutex_lock(&table->lock);
+ {
+- dentry = __dentry_grep(table, parent, name);
+-
+- if (dentry)
++ dentry = __dentry_grep(table, parent, name, hash);
++ if (dentry) {
+ inode = dentry->inode;
+-
+- if (inode) {
+- gf_uuid_copy(gfid, inode->gfid);
+- *type = inode->ia_type;
+- ret = 0;
++ if (inode) {
++ gf_uuid_copy(gfid, inode->gfid);
++ *type = inode->ia_type;
++ ret = 0;
++ }
+ }
+ }
+ pthread_mutex_unlock(&table->lock);
+@@ -978,25 +871,14 @@ __is_root_gfid(uuid_t gfid)
+ }
+
+ inode_t *
+-__inode_find(inode_table_t *table, uuid_t gfid)
++__inode_find(inode_table_t *table, uuid_t gfid, const int hash)
+ {
+ inode_t *inode = NULL;
+ inode_t *tmp = NULL;
+- int hash = 0;
+-
+- if (!table) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+- LG_MSG_INODE_TABLE_NOT_FOUND,
+- "table not "
+- "found");
+- goto out;
+- }
+
+ if (__is_root_gfid(gfid))
+ return table->root;
+
+- hash = hash_gfid(gfid, 65536);
+-
+ list_for_each_entry(tmp, &table->inode_hash[hash], hash)
+ {
+ if (gf_uuid_compare(tmp->gfid, gfid) == 0) {
+@@ -1005,7 +887,6 @@ __inode_find(inode_table_t *table, uuid_t gfid)
+ }
+ }
+
+-out:
+ return inode;
+ }
+
+@@ -1022,9 +903,11 @@ inode_find(inode_table_t *table, uuid_t gfid)
+ return NULL;
+ }
+
++ int hash = hash_gfid(gfid, 65536);
++
+ pthread_mutex_lock(&table->lock);
+ {
+- inode = __inode_find(table, gfid);
++ inode = __inode_find(table, gfid, hash);
+ if (inode)
+ __inode_ref(inode, false);
+ }
+@@ -1035,7 +918,7 @@ inode_find(inode_table_t *table, uuid_t gfid)
+
+ static inode_t *
+ __inode_link(inode_t *inode, inode_t *parent, const char *name,
+- struct iatt *iatt)
++ struct iatt *iatt, const int dhash)
+ {
+ dentry_t *dentry = NULL;
+ dentry_t *old_dentry = NULL;
+@@ -1043,16 +926,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
+ inode_table_t *table = NULL;
+ inode_t *link_inode = NULL;
+
+- if (!inode) {
+- errno = EINVAL;
+- return NULL;
+- }
+-
+ table = inode->table;
+- if (!table) {
+- errno = EINVAL;
+- return NULL;
+- }
+
+ if (parent) {
+ /* We should prevent inode linking between different
+@@ -1090,14 +964,16 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
+ return NULL;
+ }
+
+- old_inode = __inode_find(table, iatt->ia_gfid);
++ int ihash = hash_gfid(iatt->ia_gfid, 65536);
++
++ old_inode = __inode_find(table, iatt->ia_gfid, ihash);
+
+ if (old_inode) {
+ link_inode = old_inode;
+ } else {
+ gf_uuid_copy(inode->gfid, iatt->ia_gfid);
+ inode->ia_type = iatt->ia_type;
+- __inode_hash(inode);
++ __inode_hash(inode, ihash);
+ }
+ } else {
+ /* @old_inode serves another important purpose - it indicates
+@@ -1112,22 +988,16 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
+ old_inode = inode;
+ }
+
+- if (name) {
+- if (!strcmp(name, ".") || !strcmp(name, ".."))
+- return link_inode;
+-
+- if (strchr(name, '/')) {
+- GF_ASSERT(!"inode link attempted with '/' in name");
+- return NULL;
+- }
++ if (name && (!strcmp(name, ".") || !strcmp(name, ".."))) {
++ return link_inode;
+ }
+
+ /* use only link_inode beyond this point */
+ if (parent) {
+- old_dentry = __dentry_grep(table, parent, name);
++ old_dentry = __dentry_grep(table, parent, name, dhash);
+
+ if (!old_dentry || old_dentry->inode != link_inode) {
+- dentry = __dentry_create(link_inode, parent, name);
++ dentry = dentry_create(link_inode, parent, name);
+ if (!dentry) {
+ gf_msg_callingfn(
+ THIS->name, GF_LOG_ERROR, 0, LG_MSG_DENTRY_CREATE_FAILED,
+@@ -1137,15 +1007,20 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
+ errno = ENOMEM;
+ return NULL;
+ }
++
++ /* dentry linking needs to happen inside lock */
++ dentry->parent = __inode_ref(parent, false);
++ list_add(&dentry->inode_list, &link_inode->dentry_list);
++
+ if (old_inode && __is_dentry_cyclic(dentry)) {
+ errno = ELOOP;
+- __dentry_unset(dentry);
++ dentry_destroy(__dentry_unset(dentry));
+ return NULL;
+ }
+- __dentry_hash(dentry);
++ __dentry_hash(dentry, dhash);
+
+ if (old_dentry)
+- __dentry_unset(old_dentry);
++ dentry_destroy(__dentry_unset(old_dentry));
+ }
+ }
+
+@@ -1155,6 +1030,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
+ inode_t *
+ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
+ {
++ int hash = 0;
+ inode_table_t *table = NULL;
+ inode_t *linked_inode = NULL;
+
+@@ -1166,10 +1042,18 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
+
+ table = inode->table;
+
++ if (parent && name) {
++ hash = hash_dentry(parent, name, table->hashsize);
++ }
++
++ if (name && strchr(name, '/')) {
++ GF_ASSERT(!"inode link attempted with '/' in name");
++ return NULL;
++ }
++
+ pthread_mutex_lock(&table->lock);
+ {
+- linked_inode = __inode_link(inode, parent, name, iatt);
+-
++ linked_inode = __inode_link(inode, parent, name, iatt, hash);
+ if (linked_inode)
+ __inode_ref(linked_inode, false);
+ }
+@@ -1312,48 +1196,47 @@ inode_invalidate(inode_t *inode)
+ return ret;
+ }
+
+-static void
++static dentry_t *
+ __inode_unlink(inode_t *inode, inode_t *parent, const char *name)
+ {
+ dentry_t *dentry = NULL;
+ char pgfid[64] = {0};
+ char gfid[64] = {0};
+
+- if (!inode || !parent || !name)
+- return;
+-
+ dentry = __dentry_search_for_inode(inode, parent->gfid, name);
+
+ /* dentry NULL for corrupted backend */
+ if (dentry) {
+- __dentry_unset(dentry);
++ dentry = __dentry_unset(dentry);
+ } else {
+ gf_msg("inode", GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+ "%s/%s: dentry not found in %s",
+ uuid_utoa_r(parent->gfid, pgfid), name,
+ uuid_utoa_r(inode->gfid, gfid));
+ }
++
++ return dentry;
+ }
+
+ void
+ inode_unlink(inode_t *inode, inode_t *parent, const char *name)
+ {
+- inode_table_t *table = NULL;
++ inode_table_t *table;
++ dentry_t *dentry;
+
+- if (!inode) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+- "inode not found");
++ if (!inode || !parent || !name)
+ return;
+- }
+
+ table = inode->table;
+
+ pthread_mutex_lock(&table->lock);
+ {
+- __inode_unlink(inode, parent, name);
++ dentry = __inode_unlink(inode, parent, name);
+ }
+ pthread_mutex_unlock(&table->lock);
+
++ dentry_destroy(dentry);
++
+ inode_table_prune(table);
+ }
+
+@@ -1362,6 +1245,9 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname,
+ inode_t *dstdir, const char *dstname, inode_t *inode,
+ struct iatt *iatt)
+ {
++ int hash = 0;
++ dentry_t *dentry = NULL;
++
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+@@ -1370,13 +1256,26 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname,
+
+ table = inode->table;
+
++ if (dstname && strchr(dstname, '/')) {
++ GF_ASSERT(!"inode link attempted with '/' in name");
++ return -1;
++ }
++
++ if (dstdir && dstname) {
++ hash = hash_dentry(dstdir, dstname, table->hashsize);
++ }
++
+ pthread_mutex_lock(&table->lock);
+ {
+- __inode_link(inode, dstdir, dstname, iatt);
+- __inode_unlink(inode, srcdir, srcname);
++ __inode_link(inode, dstdir, dstname, iatt, hash);
++ /* pick the old dentry */
++ dentry = __inode_unlink(inode, srcdir, srcname);
+ }
+ pthread_mutex_unlock(&table->lock);
+
++ /* free the old dentry */
++ dentry_destroy(dentry);
++
+ inode_table_prune(table);
+
+ return 0;
+@@ -1447,12 +1346,6 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name)
+ static int
+ __inode_has_dentry(inode_t *inode)
+ {
+- if (!inode) {
+- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+- "inode not found");
+- return 0;
+- }
+-
+ return !list_empty(&inode->dentry_list);
+ }
+
+@@ -1461,6 +1354,12 @@ inode_has_dentry(inode_t *inode)
+ {
+ int dentry_present = 0;
+
++ if (!inode) {
++ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
++ "inode not found");
++ return 0;
++ }
++
+ LOCK(&inode->lock);
+ {
+ dentry_present = __inode_has_dentry(inode);
+@@ -1720,7 +1619,7 @@ __inode_table_init_root(inode_table_t *table)
+ iatt.ia_ino = 1;
+ iatt.ia_type = IA_IFDIR;
+
+- __inode_link(root, NULL, NULL, &iatt);
++ __inode_link(root, NULL, NULL, &iatt, 0);
+ table->root = root;
+ }
+
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index 5a721e0..d060292 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -357,7 +357,6 @@ default_copy_file_range
+ default_copy_file_range_cbk
+ default_copy_file_range_failure_cbk
+ default_copy_file_range_resume
+-__dentry_grep
+ dht_is_linkfile
+ dict_add
+ dict_addn
+--
+1.8.3.1
+
diff --git a/0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch b/0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch
new file mode 100644
index 0000000..9ccc1b5
--- /dev/null
+++ b/0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch
@@ -0,0 +1,232 @@
+From 87b7689f7727a542c5afa22bdebd3781dd650a2f Mon Sep 17 00:00:00 2001
+From: Csaba Henk <csaba@redhat.com>
+Date: Fri, 17 Jul 2020 11:33:36 +0200
+Subject: [PATCH 508/511] fuse: fetch arbitrary number of groups from
+ /proc/[pid]/status
+
+Glusterfs so far constrained itself with an arbitrary limit (32)
+for the number of groups read from /proc/[pid]/status (this was
+the number of groups shown there prior to Linux commit
+v3.7-9553-g8d238027b87e (v3.8-rc1~74^2~59); since this commit, all
+groups are shown).
+
+With this change we'll read groups up to the number Glusterfs
+supports in general (64k).
+
+Note: the actual number of groups that are made use of in a
+regular Glusterfs setup shall still be capped at ~93 due to limitations
+of the RPC transport. To be able to handle more groups than that,
+brick side gid resolution (server.manage-gids option) can be used along
+with NIS, LDAP or other such networked directory service (see
+https://github.com/gluster/glusterdocs/blob/5ba15a2/docs/Administrator%20Guide/Handling-of-users-with-many-groups.md#limit-in-the-glusterfs-protocol
+).
+
+Also adding some diagnostic messages to frame_fill_groups().
+
+Upstream:
+> Reviewed-on: https://review.gluster.org/c/glusterfs/+/24721
+> Change-Id: I271f3dc3e6d3c44d6d989c7a2073ea5f16c26ee0
+> fixes: #1075
+> Signed-off-by: Csaba Henk <csaba@redhat.com>
+
+BUG: 1749304
+Change-Id: I80bf99d34087fb95768bf2259d8c4774d9f5d0c5
+Signed-off-by: Csaba Henk <csaba@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220920
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/stack.h | 7 ++++
+ tests/bugs/fuse/many-groups-for-acl.t | 13 ++++++-
+ xlators/mount/fuse/src/fuse-helpers.c | 71 +++++++++++++++++++++++------------
+ 3 files changed, 65 insertions(+), 26 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/stack.h b/libglusterfs/src/glusterfs/stack.h
+index 1758550..bd466d8 100644
+--- a/libglusterfs/src/glusterfs/stack.h
++++ b/libglusterfs/src/glusterfs/stack.h
+@@ -429,6 +429,7 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps)
+ if (ngrps <= SMALL_GROUP_COUNT) {
+ stack->groups = stack->groups_small;
+ } else {
++ GF_FREE(stack->groups_large);
+ stack->groups_large = GF_CALLOC(ngrps, sizeof(gid_t),
+ gf_common_mt_groups_t);
+ if (!stack->groups_large)
+@@ -442,6 +443,12 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps)
+ }
+
+ static inline int
++call_stack_groups_capacity(call_stack_t *stack)
++{
++ return max(stack->ngrps, SMALL_GROUP_COUNT);
++}
++
++static inline int
+ call_frames_count(call_stack_t *call_stack)
+ {
+ call_frame_t *pos;
+diff --git a/tests/bugs/fuse/many-groups-for-acl.t b/tests/bugs/fuse/many-groups-for-acl.t
+index d959f75..a51b1bc 100755
+--- a/tests/bugs/fuse/many-groups-for-acl.t
++++ b/tests/bugs/fuse/many-groups-for-acl.t
+@@ -38,6 +38,13 @@ do
+ done
+ TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER}
+
++# Linux < 3.8 exports only first 32 gids of pid to userspace
++kernel_exports_few_gids=0
++if [ "$OSTYPE" = Linux ] && \
++ su -m ${NEW_USER} -c "grep ^Groups: /proc/self/status | wc -w | xargs -I@ expr @ - 1 '<' $LAST_GID - $NEW_GID + 1" > /dev/null; then
++ kernel_exports_few_gids=1
++fi
++
+ # preparation done, start the tests
+
+ TEST glusterd
+@@ -48,6 +55,8 @@ TEST $CLI volume set $V0 nfs.disable off
+ TEST $CLI volume set ${V0} server.manage-gids off
+ TEST $CLI volume start ${V0}
+
++# This is just a synchronization hack to make sure the bricks are
++# up before going on.
+ EXPECT_WITHIN ${NFS_EXPORT_TIMEOUT} "1" is_nfs_export_available
+
+ # mount the volume with POSIX ACL support, without --resolve-gids
+@@ -69,8 +78,8 @@ TEST [ $? -eq 0 ]
+ su -m ${NEW_USER} -c "touch ${M0}/first-32-gids-2/success > /dev/null"
+ TEST [ $? -eq 0 ]
+
+-su -m ${NEW_USER} -c "touch ${M0}/gid-64/failure > /dev/null"
+-TEST [ $? -ne 0 ]
++su -m ${NEW_USER} -c "touch ${M0}/gid-64/success--if-all-gids-exported > /dev/null"
++TEST [ $? -eq $kernel_exports_few_gids ]
+
+ su -m ${NEW_USER} -c "touch ${M0}/gid-120/failure > /dev/null"
+ TEST [ $? -ne 0 ]
+diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c
+index 5bfc40c..6e04cd4 100644
+--- a/xlators/mount/fuse/src/fuse-helpers.c
++++ b/xlators/mount/fuse/src/fuse-helpers.c
+@@ -139,8 +139,6 @@ get_fuse_state(xlator_t *this, fuse_in_header_t *finh)
+ return state;
+ }
+
+-#define FUSE_MAX_AUX_GROUPS \
+- 32 /* We can get only up to 32 aux groups from /proc */
+ void
+ frame_fill_groups(call_frame_t *frame)
+ {
+@@ -150,8 +148,6 @@ frame_fill_groups(call_frame_t *frame)
+ char filename[32];
+ char line[4096];
+ char *ptr = NULL;
+- FILE *fp = NULL;
+- int idx = 0;
+ long int id = 0;
+ char *saveptr = NULL;
+ char *endptr = NULL;
+@@ -191,45 +187,72 @@ frame_fill_groups(call_frame_t *frame)
+
+ call_stack_set_groups(frame->root, ngroups, &mygroups);
+ } else {
++ FILE *fp = NULL;
++
+ ret = snprintf(filename, sizeof filename, "/proc/%d/status",
+ frame->root->pid);
+- if (ret >= sizeof filename)
++ if (ret >= sizeof filename) {
++ gf_log(this->name, GF_LOG_ERROR, "procfs path exceeds buffer size");
+ goto out;
++ }
+
+ fp = fopen(filename, "r");
+- if (!fp)
++ if (!fp) {
++ gf_log(this->name, GF_LOG_ERROR, "failed to open %s: %s", filename,
++ strerror(errno));
+ goto out;
++ }
+
+- if (call_stack_alloc_groups(frame->root, ngroups) != 0)
+- goto out;
++ for (;;) {
++ gf_boolean_t found_groups = _gf_false;
++ int idx = 0;
+
+- while ((ptr = fgets(line, sizeof line, fp))) {
+- if (strncmp(ptr, "Groups:", 7) != 0)
+- continue;
++ if (call_stack_alloc_groups(frame->root, ngroups) != 0) {
++ gf_log(this->name, GF_LOG_ERROR,
++ "failed to allocate gid buffer");
++ goto out;
++ }
+
++ while ((ptr = fgets(line, sizeof line, fp))) {
++ if (strncmp(ptr, "Groups:", 7) == 0) {
++ found_groups = _gf_true;
++ break;
++ }
++ }
++ if (!found_groups) {
++ gf_log(this->name, GF_LOG_ERROR, "cannot find gid list in %s",
++ filename);
++ break;
++ }
+ ptr = line + 8;
+
+ for (ptr = strtok_r(ptr, " \t\r\n", &saveptr); ptr;
+ ptr = strtok_r(NULL, " \t\r\n", &saveptr)) {
+ errno = 0;
+ id = strtol(ptr, &endptr, 0);
+- if (errno == ERANGE)
+- break;
+- if (!endptr || *endptr)
++ if (errno == ERANGE || !endptr || *endptr) {
++ gf_log(this->name, GF_LOG_ERROR, "failed to parse %s",
++ filename);
+ break;
+- frame->root->groups[idx++] = id;
+- if (idx == FUSE_MAX_AUX_GROUPS)
++ }
++ if (idx < call_stack_groups_capacity(frame->root))
++ frame->root->groups[idx] = id;
++ idx++;
++ if (idx == GF_MAX_AUX_GROUPS)
+ break;
+ }
+-
+- frame->root->ngrps = idx;
+- break;
++ if (idx > call_stack_groups_capacity(frame->root)) {
++ ngroups = idx;
++ rewind(fp);
++ } else {
++ frame->root->ngrps = idx;
++ break;
++ }
+ }
++ out:
++ if (fp)
++ fclose(fp);
+ }
+-
+-out:
+- if (fp)
+- fclose(fp);
+ #elif defined(GF_SOLARIS_HOST_OS)
+ char filename[32];
+ char scratch[128];
+@@ -245,7 +268,7 @@ out:
+ fp = fopen(filename, "r");
+ if (fp != NULL) {
+ if (fgets(scratch, sizeof scratch, fp) != NULL) {
+- ngrps = MIN(prcred->pr_ngroups, FUSE_MAX_AUX_GROUPS);
++ ngrps = MIN(prcred->pr_ngroups, GF_MAX_AUX_GROUPS);
+ if (call_stack_alloc_groups(frame->root, ngrps) != 0) {
+ fclose(fp);
+ return;
+--
+1.8.3.1
+
diff --git a/0509-core-configure-optimum-inode-table-hash_size-for-shd.patch b/0509-core-configure-optimum-inode-table-hash_size-for-shd.patch
new file mode 100644
index 0000000..fdfc9bb
--- /dev/null
+++ b/0509-core-configure-optimum-inode-table-hash_size-for-shd.patch
@@ -0,0 +1,407 @@
+From a18f03cbf2b5652f8617cb4dd236bb4ca9838d96 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Tue, 6 Oct 2020 16:54:15 +0530
+Subject: [PATCH 509/511] core: configure optimum inode table hash_size for shd
+
+In brick_mux environment a shd process consume high memory.
+After print the statedump i have found it allocates 1M per afr xlator
+for all bricks.In case of configure 4k volumes it consumes almost total
+6G RSS size in which 4G consumes by inode_tables
+
+[cluster/replicate.test1-replicate-0 - usage-type gf_common_mt_list_head memusage]
+size=1273488
+num_allocs=2
+max_size=1273488
+max_num_allocs=2
+total_allocs=2
+
+inode_new_table function allocates memory(1M) for a list of inode and dentry hash.
+For shd lru_limit size is 1 so we don't need to create a big hash table so to reduce
+RSS size for shd process pass optimum bucket count at the time of creating inode_table.
+
+> Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb
+> Fixes: #1538
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry pick from commit ca6bbc486e76fdb9a8e07119bb10d7fa45b2e93b)
+> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1538)
+
+Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb
+BUG: 1898777
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221191
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ api/src/glfs-master.c | 2 +-
+ libglusterfs/src/glusterfs/inode.h | 17 +++++----
+ libglusterfs/src/inode.c | 53 +++++++++++++++++---------
+ xlators/cluster/afr/src/afr.c | 10 ++++-
+ xlators/cluster/dht/src/dht-rebalance.c | 3 +-
+ xlators/cluster/ec/src/ec.c | 2 +-
+ xlators/features/bit-rot/src/bitd/bit-rot.c | 2 +-
+ xlators/features/quota/src/quotad-helpers.c | 2 +-
+ xlators/features/trash/src/trash.c | 4 +-
+ xlators/mount/fuse/src/fuse-bridge.c | 6 +--
+ xlators/nfs/server/src/nfs.c | 2 +-
+ xlators/protocol/server/src/server-handshake.c | 3 +-
+ 12 files changed, 66 insertions(+), 40 deletions(-)
+
+diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c
+index b4473b1..9e604d3 100644
+--- a/api/src/glfs-master.c
++++ b/api/src/glfs-master.c
+@@ -45,7 +45,7 @@ graph_setup(struct glfs *fs, glusterfs_graph_t *graph)
+ }
+
+ if (!new_subvol->itable) {
+- itable = inode_table_new(131072, new_subvol);
++ itable = inode_table_new(131072, new_subvol, 0, 0);
+ if (!itable) {
+ errno = ENOMEM;
+ ret = -1;
+diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
+index c875653..62c093d 100644
+--- a/libglusterfs/src/glusterfs/inode.h
++++ b/libglusterfs/src/glusterfs/inode.h
+@@ -35,11 +35,12 @@ typedef struct _dentry dentry_t;
+
+ struct _inode_table {
+ pthread_mutex_t lock;
+- size_t hashsize; /* bucket size of inode hash and dentry hash */
+- char *name; /* name of the inode table, just for gf_log() */
+- inode_t *root; /* root directory inode, with number 1 */
+- xlator_t *xl; /* xlator to be called to do purge */
+- uint32_t lru_limit; /* maximum LRU cache size */
++ size_t dentry_hashsize; /* Number of buckets for dentry hash*/
++ size_t inode_hashsize; /* Size of inode hash table */
++ char *name; /* name of the inode table, just for gf_log() */
++ inode_t *root; /* root directory inode, with number 1 */
++ xlator_t *xl; /* xlator to be called to do purge */
++ uint32_t lru_limit; /* maximum LRU cache size */
+ struct list_head *inode_hash; /* buckets for inode hash table */
+ struct list_head *name_hash; /* buckets for dentry hash table */
+ struct list_head active; /* list of inodes currently active (in an fop) */
+@@ -116,12 +117,14 @@ struct _inode {
+ #define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1)
+
+ inode_table_t *
+-inode_table_new(uint32_t lru_limit, xlator_t *xl);
++inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dhash_size,
++ uint32_t inodehash_size);
+
+ inode_table_t *
+ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+- xlator_t *invalidator_xl);
++ xlator_t *invalidator_xl, uint32_t dentry_hashsize,
++ uint32_t inode_hashsize);
+
+ void
+ inode_table_destroy_all(glusterfs_ctx_t *ctx);
+diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
+index 71b2d2a..98f8ea6 100644
+--- a/libglusterfs/src/inode.c
++++ b/libglusterfs/src/inode.c
+@@ -763,7 +763,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)
+ return NULL;
+ }
+
+- int hash = hash_dentry(parent, name, table->hashsize);
++ int hash = hash_dentry(parent, name, table->dentry_hashsize);
+
+ pthread_mutex_lock(&table->lock);
+ {
+@@ -839,7 +839,7 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name,
+ return ret;
+ }
+
+- int hash = hash_dentry(parent, name, table->hashsize);
++ int hash = hash_dentry(parent, name, table->dentry_hashsize);
+
+ pthread_mutex_lock(&table->lock);
+ {
+@@ -903,7 +903,7 @@ inode_find(inode_table_t *table, uuid_t gfid)
+ return NULL;
+ }
+
+- int hash = hash_gfid(gfid, 65536);
++ int hash = hash_gfid(gfid, table->inode_hashsize);
+
+ pthread_mutex_lock(&table->lock);
+ {
+@@ -964,7 +964,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
+ return NULL;
+ }
+
+- int ihash = hash_gfid(iatt->ia_gfid, 65536);
++ int ihash = hash_gfid(iatt->ia_gfid, table->inode_hashsize);
+
+ old_inode = __inode_find(table, iatt->ia_gfid, ihash);
+
+@@ -1043,7 +1043,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
+ table = inode->table;
+
+ if (parent && name) {
+- hash = hash_dentry(parent, name, table->hashsize);
++ hash = hash_dentry(parent, name, table->dentry_hashsize);
+ }
+
+ if (name && strchr(name, '/')) {
+@@ -1262,7 +1262,7 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname,
+ }
+
+ if (dstdir && dstname) {
+- hash = hash_dentry(dstdir, dstname, table->hashsize);
++ hash = hash_dentry(dstdir, dstname, table->dentry_hashsize);
+ }
+
+ pthread_mutex_lock(&table->lock);
+@@ -1626,7 +1626,8 @@ __inode_table_init_root(inode_table_t *table)
+ inode_table_t *
+ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+- xlator_t *invalidator_xl)
++ xlator_t *invalidator_xl, uint32_t dentry_hashsize,
++ uint32_t inode_hashsize)
+ {
+ inode_table_t *new = NULL;
+ uint32_t mem_pool_size = lru_limit;
+@@ -1644,7 +1645,19 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ new->invalidator_fn = invalidator_fn;
+ new->invalidator_xl = invalidator_xl;
+
+- new->hashsize = 14057; /* TODO: Random Number?? */
++ if (dentry_hashsize == 0) {
++ /* Prime number for uniform distribution */
++ new->dentry_hashsize = 14057;
++ } else {
++ new->dentry_hashsize = dentry_hashsize;
++ }
++
++ if (inode_hashsize == 0) {
++ /* The size of hash table always should be power of 2 */
++ new->inode_hashsize = 65536;
++ } else {
++ new->inode_hashsize = inode_hashsize;
++ }
+
+ /* In case FUSE is initing the inode table. */
+ if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES))
+@@ -1658,13 +1671,13 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ if (!new->dentry_pool)
+ goto out;
+
+- new->inode_hash = (void *)GF_CALLOC(65536, sizeof(struct list_head),
+- gf_common_mt_list_head);
++ new->inode_hash = (void *)GF_CALLOC(
++ new->inode_hashsize, sizeof(struct list_head), gf_common_mt_list_head);
+ if (!new->inode_hash)
+ goto out;
+
+- new->name_hash = (void *)GF_CALLOC(new->hashsize, sizeof(struct list_head),
+- gf_common_mt_list_head);
++ new->name_hash = (void *)GF_CALLOC(
++ new->dentry_hashsize, sizeof(struct list_head), gf_common_mt_list_head);
+ if (!new->name_hash)
+ goto out;
+
+@@ -1675,11 +1688,11 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ if (!new->fd_mem_pool)
+ goto out;
+
+- for (i = 0; i < 65536; i++) {
++ for (i = 0; i < new->inode_hashsize; i++) {
+ INIT_LIST_HEAD(&new->inode_hash[i]);
+ }
+
+- for (i = 0; i < new->hashsize; i++) {
++ for (i = 0; i < new->dentry_hashsize; i++) {
+ INIT_LIST_HEAD(&new->name_hash[i]);
+ }
+
+@@ -1717,10 +1730,12 @@ out:
+ }
+
+ inode_table_t *
+-inode_table_new(uint32_t lru_limit, xlator_t *xl)
++inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dentry_hashsize,
++ uint32_t inode_hashsize)
+ {
+ /* Only fuse for now requires the inode table with invalidator */
+- return inode_table_with_invalidator(lru_limit, xl, NULL, NULL);
++ return inode_table_with_invalidator(lru_limit, xl, NULL, NULL,
++ dentry_hashsize, inode_hashsize);
+ }
+
+ int
+@@ -2439,8 +2454,10 @@ inode_table_dump(inode_table_t *itable, char *prefix)
+ return;
+ }
+
+- gf_proc_dump_build_key(key, prefix, "hashsize");
+- gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->hashsize);
++ gf_proc_dump_build_key(key, prefix, "dentry_hashsize");
++ gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->dentry_hashsize);
++ gf_proc_dump_build_key(key, prefix, "inode_hashsize");
++ gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->inode_hashsize);
+ gf_proc_dump_build_key(key, prefix, "name");
+ gf_proc_dump_write(key, "%s", itable->name);
+
+diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
+index 8f9e71f..bfa464f 100644
+--- a/xlators/cluster/afr/src/afr.c
++++ b/xlators/cluster/afr/src/afr.c
+@@ -594,7 +594,15 @@ init(xlator_t *this)
+ goto out;
+ }
+
+- this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
++ if (priv->shd.iamshd) {
++ /* Number of hash bucket should be prime number so declare 131
++ total dentry hash buckets
++ */
++ this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 131, 128);
++ } else {
++ this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 0, 0);
++ }
++
+ if (!this->itable) {
+ ret = -ENOMEM;
+ goto out;
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index 16ac16c..072896d 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -1168,7 +1168,6 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+ break;
+ }
+
+-
+ offset += ret;
+ total += ret;
+
+@@ -2467,7 +2466,7 @@ dht_build_root_inode(xlator_t *this, inode_t **inode)
+ 0,
+ };
+
+- itable = inode_table_new(0, this);
++ itable = inode_table_new(0, this, 0, 0);
+ if (!itable)
+ return;
+
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index 3f31c74..4118c3b 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -734,7 +734,7 @@ init(xlator_t *this)
+ GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed);
+ GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed);
+
+- this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);
++ this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this, 0, 0);
+ if (!this->itable)
+ goto failed;
+
+diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
+index 424c0d5..4e0e798 100644
+--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
++++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
+@@ -1658,7 +1658,7 @@ notify(xlator_t *this, int32_t event, void *data, ...)
+ child->child_up = 1;
+ child->xl = subvol;
+ if (!child->table)
+- child->table = inode_table_new(4096, subvol);
++ child->table = inode_table_new(4096, subvol, 0, 0);
+
+ _br_qchild_event(this, child, br_brick_connect);
+ pthread_cond_signal(&priv->cond);
+diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c
+index d9f0351..46ac116 100644
+--- a/xlators/features/quota/src/quotad-helpers.c
++++ b/xlators/features/quota/src/quotad-helpers.c
+@@ -32,7 +32,7 @@ get_quotad_aggregator_state(xlator_t *this, rpcsvc_request_t *req)
+ UNLOCK(&priv->lock);
+
+ if (active_subvol->itable == NULL)
+- active_subvol->itable = inode_table_new(4096, active_subvol);
++ active_subvol->itable = inode_table_new(4096, active_subvol, 0, 0);
+
+ state->itable = active_subvol->itable;
+
+diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
+index 93f020f..099c887 100644
+--- a/xlators/features/trash/src/trash.c
++++ b/xlators/features/trash/src/trash.c
+@@ -2261,7 +2261,7 @@ reconfigure(xlator_t *this, dict_t *options)
+
+ if (!active_earlier && active_now) {
+ if (!priv->trash_itable) {
+- priv->trash_itable = inode_table_new(0, this);
++ priv->trash_itable = inode_table_new(0, this, 0, 0);
+ if (!priv->trash_itable) {
+ ret = -ENOMEM;
+ gf_log(this->name, GF_LOG_ERROR,
+@@ -2533,7 +2533,7 @@ init(xlator_t *this)
+ }
+
+ if (priv->state) {
+- priv->trash_itable = inode_table_new(0, this);
++ priv->trash_itable = inode_table_new(0, this, 0, 0);
+ if (!priv->trash_itable) {
+ ret = -ENOMEM;
+ priv->state = _gf_false;
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index 1bddac2..919eea3 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -6298,10 +6298,10 @@ fuse_graph_setup(xlator_t *this, glusterfs_graph_t *graph)
+ }
+
+ #if FUSE_KERNEL_MINOR_VERSION >= 11
+- itable = inode_table_with_invalidator(priv->lru_limit, graph->top,
+- fuse_inode_invalidate_fn, this);
++ itable = inode_table_with_invalidator(
++ priv->lru_limit, graph->top, fuse_inode_invalidate_fn, this, 0, 0);
+ #else
+- itable = inode_table_new(0, graph->top);
++ itable = inode_table_new(0, graph->top, 0, 0);
+ #endif
+ if (!itable) {
+ ret = -1;
+diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c
+index ebded41..402be30 100644
+--- a/xlators/nfs/server/src/nfs.c
++++ b/xlators/nfs/server/src/nfs.c
+@@ -564,7 +564,7 @@ nfs_init_subvolume(struct nfs_state *nfs, xlator_t *xl)
+ return -1;
+
+ lrusize = nfs->memfactor * GF_NFS_INODE_LRU_MULT;
+- xl->itable = inode_table_new(lrusize, xl);
++ xl->itable = inode_table_new(lrusize, xl, 0, 0);
+ if (!xl->itable) {
+ gf_msg(GF_NFS, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to allocate inode table");
+diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
+index 1d1177d..eeca73c 100644
+--- a/xlators/protocol/server/src/server-handshake.c
++++ b/xlators/protocol/server/src/server-handshake.c
+@@ -36,7 +36,6 @@ gf_compare_client_version(rpcsvc_request_t *req, int fop_prognum,
+ return ret;
+ }
+
+-
+ int
+ server_getspec(rpcsvc_request_t *req)
+ {
+@@ -629,7 +628,7 @@ server_setvolume(rpcsvc_request_t *req)
+
+ /* TODO: what is this ? */
+ client->bound_xl->itable = inode_table_new(conf->inode_lru_limit,
+- client->bound_xl);
++ client->bound_xl, 0, 0);
+ }
+ }
+ UNLOCK(&conf->itable_lock);
+--
+1.8.3.1
+
diff --git a/0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch b/0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch
new file mode 100644
index 0000000..e8a4906
--- /dev/null
+++ b/0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch
@@ -0,0 +1,784 @@
+From 5294c82e0528059b10cbaab7805b20e76ffdd66b Mon Sep 17 00:00:00 2001
+From: mohit84 <moagrawa@redhat.com>
+Date: Mon, 30 Nov 2020 17:39:53 +0530
+Subject: [PATCH 510/511] glusterd[brick_mux]: Optimize friend handshake code
+ to avoid call_bail (#1614)
+
+During glusterd handshake glusterd received a volume dictionary
+from peer end to compare the own volume dictionary data.If the options
+are differ it sets the key to recognize volume options are changed
+and call import syntask to delete/start the volume.In brick_mux
+environment while number of volumes are high(5k) the dict api in function
+glusterd_compare_friend_volume takes time because the function
+glusterd_handle_friend_req saves all peer volume data in a single dictionary.
+Due to time taken by the function glusterd_handle_friend RPC requests receives
+a call_bail from a peer end gluster(CLI) won't be able to show volume status.
+
+Solution: To optimize the code done below changes
+1) Populate a new specific dictionary to save the peer end version specific
+ data so that function won't take much time to take the decision about the
+ peer end has some volume updates.
+2) In case of volume has differ version set the key in status_arr instead
+ of saving in a dictionary to make the operation is faster.
+
+Note: To validate the changes followed below procedure
+1) Setup 5100 distributed volumes 3x1
+2) Enable brick_mux
+3) Start all the volumes
+4) Kill all gluster processes on 3rd node
+5) Run a loop to update volume option on a 1st node
+ for i in {1..5100}; do gluster v set vol$i performance.open-behind off; done
+6) Start the glusterd process on the 3rd node
+7) Wait to finish handshake and check there should not be any call_bail message
+ in the logs
+
+> Change-Id: Ibad7c23988539cc369ecc39dea2ea6985470bee1
+> Fixes: #1613
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry pick from commit 12545d91eed27ff9abb0505a12c7d4e75b45a53e)
+> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1613)
+
+Change-Id: Ibad7c23988539cc369ecc39dea2ea6985470bee1
+BUG: 1898784
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221193
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/ctx.c | 4 +
+ libglusterfs/src/dict.c | 166 ++++++++++++++++++++++++++-
+ libglusterfs/src/globals.c | 2 -
+ libglusterfs/src/glusterfs/dict.h | 5 +
+ libglusterfs/src/glusterfs/globals.h | 2 +
+ libglusterfs/src/libglusterfs.sym | 1 +
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 39 ++++---
+ xlators/mgmt/glusterd/src/glusterd-sm.c | 6 +-
+ xlators/mgmt/glusterd/src/glusterd-sm.h | 1 +
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 148 ++++++++++++++----------
+ xlators/mgmt/glusterd/src/glusterd-utils.h | 2 +-
+ xlators/mgmt/glusterd/src/glusterd.h | 8 +-
+ 12 files changed, 301 insertions(+), 83 deletions(-)
+
+diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c
+index 4a001c2..ae1a77a 100644
+--- a/libglusterfs/src/ctx.c
++++ b/libglusterfs/src/ctx.c
+@@ -14,6 +14,7 @@
+ #include "glusterfs/glusterfs.h"
+ #include "timer-wheel.h"
+
++glusterfs_ctx_t *global_ctx = NULL;
+ glusterfs_ctx_t *
+ glusterfs_ctx_new()
+ {
+@@ -51,6 +52,9 @@ glusterfs_ctx_new()
+ GF_ATOMIC_INIT(ctx->stats.max_dict_pairs, 0);
+ GF_ATOMIC_INIT(ctx->stats.total_pairs_used, 0);
+ GF_ATOMIC_INIT(ctx->stats.total_dicts_used, 0);
++
++ if (!global_ctx)
++ global_ctx = ctx;
+ out:
+ return ctx;
+ }
+diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
+index d8cdda4..e5f619c 100644
+--- a/libglusterfs/src/dict.c
++++ b/libglusterfs/src/dict.c
+@@ -56,7 +56,13 @@ struct dict_cmp {
+ static data_t *
+ get_new_data()
+ {
+- data_t *data = mem_get(THIS->ctx->dict_data_pool);
++ data_t *data = NULL;
++
++ if (global_ctx) {
++ data = mem_get(global_ctx->dict_data_pool);
++ } else {
++ data = mem_get(THIS->ctx->dict_data_pool);
++ }
+
+ if (!data)
+ return NULL;
+@@ -3503,3 +3509,161 @@ unlock:
+ UNLOCK(&dict->lock);
+ return 0;
+ }
++
++/* Popluate specific dictionary on the basis of passed key array at the
++ time of unserialize buffer
++*/
++int32_t
++dict_unserialize_specific_keys(char *orig_buf, int32_t size, dict_t **fill,
++ char **suffix_key_arr, dict_t **specific_dict,
++ int totkeycount)
++{
++ char *buf = orig_buf;
++ int ret = -1;
++ int32_t count = 0;
++ int i = 0;
++ int j = 0;
++
++ data_t *value = NULL;
++ char *key = NULL;
++ int32_t keylen = 0;
++ int32_t vallen = 0;
++ int32_t hostord = 0;
++ xlator_t *this = NULL;
++ int32_t keylenarr[totkeycount];
++
++ this = THIS;
++ GF_ASSERT(this);
++
++ if (!buf) {
++ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
++ "buf is null!");
++ goto out;
++ }
++
++ if (size == 0) {
++ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
++ "size is 0!");
++ goto out;
++ }
++
++ if (!fill) {
++ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
++ "fill is null!");
++ goto out;
++ }
++
++ if (!*fill) {
++ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
++ "*fill is null!");
++ goto out;
++ }
++
++ if ((buf + DICT_HDR_LEN) > (orig_buf + size)) {
++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
++ "undersized buffer "
++ "passed. available (%lu) < required (%lu)",
++ (long)(orig_buf + size), (long)(buf + DICT_HDR_LEN));
++ goto out;
++ }
++
++ memcpy(&hostord, buf, sizeof(hostord));
++ count = ntoh32(hostord);
++ buf += DICT_HDR_LEN;
++
++ if (count < 0) {
++ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO,
++ "count=%d", count, NULL);
++ goto out;
++ }
++
++ /* Compute specific key length and save in array */
++ for (i = 0; i < totkeycount; i++) {
++ keylenarr[i] = strlen(suffix_key_arr[i]);
++ }
++
++ for (i = 0; i < count; i++) {
++ if ((buf + DICT_DATA_HDR_KEY_LEN) > (orig_buf + size)) {
++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
++ "undersized "
++ "buffer passed. available (%lu) < "
++ "required (%lu)",
++ (long)(orig_buf + size),
++ (long)(buf + DICT_DATA_HDR_KEY_LEN));
++ goto out;
++ }
++ memcpy(&hostord, buf, sizeof(hostord));
++ keylen = ntoh32(hostord);
++ buf += DICT_DATA_HDR_KEY_LEN;
++
++ if ((buf + DICT_DATA_HDR_VAL_LEN) > (orig_buf + size)) {
++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
++ "undersized "
++ "buffer passed. available (%lu) < "
++ "required (%lu)",
++ (long)(orig_buf + size),
++ (long)(buf + DICT_DATA_HDR_VAL_LEN));
++ goto out;
++ }
++ memcpy(&hostord, buf, sizeof(hostord));
++ vallen = ntoh32(hostord);
++ buf += DICT_DATA_HDR_VAL_LEN;
++
++ if ((keylen < 0) || (vallen < 0)) {
++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
++ "undersized length passed "
++ "key:%d val:%d",
++ keylen, vallen);
++ goto out;
++ }
++ if ((buf + keylen) > (orig_buf + size)) {
++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
++ "undersized buffer passed. "
++ "available (%lu) < required (%lu)",
++ (long)(orig_buf + size), (long)(buf + keylen));
++ goto out;
++ }
++ key = buf;
++ buf += keylen + 1; /* for '\0' */
++
++ if ((buf + vallen) > (orig_buf + size)) {
++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
++ "undersized buffer passed. "
++ "available (%lu) < required (%lu)",
++ (long)(orig_buf + size), (long)(buf + vallen));
++ goto out;
++ }
++ value = get_new_data();
++
++ if (!value) {
++ ret = -1;
++ goto out;
++ }
++ value->len = vallen;
++ value->data = gf_memdup(buf, vallen);
++ value->data_type = GF_DATA_TYPE_STR_OLD;
++ value->is_static = _gf_false;
++ buf += vallen;
++
++ ret = dict_addn(*fill, key, keylen, value);
++ if (ret < 0) {
++ data_destroy(value);
++ goto out;
++ }
++ for (j = 0; j < totkeycount; j++) {
++ if (keylen > keylenarr[j]) {
++ if (!strcmp(key + keylen - keylenarr[j], suffix_key_arr[j])) {
++ ret = dict_addn(*specific_dict, key, keylen, value);
++ break;
++ }
++ }
++ }
++
++ if (ret < 0)
++ goto out;
++ }
++
++ ret = 0;
++out:
++ return ret;
++}
+diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
+index e433ee8..30c15b6 100644
+--- a/libglusterfs/src/globals.c
++++ b/libglusterfs/src/globals.c
+@@ -96,7 +96,6 @@ const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = {
+ /* This global ctx is a bad hack to prevent some of the libgfapi crashes.
+ * This should be removed once the patch on resource pool is accepted
+ */
+-glusterfs_ctx_t *global_ctx = NULL;
+ pthread_mutex_t global_ctx_mutex = PTHREAD_MUTEX_INITIALIZER;
+ xlator_t global_xlator;
+ static int gf_global_mem_acct_enable = 1;
+@@ -236,7 +235,6 @@ __glusterfs_this_location()
+ if (*this_location == NULL) {
+ thread_xlator = &global_xlator;
+ }
+-
+ return this_location;
+ }
+
+diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h
+index 8239c7a..6e469c7 100644
+--- a/libglusterfs/src/glusterfs/dict.h
++++ b/libglusterfs/src/glusterfs/dict.h
+@@ -423,4 +423,9 @@ dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result);
+
+ int
+ dict_serialized_length_lk(dict_t *this);
++
++int32_t
++dict_unserialize_specific_keys(char *orig_buf, int32_t size, dict_t **fill,
++ char **specific_key_arr, dict_t **specific_dict,
++ int totkeycount);
+ #endif
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index cc145cd..33fb023 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -199,4 +199,6 @@ int
+ gf_global_mem_acct_enable_get(void);
+ int
+ gf_global_mem_acct_enable_set(int val);
++
++extern glusterfs_ctx_t *global_ctx;
+ #endif /* !_GLOBALS_H */
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index d060292..bc770e2 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -436,6 +436,7 @@ dict_clear_flag
+ dict_check_flag
+ dict_unref
+ dict_unserialize
++dict_unserialize_specific_keys
+ drop_token
+ eh_destroy
+ eh_dump
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index b8799ab..908361c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -86,6 +86,9 @@ glusterd_big_locked_handler(rpcsvc_request_t *req, rpcsvc_actor actor_fn)
+ return ret;
+ }
+
++static char *specific_key_suffix[] = {".quota-cksum", ".ckusm", ".version",
++ ".quota-version", ".name"};
++
+ static int
+ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
+ int port, gd1_mgmt_friend_req *friend_req)
+@@ -97,6 +100,8 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
+ char rhost[UNIX_PATH_MAX + 1] = {0};
+ uuid_t friend_uuid = {0};
+ dict_t *dict = NULL;
++ dict_t *peer_ver = NULL;
++ int totcount = sizeof(specific_key_suffix) / sizeof(specific_key_suffix[0]);
+
+ gf_uuid_parse(uuid_utoa(uuid), friend_uuid);
+ if (!port)
+@@ -104,8 +109,19 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
+
+ ret = glusterd_remote_hostname_get(req, rhost, sizeof(rhost));
+
++ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t);
++ dict = dict_new();
++ peer_ver = dict_new();
++
+ RCU_READ_LOCK;
+
++ if (!ctx || !dict || !peer_ver) {
++ gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
++ "Unable to allocate memory");
++ ret = -1;
++ goto out;
++ }
++
+ peerinfo = glusterd_peerinfo_find(uuid, rhost);
+
+ if (peerinfo == NULL) {
+@@ -130,28 +146,14 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
+ event->peername = gf_strdup(peerinfo->hostname);
+ gf_uuid_copy(event->peerid, peerinfo->uuid);
+
+- ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t);
+-
+- if (!ctx) {
+- gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+- "Unable to allocate memory");
+- ret = -1;
+- goto out;
+- }
+-
+ gf_uuid_copy(ctx->uuid, uuid);
+ if (hostname)
+ ctx->hostname = gf_strdup(hostname);
+ ctx->req = req;
+
+- dict = dict_new();
+- if (!dict) {
+- ret = -1;
+- goto out;
+- }
+-
+- ret = dict_unserialize(friend_req->vols.vols_val, friend_req->vols.vols_len,
+- &dict);
++ ret = dict_unserialize_specific_keys(
++ friend_req->vols.vols_val, friend_req->vols.vols_len, &dict,
++ specific_key_suffix, &peer_ver, totcount);
+
+ if (ret)
+ goto out;
+@@ -159,6 +161,7 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
+ dict->extra_stdfree = friend_req->vols.vols_val;
+
+ ctx->vols = dict;
++ ctx->peer_ver = peer_ver;
+ event->ctx = ctx;
+
+ ret = glusterd_friend_sm_inject_event(event);
+@@ -188,6 +191,8 @@ out:
+ } else {
+ free(friend_req->vols.vols_val);
+ }
++ if (peer_ver)
++ dict_unref(peer_ver);
+ if (event)
+ GF_FREE(event->peername);
+ GF_FREE(event);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
+index 044da3d..d10a792 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
+@@ -106,6 +106,8 @@ glusterd_destroy_friend_req_ctx(glusterd_friend_req_ctx_t *ctx)
+
+ if (ctx->vols)
+ dict_unref(ctx->vols);
++ if (ctx->peer_ver)
++ dict_unref(ctx->peer_ver);
+ GF_FREE(ctx->hostname);
+ GF_FREE(ctx);
+ }
+@@ -936,8 +938,8 @@ glusterd_ac_handle_friend_add_req(glusterd_friend_sm_event_t *event, void *ctx)
+ // Build comparison logic here.
+ pthread_mutex_lock(&conf->import_volumes);
+ {
+- ret = glusterd_compare_friend_data(ev_ctx->vols, &status,
+- event->peername);
++ ret = glusterd_compare_friend_data(ev_ctx->vols, ev_ctx->peer_ver,
++ &status, event->peername);
+ if (ret) {
+ pthread_mutex_unlock(&conf->import_volumes);
+ goto out;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h
+index ce008ac..efdf68e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-sm.h
++++ b/xlators/mgmt/glusterd/src/glusterd-sm.h
+@@ -174,6 +174,7 @@ typedef struct glusterd_friend_req_ctx_ {
+ rpcsvc_request_t *req;
+ int port;
+ dict_t *vols;
++ dict_t *peer_ver; // Dictionary to save peer ver data
+ } glusterd_friend_req_ctx_t;
+
+ typedef struct glusterd_friend_update_ctx_ {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index f7030fb..cf32bd9 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -3709,12 +3709,14 @@ out:
+ return ret;
+ }
+
+-int32_t
+-glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+- int32_t *status, char *hostname)
++static int32_t
++glusterd_compare_friend_volume(dict_t *peer_data,
++ glusterd_friend_synctask_args_t *arg,
++ int32_t count, int32_t *status, char *hostname)
+ {
+ int32_t ret = -1;
+ char key[64] = "";
++ char key_prefix[32];
+ int keylen;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+@@ -3726,15 +3728,20 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+ xlator_t *this = NULL;
+
+ GF_ASSERT(peer_data);
++ GF_ASSERT(arg);
+ GF_ASSERT(status);
+
+ this = THIS;
+ GF_ASSERT(this);
+
+- keylen = snprintf(key, sizeof(key), "volume%d.name", count);
+- ret = dict_get_strn(peer_data, key, keylen, &volname);
+- if (ret)
++ snprintf(key_prefix, sizeof(key_prefix), "volume%d", count);
++ keylen = snprintf(key, sizeof(key), "%s.name", key_prefix);
++ ret = dict_get_strn(arg->peer_ver_data, key, keylen, &volname);
++ if (ret) {
++ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
++ "Key=%s is NULL in peer_ver_data", key, NULL);
+ goto out;
++ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+@@ -3750,10 +3757,13 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+ goto out;
+ }
+
+- keylen = snprintf(key, sizeof(key), "volume%d.version", count);
+- ret = dict_get_int32n(peer_data, key, keylen, &version);
+- if (ret)
++ keylen = snprintf(key, sizeof(key), "%s.version", key_prefix);
++ ret = dict_get_int32n(arg->peer_ver_data, key, keylen, &version);
++ if (ret) {
++ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
++ "Key=%s is NULL in peer_ver_data", key, NULL);
+ goto out;
++ }
+
+ if (version > volinfo->version) {
+ // Mismatch detected
+@@ -3772,10 +3782,13 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+
+ // Now, versions are same, compare cksums.
+ //
+- snprintf(key, sizeof(key), "volume%d.ckusm", count);
+- ret = dict_get_uint32(peer_data, key, &cksum);
+- if (ret)
++ snprintf(key, sizeof(key), "%s.ckusm", key_prefix);
++ ret = dict_get_uint32(arg->peer_ver_data, key, &cksum);
++ if (ret) {
++ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
++ "Key=%s is NULL in peer_ver_data", key, NULL);
+ goto out;
++ }
+
+ if (cksum != volinfo->cksum) {
+ ret = 0;
+@@ -3790,8 +3803,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA))
+ goto skip_quota;
+
+- snprintf(key, sizeof(key), "volume%d.quota-version", count);
+- ret = dict_get_uint32(peer_data, key, &quota_version);
++ snprintf(key, sizeof(key), "%s.quota-version", key_prefix);
++ ret = dict_get_uint32(arg->peer_ver_data, key, &quota_version);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "quota-version key absent for"
+@@ -3809,6 +3822,7 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+ "%d on peer %s",
+ volinfo->volname, volinfo->quota_conf_version, quota_version,
+ hostname);
++ GF_ATOMIC_INIT(volinfo->volpeerupdate, 1);
+ *status = GLUSTERD_VOL_COMP_UPDATE_REQ;
+ goto out;
+ } else if (quota_version < volinfo->quota_conf_version) {
+@@ -3819,8 +3833,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+
+ // Now, versions are same, compare cksums.
+ //
+- snprintf(key, sizeof(key), "volume%d.quota-cksum", count);
+- ret = dict_get_uint32(peer_data, key, &quota_cksum);
++ snprintf(key, sizeof(key), "%s.quota-cksum", key_prefix);
++ ret = dict_get_uint32(arg->peer_ver_data, key, &quota_cksum);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "quota checksum absent for "
+@@ -3846,13 +3860,12 @@ skip_quota:
+ *status = GLUSTERD_VOL_COMP_SCS;
+
+ out:
+- keylen = snprintf(key, sizeof(key), "volume%d.update", count);
+-
+ if (*status == GLUSTERD_VOL_COMP_UPDATE_REQ) {
+- ret = dict_set_int32n(peer_data, key, keylen, 1);
+- } else {
+- ret = dict_set_int32n(peer_data, key, keylen, 0);
++ /*Set the status to ensure volume is updated on the peer
++ */
++ arg->status_arr[(count / 64)] ^= 1UL << (count % 64);
+ }
++
+ if (*status == GLUSTERD_VOL_COMP_RJT) {
+ gf_event(EVENT_COMPARE_FRIEND_VOLUME_FAILED, "volume=%s",
+ volinfo->volname);
+@@ -4935,8 +4948,9 @@ out:
+ return ret;
+ }
+
+-int32_t
+-glusterd_import_friend_volume(dict_t *peer_data, int count)
++static int32_t
++glusterd_import_friend_volume(dict_t *peer_data, int count,
++ glusterd_friend_synctask_args_t *arg)
+ {
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+@@ -4954,10 +4968,27 @@ glusterd_import_friend_volume(dict_t *peer_data, int count)
+ priv = this->private;
+ GF_ASSERT(priv);
+
+- ret = snprintf(key, sizeof(key), "volume%d.update", count);
+- ret = dict_get_int32n(peer_data, key, ret, &update);
+- if (ret || !update) {
++ if (arg) {
++ /*Check if the volume options are updated on the other peers
++ */
++ update = (1UL & (arg->status_arr[(count / 64)] >> (count % 64)));
++ } else {
++ ret = snprintf(key, sizeof(key), "volume%d.update", count);
++ ret = dict_get_int32n(peer_data, key, ret, &update);
++ if (ret) {
++ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
++ "Key=%s", key, NULL);
++ goto out;
++ }
++ }
++
++ if (!update) {
+ /* if update is 0 that means the volume is not imported */
++ gf_log(this->name, GF_LOG_DEBUG,
++ "The volume%d does"
++ " not have any peer change",
++ count);
++ ret = 0;
+ goto out;
+ }
+
+@@ -5045,6 +5076,8 @@ glusterd_import_friend_volumes_synctask(void *opaque)
+ glusterd_conf_t *conf = NULL;
+ dict_t *peer_data = NULL;
+ glusterd_friend_synctask_args_t *arg = NULL;
++ uint64_t bm = 0;
++ uint64_t mask = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+@@ -5056,17 +5089,7 @@ glusterd_import_friend_volumes_synctask(void *opaque)
+ if (!arg)
+ goto out;
+
+- peer_data = dict_new();
+- if (!peer_data) {
+- goto out;
+- }
+-
+- ret = dict_unserialize(arg->dict_buf, arg->dictlen, &peer_data);
+- if (ret) {
+- errno = ENOMEM;
+- goto out;
+- }
+-
++ peer_data = arg->peer_data;
+ ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count);
+ if (ret)
+ goto out;
+@@ -5083,11 +5106,18 @@ glusterd_import_friend_volumes_synctask(void *opaque)
+ conf->restart_bricks = _gf_true;
+
+ while (i <= count) {
+- ret = glusterd_import_friend_volume(peer_data, i);
+- if (ret) {
+- break;
++ bm = arg->status_arr[i / 64];
++ while (bm != 0) {
++ /* mask will contain the lowest bit set from bm. */
++ mask = bm & (-bm);
++ bm ^= mask;
++ ret = glusterd_import_friend_volume(peer_data, i + ffsll(mask) - 2,
++ arg);
++ if (ret < 0) {
++ break;
++ }
+ }
+- i++;
++ i += 64;
+ }
+ if (i > count) {
+ glusterd_svcs_manager(NULL);
+@@ -5095,11 +5125,9 @@ glusterd_import_friend_volumes_synctask(void *opaque)
+ conf->restart_bricks = _gf_false;
+ synccond_broadcast(&conf->cond_restart_bricks);
+ out:
+- if (peer_data)
+- dict_unref(peer_data);
+ if (arg) {
+- if (arg->dict_buf)
+- GF_FREE(arg->dict_buf);
++ dict_unref(arg->peer_data);
++ dict_unref(arg->peer_ver_data);
+ GF_FREE(arg);
+ }
+
+@@ -5121,7 +5149,7 @@ glusterd_import_friend_volumes(dict_t *peer_data)
+ goto out;
+
+ while (i <= count) {
+- ret = glusterd_import_friend_volume(peer_data, i);
++ ret = glusterd_import_friend_volume(peer_data, i, NULL);
+ if (ret)
+ goto out;
+ i++;
+@@ -5260,7 +5288,8 @@ out:
+ }
+
+ int32_t
+-glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname)
++glusterd_compare_friend_data(dict_t *peer_data, dict_t *cmp, int32_t *status,
++ char *hostname)
+ {
+ int32_t ret = -1;
+ int32_t count = 0;
+@@ -5289,8 +5318,19 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname)
+ if (ret)
+ goto out;
+
++ arg = GF_CALLOC(1, sizeof(*arg) + sizeof(uint64_t) * (count / 64),
++ gf_common_mt_char);
++ if (!arg) {
++ ret = -1;
++ gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
++ "Out Of Memory");
++ goto out;
++ }
++ arg->peer_data = dict_ref(peer_data);
++ arg->peer_ver_data = dict_ref(cmp);
+ while (i <= count) {
+- ret = glusterd_compare_friend_volume(peer_data, i, status, hostname);
++ ret = glusterd_compare_friend_volume(peer_data, arg, i, status,
++ hostname);
+ if (ret)
+ goto out;
+
+@@ -5310,21 +5350,13 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname)
+ * first brick to come up before attaching the subsequent bricks
+ * in case brick multiplexing is enabled
+ */
+- arg = GF_CALLOC(1, sizeof(*arg), gf_common_mt_char);
+- ret = dict_allocate_and_serialize(peer_data, &arg->dict_buf,
+- &arg->dictlen);
+- if (ret < 0) {
+- gf_log(this->name, GF_LOG_ERROR,
+- "dict_serialize failed while handling "
+- " import friend volume request");
+- goto out;
+- }
+-
+ glusterd_launch_synctask(glusterd_import_friend_volumes_synctask, arg);
+ }
+
+ out:
+ if (ret && arg) {
++ dict_unref(arg->peer_data);
++ dict_unref(arg->peer_ver_data);
+ GF_FREE(arg);
+ }
+ gf_msg_debug(this->name, 0, "Returning with ret: %d, status: %d", ret,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
+index 5f5de82..02d85d2 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
+@@ -231,7 +231,7 @@ glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf,
+ u_int *length);
+
+ int32_t
+-glusterd_compare_friend_data(dict_t *peer_data, int32_t *status,
++glusterd_compare_friend_data(dict_t *peer_data, dict_t *cmp, int32_t *status,
+ char *hostname);
+
+ int
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index f739b5d..efe4d0e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -234,8 +234,12 @@ typedef struct glusterd_add_dict_args {
+ } glusterd_add_dict_args_t;
+
+ typedef struct glusterd_friend_synctask_args {
+- char *dict_buf;
+- u_int dictlen;
++ dict_t *peer_data;
++ dict_t *peer_ver_data; // Dictionary to save peer version data
++ /* This status_arr[1] is not a real size, real size of the array
++ is dynamically allocated
++ */
++ uint64_t status_arr[1];
+ } glusterd_friend_synctask_args_t;
+
+ typedef enum gf_brick_status {
+--
+1.8.3.1
+
diff --git a/0511-features-shard-Missing-format-specifier.patch b/0511-features-shard-Missing-format-specifier.patch
new file mode 100644
index 0000000..baf6cf4
--- /dev/null
+++ b/0511-features-shard-Missing-format-specifier.patch
@@ -0,0 +1,39 @@
+From 868d346cc35c222d19b95bd9c367674c9ea859df Mon Sep 17 00:00:00 2001
+From: Vinayakswami Hariharmath <vharihar@redhat.com>
+Date: Tue, 15 Dec 2020 16:23:49 +0530
+Subject: [PATCH 511/511] features/shard: Missing format specifier
+
+PRIu64 format specifier explicitly needs (percent sign) as
+prefix and that was missing as part of the below commit on
+downstream
+
+https://code.engineering.redhat.com/gerrit/#/c/221061/
+
+BUG: 1752739
+Change-Id: I354de58796f350eb1aa42fcdf8092ca2e69ccbb6
+
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Change-Id: I4598893e3fcca3a2b3e6e8ef9b64b3e5e98923e6
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221217
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ xlators/features/shard/src/shard.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index a967f35..099b062 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -1855,7 +1855,7 @@ int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this,
+ */
+ if (!inode) {
+ gf_msg_debug(this->name, 0,
+- "Last shard to be truncated absent in backend: " PRIu64
++ "Last shard to be truncated absent in backend:%" PRIu64
+ " of gfid: %s. Directly proceeding to update file size",
+ local->first_block, uuid_utoa(local->loc.inode->gfid));
+ shard_update_file_size(frame, this, NULL, &local->loc,
+--
+1.8.3.1
+
diff --git a/0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch b/0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch
new file mode 100644
index 0000000..37de503
--- /dev/null
+++ b/0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch
@@ -0,0 +1,105 @@
+From c963653a89c3f6466af9a3e8f19246a7907f7f8c Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Thu, 30 Jul 2020 13:04:52 +0530
+Subject: [PATCH 512/517] glusterd: shared storage mount fails in ipv6
+ environment
+
+Issue:
+In case of ipv6 environment, the mounting of glusterd_shared_storage
+volume fails as it doesn't recognises the ipv6 enviornment.
+
+Fix:
+In case of ipv6 environment, the address-family is passed
+to the hooks script on creating shared-storage, then depending
+upon the address-family --xlator-option=transport.address-family=inet6
+option is added to the mount command, and the mounting succeeds.
+
+>Fixes: #1406
+>
+>Change-Id: Ib1888c34d85e6c01618b0ba214cbe1f57576908d
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream patch: https://review.gluster.org/c/glusterfs/+/24797
+BUG: 1856574
+
+Change-Id: Ib1888c34d85e6c01618b0ba214cbe1f57576908d
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221844
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Srijan Sivakumar <ssivakum@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../set/post/S32gluster_enable_shared_storage.sh | 11 +++++++++--
+ xlators/mgmt/glusterd/src/glusterd-hooks.c | 19 +++++++++++++++++++
+ 2 files changed, 28 insertions(+), 2 deletions(-)
+
+diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
+index 3bae37c..9597503 100755
+--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
++++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
+@@ -104,8 +104,15 @@ function check_volume_status()
+ echo $status
+ }
+
+-mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \
+- /run/gluster/shared_storage"
++key=`echo $5 | cut -d '=' -f 1`
++val=`echo $5 | cut -d '=' -f 2`
++if [ "$key" == "transport.address-family" ]; then
++ mount_cmd="mount -t glusterfs -o xlator-option=transport.address-family=inet6 \
++ $local_node_hostname:/gluster_shared_storage /var/run/gluster/shared_storage"
++else
++ mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \
++ /var/run/gluster/shared_storage"
++fi
+
+ if [ "$option" == "enable" ]; then
+ retry=0;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c
+index 216cdf7..4f0d775 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-hooks.c
++++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c
+@@ -200,11 +200,16 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner)
+ int i = 0;
+ int count = 0;
+ int ret = -1;
++ int flag = 0;
+ char query[1024] = {
+ 0,
+ };
+ char *key = NULL;
+ char *value = NULL;
++ char *inet_family = NULL;
++ xlator_t *this = NULL;
++ this = THIS;
++ GF_ASSERT(this);
+
+ ret = dict_get_int32(dict, "count", &count);
+ if (ret)
+@@ -228,9 +233,23 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner)
+ continue;
+
+ runner_argprintf(runner, "%s=%s", key, value);
++ if ((strncmp(key, "cluster.enable-shared-storage",
++ SLEN("cluster.enable-shared-storage")) == 0 ||
++ strncmp(key, "enable-shared-storage",
++ SLEN("enable-shared-storage")) == 0) &&
++ strncmp(value, "enable", SLEN("enable")) == 0)
++ flag = 1;
+ }
+
+ glusterd_hooks_add_custom_args(dict, runner);
++ if (flag == 1) {
++ ret = dict_get_str_sizen(this->options, "transport.address-family",
++ &inet_family);
++ if (!ret) {
++ runner_argprintf(runner, "transport.address-family=%s",
++ inet_family);
++ }
++ }
+
+ ret = 0;
+ out:
+--
+1.8.3.1
+
diff --git a/0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch b/0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch
new file mode 100644
index 0000000..ebd5609
--- /dev/null
+++ b/0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch
@@ -0,0 +1,191 @@
+From 708c17a8a69b2657f384affaedfcf4ba0a123893 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Wed, 23 Dec 2020 14:45:07 +0530
+Subject: [PATCH 513/517] afr: mark pending xattrs as a part of metadata heal
+
+...if pending xattrs are zero for all children.
+
+Problem:
+If there are no pending xattrs and a metadata heal needs to be
+performed, it can be possible that we end up with xattrs inadvertendly
+deleted from all bricks, as explained in the BZ.
+
+Fix:
+After picking one among the sources as the good copy, mark pending xattrs on
+all sources to blame the sinks. Now even if this metadata heal fails midway,
+a subsequent heal will still choose one of the valid sources that it
+picked previously.
+
+Upstream patch details:
+> Fixes: #1067
+> Change-Id: If1b050b70b0ad911e162c04db4d89b263e2b8d7b
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Upstream patch: https://review.gluster.org/#/c/glusterfs/+/21922/
+
+BUG: 1640148
+Change-Id: If1b050b70b0ad911e162c04db4d89b263e2b8d7b
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/222073
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ tests/bugs/replicate/mdata-heal-no-xattrs.t | 59 ++++++++++++++++++++++
+ xlators/cluster/afr/src/afr-self-heal-metadata.c | 62 +++++++++++++++++++++++-
+ 2 files changed, 120 insertions(+), 1 deletion(-)
+ create mode 100644 tests/bugs/replicate/mdata-heal-no-xattrs.t
+
+diff --git a/tests/bugs/replicate/mdata-heal-no-xattrs.t b/tests/bugs/replicate/mdata-heal-no-xattrs.t
+new file mode 100644
+index 0000000..d3b0c50
+--- /dev/null
++++ b/tests/bugs/replicate/mdata-heal-no-xattrs.t
+@@ -0,0 +1,59 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
++TEST $CLI volume set $V0 cluster.self-heal-daemon off
++TEST $CLI volume start $V0
++
++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
++echo "Data">$M0/FILE
++ret=$?
++TEST [ $ret -eq 0 ]
++
++# Change permission on brick-0: simulates the case where there is metadata
++# mismatch but no pending xattrs. This brick will become the source for heal.
++TEST chmod +x $B0/$V0"0"/FILE
++
++# Add gfid to xattrop
++xattrop_b0=$(afr_get_index_path $B0/$V0"0")
++base_entry_b0=`ls $xattrop_b0`
++gfid_str_FILE=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/FILE))
++TEST ln $xattrop_b0/$base_entry_b0 $xattrop_b0/$gfid_str_FILE
++EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0
++
++TEST $CLI volume set $V0 cluster.self-heal-daemon on
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++# Brick-0 should contain xattrs blaming other 2 bricks.
++# The values will be zero because heal is over.
++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/FILE
++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE
++TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}0/FILE
++
++# Brick-1 and Brick-2 must not contain any afr xattrs.
++TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}1/FILE
++TEST ! getfattr -n trusted.afr.$V0-client-1 $B0/${V0}1/FILE
++TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}1/FILE
++TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}2/FILE
++TEST ! getfattr -n trusted.afr.$V0-client-1 $B0/${V0}2/FILE
++TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}2/FILE
++
++# check permission bits.
++EXPECT '755' stat -c %a $B0/${V0}0/FILE
++EXPECT '755' stat -c %a $B0/${V0}1/FILE
++EXPECT '755' stat -c %a $B0/${V0}2/FILE
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++cleanup;
+diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
+index f4e31b6..03f43ba 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
++++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
+@@ -190,6 +190,59 @@ out:
+ return ret;
+ }
+
++static int
++__afr_selfheal_metadata_mark_pending_xattrs(call_frame_t *frame, xlator_t *this,
++ inode_t *inode,
++ struct afr_reply *replies,
++ unsigned char *sources)
++{
++ int ret = 0;
++ int i = 0;
++ int m_idx = 0;
++ afr_private_t *priv = NULL;
++ int raw[AFR_NUM_CHANGE_LOGS] = {0};
++ dict_t *xattr = NULL;
++
++ priv = this->private;
++ m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION);
++ raw[m_idx] = 1;
++
++ xattr = dict_new();
++ if (!xattr)
++ return -ENOMEM;
++
++ for (i = 0; i < priv->child_count; i++) {
++ if (sources[i])
++ continue;
++ ret = dict_set_static_bin(xattr, priv->pending_key[i], raw,
++ sizeof(int) * AFR_NUM_CHANGE_LOGS);
++ if (ret) {
++ ret = -1;
++ goto out;
++ }
++ }
++
++ for (i = 0; i < priv->child_count; i++) {
++ if (!sources[i])
++ continue;
++ ret = afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_SELF_HEAL_INFO,
++ "Failed to set pending metadata xattr on child %d for %s", i,
++ uuid_utoa(inode->gfid));
++ goto out;
++ }
++ }
++
++ afr_replies_wipe(replies, priv->child_count);
++ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
++
++out:
++ if (xattr)
++ dict_unref(xattr);
++ return ret;
++}
++
+ /*
+ * Look for mismatching uid/gid or mode or user xattrs even if
+ * AFR xattrs don't say so, and pick one arbitrarily as winner. */
+@@ -210,6 +263,7 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
+ };
+ int source = -1;
+ int sources_count = 0;
++ int ret = 0;
+
+ priv = this->private;
+
+@@ -300,7 +354,13 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
+ healed_sinks[i] = 1;
+ }
+ }
+-
++ if ((sources_count == priv->child_count) && (source > -1) &&
++ (AFR_COUNT(healed_sinks, priv->child_count) != 0)) {
++ ret = __afr_selfheal_metadata_mark_pending_xattrs(frame, this, inode,
++ replies, sources);
++ if (ret < 0)
++ return ret;
++ }
+ out:
+ afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+ return source;
+--
+1.8.3.1
+
diff --git a/0514-afr-event-gen-changes.patch b/0514-afr-event-gen-changes.patch
new file mode 100644
index 0000000..9f9562e
--- /dev/null
+++ b/0514-afr-event-gen-changes.patch
@@ -0,0 +1,308 @@
+From 4c47d6dd7c5ddcaa2a1e159427c0f6713fd33907 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Wed, 23 Dec 2020 14:57:51 +0530
+Subject: [PATCH 514/517] afr: event gen changes
+
+The general idea of the changes is to prevent resetting event generation
+to zero in the inode ctx, since event gen is something that should
+follow 'causal order'.
+
+Change #1:
+For a read txn, in inode refresh cbk, if event_generation is
+found zero, we are failing the read fop. This is not needed
+because change in event gen is only a marker for the next inode refresh to
+happen and should not be taken into account by the current read txn.
+
+Change #2:
+The event gen being zero above can happen if there is a racing lookup,
+which resets even get (in afr_lookup_done) if there are non zero afr
+xattrs. The resetting is done only to trigger an inode refresh and a
+possible client side heal on the next lookup. That can be acheived by
+setting the need_refresh flag in the inode ctx. So replaced all
+occurences of resetting even gen to zero with a call to
+afr_inode_need_refresh_set().
+
+Change #3:
+In both lookup and discover path, we are doing an inode refresh which is
+not required since all 3 essentially do the same thing- update the inode
+ctx with the good/bad copies from the brick replies. Inode refresh also
+triggers background heals, but I think it is okay to do it when we call
+refresh during the read and write txns and not in the lookup path.
+
+The .ts which relied on inode refresh in lookup path to trigger heals are
+now changed to do read txn so that inode refresh and the heal happens.
+
+Upstream patch details:
+> Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
+> Fixes: #1179
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+> Reported-by: Erik Jacobson <erik.jacobson at hpe.com>
+Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24316/
+
+BUG: 1640148
+Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/222074
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ ...fid-mismatch-resolution-with-fav-child-policy.t | 8 +-
+ xlators/cluster/afr/src/afr-common.c | 92 +++++-----------------
+ xlators/cluster/afr/src/afr-dir-write.c | 6 +-
+ xlators/cluster/afr/src/afr.h | 5 +-
+ 4 files changed, 29 insertions(+), 82 deletions(-)
+
+diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
+index f4aa351..12af0c8 100644
+--- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
++++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
+@@ -168,8 +168,8 @@ TEST [ "$gfid_1" != "$gfid_2" ]
+ #We know that second brick has the bigger size file
+ BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\ -f1)
+
+-TEST ls $M0/f3
+-TEST cat $M0/f3
++TEST ls $M0 #Trigger entry heal via readdir inode refresh
++TEST cat $M0/f3 #Trigger data heal via readv inode refresh
+ EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+ #gfid split-brain should be resolved
+@@ -215,8 +215,8 @@ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+-TEST ls $M0/f4
+-TEST cat $M0/f4
++TEST ls $M0 #Trigger entry heal via readdir inode refresh
++TEST cat $M0/f4 #Trigger data heal via readv inode refresh
+ EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+ #gfid split-brain should be resolved
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index fca2cd5..90b4f14 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -284,7 +284,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
+ metadatamap |= (1 << index);
+ }
+ if (metadatamap_old != metadatamap) {
+- event = 0;
++ __afr_inode_need_refresh_set(inode, this);
+ }
+ break;
+
+@@ -297,7 +297,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
+ datamap |= (1 << index);
+ }
+ if (datamap_old != datamap)
+- event = 0;
++ __afr_inode_need_refresh_set(inode, this);
+ break;
+
+ default:
+@@ -461,34 +461,6 @@ out:
+ }
+
+ int
+-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
+-{
+- int ret = -1;
+- uint16_t datamap = 0;
+- uint16_t metadatamap = 0;
+- uint32_t event = 0;
+- uint64_t val = 0;
+- afr_inode_ctx_t *ctx = NULL;
+-
+- ret = __afr_inode_ctx_get(this, inode, &ctx);
+- if (ret)
+- return ret;
+-
+- val = ctx->read_subvol;
+-
+- metadatamap = (val & 0x000000000000ffff) >> 0;
+- datamap = (val & 0x00000000ffff0000) >> 16;
+- event = 0;
+-
+- val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+- (((uint64_t)event) << 32);
+-
+- ctx->read_subvol = val;
+-
+- return ret;
+-}
+-
+-int
+ __afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
+ {
+@@ -559,22 +531,6 @@ out:
+ }
+
+ int
+-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
+-{
+- afr_private_t *priv = NULL;
+- int ret = -1;
+-
+- priv = this->private;
+-
+- if (priv->child_count <= 16)
+- ret = __afr_inode_event_gen_reset_small(inode, this);
+- else
+- ret = -1;
+-
+- return ret;
+-}
+-
+-int
+ afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
+ {
+@@ -723,30 +679,22 @@ out:
+ return need_refresh;
+ }
+
+-static int
+-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
++int
++__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+ {
+ int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
+
+- GF_VALIDATE_OR_GOTO(this->name, inode, out);
+-
+- LOCK(&inode->lock);
+- {
+- ret = __afr_inode_ctx_get(this, inode, &ctx);
+- if (ret)
+- goto unlock;
+-
++ ret = __afr_inode_ctx_get(this, inode, &ctx);
++ if (ret == 0) {
+ ctx->need_refresh = _gf_true;
+ }
+-unlock:
+- UNLOCK(&inode->lock);
+-out:
++
+ return ret;
+ }
+
+ int
+-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
++afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+ {
+ int ret = -1;
+
+@@ -754,7 +702,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
+
+ LOCK(&inode->lock);
+ {
+- ret = __afr_inode_event_gen_reset(inode, this);
++ ret = __afr_inode_need_refresh_set(inode, this);
+ }
+ UNLOCK(&inode->lock);
+ out:
+@@ -1191,7 +1139,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
+ ret = afr_inode_get_readable(frame, inode, this, local->readable,
+ &event_generation, local->transaction.type);
+
+- if (ret == -EIO || (local->is_read_txn && !event_generation)) {
++ if (ret == -EIO) {
+ /* No readable subvolume even after refresh ==> splitbrain.*/
+ if (!priv->fav_child_policy) {
+ err = EIO;
+@@ -2413,7 +2361,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
+ if (read_subvol == -1)
+ goto cant_interpret;
+ if (ret) {
+- afr_inode_event_gen_reset(local->inode, this);
++ afr_inode_need_refresh_set(local->inode, this);
+ dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
+ }
+ } else {
+@@ -2971,6 +2919,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int read_subvol = -1;
++ int ret = 0;
+ unsigned char *data_readable = NULL;
+ unsigned char *success_replies = NULL;
+
+@@ -2992,7 +2941,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
+ if (!afr_has_quorum(success_replies, this, frame))
+ goto unwind;
+
+- afr_replies_interpret(frame, this, local->inode, NULL);
++ ret = afr_replies_interpret(frame, this, local->inode, NULL);
++ if (ret) {
++ afr_inode_need_refresh_set(local->inode, this);
++ }
+
+ read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
+ data_readable);
+@@ -3248,11 +3200,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+ afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
+ AFR_DATA_TRANSACTION, NULL);
+
+- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
+- local->event_generation))
+- afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
+- else
+- afr_discover_do(frame, this, 0);
++ afr_discover_do(frame, this, 0);
+
+ return 0;
+ out:
+@@ -3393,11 +3341,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+ afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
+ AFR_DATA_TRANSACTION, NULL);
+
+- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
+- local->event_generation))
+- afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
+- else
+- afr_lookup_do(frame, this, 0);
++ afr_lookup_do(frame, this, 0);
+
+ return 0;
+ out:
+diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
+index 416c19d..d419bfc 100644
+--- a/xlators/cluster/afr/src/afr-dir-write.c
++++ b/xlators/cluster/afr/src/afr-dir-write.c
+@@ -123,11 +123,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
+ continue;
+ if (local->replies[i].op_ret < 0) {
+ if (local->inode)
+- afr_inode_event_gen_reset(local->inode, this);
++ afr_inode_need_refresh_set(local->inode, this);
+ if (local->parent)
+- afr_inode_event_gen_reset(local->parent, this);
++ afr_inode_need_refresh_set(local->parent, this);
+ if (local->parent2)
+- afr_inode_event_gen_reset(local->parent2, this);
++ afr_inode_need_refresh_set(local->parent2, this);
+ continue;
+ }
+
+diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
+index ed5096e..3a2b26d 100644
+--- a/xlators/cluster/afr/src/afr.h
++++ b/xlators/cluster/afr/src/afr.h
+@@ -948,7 +948,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
+ int event_generation);
+
+ int
+-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
++__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
++
++int
++afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
+
+ int
+ afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
+--
+1.8.3.1
+
diff --git a/0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch b/0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch
new file mode 100644
index 0000000..9c7693a
--- /dev/null
+++ b/0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch
@@ -0,0 +1,2155 @@
+From aab8a587360214432c4a2ab59134411f1d38c509 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Wed, 9 Dec 2020 10:46:31 +0530
+Subject: [PATCH 515/517] cluster/afr: Heal directory rename without
+ rmdir/mkdir
+
+Problem1:
+When a directory is renamed while a brick
+is down entry-heal always did an rm -rf on that directory on
+the sink on old location and did mkdir and created the directory
+hierarchy again in the new location. This is inefficient.
+
+Problem2:
+Renamedir heal order may lead to a scenario where directory in
+the new location could be created before deleting it from old
+location leading to 2 directories with same gfid in posix.
+
+Fix:
+As part of heal, if oldlocation is healed first and is not present in
+source-brick always rename it into a hidden directory inside the
+sink-brick so that when heal is triggered in new-location shd can
+rename it from this hidden directory to the new-location.
+
+If new-location heal is triggered first and it detects that the
+directory already exists in the brick, then it should skip healing the
+directory until it appears in the hidden directory.
+
+Credits: Ravi for rename-data-loss.t script
+
+Upstream patch details:
+> Fixes: #1211
+> Change-Id: I0cba2006f35cd03d314d18211ce0bd530e254843
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24373/
+
+BUG: 1640148
+Change-Id: I0cba2006f35cd03d314d18211ce0bd530e254843
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220660
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ tests/afr.rc | 16 +
+ tests/basic/afr/afr-anon-inode-no-quorum.t | 63 ++++
+ tests/basic/afr/afr-anon-inode.t | 114 ++++++
+ tests/basic/afr/entry-self-heal-anon-dir-off.t | 464 ++++++++++++++++++++++++
+ tests/basic/afr/rename-data-loss.t | 72 ++++
+ tests/bugs/replicate/bug-1744548-heal-timeout.t | 6 +-
+ tests/features/trash.t | 74 ++--
+ xlators/cluster/afr/src/afr-common.c | 46 ++-
+ xlators/cluster/afr/src/afr-dir-read.c | 12 +-
+ xlators/cluster/afr/src/afr-self-heal-common.c | 182 ++++++++++
+ xlators/cluster/afr/src/afr-self-heal-entry.c | 206 +++++++++--
+ xlators/cluster/afr/src/afr-self-heal-name.c | 33 +-
+ xlators/cluster/afr/src/afr-self-heal.h | 5 +
+ xlators/cluster/afr/src/afr-self-heald.c | 178 ++++++++-
+ xlators/cluster/afr/src/afr-self-heald.h | 2 +-
+ xlators/cluster/afr/src/afr.c | 40 +-
+ xlators/cluster/afr/src/afr.h | 11 +
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 39 ++
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 6 +
+ 19 files changed, 1442 insertions(+), 127 deletions(-)
+ create mode 100644 tests/basic/afr/afr-anon-inode-no-quorum.t
+ create mode 100644 tests/basic/afr/afr-anon-inode.t
+ create mode 100644 tests/basic/afr/entry-self-heal-anon-dir-off.t
+ create mode 100644 tests/basic/afr/rename-data-loss.t
+
+diff --git a/tests/afr.rc b/tests/afr.rc
+index 35f352d..2417899 100644
+--- a/tests/afr.rc
++++ b/tests/afr.rc
+@@ -105,3 +105,19 @@ function get_quorum_type()
+ local repl_id="$3"
+ cat $m/.meta/graphs/active/$v-replicate-$repl_id/private|grep quorum-type|awk '{print $3}'
+ }
++
++function afr_private_key_value()
++{
++ local v=$1
++ local m=$2
++ local replica_id=$3
++ local key=$4
++#xargs at the end will strip leading spaces
++ grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs
++}
++
++function afr_anon_entry_count()
++{
++ local b=$1
++ ls $b/.glusterfs-anonymous-inode* | wc -l
++}
+diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t
+new file mode 100644
+index 0000000..896ba0c
+--- /dev/null
++++ b/tests/basic/afr/afr-anon-inode-no-quorum.t
+@@ -0,0 +1,63 @@
++#!/bin/bash
++
++#Test that anon-inode entry is not cleaned up as long as there exists at least
++#one valid entry
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
++TEST $CLI volume heal $V0 disable
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.readdir-ahead off
++TEST $CLI volume set $V0 performance.open-behind off
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 cluster.entry-self-heal off
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++TEST touch $M0/a $M0/b
++
++gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a))
++gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b))
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST mv $M0/a $M0/a-new
++TEST mv $M0/b $M0/b-new
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
++TEST ! ls $M0/a
++TEST ! ls $M0/b
++anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
++#Make sure index heal doesn't happen after enabling heal
++TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
++TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/*
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++TEST $CLI volume heal $V0
++#Allow time for a scan
++sleep 5
++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
++inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b)
++TEST rm -f $M0/a-new
++TEST stat $M0/b-new
++
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
++EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new
++
++cleanup
+diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t
+new file mode 100644
+index 0000000..f4cf37a
+--- /dev/null
++++ b/tests/basic/afr/afr-anon-inode.t
+@@ -0,0 +1,114 @@
++#!/bin/bash
++#Tests that afr-anon-inode test cases work fine as expected
++#These are cases where in entry-heal/name-heal we dont know entry for an inode
++#so these inodes are kept in a special directory
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.open-behind off
++TEST $CLI volume start $V0
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
++EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
++TEST $CLI volume set $V0 cluster.use-anonymous-inode no
++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
++TEST $CLI volume set $V0 cluster.use-anonymous-inode yes
++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
++TEST mkdir -p $M0/d1/b $M0/d2/a
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST mv $M0/d2/a $M0/d1
++TEST mv $M0/d1/b $M0/d2
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
++TEST [[ -d $B0/${V0}1/$anon_inode_name ]]
++TEST [[ -d $B0/${V0}2/$anon_inode_name ]]
++anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name)
++EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name
++EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name
++
++TEST ! ls $M0/$anon_inode_name
++EXPECT "^4$" echo $(ls -a $M0 | wc -l)
++
++#Test purging code path by shd
++TEST $CLI volume heal $V0 disable
++TEST mkdir $M0/l0 $M0/l1 $M0/l2
++TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file
++TEST ln $M0/del-file $M0/del-file-link
++TEST ln $M0/l0/file $M0/l1/file-link1
++TEST ln $M0/l0/file $M0/l2/file-link2
++TEST mkdir -p $M0/del-recursive-dir/d1
++
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST rm -f $M0/del-file $M0/del-file-nolink
++TEST rm -rf $M0/del-recursive-dir
++TEST mv $M0/d1/a $M0/d2
++TEST mv $M0/l0/file $M0/l0/renamed-file
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0
++
++nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink))
++link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file))
++dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir))
++rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a))
++rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file))
++TEST ! stat $M0/del-file
++TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
++TEST ! stat $M0/del-file-nolink
++TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid
++TEST ! stat $M0/del-recursive-dir
++TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
++TEST ! stat $M0/d1/a
++TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
++TEST ! stat $M0/l0/file
++TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid
++
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
++TEST ! stat $M0/l1/file-link1
++TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid
++
++TEST kill_brick $V0 $H0 $B0/${V0}2
++TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2
++TEST ! stat $M0/l2/file-link2
++TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid
++
++#Simulate only anon-inodes present in all bricks
++TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2
++
++#Test that shd doesn't cleanup anon-inodes when some bricks are down
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST $CLI volume heal $V0 enable
++$CLI volume heal $V0
++sleep 5 #Allow time for completion of one scan
++TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
++TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
++TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
++rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid)
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2
++
++#Test that rename indeed happened instead of rmdir/mkdir
++renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a)
++EXPECT "$rename_dir_inum" echo $renamed_dir_inum
++cleanup;
+diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t
+new file mode 100644
+index 0000000..0803a08
+--- /dev/null
++++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t
+@@ -0,0 +1,464 @@
++#!/bin/bash
++
++#This file checks if missing entry self-heal and entry self-heal are working
++#as expected.
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++function get_file_type {
++ stat -c "%a:%F:%g:%t:%T:%u" $1
++}
++
++function diff_dirs {
++ diff <(ls $1 | sort) <(ls $2 | sort)
++}
++
++function heal_status {
++ local f1_path="${1}/${3}"
++ local f2_path="${2}/${3}"
++ local insync=""
++ diff_dirs $f1_path $f2_path
++ if [ $? -eq 0 ];
++ then
++ insync="Y"
++ else
++ insync="N"
++ fi
++ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
++ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
++ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
++ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
++ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
++ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
++ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
++ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
++ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
++ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
++ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
++ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
++ echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}
++}
++
++function is_heal_done {
++ local zero_xattr="000000000000000000000000"
++ if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
++ then
++ echo "Y"
++ else
++ echo "N"
++ fi
++}
++
++function print_pending_heals {
++ local result=":"
++ for i in "$@";
++ do
++ if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ];
++ then
++ result="$result:$i"
++ fi
++ done
++#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
++ if [ $result == ":" ]; then result="~"; fi
++ echo $result
++}
++
++zero_xattr="000000000000000000000000"
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
++TEST $CLI volume heal $V0 disable
++TEST $CLI volume set $V0 cluster.use-anonymous-inode off
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.readdir-ahead off
++TEST $CLI volume set $V0 performance.open-behind off
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 cluster.data-self-heal on
++TEST $CLI volume set $V0 cluster.metadata-self-heal on
++TEST $CLI volume set $V0 cluster.entry-self-heal on
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0
++cd $M0
++#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens
++#spb is split-brain, fool is all fool
++
++#source_self_accusing means there exists source and a sink which self-accuses.
++#This simulates failures where fops failed on the bricks without it going down.
++#Something like EACCESS/EDQUOT etc
++
++TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing
++TEST mkfifo source_deletions_heal/fifo
++TEST mknod source_deletions_heal/block b 4 5
++TEST mknod source_deletions_heal/char c 1 5
++TEST touch source_deletions_heal/file
++TEST ln -s source_deletions_heal/file source_deletions_heal/slink
++TEST mkdir source_deletions_heal/dir1
++TEST mkdir source_deletions_heal/dir1/dir2
++
++TEST mkfifo source_deletions_me/fifo
++TEST mknod source_deletions_me/block b 4 5
++TEST mknod source_deletions_me/char c 1 5
++TEST touch source_deletions_me/file
++TEST ln -s source_deletions_me/file source_deletions_me/slink
++TEST mkdir source_deletions_me/dir1
++TEST mkdir source_deletions_me/dir1/dir2
++
++TEST mkfifo source_self_accusing/fifo
++TEST mknod source_self_accusing/block b 4 5
++TEST mknod source_self_accusing/char c 1 5
++TEST touch source_self_accusing/file
++TEST ln -s source_self_accusing/file source_self_accusing/slink
++TEST mkdir source_self_accusing/dir1
++TEST mkdir source_self_accusing/dir1/dir2
++
++TEST kill_brick $V0 $H0 $B0/${V0}0
++
++TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0
++TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1
++TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1
++TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1
++
++#Test that the files are deleted
++TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
++TEST ! stat $B0/${V0}1/source_deletions_heal/block
++TEST ! stat $B0/${V0}1/source_deletions_heal/char
++TEST ! stat $B0/${V0}1/source_deletions_heal/file
++TEST ! stat $B0/${V0}1/source_deletions_heal/slink
++TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
++TEST ! stat $B0/${V0}1/source_deletions_me/fifo
++TEST ! stat $B0/${V0}1/source_deletions_me/block
++TEST ! stat $B0/${V0}1/source_deletions_me/char
++TEST ! stat $B0/${V0}1/source_deletions_me/file
++TEST ! stat $B0/${V0}1/source_deletions_me/slink
++TEST ! stat $B0/${V0}1/source_deletions_me/dir1
++TEST ! stat $B0/${V0}1/source_self_accusing/fifo
++TEST ! stat $B0/${V0}1/source_self_accusing/block
++TEST ! stat $B0/${V0}1/source_self_accusing/char
++TEST ! stat $B0/${V0}1/source_self_accusing/file
++TEST ! stat $B0/${V0}1/source_self_accusing/slink
++TEST ! stat $B0/${V0}1/source_self_accusing/dir1
++
++
++TEST mkfifo source_creations_heal/fifo
++TEST mknod source_creations_heal/block b 4 5
++TEST mknod source_creations_heal/char c 1 5
++TEST touch source_creations_heal/file
++TEST ln -s source_creations_heal/file source_creations_heal/slink
++TEST mkdir source_creations_heal/dir1
++TEST mkdir source_creations_heal/dir1/dir2
++
++TEST mkfifo source_creations_me/fifo
++TEST mknod source_creations_me/block b 4 5
++TEST mknod source_creations_me/char c 1 5
++TEST touch source_creations_me/file
++TEST ln -s source_creations_me/file source_creations_me/slink
++TEST mkdir source_creations_me/dir1
++TEST mkdir source_creations_me/dir1/dir2
++
++$CLI volume stop $V0
++
++#simulate fool fool scenario for fool_* dirs
++setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me}
++setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
++
++#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty
++setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me}
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me}
++
++$CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++TEST kill_brick $V0 $H0 $B0/${V0}1
++
++TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1
++
++$CLI volume stop $V0
++
++#simulate fool fool scenario for fool_* dirs
++setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me}
++setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
++
++#simulate self-accusing for source_self_accusing
++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing
++
++$CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++
++# Check if conservative merges happened correctly on _me_ dirs
++TEST stat spb_me_heal/1
++TEST stat $B0/${V0}0/spb_me_heal/1
++TEST stat $B0/${V0}1/spb_me_heal/1
++
++TEST stat spb_me_heal/0
++TEST stat $B0/${V0}0/spb_me_heal/0
++TEST stat $B0/${V0}1/spb_me_heal/0
++
++TEST stat fool_me/1
++TEST stat $B0/${V0}0/fool_me/1
++TEST stat $B0/${V0}1/fool_me/1
++
++TEST stat fool_me/0
++TEST stat $B0/${V0}0/fool_me/0
++TEST stat $B0/${V0}1/fool_me/0
++
++TEST stat v1_fool_me/0
++TEST stat $B0/${V0}0/v1_fool_me/0
++TEST stat $B0/${V0}1/v1_fool_me/0
++
++TEST stat v1_fool_me/1
++TEST stat $B0/${V0}0/v1_fool_me/1
++TEST stat $B0/${V0}1/v1_fool_me/1
++
++TEST stat v1_dirty_me/0
++TEST stat $B0/${V0}0/v1_dirty_me/0
++TEST stat $B0/${V0}1/v1_dirty_me/0
++
++#Check if files that have gfid-mismatches in _me_ are giving EIO
++TEST ! stat spb_me/0
++
++#Check if stale files are deleted on access
++TEST ! stat source_deletions_me/fifo
++TEST ! stat $B0/${V0}0/source_deletions_me/fifo
++TEST ! stat $B0/${V0}1/source_deletions_me/fifo
++TEST ! stat source_deletions_me/block
++TEST ! stat $B0/${V0}0/source_deletions_me/block
++TEST ! stat $B0/${V0}1/source_deletions_me/block
++TEST ! stat source_deletions_me/char
++TEST ! stat $B0/${V0}0/source_deletions_me/char
++TEST ! stat $B0/${V0}1/source_deletions_me/char
++TEST ! stat source_deletions_me/file
++TEST ! stat $B0/${V0}0/source_deletions_me/file
++TEST ! stat $B0/${V0}1/source_deletions_me/file
++TEST ! stat source_deletions_me/file
++TEST ! stat $B0/${V0}0/source_deletions_me/file
++TEST ! stat $B0/${V0}1/source_deletions_me/file
++TEST ! stat source_deletions_me/dir1/dir2
++TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2
++TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2
++TEST ! stat source_deletions_me/dir1
++TEST ! stat $B0/${V0}0/source_deletions_me/dir1
++TEST ! stat $B0/${V0}1/source_deletions_me/dir1
++
++#Test if the files created as part of access are healed correctly
++r=$(get_file_type source_creations_me/fifo)
++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo
++TEST [ -p source_creations_me/fifo ]
++
++r=$(get_file_type source_creations_me/block)
++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block
++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block
++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block
++TEST [ -b source_creations_me/block ]
++
++r=$(get_file_type source_creations_me/char)
++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char
++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char
++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char
++TEST [ -c source_creations_me/char ]
++
++r=$(get_file_type source_creations_me/file)
++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file
++TEST [ -f source_creations_me/file ]
++
++r=$(get_file_type source_creations_me/slink)
++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink
++TEST [ -h source_creations_me/slink ]
++
++r=$(get_file_type source_creations_me/dir1/dir2)
++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2
++TEST [ -d source_creations_me/dir1/dir2 ]
++
++r=$(get_file_type source_creations_me/dir1)
++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1
++TEST [ -d source_creations_me/dir1 ]
++
++#Trigger heal and check _heal dirs are healed properly
++#Trigger change in event generation number. That way inodes would get refreshed during lookup
++TEST kill_brick $V0 $H0 $B0/${V0}1
++$CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++
++TEST stat spb_heal
++TEST stat spb_me_heal
++TEST stat fool_heal
++TEST stat fool_me
++TEST stat v1_fool_heal
++TEST stat v1_fool_me
++TEST stat source_deletions_heal
++TEST stat source_deletions_me
++TEST stat source_self_accusing
++TEST stat source_creations_heal
++TEST stat source_creations_me
++TEST stat v1_dirty_heal
++TEST stat v1_dirty_me
++TEST $CLI volume stop $V0
++TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/*
++
++$CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++
++#Create base entry in indices/xattrop
++echo "Data" > $M0/FILE
++rm -f $M0/FILE
++EXPECT "1" count_index_entries $B0/${V0}0
++EXPECT "1" count_index_entries $B0/${V0}1
++
++TEST $CLI volume stop $V0;
++
++#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal
++create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
++
++$CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++
++$CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++
++TEST $CLI volume heal $V0;
++EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing
++
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me
++
++#Don't access the files/dirs from mount point as that may cause self-heals
++# Check if conservative merges happened correctly on heal dirs
++TEST stat $B0/${V0}0/spb_heal/1
++TEST stat $B0/${V0}1/spb_heal/1
++
++TEST stat $B0/${V0}0/spb_heal/0
++TEST stat $B0/${V0}1/spb_heal/0
++
++TEST stat $B0/${V0}0/fool_heal/1
++TEST stat $B0/${V0}1/fool_heal/1
++
++TEST stat $B0/${V0}0/fool_heal/0
++TEST stat $B0/${V0}1/fool_heal/0
++
++TEST stat $B0/${V0}0/v1_fool_heal/0
++TEST stat $B0/${V0}1/v1_fool_heal/0
++
++TEST stat $B0/${V0}0/v1_fool_heal/1
++TEST stat $B0/${V0}1/v1_fool_heal/1
++
++TEST stat $B0/${V0}0/v1_dirty_heal/0
++TEST stat $B0/${V0}1/v1_dirty_heal/0
++
++#Check if files that have gfid-mismatches in spb are giving EIO
++TEST ! stat spb/0
++
++#Check if stale files are deleted on access
++TEST ! stat $B0/${V0}0/source_deletions_heal/fifo
++TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
++TEST ! stat $B0/${V0}0/source_deletions_heal/block
++TEST ! stat $B0/${V0}1/source_deletions_heal/block
++TEST ! stat $B0/${V0}0/source_deletions_heal/char
++TEST ! stat $B0/${V0}1/source_deletions_heal/char
++TEST ! stat $B0/${V0}0/source_deletions_heal/file
++TEST ! stat $B0/${V0}1/source_deletions_heal/file
++TEST ! stat $B0/${V0}0/source_deletions_heal/file
++TEST ! stat $B0/${V0}1/source_deletions_heal/file
++TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2
++TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2
++TEST ! stat $B0/${V0}0/source_deletions_heal/dir1
++TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
++
++#Check if stale files are deleted on access
++TEST ! stat $B0/${V0}0/source_self_accusing/fifo
++TEST ! stat $B0/${V0}1/source_self_accusing/fifo
++TEST ! stat $B0/${V0}0/source_self_accusing/block
++TEST ! stat $B0/${V0}1/source_self_accusing/block
++TEST ! stat $B0/${V0}0/source_self_accusing/char
++TEST ! stat $B0/${V0}1/source_self_accusing/char
++TEST ! stat $B0/${V0}0/source_self_accusing/file
++TEST ! stat $B0/${V0}1/source_self_accusing/file
++TEST ! stat $B0/${V0}0/source_self_accusing/file
++TEST ! stat $B0/${V0}1/source_self_accusing/file
++TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2
++TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2
++TEST ! stat $B0/${V0}0/source_self_accusing/dir1
++TEST ! stat $B0/${V0}1/source_self_accusing/dir1
++
++#Test if the files created as part of full self-heal correctly
++r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo)
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo
++TEST [ -p $B0/${V0}0/source_creations_heal/fifo ]
++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block
++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block
++
++r=$(get_file_type $B0/${V0}0/source_creations_heal/block)
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block
++
++r=$(get_file_type $B0/${V0}0/source_creations_heal/char)
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char
++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char
++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char
++
++r=$(get_file_type $B0/${V0}0/source_creations_heal/file)
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file
++TEST [ -f $B0/${V0}0/source_creations_heal/file ]
++
++r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink)
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink
++TEST [ -h $B0/${V0}0/source_creations_heal/slink ]
++
++r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2)
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2
++TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ]
++
++r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1)
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1
++TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ]
++
++cd -
++
++#Anonymous directory shouldn't be created
++TEST mkdir $M0/rename-dir
++before_rename=$(STAT_INO $B0/${V0}1/rename-dir)
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST mv $M0/rename-dir $M0/new-name
++TEST $CLI volume start $V0 force
++#Since features.ctime is not enabled by default in downstream, the below test
++#will fail. If ctime feature is enabled, there will be trusted.glusterfs.mdata
++#xattr set which will differ for the parent in the gfid split-brain scenario
++#and when lookup is triggered, the gfid gets added to indices/xattrop leading
++#the below test to pass in upstream. Hence commenting it here.
++#'spb' is in split-brain so pending-heal-count will be 2
++#EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
++after_rename=$(STAT_INO $B0/${V0}1/new-name)
++EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l)
++EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l)
++EXPECT_NOT "$before_rename" echo $after_rename
++cleanup
+diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t
+new file mode 100644
+index 0000000..256ee2a
+--- /dev/null
++++ b/tests/basic/afr/rename-data-loss.t
+@@ -0,0 +1,72 @@
++#!/bin/bash
++#Self-heal tests
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
++TEST $CLI volume set $V0 write-behind off
++TEST $CLI volume set $V0 self-heal-daemon off
++TEST $CLI volume set $V0 data-self-heal off
++TEST $CLI volume set $V0 metadata-self-heal off
++TEST $CLI volume set $V0 entry-self-heal off
++TEST $CLI volume start $V0
++EXPECT 'Started' volinfo_field $V0 'Status'
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
++
++cd $M0
++TEST `echo "line1" >> file1`
++TEST mkdir dir1
++TEST mkdir dir2
++TEST mkdir -p dir1/dira/dirb
++TEST `echo "line1">>dir1/dira/dirb/file1`
++TEST mkdir delete_me
++TEST `echo "line1" >> delete_me/file1`
++
++#brick0 has witnessed the second write while brick1 is down.
++TEST kill_brick $V0 $H0 $B0/brick1
++TEST `echo "line2" >> file1`
++TEST `echo "line2" >> dir1/dira/dirb/file1`
++TEST `echo "line2" >> delete_me/file1`
++
++#Toggle the bricks that are up/down.
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
++TEST kill_brick $V0 $H0 $B0/brick0
++
++#Rename when the 'source' brick0 for data-selfheals is down.
++mv file1 file2
++mv dir1/dira dir2
++
++#Delete a dir when brick0 is down.
++rm -rf delete_me
++cd -
++
++#Bring everything up and trigger heal
++TEST $CLI volume set $V0 self-heal-daemon on
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1
++
++#Remount to avoid reading from caches
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
++EXPECT "line2" tail -1 $M0/file2
++EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1
++TEST ! stat $M0/delete_me/file1
++TEST ! stat $M0/delete_me
++
++anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode)
++TEST [[ -d $B0/brick0/$anon_inode_name ]]
++TEST [[ -d $B0/brick1/$anon_inode_name ]]
++cleanup
+diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t
+index c208112..0115350 100644
+--- a/tests/bugs/replicate/bug-1744548-heal-timeout.t
++++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t
+@@ -25,14 +25,14 @@ TEST ! $CLI volume heal $V0
+ TEST $CLI volume profile $V0 start
+ TEST $CLI volume profile $V0 info clear
+ TEST $CLI volume heal $V0 enable
+-# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes
+-EXPECT_WITHIN $HEAL_TIMEOUT "^333$" get_cumulative_opendir_count
++# Each brick does 4 opendirs, corresponding to dirty, xattrop and entry-changes, anonymous-inode
++EXPECT_WITHIN 4 "^444$" get_cumulative_opendir_count
+
+ # Check that a change in heal-timeout is honoured immediately.
+ TEST $CLI volume set $V0 cluster.heal-timeout 5
+ sleep 10
+ # Two crawls must have happened.
+-EXPECT_WITHIN $HEAL_TIMEOUT "^999$" get_cumulative_opendir_count
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^121212$" get_cumulative_opendir_count
+
+ # shd must not heal if it is disabled and heal-timeout is changed.
+ TEST $CLI volume heal $V0 disable
+diff --git a/tests/features/trash.t b/tests/features/trash.t
+index 472e909..da5b50b 100755
+--- a/tests/features/trash.t
++++ b/tests/features/trash.t
+@@ -94,105 +94,105 @@ wildcard_not_exists() {
+ if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
+ }
+
+-# testing glusterd [1-3]
++# testing glusterd
+ TEST glusterd
+ TEST pidof glusterd
+ TEST $CLI volume info
+
+-# creating distributed volume [4]
++# creating distributed volume
+ TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+
+-# checking volume status [5-7]
++# checking volume status
+ EXPECT "$V0" volinfo_field $V0 'Volume Name'
+ EXPECT 'Created' volinfo_field $V0 'Status'
+ EXPECT '2' brick_count $V0
+
+-# test without enabling trash translator [8]
++# test without enabling trash translator
+ TEST start_vol $V0 $M0
+
+-# test on enabling trash translator [9-10]
++# test on enabling trash translator
+ TEST $CLI volume set $V0 features.trash on
+ EXPECT 'on' volinfo_field $V0 'features.trash'
+
+-# files directly under mount point [11]
++# files directly under mount point
+ create_files $M0/file1 $M0/file2
+ TEST file_exists $V0 file1 file2
+
+-# perform unlink [12]
++# perform unlink
+ TEST unlink_op file1
+
+-# perform truncate [13]
++# perform truncate
+ TEST truncate_op file2 4
+
+-# create files directory hierarchy and check [14]
++# create files directory hierarchy and check
+ mkdir -p $M0/1/2/3
+ create_files $M0/1/2/3/foo1 $M0/1/2/3/foo2
+ TEST file_exists $V0 1/2/3/foo1 1/2/3/foo2
+
+-# perform unlink [15]
++# perform unlink
+ TEST unlink_op 1/2/3/foo1
+
+-# perform truncate [16]
++# perform truncate
+ TEST truncate_op 1/2/3/foo2 4
+
+ # create a directory for eliminate pattern
+ mkdir $M0/a
+
+-# set the eliminate pattern [17-18]
++# set the eliminate pattern
+ TEST $CLI volume set $V0 features.trash-eliminate-path /a
+ EXPECT '/a' volinfo_field $V0 'features.trash-eliminate-path'
+
+-# create two files and check [19]
++# create two files and check
+ create_files $M0/a/test1 $M0/a/test2
+ TEST file_exists $V0 a/test1 a/test2
+
+-# remove from eliminate pattern [20]
++# remove from eliminate pattern
+ rm -f $M0/a/test1
+ EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test1*
+
+-# truncate from eliminate path [21-23]
++# truncate from eliminate path
+ truncate -s 2 $M0/a/test2
+ TEST [ -e $M0/a/test2 ]
+ TEST [ `ls -l $M0/a/test2 | awk '{print $5}'` -eq 2 ]
+ EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test2*
+
+-# set internal op on [24-25]
++# set internal op on
+ TEST $CLI volume set $V0 features.trash-internal-op on
+ EXPECT 'on' volinfo_field $V0 'features.trash-internal-op'
+
+-# again create two files and check [26]
++# again create two files and check
+ create_files $M0/inop1 $M0/inop2
+ TEST file_exists $V0 inop1 inop2
+
+-# perform unlink [27]
++# perform unlink
+ TEST unlink_op inop1
+
+-# perform truncate [28]
++# perform truncate
+ TEST truncate_op inop2 4
+
+-# remove one brick and restart the volume [28-31]
++# remove one brick and restart the volume
+ TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+ TEST $CLI volume stop $V0
+ TEST start_vol $V0 $M0 $M0/.trashcan
+
+-# again create two files and check [33]
++# again create two files and check
+ create_files $M0/rebal1 $M0/rebal2
+ TEST file_exists $V0 rebal1 rebal2
+
+-# add one brick [34-35]
++# add one brick
+ TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+ TEST [ -d $B0/${V0}3 ]
+
+
+-# perform rebalance [36]
++# perform rebalance
+ TEST $CLI volume rebalance $V0 start force
+ EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
+
+ #Find out which file was migrated to the new brick
+ file_name=$(ls $B0/${V0}3/rebal*| xargs basename)
+
+-# check whether rebalance was succesful [37-40]
++# check whether rebalance was succesful
+ EXPECT "Y" wildcard_exists $B0/${V0}3/$file_name*
+ EXPECT "Y" wildcard_exists $B0/${V0}1/.trashcan/internal_op/$file_name*
+
+@@ -201,52 +201,42 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+ # force required in case rebalance is not over
+ TEST $CLI volume stop $V0 force
+
+-# create a replicated volume [41]
++# create a replicated volume
+ TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{1,2}
+
+-# checking volume status [42-45]
++# checking volume status
+ EXPECT "$V1" volinfo_field $V1 'Volume Name'
+ EXPECT 'Replicate' volinfo_field $V1 'Type'
+ EXPECT 'Created' volinfo_field $V1 'Status'
+ EXPECT '2' brick_count $V1
+
+-# enable trash with options and start the replicate volume by disabling automatic self-heal [46-50]
++# enable trash with options and start the replicate volume by disabling automatic self-heal
+ TEST $CLI volume set $V1 features.trash on
+ TEST $CLI volume set $V1 features.trash-internal-op on
+ EXPECT 'on' volinfo_field $V1 'features.trash'
+ EXPECT 'on' volinfo_field $V1 'features.trash-internal-op'
+ TEST start_vol $V1 $M1 $M1/.trashcan
+
+-# mount and check for trash directory [51]
++# mount and check for trash directory
+ TEST [ -d $M1/.trashcan/internal_op ]
+
+-# create a file and check [52]
++# create a file and check
+ touch $M1/self
+ TEST [ -e $B0/${V1}1/self -a -e $B0/${V1}2/self ]
+
+-# kill one brick and delete the file from mount point [53-54]
++# kill one brick and delete the file from mount point
+ kill_brick $V1 $H0 $B0/${V1}1
+ EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
+ rm -f $M1/self
+ EXPECT "Y" wildcard_exists $B0/${V1}2/.trashcan/self*
+
+-# force start the volume and trigger the self-heal manually [55-57]
+-TEST $CLI volume start $V1 force
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+-# Since we created the file under root of the volume, it will be
+-# healed automatically
+-
+-# check for the removed file in trashcan [58]
+-EXPECT_WITHIN $HEAL_TIMEOUT "Y" wildcard_exists $B0/${V1}1/.trashcan/internal_op/self*
+-
+-# check renaming of trash directory through cli [59-62]
++# check renaming of trash directory through cli
+ TEST $CLI volume set $V0 trash-dir abc
+ TEST start_vol $V0 $M0 $M0/abc
+ TEST [ -e $M0/abc -a ! -e $M0/.trashcan ]
+ EXPECT "Y" wildcard_exists $B0/${V0}1/abc/internal_op/rebal*
+
+-# ensure that rename and delete operation on trash directory fails [63-65]
++# ensure that rename and delete operation on trash directory fails
+ rm -rf $M0/abc/internal_op
+ TEST [ -e $M0/abc/internal_op ]
+ rm -rf $M0/abc/
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 90b4f14..6f2da11 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -47,6 +47,41 @@ afr_quorum_errno(afr_private_t *priv)
+ return ENOTCONN;
+ }
+
++gf_boolean_t
++afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
++ pid_t pid)
++{
++ if (!__is_root_gfid(pargfid)) {
++ return _gf_false;
++ }
++
++ if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
++ /*For backward compatibility /.landfill is private*/
++ return _gf_true;
++ }
++
++ if (pid == GF_CLIENT_PID_GSYNCD) {
++ /*geo-rep needs to create/sync private directory on slave because
++ * it appears in changelog*/
++ return _gf_false;
++ }
++
++ if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
++ if (strcmp(name, priv->anon_inode_name) == 0) {
++ /* anonymous-inode dir is private*/
++ return _gf_true;
++ }
++ } else {
++ if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
++ 0) {
++ /* anonymous-inode dir prefix is private for geo-rep to work*/
++ return _gf_true;
++ }
++ }
++
++ return _gf_false;
++}
++
+ int
+ afr_fav_child_reset_sink_xattrs(void *opaque);
+
+@@ -3301,11 +3336,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+ return 0;
+ }
+
+- if (__is_root_gfid(loc->parent->gfid)) {
+- if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) {
+- op_errno = EPERM;
+- goto out;
+- }
++ if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
++ frame->root->pid)) {
++ op_errno = EPERM;
++ goto out;
+ }
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+@@ -4832,6 +4866,7 @@ afr_priv_dump(xlator_t *this)
+ priv->background_self_heal_count);
+ gf_proc_dump_write("healers", "%d", priv->healers);
+ gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
++ gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ gf_proc_dump_write("quorum-type", "auto");
+ } else if (priv->quorum_count == 0) {
+@@ -5792,6 +5827,7 @@ afr_priv_destroy(afr_private_t *priv)
+ GF_FREE(priv->local);
+ GF_FREE(priv->pending_key);
+ GF_FREE(priv->children);
++ GF_FREE(priv->anon_inode);
+ GF_FREE(priv->child_up);
+ GF_FREE(priv->child_latency);
+ LOCK_DESTROY(&priv->lock);
+diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
+index 6307b63..d64b6a9 100644
+--- a/xlators/cluster/afr/src/afr-dir-read.c
++++ b/xlators/cluster/afr/src/afr-dir-read.c
+@@ -158,8 +158,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
+ }
+
+ static void
+-afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
+- gf_dirent_t *entries, fd_t *fd)
++afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
++ int subvol, gf_dirent_t *entries, fd_t *fd)
+ {
+ int ret = -1;
+ gf_dirent_t *entry = NULL;
+@@ -177,8 +177,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
+
+ list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
+ {
+- if (__is_root_gfid(fd->inode->gfid) &&
+- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) {
++ if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
++ frame->root->pid)) {
+ continue;
+ }
+
+@@ -222,8 +222,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ }
+
+ if (op_ret >= 0)
+- afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries,
+- local->fd);
++ afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
++ &entries, local->fd);
+
+ AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index 9b6575f..0a8a7fd 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -2753,3 +2753,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
+ out:
+ return source;
+ }
++
++static int
++afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, inode_t *inode,
++ struct iatt *buf, struct iatt *preparent,
++ struct iatt *postparent, dict_t *xdata)
++{
++ afr_local_t *local = frame->local;
++ int i = (long)cookie;
++
++ local->replies[i].valid = 1;
++ local->replies[i].op_ret = op_ret;
++ local->replies[i].op_errno = op_errno;
++ if (op_ret == 0) {
++ local->op_ret = 0;
++ local->replies[i].poststat = *buf;
++ local->replies[i].preparent = *preparent;
++ local->replies[i].postparent = *postparent;
++ }
++ if (xdata) {
++ local->replies[i].xdata = dict_ref(xdata);
++ }
++
++ syncbarrier_wake(&local->barrier);
++ return 0;
++}
++
++int
++afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode)
++{
++ call_frame_t *frame = NULL;
++ afr_local_t *local = NULL;
++ afr_private_t *priv = this->private;
++ unsigned char *mkdir_on = alloca0(priv->child_count);
++ unsigned char *lookup_on = alloca0(priv->child_count);
++ loc_t loc = {0};
++ int32_t op_errno = 0;
++ int32_t child_op_errno = 0;
++ struct iatt iatt = {0};
++ dict_t *xdata = NULL;
++ uuid_t anon_inode_gfid = {0};
++ int mkdir_count = 0;
++ int i = 0;
++
++ /*Try to mkdir everywhere and return success if the dir exists on 'child'
++ */
++
++ if (!priv->use_anon_inode) {
++ op_errno = EINVAL;
++ goto out;
++ }
++
++ frame = afr_frame_create(this, &op_errno);
++ if (op_errno) {
++ goto out;
++ }
++ local = frame->local;
++ if (!local->child_up[child]) {
++ /*Other bricks may need mkdir so don't error out yet*/
++ child_op_errno = ENOTCONN;
++ }
++ gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid);
++ for (i = 0; i < priv->child_count; i++) {
++ if (!local->child_up[i])
++ continue;
++
++ if (priv->anon_inode[i]) {
++ mkdir_on[i] = 0;
++ } else {
++ mkdir_on[i] = 1;
++ mkdir_count++;
++ }
++ }
++
++ if (mkdir_count == 0) {
++ *linked_inode = inode_find(this->itable, anon_inode_gfid);
++ if (*linked_inode) {
++ op_errno = 0;
++ goto out;
++ }
++ }
++
++ loc.parent = inode_ref(this->itable->root);
++ loc.name = priv->anon_inode_name;
++ loc.inode = inode_new(this->itable);
++ if (!loc.inode) {
++ op_errno = ENOMEM;
++ goto out;
++ }
++
++ xdata = dict_new();
++ if (!xdata) {
++ op_errno = ENOMEM;
++ goto out;
++ }
++
++ op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true);
++ if (op_errno) {
++ goto out;
++ }
++
++ if (mkdir_count == 0) {
++ memcpy(lookup_on, local->child_up, priv->child_count);
++ goto lookup;
++ }
++
++ AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0,
++ xdata);
++
++ for (i = 0; i < priv->child_count; i++) {
++ if (!mkdir_on[i]) {
++ continue;
++ }
++
++ if (local->replies[i].op_ret == 0) {
++ priv->anon_inode[i] = 1;
++ iatt = local->replies[i].poststat;
++ } else if (local->replies[i].op_ret < 0 &&
++ local->replies[i].op_errno == EEXIST) {
++ lookup_on[i] = 1;
++ } else if (i == child) {
++ child_op_errno = local->replies[i].op_errno;
++ }
++ }
++
++ if (AFR_COUNT(lookup_on, priv->child_count) == 0) {
++ goto link;
++ }
++
++lookup:
++ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
++ xdata);
++ for (i = 0; i < priv->child_count; i++) {
++ if (!lookup_on[i]) {
++ continue;
++ }
++
++ if (local->replies[i].op_ret == 0) {
++ if (gf_uuid_compare(anon_inode_gfid,
++ local->replies[i].poststat.ia_gfid) == 0) {
++ priv->anon_inode[i] = 1;
++ iatt = local->replies[i].poststat;
++ } else {
++ if (i == child)
++ child_op_errno = EINVAL;
++ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA,
++ "%s has gfid: %s", priv->anon_inode_name,
++ uuid_utoa(local->replies[i].poststat.ia_gfid));
++ }
++ } else if (i == child) {
++ child_op_errno = local->replies[i].op_errno;
++ }
++ }
++link:
++ if (!gf_uuid_is_null(iatt.ia_gfid)) {
++ *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt);
++ if (*linked_inode) {
++ op_errno = 0;
++ inode_lookup(*linked_inode);
++ } else {
++ op_errno = ENOMEM;
++ }
++ goto out;
++ }
++
++out:
++ if (xdata)
++ dict_unref(xdata);
++ loc_wipe(&loc);
++ /*child_op_errno takes precedence*/
++ if (child_op_errno == 0) {
++ child_op_errno = op_errno;
++ }
++
++ if (child_op_errno && *linked_inode) {
++ inode_unref(*linked_inode);
++ *linked_inode = NULL;
++ }
++ if (frame)
++ AFR_STACK_DESTROY(frame);
++ return -child_op_errno;
++}
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index 00b5b2d..20b07dd 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -16,54 +16,170 @@
+ #include <glusterfs/syncop-utils.h>
+ #include <glusterfs/events.h>
+
+-static int
+-afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+- inode_t *inode, int child, struct afr_reply *replies)
++int
++afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name,
++ inode_t *inode, int child,
++ struct afr_reply *replies,
++ gf_boolean_t *anon_inode)
+ {
+ afr_private_t *priv = NULL;
++ afr_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
++ int i = 0;
++ char g[64] = {0};
++ unsigned char *lookup_success = NULL;
++ call_frame_t *frame = NULL;
++ loc_t loc2 = {
++ 0,
++ };
+ loc_t loc = {
+ 0,
+ };
+- char g[64];
+
+ priv = this->private;
+-
+ subvol = priv->children[child];
++ lookup_success = alloca0(priv->child_count);
++ uuid_utoa_r(replies[child].poststat.ia_gfid, g);
++ loc.inode = inode_new(inode->table);
++ if (!loc.inode) {
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ if (replies[child].poststat.ia_type == IA_IFDIR) {
++ /* This directory may have sub-directory hierarchy which may need to
++ * be preserved for subsequent heals. So unconditionally move the
++ * directory to anonymous-inode directory*/
++ *anon_inode = _gf_true;
++ goto anon_inode;
++ }
++
++ frame = afr_frame_create(this, &ret);
++ if (!frame) {
++ ret = -ret;
++ goto out;
++ }
++ local = frame->local;
++ gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid);
++ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
++ NULL);
++ for (i = 0; i < priv->child_count; i++) {
++ if (local->replies[i].op_ret == 0) {
++ lookup_success[i] = 1;
++ } else if (local->replies[i].op_errno != ENOENT &&
++ local->replies[i].op_errno != ESTALE) {
++ ret = -local->replies[i].op_errno;
++ }
++ }
++
++ if (priv->quorum_count) {
++ if (afr_has_quorum(lookup_success, this, NULL)) {
++ *anon_inode = _gf_true;
++ }
++ } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) {
++ *anon_inode = _gf_true;
++ } else if (ret) {
++ goto out;
++ }
++
++anon_inode:
++ if (!*anon_inode) {
++ ret = 0;
++ goto out;
++ }
+
+ loc.parent = inode_ref(dir);
+ gf_uuid_copy(loc.pargfid, dir->gfid);
+ loc.name = name;
+- loc.inode = inode_ref(inode);
+
+- if (replies[child].valid && replies[child].op_ret == 0) {
+- switch (replies[child].poststat.ia_type) {
+- case IA_IFDIR:
+- gf_msg(this->name, GF_LOG_WARNING, 0,
+- AFR_MSG_EXPUNGING_FILE_OR_DIR,
+- "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid),
+- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+- subvol->name);
+- ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+- break;
+- default:
+- gf_msg(this->name, GF_LOG_WARNING, 0,
+- AFR_MSG_EXPUNGING_FILE_OR_DIR,
+- "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
+- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+- subvol->name);
+- ret = syncop_unlink(subvol, &loc, NULL, NULL);
+- break;
+- }
++ ret = afr_anon_inode_create(this, child, &loc2.parent);
++ if (ret < 0)
++ goto out;
++
++ loc2.name = g;
++ ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL);
++ if (ret < 0) {
++ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR,
++ "Rename to %s dir %s/%s (%s) on %s failed",
++ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
++ subvol->name);
++ } else {
++ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
++ "Rename to %s dir %s/%s (%s) on %s successful",
++ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
++ subvol->name);
+ }
+
++out:
+ loc_wipe(&loc);
++ loc_wipe(&loc2);
++ if (frame) {
++ AFR_STACK_DESTROY(frame);
++ }
+
+ return ret;
+ }
+
+ int
++afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
++ inode_t *inode, int child, struct afr_reply *replies)
++{
++ char g[64] = {0};
++ afr_private_t *priv = NULL;
++ xlator_t *subvol = NULL;
++ int ret = 0;
++ loc_t loc = {
++ 0,
++ };
++ gf_boolean_t anon_inode = _gf_false;
++
++ priv = this->private;
++ subvol = priv->children[child];
++
++ if ((!replies[child].valid) || (replies[child].op_ret < 0)) {
++ /*Nothing to do*/
++ ret = 0;
++ goto out;
++ }
++
++ if (priv->use_anon_inode) {
++ ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child,
++ replies, &anon_inode);
++ if (ret < 0 || anon_inode)
++ goto out;
++ }
++
++ loc.parent = inode_ref(dir);
++ loc.inode = inode_new(inode->table);
++ if (!loc.inode) {
++ ret = -ENOMEM;
++ goto out;
++ }
++ loc.name = name;
++ switch (replies[child].poststat.ia_type) {
++ case IA_IFDIR:
++ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
++ "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name,
++ uuid_utoa_r(replies[child].poststat.ia_gfid, g),
++ subvol->name);
++ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
++ break;
++ default:
++ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
++ "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
++ name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
++ subvol->name);
++ ret = syncop_unlink(subvol, &loc, NULL, NULL);
++ break;
++ }
++
++out:
++ loc_wipe(&loc);
++ return ret;
++}
++
++int
+ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
+ unsigned char *sources, inode_t *dir,
+ const char *name, inode_t *inode,
+@@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
+ loc_t srcloc = {
+ 0,
+ };
++ loc_t anonloc = {
++ 0,
++ };
+ xlator_t *this = frame->this;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+@@ -86,15 +205,18 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
+ 0,
+ };
+ unsigned char *newentry = NULL;
++ char iatt_uuid_str[64] = {0};
++ char dir_uuid_str[64] = {0};
+
+ priv = this->private;
+ iatt = &replies[source].poststat;
++ uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str);
+ if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED,
+ "Invalid ia_type (%d) or gfid(%s). source brick=%d, "
+ "pargfid=%s, name=%s",
+- iatt->ia_type, uuid_utoa(iatt->ia_gfid), source,
+- uuid_utoa(dir->gfid), name);
++ iatt->ia_type, iatt_uuid_str, source,
++ uuid_utoa_r(dir->gfid, dir_uuid_str), name);
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -119,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
+
+ srcloc.inode = inode_ref(inode);
+ gf_uuid_copy(srcloc.gfid, iatt->ia_gfid);
+- if (iatt->ia_type != IA_IFDIR)
+- ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
+- if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) {
++ ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
++ if (ret == -ENOENT || ret == -ESTALE) {
+ newentry[dst] = 1;
+ ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies,
+ sources, newentry);
+ if (ret)
+ goto out;
++ } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) {
++ // Try rename from hidden directory
++ ret = afr_anon_inode_create(this, dst, &anonloc.parent);
++ if (ret < 0)
++ goto out;
++ anonloc.inode = inode_ref(inode);
++ anonloc.name = iatt_uuid_str;
++ ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL);
++ if (ret == -ENOENT || ret == -ESTALE)
++ ret = -1; /*This sets 'mismatch' to true*/
++ goto out;
+ }
+
+ mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
+@@ -165,6 +297,7 @@ out:
+ GF_FREE(linkname);
+ loc_wipe(&loc);
+ loc_wipe(&srcloc);
++ loc_wipe(&anonloc);
+ return ret;
+ }
+
+@@ -580,6 +713,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+
+ priv = this->private;
+
++ if (afr_is_private_directory(priv, fd->inode->gfid, name,
++ GF_CLIENT_PID_SELF_HEALD)) {
++ return 0;
++ }
++
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+@@ -628,7 +766,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ replies);
+
+ if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
+- ret = afr_shd_index_purge(subvol, parent_idx_inode, name,
++ ret = afr_shd_entry_purge(subvol, parent_idx_inode, name,
+ inode->ia_type);
+ /* Why is ret force-set to 0? We do not care about
+ * index purge failing for full heal as it is quite
+@@ -758,10 +896,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
+
+- if (__is_root_gfid(fd->inode->gfid) &&
+- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR))
+- continue;
+-
+ ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name,
+ loc.inode, subvol,
+ local->need_full_crawl);
+@@ -824,7 +958,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
+ /* The name indices under the pgfid index dir are guaranteed
+ * to be regular files. Hence the hardcoding.
+ */
+- afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
++ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
+ ret = 0;
+ goto out;
+ }
+diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
+index dace071..51e3d8c 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-name.c
++++ b/xlators/cluster/afr/src/afr-self-heal-name.c
+@@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ struct afr_reply *replies)
+ {
+- loc_t loc = {
+- 0,
+- };
+ int i = 0;
+ afr_private_t *priv = NULL;
+- char g[64];
+ int ret = 0;
+
+ priv = this->private;
+
+- loc.parent = inode_ref(parent);
+- gf_uuid_copy(loc.pargfid, pargfid);
+- loc.name = bname;
+- loc.inode = inode_ref(inode);
+-
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+@@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
+ if (replies[i].op_ret)
+ continue;
+
+- switch (replies[i].poststat.ia_type) {
+- case IA_IFDIR:
+- gf_msg(this->name, GF_LOG_WARNING, 0,
+- AFR_MSG_EXPUNGING_FILE_OR_DIR,
+- "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid),
+- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
+- priv->children[i]->name);
+-
+- ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL);
+- break;
+- default:
+- gf_msg(this->name, GF_LOG_WARNING, 0,
+- AFR_MSG_EXPUNGING_FILE_OR_DIR,
+- "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid),
+- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
+- priv->children[i]->name);
+-
+- ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL);
+- break;
+- }
++ ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
++ replies);
+ }
+
+- loc_wipe(&loc);
+-
+ return ret;
+ }
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
+index 8f6fb00..c8dc384 100644
+--- a/xlators/cluster/afr/src/afr-self-heal.h
++++ b/xlators/cluster/afr/src/afr-self-heal.h
+@@ -370,4 +370,9 @@ gf_boolean_t
+ afr_is_file_empty_on_all_children(afr_private_t *priv,
+ struct afr_reply *replies);
+
++int
++afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
++ inode_t *inode, int child, struct afr_reply *replies);
++int
++afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode);
+ #endif /* !_AFR_SELFHEAL_H */
+diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
+index 95ac5f2..939a135 100644
+--- a/xlators/cluster/afr/src/afr-self-heald.c
++++ b/xlators/cluster/afr/src/afr-self-heald.c
+@@ -222,7 +222,7 @@ out:
+ }
+
+ int
+-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
++afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
+ ia_type_t type)
+ {
+ int ret = 0;
+@@ -422,7 +422,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ ret = afr_shd_selfheal(healer, healer->subvol, gfid);
+
+ if (ret == -ENOENT || ret == -ESTALE)
+- afr_shd_index_purge(subvol, parent->inode, entry->d_name, val);
++ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val);
+
+ if (ret == 2)
+ /* If bricks crashed in pre-op after creating indices/xattrop
+@@ -798,6 +798,176 @@ afr_bricks_available_for_heal(afr_private_t *priv)
+ return _gf_true;
+ }
+
++static int
++afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
++ void *data)
++{
++ struct subvol_healer *healer = data;
++ afr_private_t *priv = healer->this->private;
++ call_frame_t *frame = NULL;
++ afr_local_t *local = NULL;
++ int ret = 0;
++ loc_t loc = {0};
++ int count = 0;
++ int i = 0;
++ int op_errno = 0;
++ struct iatt *iatt = NULL;
++ gf_boolean_t multiple_links = _gf_false;
++ unsigned char *gfid_present = alloca0(priv->child_count);
++ unsigned char *entry_present = alloca0(priv->child_count);
++ char *type = "file";
++
++ frame = afr_frame_create(healer->this, &ret);
++ if (!frame) {
++ ret = -ret;
++ goto out;
++ }
++ local = frame->local;
++ if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
++ gf_msg_debug(healer->this->name, 0,
++ "Not all bricks are up. Skipping "
++ "cleanup of %s on %s",
++ entry->d_name, subvol->name);
++ ret = 0;
++ goto out;
++ }
++
++ loc.inode = inode_new(parent->inode->table);
++ if (!loc.inode) {
++ ret = -ENOMEM;
++ goto out;
++ }
++ ret = gf_uuid_parse(entry->d_name, loc.gfid);
++ if (ret) {
++ ret = 0;
++ goto out;
++ }
++ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
++ NULL);
++ for (i = 0; i < priv->child_count; i++) {
++ if (local->replies[i].op_ret == 0) {
++ count++;
++ gfid_present[i] = 1;
++ iatt = &local->replies[i].poststat;
++ if (iatt->ia_type == IA_IFDIR) {
++ type = "dir";
++ }
++
++ if (i == healer->subvol) {
++ if (local->replies[i].poststat.ia_nlink > 1) {
++ multiple_links = _gf_true;
++ }
++ }
++ } else if (local->replies[i].op_errno != ENOENT &&
++ local->replies[i].op_errno != ESTALE) {
++ /*We don't have complete view. Skip the entry*/
++ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
++ "Skipping cleanup of %s on %s", entry->d_name,
++ subvol->name);
++ ret = 0;
++ goto out;
++ }
++ }
++
++ /*Inode is deleted from subvol*/
++ if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
++ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
++ AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
++ priv->anon_inode_name, entry->d_name, subvol->name);
++ ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name,
++ iatt->ia_type);
++ if (ret == -ENOENT || ret == -ESTALE)
++ ret = 0;
++ } else if (count > 1) {
++ loc_wipe(&loc);
++ loc.parent = inode_ref(parent->inode);
++ loc.name = entry->d_name;
++ loc.inode = inode_new(parent->inode->table);
++ if (!loc.inode) {
++ ret = -ENOMEM;
++ goto out;
++ }
++ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup,
++ &loc, NULL);
++ count = 0;
++ for (i = 0; i < priv->child_count; i++) {
++ if (local->replies[i].op_ret == 0) {
++ count++;
++ entry_present[i] = 1;
++ iatt = &local->replies[i].poststat;
++ } else if (local->replies[i].op_errno != ENOENT &&
++ local->replies[i].op_errno != ESTALE) {
++ /*We don't have complete view. Skip the entry*/
++ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
++ "Skipping cleanup of %s on %s", entry->d_name,
++ subvol->name);
++ ret = 0;
++ goto out;
++ }
++ }
++ for (i = 0; i < priv->child_count; i++) {
++ if (gfid_present[i] && !entry_present[i]) {
++ /*Entry is not anonymous on at least one subvol*/
++ gf_msg_debug(healer->this->name, 0,
++ "Valid entry present on %s "
++ "Skipping cleanup of %s on %s",
++ priv->children[i]->name, entry->d_name,
++ subvol->name);
++ ret = 0;
++ goto out;
++ }
++ }
++
++ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
++ AFR_MSG_EXPUNGING_FILE_OR_DIR,
++ "expunging %s %s/%s on all subvols", type, priv->anon_inode_name,
++ entry->d_name);
++ ret = 0;
++ for (i = 0; i < priv->child_count; i++) {
++ op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent,
++ entry->d_name, iatt->ia_type);
++ if (op_errno != ENOENT && op_errno != ESTALE) {
++ ret |= -op_errno;
++ }
++ }
++ }
++
++out:
++ if (frame)
++ AFR_STACK_DESTROY(frame);
++ loc_wipe(&loc);
++ return ret;
++}
++
++static void
++afr_cleanup_anon_inode_dir(struct subvol_healer *healer)
++{
++ int ret = 0;
++ call_frame_t *frame = NULL;
++ afr_private_t *priv = healer->this->private;
++ loc_t loc = {0};
++
++ ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode);
++ if (ret)
++ goto out;
++
++ frame = afr_frame_create(healer->this, &ret);
++ if (!frame) {
++ ret = -ret;
++ goto out;
++ }
++
++ ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc,
++ GF_CLIENT_PID_SELF_HEALD, healer,
++ afr_shd_anon_inode_cleaner, NULL,
++ priv->shd.max_threads, priv->shd.wait_qlength);
++out:
++ if (frame)
++ AFR_STACK_DESTROY(frame);
++ loc_wipe(&loc);
++ return;
++}
++
+ void *
+ afr_shd_index_healer(void *data)
+ {
+@@ -854,6 +1024,10 @@ afr_shd_index_healer(void *data)
+ sleep(1);
+ } while (ret > 0);
+
++ if (ret == 0) {
++ afr_cleanup_anon_inode_dir(healer);
++ }
++
+ if (pre_crawl_xdata && !healer->crawl_event.heal_failed_count) {
+ afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
+ pre_crawl_xdata);
+diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
+index 1990539..acd567e 100644
+--- a/xlators/cluster/afr/src/afr-self-heald.h
++++ b/xlators/cluster/afr/src/afr-self-heald.h
+@@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid,
+ char **path_p);
+
+ int
+-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
++afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
+ ia_type_t type);
+ #endif /* !_AFR_SELF_HEALD_H */
+diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
+index bfa464f..33fe4d8 100644
+--- a/xlators/cluster/afr/src/afr.c
++++ b/xlators/cluster/afr/src/afr.c
+@@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
+ }
+ }
+
++void
++afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
++{
++ char *volfile_id_str = NULL;
++ uuid_t anon_inode_gfid = {0};
++
++ /*If volume id is not present don't enable anything*/
++ if (dict_get_str(options, "volume-id", &volfile_id_str))
++ return;
++ GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
++ /*anon_inode_name is not supposed to change once assigned*/
++ if (!priv->anon_inode_name[0]) {
++ snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
++ AFR_ANON_DIR_PREFIX, volfile_id_str);
++ gf_uuid_parse(volfile_id_str, anon_inode_gfid);
++ /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
++ anon_inode_gfid[0] ^= 1;
++ uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
++ }
++}
++
+ int
+ reconfigure(xlator_t *this, dict_t *options)
+ {
+@@ -287,6 +308,10 @@ reconfigure(xlator_t *this, dict_t *options)
+ consistent_io = _gf_false;
+ priv->consistent_io = consistent_io;
+
++ afr_handle_anon_inode_options(priv, options);
++
++ GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
++ out);
+ if (priv->shd.enabled) {
+ if ((priv->shd.enabled != enabled_old) ||
+ (timeout_old != priv->shd.timeout))
+@@ -535,7 +560,9 @@ init(xlator_t *this)
+
+ GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
+ GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
++ afr_handle_anon_inode_options(priv, this->options);
+
++ GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
+ if (priv->quorum_count != 0)
+ priv->consistent_io = _gf_false;
+
+@@ -547,13 +574,16 @@ init(xlator_t *this)
+ goto out;
+ }
+
++ priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
++ gf_afr_mt_char);
++
+ priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
+
+ priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,
+ gf_afr_mt_child_latency_t);
+
+- if (!priv->child_up || !priv->child_latency) {
++ if (!priv->child_up || !priv->child_latency || !priv->anon_inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+@@ -1218,6 +1248,14 @@ struct volume_options options[] = {
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
++ {.key = {"use-anonymous-inode"},
++ .type = GF_OPTION_TYPE_BOOL,
++ .default_value = "no",
++ .op_version = {GD_OP_VERSION_7_0},
++ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
++ .tags = {"replicate"},
++ .description = "Setting this option heals directory renames efficiently"},
++
+ {.key = {NULL}},
+ };
+
+diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
+index 3a2b26d..6a9a763 100644
+--- a/xlators/cluster/afr/src/afr.h
++++ b/xlators/cluster/afr/src/afr.h
+@@ -40,6 +40,8 @@
+ #define AFR_TA_DOM_MODIFY "afr.ta.dom-modify"
+
+ #define AFR_HALO_MAX_LATENCY 99999
++#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode"
++
+
+ #define PFLAG_PENDING (1 << 0)
+ #define PFLAG_SBRAIN (1 << 1)
+@@ -155,6 +157,7 @@ typedef struct _afr_private {
+ struct list_head ta_waitq;
+ struct list_head ta_onwireq;
+
++ unsigned char *anon_inode;
+ unsigned char *child_up;
+ int64_t *child_latency;
+ unsigned char *local;
+@@ -240,6 +243,11 @@ typedef struct _afr_private {
+ gf_boolean_t esh_granular;
+ gf_boolean_t consistent_io;
+ gf_boolean_t data_self_heal; /* on/off */
++ gf_boolean_t use_anon_inode;
++
++ /*For anon-inode handling */
++ char anon_inode_name[NAME_MAX + 1];
++ char anon_gfid_str[UUID_SIZE + 1];
+ } afr_private_t;
+
+ typedef enum {
+@@ -1341,4 +1349,7 @@ afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
+ void
+ afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies);
++gf_boolean_t
++afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
++ pid_t pid);
+ #endif /* __AFR_H__ */
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 094a71f..1920284 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -3867,6 +3867,38 @@ out:
+ }
+
+ static int
++set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
++ int clusters)
++{
++ xlator_t *xlator = NULL;
++ int i = 0;
++ int ret = -1;
++ glusterd_conf_t *conf = NULL;
++ xlator_t *this = NULL;
++
++ this = THIS;
++ GF_VALIDATE_OR_GOTO("glusterd", this, out);
++ conf = this->private;
++ GF_VALIDATE_OR_GOTO(this->name, conf, out);
++
++ if (conf->op_version < GD_OP_VERSION_7_1)
++ return 0;
++ xlator = first_of(graph);
++
++ for (i = 0; i < clusters; i++) {
++ ret = xlator_set_fixed_option(xlator, "volume-id",
++ uuid_utoa(volinfo->volume_id));
++ if (ret)
++ goto out;
++
++ xlator = xlator->next;
++ }
++
++out:
++ return ret;
++}
++
++static int
+ volgen_graph_build_afr_clusters(volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo)
+ {
+@@ -3906,6 +3938,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph,
+ clusters = -1;
+ goto out;
+ }
++
++ ret = set_volfile_id_option(graph, volinfo, clusters);
++ if (ret) {
++ clusters = -1;
++ goto out;
++ }
++
+ if (!volinfo->arbiter_count)
+ goto out;
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 62acadf..c1ca190 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -3789,4 +3789,10 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+ .voltype = "features/cloudsync",
+ .op_version = GD_OP_VERSION_7_0,
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
++
++ {.key = "cluster.use-anonymous-inode",
++ .voltype = "cluster/replicate",
++ .op_version = GD_OP_VERSION_7_1,
++ .value = "yes",
++ .flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = NULL}};
+--
+1.8.3.1
+
diff --git a/0516-afr-return-EIO-for-gfid-split-brains.patch b/0516-afr-return-EIO-for-gfid-split-brains.patch
new file mode 100644
index 0000000..0f6249e
--- /dev/null
+++ b/0516-afr-return-EIO-for-gfid-split-brains.patch
@@ -0,0 +1,338 @@
+From 8d24d891aade910b0bb86b27c25a8d2382e19ba0 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Tue, 15 Dec 2020 15:04:19 +0530
+Subject: [PATCH 516/517] afr: return -EIO for gfid split-brains.
+
+Problem:
+entry-self-heal-anon-dir-off.t was failing occasionally because
+afr_gfid_split_brain_source() returned -1 instead of -EIO for
+split-brains, causing the code to proceed to afr_lookup_done(), which
+in turn succeeded the lookup if there was a parallel client side heal
+going on.
+
+Fix:
+Return -EIO instead of -1 so that lookp fails.
+
+Also, afr_selfheal_name() was using the same dict to get and set values. This
+could be problematic if the caller passed local->xdata_req, since
+setting a response in a request dict can lead to bugs.So changed it to use
+separate request and response dicts.
+
+Upstream patch details:
+> Fixes: #1739
+> Credits Pranith Karampuri <pranith.karampuri@phonepe.com>
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+>Change-Id: I5cb4c547fb25e6bfc8bec1740f7eb64e1a5ad443
+Upstream patch: https://github.com/gluster/glusterfs/pull/1819/
+
+BUG: 1640148
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Change-Id: I5cb4c547fb25e6bfc8bec1740f7eb64e1a5ad443
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221209
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ xlators/cluster/afr/src/afr-common.c | 12 ++++++++----
+ xlators/cluster/afr/src/afr-self-heal-common.c | 27 +++++++++++++-------------
+ xlators/cluster/afr/src/afr-self-heal-entry.c | 8 ++++----
+ xlators/cluster/afr/src/afr-self-heal-name.c | 23 +++++++++++-----------
+ xlators/cluster/afr/src/afr-self-heal.h | 5 +++--
+ xlators/cluster/afr/src/afr-self-heald.c | 2 +-
+ 6 files changed, 42 insertions(+), 35 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 6f2da11..416012c 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -2366,7 +2366,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
+ /* If we were called from glfsheal and there is still a gfid
+ * mismatch, succeed the lookup and let glfsheal print the
+ * response via gfid-heal-msg.*/
+- if (!dict_get_str_sizen(local->xattr_req, "gfid-heal-msg",
++ if (!dict_get_str_sizen(local->xattr_rsp, "gfid-heal-msg",
+ &gfid_heal_msg))
+ goto cant_interpret;
+
+@@ -2421,7 +2421,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
+ goto error;
+ }
+
+- ret = dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg);
++ ret = dict_get_str_sizen(local->xattr_rsp, "gfid-heal-msg", &gfid_heal_msg);
+ if (!ret) {
+ ret = dict_set_str_sizen(local->replies[read_subvol].xdata,
+ "gfid-heal-msg", gfid_heal_msg);
+@@ -2768,9 +2768,12 @@ afr_lookup_selfheal_wrap(void *opaque)
+ local = frame->local;
+ this = frame->this;
+ loc_pargfid(&local->loc, pargfid);
++ if (!local->xattr_rsp)
++ local->xattr_rsp = dict_new();
+
+ ret = afr_selfheal_name(frame->this, pargfid, local->loc.name,
+- &local->cont.lookup.gfid_req, local->xattr_req);
++ &local->cont.lookup.gfid_req, local->xattr_req,
++ local->xattr_rsp);
+ if (ret == -EIO)
+ goto unwind;
+
+@@ -2786,7 +2789,8 @@ afr_lookup_selfheal_wrap(void *opaque)
+ return 0;
+
+ unwind:
+- AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
++ AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, local->xattr_rsp,
++ NULL);
+ return 0;
+ }
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index 0a8a7fd..0954d2c 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -245,7 +245,8 @@ int
+ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, uuid_t pargfid, const char *bname,
+ int src_idx, int child_idx,
+- unsigned char *locked_on, int *src, dict_t *xdata)
++ unsigned char *locked_on, int *src, dict_t *req,
++ dict_t *rsp)
+ {
+ afr_private_t *priv = NULL;
+ char g1[64] = {
+@@ -266,8 +267,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "All the bricks should be up to resolve the gfid split "
+ "barin");
+- if (xdata) {
+- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
++ if (rsp) {
++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
+ SALL_BRICKS_UP_TO_RESOLVE);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+@@ -277,8 +278,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+ goto out;
+ }
+
+- if (xdata) {
+- ret = dict_get_int32_sizen(xdata, "heal-op", &heal_op);
++ if (req) {
++ ret = dict_get_int32_sizen(req, "heal-op", &heal_op);
+ if (ret)
+ goto fav_child;
+ } else {
+@@ -292,8 +293,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ SNO_BIGGER_FILE);
+- if (xdata) {
+- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
++ if (rsp) {
++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
+ SNO_BIGGER_FILE);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+@@ -310,8 +311,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ SNO_DIFF_IN_MTIME);
+- if (xdata) {
+- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
++ if (rsp) {
++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
+ SNO_DIFF_IN_MTIME);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+@@ -323,7 +324,7 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+ break;
+
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+- ret = dict_get_str_sizen(xdata, "child-name", &src_brick);
++ ret = dict_get_str_sizen(req, "child-name", &src_brick);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Error getting the source "
+@@ -335,8 +336,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ SERROR_GETTING_SRC_BRICK);
+- if (xdata) {
+- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
++ if (rsp) {
++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
+ SERROR_GETTING_SRC_BRICK);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+@@ -400,7 +401,7 @@ out:
+ uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx,
+ priv->children[src_idx]->name, src_idx,
+ uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2));
+- return -1;
++ return -EIO;
+ }
+ return 0;
+ }
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index 20b07dd..a17dd93 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -399,7 +399,7 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
+ (ia_type == replies[i].poststat.ia_type)) {
+ ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
+ bname, src_idx, i, locked_on, src,
+- NULL);
++ NULL, NULL);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Skipping conservative merge on the "
+@@ -474,7 +474,7 @@ __afr_selfheal_merge_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ return ret;
+
+ /* In case of type mismatch / unable to resolve gfid mismatch on the
+- * entry, return -1.*/
++ * entry, return -EIO.*/
+ ret = afr_selfheal_detect_gfid_and_type_mismatch(
+ this, replies, inode, fd->inode->gfid, name, source, locked_on, &src);
+
+@@ -905,7 +905,7 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ break;
+ }
+
+- if (ret == -1) {
++ if (ret == -EIO) {
+ /* gfid or type mismatch. */
+ mismatch = _gf_true;
+ ret = 0;
+@@ -1072,7 +1072,7 @@ afr_selfheal_entry_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
+ else
+ ret = afr_selfheal_entry_do_subvol(frame, this, fd, i);
+
+- if (ret == -1) {
++ if (ret == -EIO) {
+ /* gfid or type mismatch. */
+ mismatch = _gf_true;
+ ret = 0;
+diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
+index 51e3d8c..9ec2066 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-name.c
++++ b/xlators/cluster/afr/src/afr-self-heal-name.c
+@@ -217,7 +217,8 @@ afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies,
+ int source, unsigned char *sources,
+ int *gfid_idx, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+- unsigned char *locked_on, dict_t *xdata)
++ unsigned char *locked_on, dict_t *req,
++ dict_t *rsp)
+ {
+ int i = 0;
+ int gfid_idx_iter = -1;
+@@ -245,11 +246,11 @@ afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies,
+ if (sources[i] || source == -1) {
+ if ((sources[gfid_idx_iter] || source == -1) &&
+ gf_uuid_compare(gfid, gfid1)) {
+- ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
+- bname, gfid_idx_iter, i,
+- locked_on, gfid_idx, xdata);
++ ret = afr_gfid_split_brain_source(
++ this, replies, inode, pargfid, bname, gfid_idx_iter, i,
++ locked_on, gfid_idx, req, rsp);
+ if (!ret && *gfid_idx >= 0) {
+- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
+ "GFID split-brain resolved");
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+@@ -303,7 +304,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, int source,
+ unsigned char *locked_on, struct afr_reply *replies,
+- void *gfid_req, dict_t *xdata)
++ void *gfid_req, dict_t *req, dict_t *rsp)
+ {
+ int gfid_idx = -1;
+ int ret = -1;
+@@ -333,7 +334,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+
+ ret = afr_selfheal_name_gfid_mismatch_check(this, replies, source, sources,
+ &gfid_idx, pargfid, bname,
+- inode, locked_on, xdata);
++ inode, locked_on, req, rsp);
+ if (ret)
+ return ret;
+
+@@ -450,7 +451,7 @@ out:
+ int
+ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, void *gfid_req,
+- dict_t *xdata)
++ dict_t *req, dict_t *rsp)
+ {
+ afr_private_t *priv = NULL;
+ unsigned char *sources = NULL;
+@@ -505,7 +506,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+
+ ret = __afr_selfheal_name_do(frame, this, parent, pargfid, bname, inode,
+ sources, sinks, healed_sinks, source,
+- locked_on, replies, gfid_req, xdata);
++ locked_on, replies, gfid_req, req, rsp);
+ }
+ unlock:
+ afr_selfheal_unentrylk(frame, this, parent, this->name, bname, locked_on,
+@@ -578,7 +579,7 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
+
+ int
+ afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname,
+- void *gfid_req, dict_t *xdata)
++ void *gfid_req, dict_t *req, dict_t *rsp)
+ {
+ inode_t *parent = NULL;
+ call_frame_t *frame = NULL;
+@@ -600,7 +601,7 @@ afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname,
+
+ if (need_heal) {
+ ret = afr_selfheal_name_do(frame, this, parent, pargfid, bname,
+- gfid_req, xdata);
++ gfid_req, req, rsp);
+ if (ret)
+ goto out;
+ }
+diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
+index c8dc384..6b0bf69 100644
+--- a/xlators/cluster/afr/src/afr-self-heal.h
++++ b/xlators/cluster/afr/src/afr-self-heal.h
+@@ -127,7 +127,7 @@ afr_throttled_selfheal(call_frame_t *frame, xlator_t *this);
+
+ int
+ afr_selfheal_name(xlator_t *this, uuid_t gfid, const char *name, void *gfid_req,
+- dict_t *xdata);
++ dict_t *req, dict_t *rsp);
+
+ int
+ afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd);
+@@ -357,7 +357,8 @@ int
+ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, uuid_t pargfid, const char *bname,
+ int src_idx, int child_idx,
+- unsigned char *locked_on, int *src, dict_t *xdata);
++ unsigned char *locked_on, int *src, dict_t *req,
++ dict_t *rsp);
+ int
+ afr_mark_source_sinks_if_file_empty(xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
+index 939a135..18aed93 100644
+--- a/xlators/cluster/afr/src/afr-self-heald.c
++++ b/xlators/cluster/afr/src/afr-self-heald.c
+@@ -295,7 +295,7 @@ afr_shd_selfheal_name(struct subvol_healer *healer, int child, uuid_t parent,
+ {
+ int ret = -1;
+
+- ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL);
++ ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL, NULL);
+
+ return ret;
+ }
+--
+1.8.3.1
+
diff --git a/0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch b/0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch
new file mode 100644
index 0000000..bc1b263
--- /dev/null
+++ b/0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch
@@ -0,0 +1,388 @@
+From da75c2857fd8b173d47fb7fc3b925ffd14105f64 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@rhel7x.kkeithle.usersys.redhat.com>
+Date: Wed, 23 Dec 2020 07:39:13 -0500
+Subject: [PATCH 517/517] gfapi: 'glfs_h_creat_open' - new API to create handle
+ and open fd
+
+Right now we have two separate APIs, one
+- 'glfs_h_creat_handle' to create handle & another
+- 'glfs_h_open' to create a glfd to return to application
+
+Having two separate routines can result in access errors
+while trying to create and write into a read-only file.
+
+Since a fd is opened even during file/directory creation,
+introducing a new API to make these two operations atomic i.e,
+which can create both handle & fd and pass them to application
+
+This is backport of below mainline patch -
+- https://review.gluster.org/#/c/glusterfs/+/23448/
+- bz#1753569
+
+> Signed-off-by: Soumya Koduri <skoduri@redhat.com>
+> Change-Id: Ibf513fcfcdad175f4d7eb6fa7a61b8feec6d33b5
+> release-6: commit 5a2af2fd06356f6fc79d591c352caffd4c511c9e
+> master: commit 41a0f2aa755ec7162facd30209f2fa3f40308766
+
+BUG: 1910119
+Change-Id: Ib397dbe82a6928d8f24251809d30febddd007bfc
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/222083
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ api/src/gfapi.aliases | 1 +
+ api/src/gfapi.map | 5 ++
+ api/src/glfs-handleops.c | 135 ++++++++++++++++++++++++++++++++++
+ api/src/glfs-handles.h | 5 ++
+ tests/basic/gfapi/glfs_h_creat_open.c | 118 +++++++++++++++++++++++++++++
+ tests/basic/gfapi/glfs_h_creat_open.t | 27 +++++++
+ 6 files changed, 291 insertions(+)
+ create mode 100644 tests/basic/gfapi/glfs_h_creat_open.c
+ create mode 100755 tests/basic/gfapi/glfs_h_creat_open.t
+
+diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases
+index 692ae13..3d3415c 100644
+--- a/api/src/gfapi.aliases
++++ b/api/src/gfapi.aliases
+@@ -197,3 +197,4 @@ _pub_glfs_fsetattr _glfs_fsetattr$GFAPI_6.0
+ _pub_glfs_setattr _glfs_setattr$GFAPI_6.0
+
+ _pub_glfs_set_statedump_path _glfs_set_statedump_path@GFAPI_6.4
++_pub_glfs_h_creat_open _glfs_h_creat_open@GFAPI_6.6
+diff --git a/api/src/gfapi.map b/api/src/gfapi.map
+index df65837..614f3f6 100644
+--- a/api/src/gfapi.map
++++ b/api/src/gfapi.map
+@@ -276,3 +276,8 @@ GFAPI_6.4 {
+ global:
+ glfs_set_statedump_path;
+ } GFAPI_PRIVATE_6.1;
++
++GFAPI_6.6 {
++ global:
++ glfs_h_creat_open;
++} GFAPI_6.4;
+diff --git a/api/src/glfs-handleops.c b/api/src/glfs-handleops.c
+index d4e1545..7b8ff14 100644
+--- a/api/src/glfs-handleops.c
++++ b/api/src/glfs-handleops.c
+@@ -843,6 +843,141 @@ invalid_fs:
+ GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat, 3.4.2);
+
+ struct glfs_object *
++pub_glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent,
++ const char *path, int flags, mode_t mode,
++ struct stat *stat, struct glfs_fd **out_fd)
++{
++ int ret = -1;
++ struct glfs_fd *glfd = NULL;
++ xlator_t *subvol = NULL;
++ inode_t *inode = NULL;
++ loc_t loc = {
++ 0,
++ };
++ struct iatt iatt = {
++ 0,
++ };
++ uuid_t gfid;
++ dict_t *xattr_req = NULL;
++ struct glfs_object *object = NULL;
++ dict_t *fop_attr = NULL;
++
++ /* validate in args */
++ if ((fs == NULL) || (parent == NULL) || (path == NULL) ||
++ (out_fd == NULL)) {
++ errno = EINVAL;
++ return NULL;
++ }
++
++ DECLARE_OLD_THIS;
++ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
++
++ /* get the active volume */
++ subvol = glfs_active_subvol(fs);
++ if (!subvol) {
++ ret = -1;
++ goto out;
++ }
++
++ /* get/refresh the in arg objects inode in correlation to the xlator */
++ inode = glfs_resolve_inode(fs, subvol, parent);
++ if (!inode) {
++ ret = -1;
++ goto out;
++ }
++
++ xattr_req = dict_new();
++ if (!xattr_req) {
++ ret = -1;
++ errno = ENOMEM;
++ goto out;
++ }
++
++ gf_uuid_generate(gfid);
++ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
++ if (ret) {
++ ret = -1;
++ errno = ENOMEM;
++ goto out;
++ }
++
++ GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, path);
++
++ glfd = glfs_fd_new(fs);
++ if (!glfd) {
++ ret = -1;
++ errno = ENOMEM;
++ goto out;
++ }
++
++ glfd->fd = fd_create(loc.inode, getpid());
++ if (!glfd->fd) {
++ ret = -1;
++ errno = ENOMEM;
++ goto out;
++ }
++ glfd->fd->flags = flags;
++
++ ret = get_fop_attr_thrd_key(&fop_attr);
++ if (ret)
++ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
++
++ /* fop/op */
++ ret = syncop_create(subvol, &loc, flags, mode, glfd->fd, &iatt, xattr_req,
++ NULL);
++ DECODE_SYNCOP_ERR(ret);
++
++ /* populate out args */
++ if (ret == 0) {
++ glfd->fd->flags = flags;
++
++ ret = glfs_loc_link(&loc, &iatt);
++ if (ret != 0) {
++ goto out;
++ }
++
++ if (stat)
++ glfs_iatt_to_stat(fs, &iatt, stat);
++
++ ret = glfs_create_object(&loc, &object);
++ }
++
++out:
++ if (ret && object != NULL) {
++ /* Release the held reference */
++ glfs_h_close(object);
++ object = NULL;
++ }
++
++ loc_wipe(&loc);
++
++ if (inode)
++ inode_unref(inode);
++
++ if (fop_attr)
++ dict_unref(fop_attr);
++
++ if (xattr_req)
++ dict_unref(xattr_req);
++
++ if (ret && glfd) {
++ GF_REF_PUT(glfd);
++ } else if (glfd) {
++ glfd_set_state_bind(glfd);
++ *out_fd = glfd;
++ }
++
++ glfs_subvol_done(fs, subvol);
++
++ __GLFS_EXIT_FS;
++
++invalid_fs:
++ return object;
++}
++
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat_open, 6.6);
++
++struct glfs_object *
+ pub_glfs_h_mkdir(struct glfs *fs, struct glfs_object *parent, const char *path,
+ mode_t mode, struct stat *stat)
+ {
+diff --git a/api/src/glfs-handles.h b/api/src/glfs-handles.h
+index f7e6a06..4d039b9 100644
+--- a/api/src/glfs-handles.h
++++ b/api/src/glfs-handles.h
+@@ -250,6 +250,11 @@ int
+ glfs_h_access(glfs_t *fs, glfs_object_t *object, int mask) __THROW
+ GFAPI_PUBLIC(glfs_h_access, 3.6.0);
+
++struct glfs_object *
++glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent, const char *path,
++ int flags, mode_t mode, struct stat *stat,
++ struct glfs_fd **out_fd) __THROW
++ GFAPI_PUBLIC(glfs_h_creat_open, 6.6);
+ /*
+ SYNOPSIS
+
+diff --git a/tests/basic/gfapi/glfs_h_creat_open.c b/tests/basic/gfapi/glfs_h_creat_open.c
+new file mode 100644
+index 0000000..7672561
+--- /dev/null
++++ b/tests/basic/gfapi/glfs_h_creat_open.c
+@@ -0,0 +1,118 @@
++#include <fcntl.h>
++#include <unistd.h>
++#include <time.h>
++#include <limits.h>
++#include <string.h>
++#include <stdio.h>
++#include <errno.h>
++#include <stdlib.h>
++#include <glusterfs/api/glfs.h>
++#include <glusterfs/api/glfs-handles.h>
++
++#define LOG_ERR(func, ret) \
++ do { \
++ if (ret != 0) { \
++ fprintf(stderr, "%s : returned error ret(%d), errno(%d)\n", func, \
++ ret, errno); \
++ exit(1); \
++ } else { \
++ fprintf(stderr, "%s : returned %d\n", func, ret); \
++ } \
++ } while (0)
++#define LOG_IF_NO_ERR(func, ret) \
++ do { \
++ if (ret == 0) { \
++ fprintf(stderr, "%s : hasn't returned error %d\n", func, ret); \
++ exit(1); \
++ } else { \
++ fprintf(stderr, "%s : returned %d\n", func, ret); \
++ } \
++ } while (0)
++int
++main(int argc, char *argv[])
++{
++ glfs_t *fs = NULL;
++ int ret = 0;
++ struct glfs_object *root = NULL, *leaf = NULL;
++ glfs_fd_t *fd = NULL;
++ char *filename = "/ro-file";
++ struct stat sb = {
++ 0,
++ };
++ char *logfile = NULL;
++ char *volname = NULL;
++ char *hostname = NULL;
++ char buf[32] = "abcdefghijklmnopqrstuvwxyz012345";
++
++ fprintf(stderr, "Starting glfs_h_creat_open\n");
++
++ if (argc != 4) {
++ fprintf(stderr, "Invalid argument\n");
++ exit(1);
++ }
++
++ hostname = argv[1];
++ volname = argv[2];
++ logfile = argv[3];
++
++ fs = glfs_new(volname);
++ if (!fs) {
++ fprintf(stderr, "glfs_new: returned NULL\n");
++ return 1;
++ }
++
++ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
++ LOG_ERR("glfs_set_volfile_server", ret);
++
++ ret = glfs_set_logging(fs, logfile, 7);
++ LOG_ERR("glfs_set_logging", ret);
++
++ ret = glfs_init(fs);
++ LOG_ERR("glfs_init", ret);
++
++ sleep(2);
++ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
++ if (!root) {
++ ret = -1;
++ LOG_ERR("glfs_h_lookupat root", ret);
++ }
++ leaf = glfs_h_lookupat(fs, root, filename, &sb, 0);
++ if (!leaf) {
++ ret = -1;
++ LOG_IF_NO_ERR("glfs_h_lookupat leaf", ret);
++ }
++
++ leaf = glfs_h_creat_open(fs, root, filename, O_RDONLY, 00444, &sb, &fd);
++ if (!leaf || !fd) {
++ ret = -1;
++ LOG_ERR("glfs_h_creat leaf", ret);
++ }
++ fprintf(stderr, "glfs_h_create_open leaf - %p\n", leaf);
++
++ ret = glfs_write(fd, buf, 32, 0);
++ if (ret < 0) {
++ fprintf(stderr, "glfs_write: error writing to file %s, %s\n", filename,
++ strerror(errno));
++ goto out;
++ }
++
++ ret = glfs_h_getattrs(fs, leaf, &sb);
++ LOG_ERR("glfs_h_getattrs", ret);
++
++ if (sb.st_size != 32) {
++ fprintf(stderr, "glfs_write: post size mismatch\n");
++ goto out;
++ }
++
++ fprintf(stderr, "Successfully opened and written to a read-only file \n");
++out:
++ if (fd)
++ glfs_close(fd);
++
++ ret = glfs_fini(fs);
++ LOG_ERR("glfs_fini", ret);
++
++ fprintf(stderr, "End of libgfapi_fini\n");
++
++ exit(0);
++}
+diff --git a/tests/basic/gfapi/glfs_h_creat_open.t b/tests/basic/gfapi/glfs_h_creat_open.t
+new file mode 100755
+index 0000000..f24ae73
+--- /dev/null
++++ b/tests/basic/gfapi/glfs_h_creat_open.t
+@@ -0,0 +1,27 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup;
++
++TEST glusterd
++
++TEST $CLI volume create $V0 $H0:$B0/brick1;
++EXPECT 'Created' volinfo_field $V0 'Status';
++
++TEST $CLI volume start $V0;
++EXPECT 'Started' volinfo_field $V0 'Status';
++
++logdir=`gluster --print-logdir`
++
++TEST build_tester $(dirname $0)/glfs_h_creat_open.c -lgfapi
++
++TEST ./$(dirname $0)/glfs_h_creat_open $H0 $V0 $logdir/glfs.log
++
++cleanup_tester $(dirname $0)/glfs_h_creat_open
++
++TEST $CLI volume stop $V0
++TEST $CLI volume delete $V0
++
++cleanup;
+--
+1.8.3.1
+
diff --git a/0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch b/0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch
new file mode 100644
index 0000000..00d29b9
--- /dev/null
+++ b/0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch
@@ -0,0 +1,41 @@
+From 818025e467ea98b32a855c92ba6aef6e172e029f Mon Sep 17 00:00:00 2001
+From: Nikhil Ladha <nladha@redhat.com>
+Date: Fri, 8 Jan 2021 13:12:46 +0530
+Subject: [PATCH 518/526] glusterd: Fix for shared storage in ipv6 env
+
+Issue:
+Mounting shared storage volume was failing in ipv6 env if the hostnames were FQDNs.
+The brickname for the volume was being cut off, as a result, volume creation was failing.
+
+>Change-Id: Ib38993724c709b35b603f9ac666630c50c932c3e
+>Fixes: #1406
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+Upstream patch: https://github.com/gluster/glusterfs/pull/1972
+
+BUG: 1856574
+
+Change-Id: Ib38993724c709b35b603f9ac666630c50c932c3e
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/223248
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
+index 9597503..e9261af 100755
+--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
++++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
+@@ -46,7 +46,7 @@ do
+
+ key=`echo $line | cut -d ':' -f 1`
+ if [ "$key" == "Hostname" ]; then
+- hostname=`echo $line | cut -d ':' -f 2 | xargs`
++ hostname=`echo $line | cut -d ' ' -f 2 | xargs`
+ fi
+
+ if [ "$key" == "State" ]; then
+--
+1.8.3.1
+
diff --git a/0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch b/0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch
new file mode 100644
index 0000000..f37acfd
--- /dev/null
+++ b/0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch
@@ -0,0 +1,58 @@
+From 6ed227367b6eb7d6d7afde3859ad0a711a3adf36 Mon Sep 17 00:00:00 2001
+From: Leela Venkaiah G <gleelavenkaiah@gmail.com>
+Date: Wed, 13 Jan 2021 16:02:25 +0530
+Subject: [PATCH 519/526] glusterfs-events: Fix incorrect attribute access
+ (#2002)
+
+Issue: When GlusterCmdException is raised, current code try to access
+message atrribute which doesn't exist and resulting in a malformed
+error string on failure operations
+
+Code Change: Replace `message` with `args[0]`
+
+>Fixes: #2001
+>Change-Id: I65c9f0ee79310937a384025b8d454acda154e4bb
+>Signed-off-by: Leela Venkaiah G <lgangava@redhat.com>
+Upstream patch: https://github.com/gluster/glusterfs/pull/2002
+
+BUG: 1600459
+Change-Id: I65c9f0ee79310937a384025b8d454acda154e4bb
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/223584
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ events/src/peer_eventsapi.py | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/events/src/peer_eventsapi.py b/events/src/peer_eventsapi.py
+index 26b77a0..c388da4 100644
+--- a/events/src/peer_eventsapi.py
++++ b/events/src/peer_eventsapi.py
+@@ -174,9 +174,9 @@ def sync_to_peers(args):
+ sync_file_to_peers(WEBHOOKS_FILE_TO_SYNC)
+ except GlusterCmdException as e:
+ # Print stdout if stderr is empty
+- errmsg = e.message[2] if e.message[2] else e.message[1]
++ errmsg = e.args[0][2] if e.args[0][2] else e.args[0][1]
+ handle_output_error("Failed to sync Webhooks file: [Error: {0}]"
+- "{1}".format(e.message[0], errmsg),
++ "{1}".format(e.args[0][0], errmsg),
+ errcode=ERROR_WEBHOOK_SYNC_FAILED,
+ json_output=args.json)
+
+@@ -185,9 +185,9 @@ def sync_to_peers(args):
+ sync_file_to_peers(CUSTOM_CONFIG_FILE_TO_SYNC)
+ except GlusterCmdException as e:
+ # Print stdout if stderr is empty
+- errmsg = e.message[2] if e.message[2] else e.message[1]
++ errmsg = e.args[0][2] if e.args[0][2] else e.args[0][1]
+ handle_output_error("Failed to sync Config file: [Error: {0}]"
+- "{1}".format(e.message[0], errmsg),
++ "{1}".format(e.args[0][0], errmsg),
+ errcode=ERROR_CONFIG_SYNC_FAILED,
+ json_output=args.json)
+
+--
+1.8.3.1
+
diff --git a/0520-performance-open-behind-seek-fop-should-open_and_res.patch b/0520-performance-open-behind-seek-fop-should-open_and_res.patch
new file mode 100644
index 0000000..c46a9ca
--- /dev/null
+++ b/0520-performance-open-behind-seek-fop-should-open_and_res.patch
@@ -0,0 +1,70 @@
+From a3fd2c9d85bbd23131c985599d9c9d74f66f32d2 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Thu, 10 Oct 2019 10:50:59 +0530
+Subject: [PATCH 520/526] performance/open-behind: seek fop should
+ open_and_resume
+
+Upstream patch:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/23530
+> fixes: bz#1760187
+> Change-Id: I4c6ad13194d4fc5c7705e35bf9a27fce504b51f9
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+
+BUG: 1830713
+Change-Id: I4c6ad13194d4fc5c7705e35bf9a27fce504b51f9
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/224484
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/open-behind/src/open-behind.c | 27 +++++++++++++++++++++++
+ 1 file changed, 27 insertions(+)
+
+diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
+index 268c717..3ee3c40 100644
+--- a/xlators/performance/open-behind/src/open-behind.c
++++ b/xlators/performance/open-behind/src/open-behind.c
+@@ -711,6 +711,32 @@ err:
+ }
+
+ int
++ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++ gf_seek_what_t what, dict_t *xdata)
++{
++ call_stub_t *stub = NULL;
++ fd_t *wind_fd = NULL;
++
++ wind_fd = ob_get_wind_fd(this, fd, NULL);
++
++ stub = fop_seek_stub(frame, default_seek_resume, wind_fd, offset, what,
++ xdata);
++
++ fd_unref(wind_fd);
++
++ if (!stub)
++ goto err;
++
++ open_and_resume(this, wind_fd, stub);
++
++ return 0;
++err:
++ STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0);
++
++ return 0;
++}
++
++int
+ ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+ {
+ call_stub_t *stub = NULL;
+@@ -1276,6 +1302,7 @@ struct xlator_fops fops = {
+ .flush = ob_flush,
+ .fsync = ob_fsync,
+ .fstat = ob_fstat,
++ .seek = ob_seek,
+ .ftruncate = ob_ftruncate,
+ .fsetxattr = ob_fsetxattr,
+ .setxattr = ob_setxattr,
+--
+1.8.3.1
+
diff --git a/0521-open-behind-fix-missing-fd-reference.patch b/0521-open-behind-fix-missing-fd-reference.patch
new file mode 100644
index 0000000..8e18af8
--- /dev/null
+++ b/0521-open-behind-fix-missing-fd-reference.patch
@@ -0,0 +1,121 @@
+From 211d0f7dbb4991b2191925973222ebc79f010e84 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Sun, 8 Mar 2020 18:36:45 +0100
+Subject: [PATCH 521/526] open-behind: fix missing fd reference
+
+Open behind was not keeping any reference on fd's pending to be
+opened. This makes it possible that a concurrent close and en entry
+fop (unlink, rename, ...) caused destruction of the fd while it
+was still being used.
+
+Upstream patch:
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24204
+> Change-Id: Ie9e992902cf2cd7be4af1f8b4e57af9bd6afd8e9
+> Fixes: bz#1810934
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: Ie9e992902cf2cd7be4af1f8b4e57af9bd6afd8e9
+BUG: 1830713
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/224485
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/open-behind/src/open-behind.c | 27 ++++++++++++++---------
+ 1 file changed, 16 insertions(+), 11 deletions(-)
+
+diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
+index 3ee3c40..dd2f2fd 100644
+--- a/xlators/performance/open-behind/src/open-behind.c
++++ b/xlators/performance/open-behind/src/open-behind.c
+@@ -206,8 +206,13 @@ ob_fd_free(ob_fd_t *ob_fd)
+ if (ob_fd->xdata)
+ dict_unref(ob_fd->xdata);
+
+- if (ob_fd->open_frame)
++ if (ob_fd->open_frame) {
++ /* If we sill have a frame it means that background open has never
++ * been triggered. We need to release the pending reference. */
++ fd_unref(ob_fd->fd);
++
+ STACK_DESTROY(ob_fd->open_frame->root);
++ }
+
+ GF_FREE(ob_fd);
+ }
+@@ -297,6 +302,7 @@ ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ call_resume(stub);
+ }
+
++ /* The background open is completed. We can release the 'fd' reference. */
+ fd_unref(fd);
+
+ STACK_DESTROY(frame->root);
+@@ -331,7 +337,9 @@ ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
+ }
+
+ if (frame) {
+- frame->local = fd_ref(fd);
++ /* We don't need to take a reference here. We already have a reference
++ * while the open is pending. */
++ frame->local = fd;
+
+ STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd,
+@@ -345,15 +353,12 @@ void
+ ob_inode_wake(xlator_t *this, struct list_head *ob_fds)
+ {
+ ob_fd_t *ob_fd = NULL, *tmp = NULL;
+- fd_t *fd = NULL;
+
+ if (!list_empty(ob_fds)) {
+ list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode)
+ {
+ ob_fd_wake(this, ob_fd->fd, ob_fd);
+- fd = ob_fd->fd;
+ ob_fd_free(ob_fd);
+- fd_unref(fd);
+ }
+ }
+ }
+@@ -365,7 +370,7 @@ ob_fd_copy(ob_fd_t *src, ob_fd_t *dst)
+ if (!src || !dst)
+ goto out;
+
+- dst->fd = __fd_ref(src->fd);
++ dst->fd = src->fd;
+ dst->loc.inode = inode_ref(src->loc.inode);
+ gf_uuid_copy(dst->loc.gfid, src->loc.gfid);
+ dst->flags = src->flags;
+@@ -509,7 +514,6 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+
+ ob_fd->ob_inode = ob_inode;
+
+- /* don't do fd_ref, it'll cause leaks */
+ ob_fd->fd = fd;
+
+ ob_fd->open_frame = copy_frame(frame);
+@@ -539,15 +543,16 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ }
+ UNLOCK(&fd->inode->lock);
+
+- if (!open_in_progress && !unlinked) {
+- fd_ref(fd);
++ /* We take a reference while the background open is pending or being
++ * processed. If we finally wind the request in the foreground, then
++ * ob_fd_free() will take care of this additional reference. */
++ fd_ref(fd);
+
++ if (!open_in_progress && !unlinked) {
+ STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata);
+
+ if (!conf->lazy_open)
+ ob_fd_wake(this, fd, NULL);
+-
+- fd_unref(fd);
+ } else {
+ ob_fd_free(ob_fd);
+ STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
+--
+1.8.3.1
+
diff --git a/0522-lcov-improve-line-coverage.patch b/0522-lcov-improve-line-coverage.patch
new file mode 100644
index 0000000..13ece12
--- /dev/null
+++ b/0522-lcov-improve-line-coverage.patch
@@ -0,0 +1,746 @@
+From 46e2bbd52d4427c1348fa38dcb5d2b5f125555f1 Mon Sep 17 00:00:00 2001
+From: Amar Tumballi <amarts@redhat.com>
+Date: Thu, 30 May 2019 15:25:01 +0530
+Subject: [PATCH 522/526] lcov: improve line coverage
+
+upcall: remove extra variable assignment and use just one
+ initialization.
+open-behind: reduce the overall number of lines, in functions
+ not frequently called
+selinux: reduce some lines in init failure cases
+
+Upstream patch:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/22789
+> updates: bz#1693692
+> Change-Id: I7c1de94f2ec76a5bfe1f48a9632879b18e5fbb95
+> Signed-off-by: Amar Tumballi <amarts@redhat.com>
+
+BUG: 1830713
+Change-Id: I7c1de94f2ec76a5bfe1f48a9632879b18e5fbb95
+Signed-off-by: Amar Tumballi <amarts@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/224486
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/selinux/src/selinux.c | 6 +-
+ xlators/features/upcall/src/upcall.c | 108 +++++++---------------
+ xlators/performance/open-behind/src/open-behind.c | 58 ++++--------
+ 3 files changed, 55 insertions(+), 117 deletions(-)
+
+diff --git a/xlators/features/selinux/src/selinux.c b/xlators/features/selinux/src/selinux.c
+index 58b4c5d..e8e16cd 100644
+--- a/xlators/features/selinux/src/selinux.c
++++ b/xlators/features/selinux/src/selinux.c
+@@ -234,7 +234,6 @@ init(xlator_t *this)
+ priv = GF_CALLOC(1, sizeof(*priv), gf_selinux_mt_selinux_priv_t);
+ if (!priv) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+- ret = ENOMEM;
+ goto out;
+ }
+
+@@ -242,7 +241,6 @@ init(xlator_t *this)
+
+ this->local_pool = mem_pool_new(selinux_priv_t, 64);
+ if (!this->local_pool) {
+- ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SL_MSG_ENOMEM,
+ "Failed to create local_t's memory pool");
+ goto out;
+@@ -252,9 +250,7 @@ init(xlator_t *this)
+ ret = 0;
+ out:
+ if (ret) {
+- if (priv) {
+- GF_FREE(priv);
+- }
++ GF_FREE(priv);
+ mem_pool_destroy(this->local_pool);
+ }
+ return ret;
+diff --git a/xlators/features/upcall/src/upcall.c b/xlators/features/upcall/src/upcall.c
+index 2583c50..0795f58 100644
+--- a/xlators/features/upcall/src/upcall.c
++++ b/xlators/features/upcall/src/upcall.c
+@@ -57,14 +57,13 @@ static int32_t
+ up_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -111,14 +110,13 @@ up_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int count, off_t off, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -167,14 +165,13 @@ static int32_t
+ up_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -220,14 +217,13 @@ static int32_t
+ up_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -274,14 +270,13 @@ static int32_t
+ up_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -343,14 +338,13 @@ static int32_t
+ up_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -410,14 +404,13 @@ static int32_t
+ up_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -472,14 +465,13 @@ static int32_t
+ up_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -531,14 +523,13 @@ static int32_t
+ up_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -592,14 +583,13 @@ static int32_t
+ up_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -653,14 +643,13 @@ static int32_t
+ up_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *params)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -717,15 +706,13 @@ static int32_t
+ up_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
+-
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -774,14 +761,13 @@ out:
+ static int32_t
+ up_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -826,14 +812,13 @@ out:
+ static int32_t
+ up_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -852,14 +837,13 @@ err:
+ static int32_t
+ up_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -879,14 +863,13 @@ static int32_t
+ up_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -932,14 +915,13 @@ static int32_t
+ up_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -986,14 +968,13 @@ static int32_t
+ up_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1047,14 +1028,13 @@ static int32_t
+ up_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1110,14 +1090,13 @@ static int32_t
+ up_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1164,14 +1143,13 @@ static int32_t
+ up_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1216,14 +1194,13 @@ out:
+ static int32_t
+ up_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1270,14 +1247,13 @@ static int32_t
+ up_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1334,14 +1310,13 @@ static int32_t
+ up_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1361,14 +1336,13 @@ static int32_t
+ up_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1415,14 +1389,13 @@ static int32_t
+ up_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1470,14 +1443,13 @@ static int32_t
+ up_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1524,14 +1496,13 @@ static int
+ up_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1577,14 +1548,13 @@ static int32_t
+ up_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1652,14 +1622,13 @@ static int32_t
+ up_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->inode, dict);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1727,14 +1696,13 @@ static int32_t
+ up_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, fd, fd->inode, dict);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1800,7 +1768,7 @@ static int32_t
+ up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+ dict_t *xattr = NULL;
+
+@@ -1808,13 +1776,11 @@ up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+
+ xattr = dict_for_key_value(name, "", 1, _gf_true);
+ if (!xattr) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = upcall_local_init(frame, this, NULL, fd, fd->inode, xattr);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1885,7 +1851,7 @@ static int32_t
+ up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+ dict_t *xattr = NULL;
+
+@@ -1893,13 +1859,11 @@ up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+
+ xattr = dict_for_key_value(name, "", 1, _gf_true);
+ if (!xattr) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->inode, xattr);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -1950,14 +1914,13 @@ static int32_t
+ up_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -2000,14 +1963,13 @@ static int32_t
+ up_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
+ {
+- int32_t op_errno = -1;
++ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
+index dd2f2fd..cbe89ec 100644
+--- a/xlators/performance/open-behind/src/open-behind.c
++++ b/xlators/performance/open-behind/src/open-behind.c
+@@ -581,7 +581,7 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ {
+ fd_t *old_fd = NULL;
+ int ret = -1;
+- int op_errno = 0;
++ int op_errno = ENOMEM;
+ call_stub_t *stub = NULL;
+
+ old_fd = fd_lookup(fd->inode, 0);
+@@ -589,7 +589,6 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ /* open-behind only when this is the first FD */
+ stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata);
+ if (!stub) {
+- op_errno = ENOMEM;
+ fd_unref(old_fd);
+ goto err;
+ }
+@@ -603,7 +602,6 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+
+ ret = ob_open_behind(frame, this, loc, flags, fd, xdata);
+ if (ret) {
+- op_errno = ENOMEM;
+ goto err;
+ }
+
+@@ -900,18 +898,12 @@ int
+ ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int cmd, struct gf_flock *flock, dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+-
+- stub = fop_finodelk_stub(frame, default_finodelk_resume, volume, fd, cmd,
+- flock, xdata);
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0);
++ call_stub_t *stub = fop_finodelk_stub(frame, default_finodelk_resume,
++ volume, fd, cmd, flock, xdata);
++ if (stub)
++ open_and_resume(this, fd, stub);
++ else
++ STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0);
+
+ return 0;
+ }
+@@ -921,18 +913,12 @@ ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+-
+- stub = fop_fentrylk_stub(frame, default_fentrylk_resume, volume, fd,
+- basename, cmd, type, xdata);
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0);
++ call_stub_t *stub = fop_fentrylk_stub(
++ frame, default_fentrylk_resume, volume, fd, basename, cmd, type, xdata);
++ if (stub)
++ open_and_resume(this, fd, stub);
++ else
++ STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0);
+
+ return 0;
+ }
+@@ -941,18 +927,12 @@ int
+ ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+-
+- stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd, optype, xattr,
+- xdata);
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0);
++ call_stub_t *stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd,
++ optype, xattr, xdata);
++ if (stub)
++ open_and_resume(this, fd, stub);
++ else
++ STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0);
+
+ return 0;
+ }
+--
+1.8.3.1
+
diff --git a/0523-open-behind-rewrite-of-internal-logic.patch b/0523-open-behind-rewrite-of-internal-logic.patch
new file mode 100644
index 0000000..621d5ae
--- /dev/null
+++ b/0523-open-behind-rewrite-of-internal-logic.patch
@@ -0,0 +1,2720 @@
+From b924c8ca8a133fc9413c8ed1407e63f1658c7e79 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Tue, 12 May 2020 23:54:54 +0200
+Subject: [PATCH 523/526] open-behind: rewrite of internal logic
+
+There was a critical flaw in the previous implementation of open-behind.
+
+When an open is done in the background, it's necessary to take a
+reference on the fd_t object because once we "fake" the open answer,
+the fd could be destroyed. However as long as there's a reference,
+the release function won't be called. So, if the application closes
+the file descriptor without having actually opened it, there will
+always remain at least 1 reference, causing a leak.
+
+To avoid this problem, the previous implementation didn't take a
+reference on the fd_t, so there were races where the fd could be
+destroyed while it was still in use.
+
+To fix this, I've implemented a new xlator cbk that gets called from
+fuse when the application closes a file descriptor.
+
+The whole logic of handling background opens have been simplified and
+it's more efficient now. Only if the fop needs to be delayed until an
+open completes, a stub is created. Otherwise no memory allocations are
+needed.
+
+Correctly handling the close request while the open is still pending
+has added a bit of complexity, but overall normal operation is simpler.
+
+Upstream patch:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24451
+> Change-Id: I6376a5491368e0e1c283cc452849032636261592
+> Fixes: #1225
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1830713
+Change-Id: I6376a5491368e0e1c283cc452849032636261592
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/224487
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/fd.c | 26 +
+ libglusterfs/src/glusterfs/fd.h | 3 +
+ libglusterfs/src/glusterfs/xlator.h | 4 +
+ libglusterfs/src/libglusterfs.sym | 1 +
+ tests/basic/open-behind/open-behind.t | 183 +++
+ tests/basic/open-behind/tester-fd.c | 99 ++
+ tests/basic/open-behind/tester.c | 444 +++++++
+ tests/basic/open-behind/tester.h | 145 +++
+ tests/bugs/glusterfs/bug-873962-spb.t | 1 +
+ xlators/mount/fuse/src/fuse-bridge.c | 2 +
+ .../open-behind/src/open-behind-messages.h | 6 +-
+ xlators/performance/open-behind/src/open-behind.c | 1302 ++++++++------------
+ 12 files changed, 1393 insertions(+), 823 deletions(-)
+ create mode 100644 tests/basic/open-behind/open-behind.t
+ create mode 100644 tests/basic/open-behind/tester-fd.c
+ create mode 100644 tests/basic/open-behind/tester.c
+ create mode 100644 tests/basic/open-behind/tester.h
+
+diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
+index 314546a..e4ec401 100644
+--- a/libglusterfs/src/fd.c
++++ b/libglusterfs/src/fd.c
+@@ -501,6 +501,32 @@ out:
+ }
+
+ void
++fd_close(fd_t *fd)
++{
++ xlator_t *xl, *old_THIS;
++
++ old_THIS = THIS;
++
++ for (xl = fd->inode->table->xl->graph->first; xl != NULL; xl = xl->next) {
++ if (!xl->call_cleanup) {
++ THIS = xl;
++
++ if (IA_ISDIR(fd->inode->ia_type)) {
++ if (xl->cbks->fdclosedir != NULL) {
++ xl->cbks->fdclosedir(xl, fd);
++ }
++ } else {
++ if (xl->cbks->fdclose != NULL) {
++ xl->cbks->fdclose(xl, fd);
++ }
++ }
++ }
++ }
++
++ THIS = old_THIS;
++}
++
++void
+ fd_unref(fd_t *fd)
+ {
+ int32_t refcount = 0;
+diff --git a/libglusterfs/src/glusterfs/fd.h b/libglusterfs/src/glusterfs/fd.h
+index cdbe289..4d157c4 100644
+--- a/libglusterfs/src/glusterfs/fd.h
++++ b/libglusterfs/src/glusterfs/fd.h
+@@ -107,6 +107,9 @@ fd_ref(fd_t *fd);
+ void
+ fd_unref(fd_t *fd);
+
++void
++fd_close(fd_t *fd);
++
+ fd_t *
+ fd_create(struct _inode *inode, pid_t pid);
+
+diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
+index 8650ccc..273039a 100644
+--- a/libglusterfs/src/glusterfs/xlator.h
++++ b/libglusterfs/src/glusterfs/xlator.h
+@@ -705,6 +705,8 @@ typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode);
+
+ typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd);
+
++typedef void (*cbk_fdclose_t)(xlator_t *this, fd_t *fd);
++
+ struct xlator_cbks {
+ cbk_forget_t forget;
+ cbk_release_t release;
+@@ -715,6 +717,8 @@ struct xlator_cbks {
+ cbk_ictxmerge_t ictxmerge;
+ cbk_inodectx_size_t ictxsize;
+ cbk_fdctx_size_t fdctxsize;
++ cbk_fdclose_t fdclose;
++ cbk_fdclose_t fdclosedir;
+ };
+
+ typedef int32_t (*dumpop_priv_t)(xlator_t *this);
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index bc770e2..0a0862e 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -456,6 +456,7 @@ event_unregister_close
+ fd_anonymous
+ fd_anonymous_with_flags
+ fd_bind
++fd_close
+ fd_create
+ fd_create_uint64
+ __fd_ctx_del
+diff --git a/tests/basic/open-behind/open-behind.t b/tests/basic/open-behind/open-behind.t
+new file mode 100644
+index 0000000..5e865d6
+--- /dev/null
++++ b/tests/basic/open-behind/open-behind.t
+@@ -0,0 +1,183 @@
++#!/bin/bash
++
++WD="$(dirname "${0}")"
++
++. ${WD}/../../include.rc
++. ${WD}/../../volume.rc
++
++function assign() {
++ local _assign_var="${1}"
++ local _assign_value="${2}"
++
++ printf -v "${_assign_var}" "%s" "${_assign_value}"
++}
++
++function pipe_create() {
++ local _pipe_create_var="${1}"
++ local _pipe_create_name
++ local _pipe_create_fd
++
++ _pipe_create_name="$(mktemp -u)"
++ mkfifo "${_pipe_create_name}"
++ exec {_pipe_create_fd}<>"${_pipe_create_name}"
++ rm "${_pipe_create_name}"
++
++ assign "${_pipe_create_var}" "${_pipe_create_fd}"
++}
++
++function pipe_close() {
++ local _pipe_close_fd="${!1}"
++
++ exec {_pipe_close_fd}>&-
++}
++
++function tester_start() {
++ declare -ag tester
++ local tester_in
++ local tester_out
++
++ pipe_create tester_in
++ pipe_create tester_out
++
++ ${WD}/tester <&${tester_in} >&${tester_out} &
++
++ tester=("$!" "${tester_in}" "${tester_out}")
++}
++
++function tester_send() {
++ declare -ag tester
++ local tester_res
++ local tester_extra
++
++ echo "${*}" >&${tester[1]}
++
++ read -t 3 -u ${tester[2]} tester_res tester_extra
++ echo "${tester_res} ${tester_extra}"
++ if [[ "${tester_res}" == "OK" ]]; then
++ return 0
++ fi
++
++ return 1
++}
++
++function tester_stop() {
++ declare -ag tester
++ local tester_res
++
++ tester_send "quit"
++
++ tester_res=0
++ if ! wait ${tester[0]}; then
++ tester_res=$?
++ fi
++
++ unset tester
++
++ return ${tester_res}
++}
++
++function count_open() {
++ local file="$(realpath "${B0}/${V0}/${1}")"
++ local count="0"
++ local inode
++ local ref
++
++ inode="$(stat -c %i "${file}")"
++
++ for fd in /proc/${BRICK_PID}/fd/*; do
++ ref="$(readlink "${fd}")"
++ if [[ "${ref}" == "${B0}/${V0}/"* ]]; then
++ if [[ "$(stat -c %i "${ref}")" == "${inode}" ]]; then
++ count="$((${count} + 1))"
++ fi
++ fi
++ done
++
++ echo "${count}"
++}
++
++cleanup
++
++TEST build_tester ${WD}/tester.c ${WD}/tester-fd.c
++
++TEST glusterd
++TEST pidof glusterd
++TEST ${CLI} volume create ${V0} ${H0}:${B0}/${V0}
++TEST ${CLI} volume set ${V0} flush-behind off
++TEST ${CLI} volume set ${V0} write-behind off
++TEST ${CLI} volume set ${V0} quick-read off
++TEST ${CLI} volume set ${V0} stat-prefetch on
++TEST ${CLI} volume set ${V0} io-cache off
++TEST ${CLI} volume set ${V0} open-behind on
++TEST ${CLI} volume set ${V0} lazy-open off
++TEST ${CLI} volume set ${V0} read-after-open off
++TEST ${CLI} volume start ${V0}
++
++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
++
++BRICK_PID="$(get_brick_pid ${V0} ${H0} ${B0}/${V0})"
++
++TEST touch "${M0}/test"
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
++
++TEST tester_start
++
++TEST tester_send fd open 0 "${M0}/test"
++EXPECT_WITHIN 5 "1" count_open "/test"
++TEST tester_send fd close 0
++EXPECT_WITHIN 5 "0" count_open "/test"
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST ${CLI} volume set ${V0} lazy-open on
++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
++
++TEST tester_send fd open 0 "${M0}/test"
++sleep 2
++EXPECT "0" count_open "/test"
++TEST tester_send fd write 0 "test"
++EXPECT "1" count_open "/test"
++TEST tester_send fd close 0
++EXPECT_WITHIN 5 "0" count_open "/test"
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
++
++TEST tester_send fd open 0 "${M0}/test"
++EXPECT "0" count_open "/test"
++EXPECT "test" tester_send fd read 0 64
++# Even though read-after-open is disabled, use-anonymous-fd is also disabled,
++# so reads need to open the file first.
++EXPECT "1" count_open "/test"
++TEST tester_send fd close 0
++EXPECT "0" count_open "/test"
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
++
++TEST tester_send fd open 0 "${M0}/test"
++EXPECT "0" count_open "/test"
++TEST tester_send fd open 1 "${M0}/test"
++EXPECT "2" count_open "/test"
++TEST tester_send fd close 0
++EXPECT_WITHIN 5 "1" count_open "/test"
++TEST tester_send fd close 1
++EXPECT_WITHIN 5 "0" count_open "/test"
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST ${CLI} volume set ${V0} read-after-open on
++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
++
++TEST tester_send fd open 0 "${M0}/test"
++EXPECT "0" count_open "/test"
++EXPECT "test" tester_send fd read 0 64
++EXPECT "1" count_open "/test"
++TEST tester_send fd close 0
++EXPECT_WITHIN 5 "0" count_open "/test"
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++
++TEST tester_stop
++
++cleanup
+diff --git a/tests/basic/open-behind/tester-fd.c b/tests/basic/open-behind/tester-fd.c
+new file mode 100644
+index 0000000..00f02bc
+--- /dev/null
++++ b/tests/basic/open-behind/tester-fd.c
+@@ -0,0 +1,99 @@
++/*
++ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#include "tester.h"
++
++#include <stdlib.h>
++#include <unistd.h>
++#include <sys/types.h>
++#include <sys/stat.h>
++#include <fcntl.h>
++#include <string.h>
++#include <ctype.h>
++#include <errno.h>
++
++static int32_t
++fd_open(context_t *ctx, command_t *cmd)
++{
++ obj_t *obj;
++ int32_t fd;
++
++ obj = cmd->args[0].obj.ref;
++
++ fd = open(cmd->args[1].str.data, O_RDWR);
++ if (fd < 0) {
++ return error(errno, "open() failed");
++ }
++
++ obj->type = OBJ_TYPE_FD;
++ obj->fd = fd;
++
++ out_ok("%d", fd);
++
++ return 0;
++}
++
++static int32_t
++fd_close(context_t *ctx, command_t *cmd)
++{
++ obj_t *obj;
++
++ obj = cmd->args[0].obj.ref;
++ obj->type = OBJ_TYPE_NONE;
++
++ if (close(obj->fd) != 0) {
++ return error(errno, "close() failed");
++ }
++
++ out_ok();
++
++ return 0;
++}
++
++static int32_t
++fd_write(context_t *ctx, command_t *cmd)
++{
++ ssize_t len, ret;
++
++ len = strlen(cmd->args[1].str.data);
++ ret = write(cmd->args[0].obj.ref->fd, cmd->args[1].str.data, len);
++ if (ret < 0) {
++ return error(errno, "write() failed");
++ }
++
++ out_ok("%zd", ret);
++
++ return 0;
++}
++
++static int32_t
++fd_read(context_t *ctx, command_t *cmd)
++{
++ char data[cmd->args[1].num.value + 1];
++ ssize_t ret;
++
++ ret = read(cmd->args[0].obj.ref->fd, data, cmd->args[1].num.value);
++ if (ret < 0) {
++ return error(errno, "read() failed");
++ }
++
++ data[ret] = 0;
++
++ out_ok("%zd %s", ret, data);
++
++ return 0;
++}
++
++command_t fd_commands[] = {
++ {"open", fd_open, CMD_ARGS(ARG_VAL(OBJ_TYPE_NONE), ARG_STR(1024))},
++ {"close", fd_close, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD))},
++ {"write", fd_write, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_STR(1024))},
++ {"read", fd_read, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_NUM(0, 1024))},
++ CMD_END};
+diff --git a/tests/basic/open-behind/tester.c b/tests/basic/open-behind/tester.c
+new file mode 100644
+index 0000000..b2da71c
+--- /dev/null
++++ b/tests/basic/open-behind/tester.c
+@@ -0,0 +1,444 @@
++/*
++ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#include "tester.h"
++
++#include <stdlib.h>
++#include <unistd.h>
++#include <string.h>
++#include <ctype.h>
++#include <errno.h>
++
++static void *
++mem_alloc(size_t size)
++{
++ void *ptr;
++
++ ptr = malloc(size);
++ if (ptr == NULL) {
++ error(ENOMEM, "Failed to allocate memory (%zu bytes)", size);
++ }
++
++ return ptr;
++}
++
++static void
++mem_free(void *ptr)
++{
++ free(ptr);
++}
++
++static bool
++buffer_create(context_t *ctx, size_t size)
++{
++ ctx->buffer.base = mem_alloc(size);
++ if (ctx->buffer.base == NULL) {
++ return false;
++ }
++
++ ctx->buffer.size = size;
++ ctx->buffer.len = 0;
++ ctx->buffer.pos = 0;
++
++ return true;
++}
++
++static void
++buffer_destroy(context_t *ctx)
++{
++ mem_free(ctx->buffer.base);
++ ctx->buffer.size = 0;
++ ctx->buffer.len = 0;
++}
++
++static int32_t
++buffer_get(context_t *ctx)
++{
++ ssize_t len;
++
++ if (ctx->buffer.pos >= ctx->buffer.len) {
++ len = read(0, ctx->buffer.base, ctx->buffer.size);
++ if (len < 0) {
++ return error(errno, "read() failed");
++ }
++ if (len == 0) {
++ return 0;
++ }
++
++ ctx->buffer.len = len;
++ ctx->buffer.pos = 0;
++ }
++
++ return ctx->buffer.base[ctx->buffer.pos++];
++}
++
++static int32_t
++str_skip_spaces(context_t *ctx, int32_t current)
++{
++ while ((current > 0) && (current != '\n') && isspace(current)) {
++ current = buffer_get(ctx);
++ }
++
++ return current;
++}
++
++static int32_t
++str_token(context_t *ctx, char *buffer, uint32_t size, int32_t current)
++{
++ uint32_t len;
++
++ current = str_skip_spaces(ctx, current);
++
++ len = 0;
++ while ((size > 0) && (current > 0) && (current != '\n') &&
++ !isspace(current)) {
++ len++;
++ *buffer++ = current;
++ size--;
++ current = buffer_get(ctx);
++ }
++
++ if (len == 0) {
++ return error(ENODATA, "Expecting a token");
++ }
++
++ if (size == 0) {
++ return error(ENOBUFS, "Token too long");
++ }
++
++ *buffer = 0;
++
++ return current;
++}
++
++static int32_t
++str_number(context_t *ctx, uint64_t min, uint64_t max, uint64_t *value,
++ int32_t current)
++{
++ char text[32], *ptr;
++ uint64_t num;
++
++ current = str_token(ctx, text, sizeof(text), current);
++ if (current > 0) {
++ num = strtoul(text, &ptr, 0);
++ if ((*ptr != 0) || (num < min) || (num > max)) {
++ return error(ERANGE, "Invalid number");
++ }
++ *value = num;
++ }
++
++ return current;
++}
++
++static int32_t
++str_eol(context_t *ctx, int32_t current)
++{
++ current = str_skip_spaces(ctx, current);
++ if (current != '\n') {
++ return error(EINVAL, "Expecting end of command");
++ }
++
++ return current;
++}
++
++static void
++str_skip(context_t *ctx, int32_t current)
++{
++ while ((current > 0) && (current != '\n')) {
++ current = buffer_get(ctx);
++ }
++}
++
++static int32_t
++cmd_parse_obj(context_t *ctx, arg_t *arg, int32_t current)
++{
++ obj_t *obj;
++ uint64_t id;
++
++ current = str_number(ctx, 0, ctx->obj_count, &id, current);
++ if (current <= 0) {
++ return current;
++ }
++
++ obj = &ctx->objs[id];
++ if (obj->type != arg->obj.type) {
++ if (obj->type != OBJ_TYPE_NONE) {
++ return error(EBUSY, "Object is in use");
++ }
++ return error(ENOENT, "Object is not defined");
++ }
++
++ arg->obj.ref = obj;
++
++ return current;
++}
++
++static int32_t
++cmd_parse_num(context_t *ctx, arg_t *arg, int32_t current)
++{
++ return str_number(ctx, arg->num.min, arg->num.max, &arg->num.value,
++ current);
++}
++
++static int32_t
++cmd_parse_str(context_t *ctx, arg_t *arg, int32_t current)
++{
++ return str_token(ctx, arg->str.data, arg->str.size, current);
++}
++
++static int32_t
++cmd_parse_args(context_t *ctx, command_t *cmd, int32_t current)
++{
++ arg_t *arg;
++
++ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
++ switch (arg->type) {
++ case ARG_TYPE_OBJ:
++ current = cmd_parse_obj(ctx, arg, current);
++ break;
++ case ARG_TYPE_NUM:
++ current = cmd_parse_num(ctx, arg, current);
++ break;
++ case ARG_TYPE_STR:
++ current = cmd_parse_str(ctx, arg, current);
++ break;
++ default:
++ return error(EINVAL, "Unknown argument type");
++ }
++ }
++
++ if (current < 0) {
++ return current;
++ }
++
++ current = str_eol(ctx, current);
++ if (current <= 0) {
++ return error(EINVAL, "Syntax error");
++ }
++
++ return cmd->handler(ctx, cmd);
++}
++
++static int32_t
++cmd_parse(context_t *ctx, command_t *cmds)
++{
++ char text[32];
++ command_t *cmd;
++ int32_t current;
++
++ cmd = cmds;
++ do {
++ current = str_token(ctx, text, sizeof(text), buffer_get(ctx));
++ if (current <= 0) {
++ return current;
++ }
++
++ while (cmd->name != NULL) {
++ if (strcmp(cmd->name, text) == 0) {
++ if (cmd->handler != NULL) {
++ return cmd_parse_args(ctx, cmd, current);
++ }
++ cmd = cmd->cmds;
++ break;
++ }
++ cmd++;
++ }
++ } while (cmd->name != NULL);
++
++ str_skip(ctx, current);
++
++ return error(ENOTSUP, "Unknown command");
++}
++
++static void
++cmd_fini(context_t *ctx, command_t *cmds)
++{
++ command_t *cmd;
++ arg_t *arg;
++
++ for (cmd = cmds; cmd->name != NULL; cmd++) {
++ if (cmd->handler == NULL) {
++ cmd_fini(ctx, cmd->cmds);
++ } else {
++ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
++ switch (arg->type) {
++ case ARG_TYPE_STR:
++ mem_free(arg->str.data);
++ arg->str.data = NULL;
++ break;
++ default:
++ break;
++ }
++ }
++ }
++ }
++}
++
++static bool
++cmd_init(context_t *ctx, command_t *cmds)
++{
++ command_t *cmd;
++ arg_t *arg;
++
++ for (cmd = cmds; cmd->name != NULL; cmd++) {
++ if (cmd->handler == NULL) {
++ if (!cmd_init(ctx, cmd->cmds)) {
++ return false;
++ }
++ } else {
++ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
++ switch (arg->type) {
++ case ARG_TYPE_STR:
++ arg->str.data = mem_alloc(arg->str.size);
++ if (arg->str.data == NULL) {
++ return false;
++ }
++ break;
++ default:
++ break;
++ }
++ }
++ }
++ }
++
++ return true;
++}
++
++static bool
++objs_create(context_t *ctx, uint32_t count)
++{
++ uint32_t i;
++
++ ctx->objs = mem_alloc(sizeof(obj_t) * count);
++ if (ctx->objs == NULL) {
++ return false;
++ }
++ ctx->obj_count = count;
++
++ for (i = 0; i < count; i++) {
++ ctx->objs[i].type = OBJ_TYPE_NONE;
++ }
++
++ return true;
++}
++
++static int32_t
++objs_destroy(context_t *ctx)
++{
++ uint32_t i;
++ int32_t err;
++
++ err = 0;
++ for (i = 0; i < ctx->obj_count; i++) {
++ if (ctx->objs[i].type != OBJ_TYPE_NONE) {
++ err = error(ENOTEMPTY, "Objects not destroyed");
++ break;
++ }
++ }
++
++ mem_free(ctx->objs);
++ ctx->objs = NULL;
++ ctx->obj_count = 0;
++
++ return err;
++}
++
++static context_t *
++init(size_t size, uint32_t objs, command_t *cmds)
++{
++ context_t *ctx;
++
++ ctx = mem_alloc(sizeof(context_t));
++ if (ctx == NULL) {
++ goto failed;
++ }
++
++ if (!buffer_create(ctx, size)) {
++ goto failed_ctx;
++ }
++
++ if (!objs_create(ctx, objs)) {
++ goto failed_buffer;
++ }
++
++ if (!cmd_init(ctx, cmds)) {
++ goto failed_objs;
++ }
++
++ ctx->active = true;
++
++ return ctx;
++
++failed_objs:
++ cmd_fini(ctx, cmds);
++ objs_destroy(ctx);
++failed_buffer:
++ buffer_destroy(ctx);
++failed_ctx:
++ mem_free(ctx);
++failed:
++ return NULL;
++}
++
++static int32_t
++fini(context_t *ctx, command_t *cmds)
++{
++ int32_t ret;
++
++ cmd_fini(ctx, cmds);
++ buffer_destroy(ctx);
++
++ ret = objs_destroy(ctx);
++
++ ctx->active = false;
++
++ return ret;
++}
++
++static int32_t
++exec_quit(context_t *ctx, command_t *cmd)
++{
++ ctx->active = false;
++
++ return 0;
++}
++
++static command_t commands[] = {{"fd", NULL, CMD_SUB(fd_commands)},
++ {"quit", exec_quit, CMD_ARGS()},
++ CMD_END};
++
++int32_t
++main(int32_t argc, char *argv[])
++{
++ context_t *ctx;
++ int32_t res;
++
++ ctx = init(1024, 16, commands);
++ if (ctx == NULL) {
++ return 1;
++ }
++
++ do {
++ res = cmd_parse(ctx, commands);
++ if (res < 0) {
++ out_err(-res);
++ }
++ } while (ctx->active);
++
++ res = fini(ctx, commands);
++ if (res >= 0) {
++ out_ok();
++ return 0;
++ }
++
++ out_err(-res);
++
++ return 1;
++}
+diff --git a/tests/basic/open-behind/tester.h b/tests/basic/open-behind/tester.h
+new file mode 100644
+index 0000000..64e940c
+--- /dev/null
++++ b/tests/basic/open-behind/tester.h
+@@ -0,0 +1,145 @@
++/*
++ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
++ This file is part of GlusterFS.
++
++ This file is licensed to you under your choice of the GNU Lesser
++ General Public License, version 3 or any later version (LGPLv3 or
++ later), or the GNU General Public License, version 2 (GPLv2), in all
++ cases as published by the Free Software Foundation.
++*/
++
++#ifndef __TESTER_H__
++#define __TESTER_H__
++
++#include <stdio.h>
++#include <inttypes.h>
++#include <stdbool.h>
++
++enum _obj_type;
++typedef enum _obj_type obj_type_t;
++
++enum _arg_type;
++typedef enum _arg_type arg_type_t;
++
++struct _buffer;
++typedef struct _buffer buffer_t;
++
++struct _obj;
++typedef struct _obj obj_t;
++
++struct _context;
++typedef struct _context context_t;
++
++struct _arg;
++typedef struct _arg arg_t;
++
++struct _command;
++typedef struct _command command_t;
++
++enum _obj_type { OBJ_TYPE_NONE, OBJ_TYPE_FD };
++
++enum _arg_type { ARG_TYPE_NONE, ARG_TYPE_OBJ, ARG_TYPE_NUM, ARG_TYPE_STR };
++
++struct _buffer {
++ char *base;
++ uint32_t size;
++ uint32_t len;
++ uint32_t pos;
++};
++
++struct _obj {
++ obj_type_t type;
++ union {
++ int32_t fd;
++ };
++};
++
++struct _context {
++ obj_t *objs;
++ buffer_t buffer;
++ uint32_t obj_count;
++ bool active;
++};
++
++struct _arg {
++ arg_type_t type;
++ union {
++ struct {
++ obj_type_t type;
++ obj_t *ref;
++ } obj;
++ struct {
++ uint64_t value;
++ uint64_t min;
++ uint64_t max;
++ } num;
++ struct {
++ uint32_t size;
++ char *data;
++ } str;
++ };
++};
++
++struct _command {
++ const char *name;
++ int32_t (*handler)(context_t *ctx, command_t *cmd);
++ union {
++ arg_t *args;
++ command_t *cmds;
++ };
++};
++
++#define msg(_stream, _fmt, _args...) \
++ do { \
++ fprintf(_stream, _fmt "\n", ##_args); \
++ fflush(_stream); \
++ } while (0)
++
++#define msg_out(_fmt, _args...) msg(stdout, _fmt, ##_args)
++#define msg_err(_err, _fmt, _args...) \
++ ({ \
++ int32_t __msg_err = (_err); \
++ msg(stderr, "[%4u:%-15s] " _fmt, __LINE__, __FUNCTION__, __msg_err, \
++ ##_args); \
++ -__msg_err; \
++ })
++
++#define error(_err, _fmt, _args...) msg_err(_err, "E(%4d) " _fmt, ##_args)
++#define warn(_err, _fmt, _args...) msg_err(_err, "W(%4d) " _fmt, ##_args)
++#define info(_err, _fmt, _args...) msg_err(_err, "I(%4d) " _fmt, ##_args)
++
++#define out_ok(_args...) msg_out("OK " _args)
++#define out_err(_err) msg_out("ERR %d", _err)
++
++#define ARG_END \
++ { \
++ ARG_TYPE_NONE \
++ }
++
++#define CMD_ARGS1(_x, _args...) \
++ .args = (arg_t[]) { _args }
++#define CMD_ARGS(_args...) CMD_ARGS1(, ##_args, ARG_END)
++
++#define CMD_SUB(_cmds) .cmds = _cmds
++
++#define CMD_END \
++ { \
++ NULL, NULL, CMD_SUB(NULL) \
++ }
++
++#define ARG_VAL(_type) \
++ { \
++ ARG_TYPE_OBJ, .obj = {.type = _type } \
++ }
++#define ARG_NUM(_min, _max) \
++ { \
++ ARG_TYPE_NUM, .num = {.min = _min, .max = _max } \
++ }
++#define ARG_STR(_size) \
++ { \
++ ARG_TYPE_STR, .str = {.size = _size } \
++ }
++
++extern command_t fd_commands[];
++
++#endif /* __TESTER_H__ */
+\ No newline at end of file
+diff --git a/tests/bugs/glusterfs/bug-873962-spb.t b/tests/bugs/glusterfs/bug-873962-spb.t
+index db84a22..db71cc0 100644
+--- a/tests/bugs/glusterfs/bug-873962-spb.t
++++ b/tests/bugs/glusterfs/bug-873962-spb.t
+@@ -14,6 +14,7 @@ TEST $CLI volume set $V0 performance.io-cache off
+ TEST $CLI volume set $V0 performance.write-behind off
+ TEST $CLI volume set $V0 performance.stat-prefetch off
+ TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.open-behind off
+ TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+ TEST $CLI volume start $V0
+ TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index 919eea3..76b5809 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -3398,6 +3398,8 @@ fuse_release(xlator_t *this, fuse_in_header_t *finh, void *msg,
+ gf_log("glusterfs-fuse", GF_LOG_TRACE,
+ "finh->unique: %" PRIu64 ": RELEASE %p", finh->unique, state->fd);
+
++ fd_close(state->fd);
++
+ fuse_fd_ctx_destroy(this, state->fd);
+ fd_unref(fd);
+
+diff --git a/xlators/performance/open-behind/src/open-behind-messages.h b/xlators/performance/open-behind/src/open-behind-messages.h
+index f250824..0e78917 100644
+--- a/xlators/performance/open-behind/src/open-behind-messages.h
++++ b/xlators/performance/open-behind/src/open-behind-messages.h
+@@ -23,6 +23,10 @@
+ */
+
+ GLFS_MSGID(OPEN_BEHIND, OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED,
+- OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY);
++ OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY,
++ OPEN_BEHIND_MSG_FAILED, OPEN_BEHIND_MSG_BAD_STATE);
++
++#define OPEN_BEHIND_MSG_FAILED_STR "Failed to submit fop"
++#define OPEN_BEHIND_MSG_BAD_STATE_STR "Unexpected state"
+
+ #endif /* _OPEN_BEHIND_MESSAGES_H_ */
+diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
+index cbe89ec..e43fe73 100644
+--- a/xlators/performance/open-behind/src/open-behind.c
++++ b/xlators/performance/open-behind/src/open-behind.c
+@@ -16,6 +16,18 @@
+ #include "open-behind-messages.h"
+ #include <glusterfs/glusterfs-acl.h>
+
++/* Note: The initial design of open-behind was made to cover the simple case
++ * of open, read, close for small files. This pattern combined with
++ * quick-read can do the whole operation without a single request to the
++ * bricks (except the initial lookup).
++ *
++ * The way to do this has been improved, but the logic remains the same.
++ * Basically, this means that any operation sent to the fd or the inode
++ * that it's not a read, causes the open request to be sent to the
++ * bricks, and all future operations will be executed synchronously,
++ * including opens (it's reset once all fd's are closed).
++ */
++
+ typedef struct ob_conf {
+ gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe
+ e.g - fstat() readv()
+@@ -32,1096 +44,754 @@ typedef struct ob_conf {
+ */
+ } ob_conf_t;
+
+-typedef struct ob_inode {
+- inode_t *inode;
+- struct list_head resume_fops;
+- struct list_head ob_fds;
+- int count;
+- int op_ret;
+- int op_errno;
+- gf_boolean_t open_in_progress;
+- int unlinked;
+-} ob_inode_t;
++/* A negative state represents an errno value negated. In this case the
++ * current operation cannot be processed. */
++typedef enum _ob_state {
++ /* There are no opens on the inode or the first open is already
++ * completed. The current operation can be sent directly. */
++ OB_STATE_READY = 0,
+
+-typedef struct ob_fd {
+- call_frame_t *open_frame;
+- loc_t loc;
+- dict_t *xdata;
+- int flags;
+- int op_errno;
+- ob_inode_t *ob_inode;
+- fd_t *fd;
+- gf_boolean_t opened;
+- gf_boolean_t ob_inode_fops_waiting;
+- struct list_head list;
+- struct list_head ob_fds_on_inode;
+-} ob_fd_t;
++ /* There's an open pending and it has been triggered. The current
++ * operation should be "stubbified" and processed with
++ * ob_stub_dispatch(). */
++ OB_STATE_OPEN_TRIGGERED,
+
+-ob_inode_t *
+-ob_inode_alloc(inode_t *inode)
+-{
+- ob_inode_t *ob_inode = NULL;
++ /* There's an open pending but it has not been triggered. The current
++ * operation can be processed directly but using an anonymous fd. */
++ OB_STATE_OPEN_PENDING,
+
+- ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t);
+- if (ob_inode == NULL)
+- goto out;
++ /* The current operation is the first open on the inode. */
++ OB_STATE_FIRST_OPEN
++} ob_state_t;
+
+- ob_inode->inode = inode;
+- INIT_LIST_HEAD(&ob_inode->resume_fops);
+- INIT_LIST_HEAD(&ob_inode->ob_fds);
+-out:
+- return ob_inode;
+-}
+-
+-void
+-ob_inode_free(ob_inode_t *ob_inode)
+-{
+- if (ob_inode == NULL)
+- goto out;
++typedef struct ob_inode {
++ /* List of stubs pending on the first open. Once the first open is
++ * complete, all these stubs will be resubmitted, and dependencies
++ * will be checked again. */
++ struct list_head resume_fops;
+
+- list_del_init(&ob_inode->resume_fops);
+- list_del_init(&ob_inode->ob_fds);
++ /* The inode this object references. */
++ inode_t *inode;
+
+- GF_FREE(ob_inode);
+-out:
+- return;
+-}
++ /* The fd from the first open sent to this inode. It will be set
++ * from the moment the open is processed until the open if fully
++ * executed or closed before actually opened. It's NULL in all
++ * other cases. */
++ fd_t *first_fd;
++
++ /* The stub from the first open operation. When open fop starts
++ * being processed, it's assigned the OB_OPEN_PREPARING value
++ * until the actual stub is created. This is necessary to avoid
++ * creating the stub inside a locked region. Once the stub is
++ * successfully created, it's assigned here. This value is set
++ * to NULL once the stub is resumed. */
++ call_stub_t *first_open;
++
++ /* The total number of currently open fd's on this inode. */
++ int32_t open_count;
++
++ /* This flag is set as soon as we know that the open will be
++ * sent to the bricks, even before the stub is ready. */
++ bool triggered;
++} ob_inode_t;
+
+-ob_inode_t *
+-ob_inode_get(xlator_t *this, inode_t *inode)
++/* Dummy pointer used temporarily while the actual open stub is being created */
++#define OB_OPEN_PREPARING ((call_stub_t *)-1)
++
++#define OB_POST_COMMON(_fop, _xl, _frame, _fd, _args...) \
++ case OB_STATE_FIRST_OPEN: \
++ gf_smsg((_xl)->name, GF_LOG_ERROR, EINVAL, OPEN_BEHIND_MSG_BAD_STATE, \
++ "fop=%s", #_fop, "state=%d", __ob_state, NULL); \
++ default_##_fop##_failure_cbk(_frame, EINVAL); \
++ break; \
++ case OB_STATE_READY: \
++ default_##_fop(_frame, _xl, ##_args); \
++ break; \
++ case OB_STATE_OPEN_TRIGGERED: { \
++ call_stub_t *__ob_stub = fop_##_fop##_stub(_frame, ob_##_fop, \
++ ##_args); \
++ if (__ob_stub != NULL) { \
++ ob_stub_dispatch(_xl, __ob_inode, _fd, __ob_stub); \
++ break; \
++ } \
++ __ob_state = -ENOMEM; \
++ } \
++ default: \
++ gf_smsg((_xl)->name, GF_LOG_ERROR, -__ob_state, \
++ OPEN_BEHIND_MSG_FAILED, "fop=%s", #_fop, NULL); \
++ default_##_fop##_failure_cbk(_frame, -__ob_state)
++
++#define OB_POST_FD(_fop, _xl, _frame, _fd, _trigger, _args...) \
++ do { \
++ ob_inode_t *__ob_inode; \
++ fd_t *__first_fd; \
++ ob_state_t __ob_state = ob_open_and_resume_fd( \
++ _xl, _fd, 0, true, _trigger, &__ob_inode, &__first_fd); \
++ switch (__ob_state) { \
++ case OB_STATE_OPEN_PENDING: \
++ if (!(_trigger)) { \
++ fd_t *__ob_fd = fd_anonymous_with_flags((_fd)->inode, \
++ (_fd)->flags); \
++ if (__ob_fd != NULL) { \
++ default_##_fop(_frame, _xl, ##_args); \
++ fd_unref(__ob_fd); \
++ break; \
++ } \
++ __ob_state = -ENOMEM; \
++ } \
++ OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \
++ } \
++ } while (0)
++
++#define OB_POST_FLUSH(_xl, _frame, _fd, _args...) \
++ do { \
++ ob_inode_t *__ob_inode; \
++ fd_t *__first_fd; \
++ ob_state_t __ob_state = ob_open_and_resume_fd( \
++ _xl, _fd, 0, true, false, &__ob_inode, &__first_fd); \
++ switch (__ob_state) { \
++ case OB_STATE_OPEN_PENDING: \
++ default_flush_cbk(_frame, NULL, _xl, 0, 0, NULL); \
++ break; \
++ OB_POST_COMMON(flush, _xl, _frame, __first_fd, ##_args); \
++ } \
++ } while (0)
++
++#define OB_POST_INODE(_fop, _xl, _frame, _inode, _trigger, _args...) \
++ do { \
++ ob_inode_t *__ob_inode; \
++ fd_t *__first_fd; \
++ ob_state_t __ob_state = ob_open_and_resume_inode( \
++ _xl, _inode, NULL, 0, true, _trigger, &__ob_inode, &__first_fd); \
++ switch (__ob_state) { \
++ case OB_STATE_OPEN_PENDING: \
++ OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \
++ } \
++ } while (0)
++
++static ob_inode_t *
++ob_inode_get_locked(xlator_t *this, inode_t *inode)
+ {
+ ob_inode_t *ob_inode = NULL;
+ uint64_t value = 0;
+- int ret = 0;
+
+- if (!inode)
+- goto out;
++ if ((__inode_ctx_get(inode, this, &value) == 0) && (value != 0)) {
++ return (ob_inode_t *)(uintptr_t)value;
++ }
+
+- LOCK(&inode->lock);
+- {
+- __inode_ctx_get(inode, this, &value);
+- if (value == 0) {
+- ob_inode = ob_inode_alloc(inode);
+- if (ob_inode == NULL)
+- goto unlock;
+-
+- value = (uint64_t)(uintptr_t)ob_inode;
+- ret = __inode_ctx_set(inode, this, &value);
+- if (ret < 0) {
+- ob_inode_free(ob_inode);
+- ob_inode = NULL;
+- }
+- } else {
+- ob_inode = (ob_inode_t *)(uintptr_t)value;
++ ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t);
++ if (ob_inode != NULL) {
++ ob_inode->inode = inode;
++ INIT_LIST_HEAD(&ob_inode->resume_fops);
++
++ value = (uint64_t)(uintptr_t)ob_inode;
++ if (__inode_ctx_set(inode, this, &value) < 0) {
++ GF_FREE(ob_inode);
++ ob_inode = NULL;
+ }
+ }
+-unlock:
+- UNLOCK(&inode->lock);
+
+-out:
+ return ob_inode;
+ }
+
+-ob_fd_t *
+-__ob_fd_ctx_get(xlator_t *this, fd_t *fd)
++static ob_state_t
++ob_open_and_resume_inode(xlator_t *xl, inode_t *inode, fd_t *fd,
++ int32_t open_count, bool synchronous, bool trigger,
++ ob_inode_t **pob_inode, fd_t **pfd)
+ {
+- uint64_t value = 0;
+- int ret = -1;
+- ob_fd_t *ob_fd = NULL;
++ ob_conf_t *conf;
++ ob_inode_t *ob_inode;
++ call_stub_t *open_stub;
+
+- ret = __fd_ctx_get(fd, this, &value);
+- if (ret)
+- return NULL;
++ if (inode == NULL) {
++ return OB_STATE_READY;
++ }
+
+- ob_fd = (void *)((long)value);
++ conf = xl->private;
+
+- return ob_fd;
+-}
++ *pfd = NULL;
+
+-ob_fd_t *
+-ob_fd_ctx_get(xlator_t *this, fd_t *fd)
+-{
+- ob_fd_t *ob_fd = NULL;
+-
+- LOCK(&fd->lock);
++ LOCK(&inode->lock);
+ {
+- ob_fd = __ob_fd_ctx_get(this, fd);
+- }
+- UNLOCK(&fd->lock);
+-
+- return ob_fd;
+-}
++ ob_inode = ob_inode_get_locked(xl, inode);
++ if (ob_inode == NULL) {
++ UNLOCK(&inode->lock);
+
+-int
+-__ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
+-{
+- uint64_t value = 0;
+- int ret = -1;
++ return -ENOMEM;
++ }
++ *pob_inode = ob_inode;
++
++ ob_inode->open_count += open_count;
++
++ /* If first_fd is not NULL, it means that there's a previous open not
++ * yet completed. */
++ if (ob_inode->first_fd != NULL) {
++ *pfd = ob_inode->first_fd;
++ /* If the current request doesn't trigger the open and it hasn't
++ * been triggered yet, we can continue without issuing the open
++ * only if the current request belongs to the same fd as the
++ * first one. */
++ if (!trigger && !ob_inode->triggered &&
++ (ob_inode->first_fd == fd)) {
++ UNLOCK(&inode->lock);
++
++ return OB_STATE_OPEN_PENDING;
++ }
+
+- value = (long)((void *)ob_fd);
++ /* We need to issue the open. It could have already been triggered
++ * before. In this case open_stub will be NULL. Or the initial open
++ * may not be completely ready yet. In this case open_stub will be
++ * OB_OPEN_PREPARING. */
++ open_stub = ob_inode->first_open;
++ ob_inode->first_open = NULL;
++ ob_inode->triggered = true;
+
+- ret = __fd_ctx_set(fd, this, value);
++ UNLOCK(&inode->lock);
+
+- return ret;
+-}
++ if ((open_stub != NULL) && (open_stub != OB_OPEN_PREPARING)) {
++ call_resume(open_stub);
++ }
+
+-int
+-ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
+-{
+- int ret = -1;
++ return OB_STATE_OPEN_TRIGGERED;
++ }
+
+- LOCK(&fd->lock);
+- {
+- ret = __ob_fd_ctx_set(this, fd, ob_fd);
+- }
+- UNLOCK(&fd->lock);
++ /* There's no pending open. Only opens can be non synchronous, so all
++ * regular fops will be processed directly. For non synchronous opens,
++ * we'll still process them normally (i.e. synchornous) if there are
++ * more file descriptors open. */
++ if (synchronous || (ob_inode->open_count > open_count)) {
++ UNLOCK(&inode->lock);
+
+- return ret;
+-}
++ return OB_STATE_READY;
++ }
+
+-ob_fd_t *
+-ob_fd_new(void)
+-{
+- ob_fd_t *ob_fd = NULL;
++ *pfd = fd;
+
+- ob_fd = GF_CALLOC(1, sizeof(*ob_fd), gf_ob_mt_fd_t);
++ /* This is the first open. We keep a reference on the fd and set
++ * first_open stub to OB_OPEN_PREPARING until the actual stub can
++ * be assigned (we don't create the stub here to avoid doing memory
++ * allocations inside the mutex). */
++ ob_inode->first_fd = __fd_ref(fd);
++ ob_inode->first_open = OB_OPEN_PREPARING;
+
+- INIT_LIST_HEAD(&ob_fd->list);
+- INIT_LIST_HEAD(&ob_fd->ob_fds_on_inode);
++ /* If lazy_open is not set, we'll need to immediately send the open,
++ * so we set triggered right now. */
++ ob_inode->triggered = !conf->lazy_open;
++ }
++ UNLOCK(&inode->lock);
+
+- return ob_fd;
++ return OB_STATE_FIRST_OPEN;
+ }
+
+-void
+-ob_fd_free(ob_fd_t *ob_fd)
++static ob_state_t
++ob_open_and_resume_fd(xlator_t *xl, fd_t *fd, int32_t open_count,
++ bool synchronous, bool trigger, ob_inode_t **pob_inode,
++ fd_t **pfd)
+ {
+- LOCK(&ob_fd->fd->inode->lock);
+- {
+- list_del_init(&ob_fd->ob_fds_on_inode);
+- }
+- UNLOCK(&ob_fd->fd->inode->lock);
+-
+- loc_wipe(&ob_fd->loc);
+-
+- if (ob_fd->xdata)
+- dict_unref(ob_fd->xdata);
++ uint64_t err;
+
+- if (ob_fd->open_frame) {
+- /* If we sill have a frame it means that background open has never
+- * been triggered. We need to release the pending reference. */
+- fd_unref(ob_fd->fd);
+-
+- STACK_DESTROY(ob_fd->open_frame->root);
++ if ((fd_ctx_get(fd, xl, &err) == 0) && (err != 0)) {
++ return (ob_state_t)-err;
+ }
+
+- GF_FREE(ob_fd);
++ return ob_open_and_resume_inode(xl, fd->inode, fd, open_count, synchronous,
++ trigger, pob_inode, pfd);
+ }
+
+-int
+-ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+- int op_errno, fd_t *fd_ret, dict_t *xdata)
++static ob_state_t
++ob_open_behind(xlator_t *xl, fd_t *fd, int32_t flags, ob_inode_t **pob_inode,
++ fd_t **pfd)
+ {
+- fd_t *fd = NULL;
+- int count = 0;
+- int ob_inode_op_ret = 0;
+- int ob_inode_op_errno = 0;
+- ob_fd_t *ob_fd = NULL;
+- call_stub_t *stub = NULL, *tmp = NULL;
+- ob_inode_t *ob_inode = NULL;
+- gf_boolean_t ob_inode_fops_waiting = _gf_false;
+- struct list_head fops_waiting_on_fd, fops_waiting_on_inode;
++ bool synchronous;
+
+- fd = frame->local;
+- frame->local = NULL;
+-
+- INIT_LIST_HEAD(&fops_waiting_on_fd);
+- INIT_LIST_HEAD(&fops_waiting_on_inode);
++ /* TODO: If O_CREAT, O_APPEND, O_WRONLY or O_DIRECT are specified, shouldn't
++ * we also execute this open synchronously ? */
++ synchronous = (flags & O_TRUNC) != 0;
+
+- ob_inode = ob_inode_get(this, fd->inode);
++ return ob_open_and_resume_fd(xl, fd, 1, synchronous, true, pob_inode, pfd);
++}
+
+- LOCK(&fd->lock);
++static int32_t
++ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
++ call_stub_t *stub)
++{
++ LOCK(&ob_inode->inode->lock);
+ {
+- ob_fd = __ob_fd_ctx_get(this, fd);
+- ob_fd->opened = _gf_true;
+-
+- ob_inode_fops_waiting = ob_fd->ob_inode_fops_waiting;
+-
+- list_splice_init(&ob_fd->list, &fops_waiting_on_fd);
+-
+- if (op_ret < 0) {
+- /* mark fd BAD for ever */
+- ob_fd->op_errno = op_errno;
+- ob_fd = NULL; /*shouldn't be freed*/
+- } else {
+- __fd_ctx_del(fd, this, NULL);
+- }
+- }
+- UNLOCK(&fd->lock);
+-
+- if (ob_inode_fops_waiting) {
+- LOCK(&fd->inode->lock);
+- {
+- count = --ob_inode->count;
+- if (op_ret < 0) {
+- /* TODO: when to reset the error? */
+- ob_inode->op_ret = -1;
+- ob_inode->op_errno = op_errno;
+- }
+-
+- if (count == 0) {
+- ob_inode->open_in_progress = _gf_false;
+- ob_inode_op_ret = ob_inode->op_ret;
+- ob_inode_op_errno = ob_inode->op_errno;
+- list_splice_init(&ob_inode->resume_fops,
+- &fops_waiting_on_inode);
+- }
++ /* We only queue a stub if the open has not been completed or
++ * cancelled. */
++ if (ob_inode->first_fd == fd) {
++ list_add_tail(&stub->list, &ob_inode->resume_fops);
++ stub = NULL;
+ }
+- UNLOCK(&fd->inode->lock);
+- }
+-
+- if (ob_fd)
+- ob_fd_free(ob_fd);
+-
+- list_for_each_entry_safe(stub, tmp, &fops_waiting_on_fd, list)
+- {
+- list_del_init(&stub->list);
+-
+- if (op_ret < 0)
+- call_unwind_error(stub, -1, op_errno);
+- else
+- call_resume(stub);
+ }
++ UNLOCK(&ob_inode->inode->lock);
+
+- list_for_each_entry_safe(stub, tmp, &fops_waiting_on_inode, list)
+- {
+- list_del_init(&stub->list);
+-
+- if (ob_inode_op_ret < 0)
+- call_unwind_error(stub, -1, ob_inode_op_errno);
+- else
+- call_resume(stub);
++ if (stub != NULL) {
++ call_resume(stub);
+ }
+
+- /* The background open is completed. We can release the 'fd' reference. */
+- fd_unref(fd);
+-
+- STACK_DESTROY(frame->root);
+-
+ return 0;
+ }
+
+-int
+-ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
++static int32_t
++ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
++ call_stub_t *stub)
+ {
+- call_frame_t *frame = NULL;
+-
+- if (ob_fd == NULL) {
+- LOCK(&fd->lock);
+- {
+- ob_fd = __ob_fd_ctx_get(this, fd);
+- if (!ob_fd)
+- goto unlock;
++ bool closed;
+
+- frame = ob_fd->open_frame;
+- ob_fd->open_frame = NULL;
+- }
+- unlock:
+- UNLOCK(&fd->lock);
+- } else {
+- LOCK(&fd->lock);
+- {
+- frame = ob_fd->open_frame;
+- ob_fd->open_frame = NULL;
++ LOCK(&ob_inode->inode->lock);
++ {
++ closed = ob_inode->first_fd != fd;
++ if (!closed) {
++ if (ob_inode->triggered) {
++ ob_inode->first_open = NULL;
++ } else {
++ ob_inode->first_open = stub;
++ stub = NULL;
++ }
+ }
+- UNLOCK(&fd->lock);
+ }
++ UNLOCK(&ob_inode->inode->lock);
+
+- if (frame) {
+- /* We don't need to take a reference here. We already have a reference
+- * while the open is pending. */
+- frame->local = fd;
+-
+- STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd,
+- ob_fd->xdata);
++ if (stub != NULL) {
++ if (closed) {
++ call_stub_destroy(stub);
++ fd_unref(fd);
++ } else {
++ call_resume(stub);
++ }
+ }
+
+ return 0;
+ }
+
+-void
+-ob_inode_wake(xlator_t *this, struct list_head *ob_fds)
++static void
++ob_resume_pending(struct list_head *list)
+ {
+- ob_fd_t *ob_fd = NULL, *tmp = NULL;
++ call_stub_t *stub;
+
+- if (!list_empty(ob_fds)) {
+- list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode)
+- {
+- ob_fd_wake(this, ob_fd->fd, ob_fd);
+- ob_fd_free(ob_fd);
+- }
+- }
+-}
++ while (!list_empty(list)) {
++ stub = list_first_entry(list, call_stub_t, list);
++ list_del_init(&stub->list);
+
+-/* called holding inode->lock and fd->lock */
+-void
+-ob_fd_copy(ob_fd_t *src, ob_fd_t *dst)
+-{
+- if (!src || !dst)
+- goto out;
+-
+- dst->fd = src->fd;
+- dst->loc.inode = inode_ref(src->loc.inode);
+- gf_uuid_copy(dst->loc.gfid, src->loc.gfid);
+- dst->flags = src->flags;
+- dst->xdata = dict_ref(src->xdata);
+- dst->ob_inode = src->ob_inode;
+-out:
+- return;
++ call_resume(stub);
++ }
+ }
+
+-int
+-open_all_pending_fds_and_resume(xlator_t *this, inode_t *inode,
+- call_stub_t *stub)
++static void
++ob_open_completed(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, int32_t op_ret,
++ int32_t op_errno)
+ {
+- ob_inode_t *ob_inode = NULL;
+- ob_fd_t *ob_fd = NULL, *tmp = NULL;
+- gf_boolean_t was_open_in_progress = _gf_false;
+- gf_boolean_t wait_for_open = _gf_false;
+- struct list_head ob_fds;
++ struct list_head list;
+
+- ob_inode = ob_inode_get(this, inode);
+- if (ob_inode == NULL)
+- goto out;
++ INIT_LIST_HEAD(&list);
+
+- INIT_LIST_HEAD(&ob_fds);
++ if (op_ret < 0) {
++ fd_ctx_set(fd, xl, op_errno <= 0 ? EIO : op_errno);
++ }
+
+- LOCK(&inode->lock);
++ LOCK(&ob_inode->inode->lock);
+ {
+- was_open_in_progress = ob_inode->open_in_progress;
+- ob_inode->unlinked = 1;
+-
+- if (was_open_in_progress) {
+- list_add_tail(&stub->list, &ob_inode->resume_fops);
+- goto inode_unlock;
+- }
+-
+- list_for_each_entry(ob_fd, &ob_inode->ob_fds, ob_fds_on_inode)
+- {
+- LOCK(&ob_fd->fd->lock);
+- {
+- if (ob_fd->opened)
+- goto fd_unlock;
+-
+- ob_inode->count++;
+- ob_fd->ob_inode_fops_waiting = _gf_true;
+-
+- if (ob_fd->open_frame == NULL) {
+- /* open in progress no need of wake */
+- } else {
+- tmp = ob_fd_new();
+- tmp->open_frame = ob_fd->open_frame;
+- ob_fd->open_frame = NULL;
+-
+- ob_fd_copy(ob_fd, tmp);
+- list_add_tail(&tmp->ob_fds_on_inode, &ob_fds);
+- }
+- }
+- fd_unlock:
+- UNLOCK(&ob_fd->fd->lock);
+- }
+-
+- if (ob_inode->count) {
+- wait_for_open = ob_inode->open_in_progress = _gf_true;
+- list_add_tail(&stub->list, &ob_inode->resume_fops);
++ /* Only update the fields if the file has not been closed before
++ * getting here. */
++ if (ob_inode->first_fd == fd) {
++ list_splice_init(&ob_inode->resume_fops, &list);
++ ob_inode->first_fd = NULL;
++ ob_inode->first_open = NULL;
++ ob_inode->triggered = false;
+ }
+ }
+-inode_unlock:
+- UNLOCK(&inode->lock);
++ UNLOCK(&ob_inode->inode->lock);
+
+-out:
+- if (!was_open_in_progress) {
+- if (!wait_for_open) {
+- call_resume(stub);
+- } else {
+- ob_inode_wake(this, &ob_fds);
+- }
+- }
++ ob_resume_pending(&list);
+
+- return 0;
++ fd_unref(fd);
+ }
+
+-int
+-open_and_resume(xlator_t *this, fd_t *fd, call_stub_t *stub)
++static int32_t
++ob_open_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret,
++ int32_t op_errno, fd_t *fd, dict_t *xdata)
+ {
+- ob_fd_t *ob_fd = NULL;
+- int op_errno = 0;
+-
+- if (!fd)
+- goto nofd;
+-
+- LOCK(&fd->lock);
+- {
+- ob_fd = __ob_fd_ctx_get(this, fd);
+- if (!ob_fd)
+- goto unlock;
++ ob_inode_t *ob_inode;
+
+- if (ob_fd->op_errno) {
+- op_errno = ob_fd->op_errno;
+- goto unlock;
+- }
++ ob_inode = frame->local;
++ frame->local = NULL;
+
+- list_add_tail(&stub->list, &ob_fd->list);
+- }
+-unlock:
+- UNLOCK(&fd->lock);
++ ob_open_completed(xl, ob_inode, cookie, op_ret, op_errno);
+
+-nofd:
+- if (op_errno)
+- call_unwind_error(stub, -1, op_errno);
+- else if (ob_fd)
+- ob_fd_wake(this, fd, NULL);
+- else
+- call_resume(stub);
++ STACK_DESTROY(frame->root);
+
+ return 0;
+ }
+
+-int
+-ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
++static int32_t
++ob_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ fd_t *fd, dict_t *xdata)
+ {
+- ob_fd_t *ob_fd = NULL;
+- int ret = -1;
+- ob_conf_t *conf = NULL;
+- ob_inode_t *ob_inode = NULL;
+- gf_boolean_t open_in_progress = _gf_false;
+- int unlinked = 0;
+-
+- conf = this->private;
+-
+- if (flags & O_TRUNC) {
+- STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+- return 0;
+- }
+-
+- ob_inode = ob_inode_get(this, fd->inode);
+-
+- ob_fd = ob_fd_new();
+- if (!ob_fd)
+- goto enomem;
+-
+- ob_fd->ob_inode = ob_inode;
+-
+- ob_fd->fd = fd;
+-
+- ob_fd->open_frame = copy_frame(frame);
+- if (!ob_fd->open_frame)
+- goto enomem;
+- ret = loc_copy(&ob_fd->loc, loc);
+- if (ret)
+- goto enomem;
+-
+- ob_fd->flags = flags;
+- if (xdata)
+- ob_fd->xdata = dict_ref(xdata);
+-
+- LOCK(&fd->inode->lock);
+- {
+- open_in_progress = ob_inode->open_in_progress;
+- unlinked = ob_inode->unlinked;
+- if (!open_in_progress && !unlinked) {
+- ret = ob_fd_ctx_set(this, fd, ob_fd);
+- if (ret) {
+- UNLOCK(&fd->inode->lock);
+- goto enomem;
+- }
+-
+- list_add(&ob_fd->ob_fds_on_inode, &ob_inode->ob_fds);
+- }
+- }
+- UNLOCK(&fd->inode->lock);
+-
+- /* We take a reference while the background open is pending or being
+- * processed. If we finally wind the request in the foreground, then
+- * ob_fd_free() will take care of this additional reference. */
+- fd_ref(fd);
+-
+- if (!open_in_progress && !unlinked) {
+- STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata);
+-
+- if (!conf->lazy_open)
+- ob_fd_wake(this, fd, NULL);
+- } else {
+- ob_fd_free(ob_fd);
+- STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
+- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+- }
++ STACK_WIND_COOKIE(frame, ob_open_cbk, fd, FIRST_CHILD(this),
++ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+
+ return 0;
+-enomem:
+- if (ob_fd) {
+- if (ob_fd->open_frame)
+- STACK_DESTROY(ob_fd->open_frame->root);
+-
+- loc_wipe(&ob_fd->loc);
+- if (ob_fd->xdata)
+- dict_unref(ob_fd->xdata);
+-
+- GF_FREE(ob_fd);
+- }
+-
+- return -1;
+ }
+
+-int
++static int32_t
+ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ dict_t *xdata)
+ {
+- fd_t *old_fd = NULL;
+- int ret = -1;
+- int op_errno = ENOMEM;
+- call_stub_t *stub = NULL;
+-
+- old_fd = fd_lookup(fd->inode, 0);
+- if (old_fd) {
+- /* open-behind only when this is the first FD */
+- stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata);
+- if (!stub) {
+- fd_unref(old_fd);
+- goto err;
+- }
+-
+- open_and_resume(this, old_fd, stub);
++ ob_inode_t *ob_inode;
++ call_frame_t *open_frame;
++ call_stub_t *stub;
++ fd_t *first_fd;
++ ob_state_t state;
++
++ state = ob_open_behind(this, fd, flags, &ob_inode, &first_fd);
++ if (state == OB_STATE_READY) {
++ /* There's no pending open, but there are other file descriptors opened
++ * or the current flags require a synchronous open. */
++ return default_open(frame, this, loc, flags, fd, xdata);
++ }
+
+- fd_unref(old_fd);
++ if (state == OB_STATE_OPEN_TRIGGERED) {
++ /* The first open is in progress (either because it was already issued
++ * or because this request triggered it). We try to create a new stub
++ * to retry the operation once the initial open completes. */
++ stub = fop_open_stub(frame, ob_open, loc, flags, fd, xdata);
++ if (stub != NULL) {
++ return ob_stub_dispatch(this, ob_inode, first_fd, stub);
++ }
+
+- return 0;
++ state = -ENOMEM;
+ }
+
+- ret = ob_open_behind(frame, this, loc, flags, fd, xdata);
+- if (ret) {
+- goto err;
+- }
++ if (state == OB_STATE_FIRST_OPEN) {
++ /* We try to create a stub for the new open. A new frame needs to be
++ * used because the current one may be destroyed soon after sending
++ * the open's reply. */
++ open_frame = copy_frame(frame);
++ if (open_frame != NULL) {
++ stub = fop_open_stub(open_frame, ob_open_resume, loc, flags, fd,
++ xdata);
++ if (stub != NULL) {
++ open_frame->local = ob_inode;
+
+- return 0;
+-err:
+- gf_msg(this->name, GF_LOG_ERROR, op_errno, OPEN_BEHIND_MSG_NO_MEMORY, "%s",
+- loc->path);
++ /* TODO: Previous version passed xdata back to the caller, but
++ * probably this doesn't make sense since it won't contain
++ * any requested data. I think it would be better to pass
++ * NULL for xdata. */
++ default_open_cbk(frame, NULL, this, 0, 0, fd, xdata);
+
+- STACK_UNWIND_STRICT(open, frame, -1, op_errno, 0, 0);
++ return ob_open_dispatch(this, ob_inode, first_fd, stub);
++ }
+
+- return 0;
+-}
++ STACK_DESTROY(open_frame->root);
++ }
+
+-fd_t *
+-ob_get_wind_fd(xlator_t *this, fd_t *fd, uint32_t *flag)
+-{
+- fd_t *wind_fd = NULL;
+- ob_fd_t *ob_fd = NULL;
+- ob_conf_t *conf = NULL;
++ /* In case of error, simulate a regular completion but with an error
++ * code. */
++ ob_open_completed(this, ob_inode, first_fd, -1, ENOMEM);
+
+- conf = this->private;
++ state = -ENOMEM;
++ }
+
+- ob_fd = ob_fd_ctx_get(this, fd);
++ /* In case of failure we need to decrement the number of open files because
++ * ob_fdclose() won't be called. */
+
+- if (ob_fd && ob_fd->open_frame && conf->use_anonymous_fd) {
+- wind_fd = fd_anonymous(fd->inode);
+- if ((ob_fd->flags & O_DIRECT) && (flag))
+- *flag = *flag | O_DIRECT;
+- } else {
+- wind_fd = fd_ref(fd);
++ LOCK(&fd->inode->lock);
++ {
++ ob_inode->open_count--;
+ }
++ UNLOCK(&fd->inode->lock);
+
+- return wind_fd;
++ gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s",
++ "open", "path=%s", loc->path, NULL);
++
++ return default_open_failure_cbk(frame, -state);
+ }
+
+-int
++static int32_t
+ ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+- fd_t *wind_fd = NULL;
+- ob_conf_t *conf = NULL;
++ ob_conf_t *conf = this->private;
++ bool trigger = conf->read_after_open || !conf->use_anonymous_fd;
+
+- conf = this->private;
+-
+- if (!conf->read_after_open)
+- wind_fd = ob_get_wind_fd(this, fd, &flags);
+- else
+- wind_fd = fd_ref(fd);
+-
+- stub = fop_readv_stub(frame, default_readv_resume, wind_fd, size, offset,
+- flags, xdata);
+- fd_unref(wind_fd);
+-
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, wind_fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0);
++ OB_POST_FD(readv, this, frame, fd, trigger, fd, size, offset, flags, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+-
+- stub = fop_writev_stub(frame, default_writev_resume, fd, iov, count, offset,
+- flags, iobref, xdata);
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, 0, 0, 0);
++ OB_POST_FD(writev, this, frame, fd, true, fd, iov, count, offset, flags,
++ iobref, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+- fd_t *wind_fd = NULL;
+-
+- wind_fd = ob_get_wind_fd(this, fd, NULL);
+-
+- stub = fop_fstat_stub(frame, default_fstat_resume, wind_fd, xdata);
++ ob_conf_t *conf = this->private;
++ bool trigger = !conf->use_anonymous_fd;
+
+- fd_unref(wind_fd);
+-
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, wind_fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0);
++ OB_POST_FD(fstat, this, frame, fd, trigger, fd, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+- fd_t *wind_fd = NULL;
+-
+- wind_fd = ob_get_wind_fd(this, fd, NULL);
++ ob_conf_t *conf = this->private;
++ bool trigger = !conf->use_anonymous_fd;
+
+- stub = fop_seek_stub(frame, default_seek_resume, wind_fd, offset, what,
+- xdata);
+-
+- fd_unref(wind_fd);
+-
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, wind_fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0);
++ OB_POST_FD(seek, this, frame, fd, trigger, fd, offset, what, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+- ob_fd_t *ob_fd = NULL;
+- gf_boolean_t unwind = _gf_false;
+-
+- LOCK(&fd->lock);
+- {
+- ob_fd = __ob_fd_ctx_get(this, fd);
+- if (ob_fd && ob_fd->open_frame)
+- /* if open() was never wound to backend,
+- no need to wind flush() either.
+- */
+- unwind = _gf_true;
+- }
+- UNLOCK(&fd->lock);
+-
+- if (unwind)
+- goto unwind;
+-
+- stub = fop_flush_stub(frame, default_flush_resume, fd, xdata);
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, 0);
+-
+- return 0;
+-
+-unwind:
+- STACK_UNWIND_STRICT(flush, frame, 0, 0, 0);
++ OB_POST_FLUSH(this, frame, fd, fd, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+-
+- stub = fop_fsync_stub(frame, default_fsync_resume, fd, flag, xdata);
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0);
++ OB_POST_FD(fsync, this, frame, fd, true, fd, flag, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+ struct gf_flock *flock, dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+-
+- stub = fop_lk_stub(frame, default_lk_resume, fd, cmd, flock, xdata);
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(lk, frame, -1, ENOMEM, 0, 0);
++ OB_POST_FD(lk, this, frame, fd, true, fd, cmd, flock, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+-
+- stub = fop_ftruncate_stub(frame, default_ftruncate_resume, fd, offset,
+- xdata);
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, 0, 0, 0);
++ OB_POST_FD(ftruncate, this, frame, fd, true, fd, offset, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+ int flags, dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+-
+- stub = fop_fsetxattr_stub(frame, default_fsetxattr_resume, fd, xattr, flags,
+- xdata);
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(fsetxattr, frame, -1, ENOMEM, 0);
++ OB_POST_FD(fsetxattr, this, frame, fd, true, fd, xattr, flags, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+-
+- stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, fd, name, xdata);
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, 0, 0);
++ OB_POST_FD(fgetxattr, this, frame, fd, true, fd, name, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+-
+- stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name,
+- xdata);
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, 0);
++ OB_POST_FD(fremovexattr, this, frame, fd, true, fd, name, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int cmd, struct gf_flock *flock, dict_t *xdata)
+ {
+- call_stub_t *stub = fop_finodelk_stub(frame, default_finodelk_resume,
+- volume, fd, cmd, flock, xdata);
+- if (stub)
+- open_and_resume(this, fd, stub);
+- else
+- STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0);
++ OB_POST_FD(finodelk, this, frame, fd, true, volume, fd, cmd, flock, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+ {
+- call_stub_t *stub = fop_fentrylk_stub(
+- frame, default_fentrylk_resume, volume, fd, basename, cmd, type, xdata);
+- if (stub)
+- open_and_resume(this, fd, stub);
+- else
+- STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0);
++ OB_POST_FD(fentrylk, this, frame, fd, true, volume, fd, basename, cmd, type,
++ xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+ {
+- call_stub_t *stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd,
+- optype, xattr, xdata);
+- if (stub)
+- open_and_resume(this, fd, stub);
+- else
+- STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0);
++ OB_POST_FD(fxattrop, this, frame, fd, true, fd, optype, xattr, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *iatt,
+ int valid, dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+-
+- stub = fop_fsetattr_stub(frame, default_fsetattr_resume, fd, iatt, valid,
+- xdata);
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, 0, 0, 0);
++ OB_POST_FD(fsetattr, this, frame, fd, true, fd, iatt, valid, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+ {
+- call_stub_t *stub;
+-
+- stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, offset,
+- len, xdata);
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, fd, stub);
++ OB_POST_FD(fallocate, this, frame, fd, true, fd, mode, offset, len, xdata);
+
+ return 0;
+-err:
+- STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
+- return 0;
+ }
+
+-int
++static int32_t
+ ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+ {
+- call_stub_t *stub;
+-
+- stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len,
+- xdata);
+- if (!stub)
+- goto err;
+-
+- open_and_resume(this, fd, stub);
++ OB_POST_FD(discard, this, frame, fd, true, fd, offset, len, xdata);
+
+ return 0;
+-err:
+- STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL);
+- return 0;
+ }
+
+-int
++static int32_t
+ ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+ {
+- call_stub_t *stub;
+-
+- stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, offset, len,
+- xdata);
+- if (!stub)
+- goto err;
++ OB_POST_FD(zerofill, this, frame, fd, true, fd, offset, len, xdata);
+
+- open_and_resume(this, fd, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+-
+- stub = fop_unlink_stub(frame, default_unlink_resume, loc, xflags, xdata);
+- if (!stub)
+- goto err;
+-
+- open_all_pending_fds_and_resume(this, loc->inode, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, 0, 0, 0);
++ OB_POST_INODE(unlink, this, frame, loc->inode, true, loc, xflags, xdata);
+
+ return 0;
+ }
+
+-int
++static int32_t
+ ob_rename(call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst,
+ dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+-
+- stub = fop_rename_stub(frame, default_rename_resume, src, dst, xdata);
+- if (!stub)
+- goto err;
+-
+- open_all_pending_fds_and_resume(this, dst->inode, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0);
++ OB_POST_INODE(rename, this, frame, dst->inode, true, src, dst, xdata);
+
+ return 0;
+ }
+
+-int32_t
++static int32_t
+ ob_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+-
+- stub = fop_setattr_stub(frame, default_setattr_resume, loc, stbuf, valid,
+- xdata);
+- if (!stub)
+- goto err;
++ OB_POST_INODE(setattr, this, frame, loc->inode, true, loc, stbuf, valid,
++ xdata);
+
+- open_all_pending_fds_and_resume(this, loc->inode, stub);
+-
+- return 0;
+-err:
+- STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+-int32_t
++static int32_t
+ ob_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+ {
+- call_stub_t *stub = NULL;
+- gf_boolean_t access_xattr = _gf_false;
+-
+ if (dict_get(dict, POSIX_ACL_DEFAULT_XATTR) ||
+ dict_get(dict, POSIX_ACL_ACCESS_XATTR) ||
+- dict_get(dict, GF_SELINUX_XATTR_KEY))
+- access_xattr = _gf_true;
+-
+- if (!access_xattr)
++ dict_get(dict, GF_SELINUX_XATTR_KEY)) {
+ return default_setxattr(frame, this, loc, dict, flags, xdata);
++ }
+
+- stub = fop_setxattr_stub(frame, default_setxattr_resume, loc, dict, flags,
+- xdata);
+- if (!stub)
+- goto err;
+-
+- open_all_pending_fds_and_resume(this, loc->inode, stub);
++ OB_POST_INODE(setxattr, this, frame, loc->inode, true, loc, dict, flags,
++ xdata);
+
+ return 0;
+-err:
+- STACK_UNWIND_STRICT(setxattr, frame, -1, ENOMEM, NULL);
+- return 0;
+ }
+
+-int
+-ob_release(xlator_t *this, fd_t *fd)
++static void
++ob_fdclose(xlator_t *this, fd_t *fd)
+ {
+- ob_fd_t *ob_fd = NULL;
++ struct list_head list;
++ ob_inode_t *ob_inode;
++ call_stub_t *stub;
++
++ INIT_LIST_HEAD(&list);
++ stub = NULL;
+
+- ob_fd = ob_fd_ctx_get(this, fd);
++ LOCK(&fd->inode->lock);
++ {
++ ob_inode = ob_inode_get_locked(this, fd->inode);
++ if (ob_inode != NULL) {
++ ob_inode->open_count--;
++
++ /* If this fd is the same as ob_inode->first_fd, it means that
++ * the initial open has not fully completed. We'll try to cancel
++ * it. */
++ if (ob_inode->first_fd == fd) {
++ if (ob_inode->first_open == OB_OPEN_PREPARING) {
++ /* In this case ob_open_dispatch() has not been called yet.
++ * We clear first_fd and first_open to allow that function
++ * to know that the open is not really needed. This also
++ * allows other requests to work as expected if they
++ * arrive before the dispatch function is called. If there
++ * are pending fops, we can directly process them here.
++ * (note that there shouldn't be any fd related fops, but
++ * if there are, it's fine if they fail). */
++ ob_inode->first_fd = NULL;
++ ob_inode->first_open = NULL;
++ ob_inode->triggered = false;
++ list_splice_init(&ob_inode->resume_fops, &list);
++ } else if (!ob_inode->triggered) {
++ /* If the open has already been dispatched, we can only
++ * cancel it if it has not been triggered. Otherwise we
++ * simply wait until it completes. While it's not triggered,
++ * first_open must be a valid stub and there can't be any
++ * pending fops. */
++ GF_ASSERT((ob_inode->first_open != NULL) &&
++ list_empty(&ob_inode->resume_fops));
++
++ ob_inode->first_fd = NULL;
++ stub = ob_inode->first_open;
++ ob_inode->first_open = NULL;
++ }
++ }
++ }
++ }
++ UNLOCK(&fd->inode->lock);
+
+- ob_fd_free(ob_fd);
++ if (stub != NULL) {
++ call_stub_destroy(stub);
++ fd_unref(fd);
++ }
+
+- return 0;
++ ob_resume_pending(&list);
+ }
+
+ int
+ ob_forget(xlator_t *this, inode_t *inode)
+ {
+- ob_inode_t *ob_inode = NULL;
++ ob_inode_t *ob_inode;
+ uint64_t value = 0;
+
+- inode_ctx_del(inode, this, &value);
+-
+- if (value) {
++ if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) {
+ ob_inode = (ob_inode_t *)(uintptr_t)value;
+- ob_inode_free(ob_inode);
++ GF_FREE(ob_inode);
+ }
+
+ return 0;
+@@ -1153,20 +823,18 @@ ob_priv_dump(xlator_t *this)
+ int
+ ob_fdctx_dump(xlator_t *this, fd_t *fd)
+ {
+- ob_fd_t *ob_fd = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+- int ret = 0;
++ uint64_t value = 0;
++ int ret = 0, error = 0;
+
+ ret = TRY_LOCK(&fd->lock);
+ if (ret)
+ return 0;
+
+- ob_fd = __ob_fd_ctx_get(this, fd);
+- if (!ob_fd) {
+- UNLOCK(&fd->lock);
+- return 0;
++ if ((__fd_ctx_get(fd, this, &value) == 0) && (value != 0)) {
++ error = (int32_t)value;
+ }
+
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind",
+@@ -1175,17 +843,7 @@ ob_fdctx_dump(xlator_t *this, fd_t *fd)
+
+ gf_proc_dump_write("fd", "%p", fd);
+
+- gf_proc_dump_write("open_frame", "%p", ob_fd->open_frame);
+-
+- if (ob_fd->open_frame)
+- gf_proc_dump_write("open_frame.root.unique", "%" PRIu64,
+- ob_fd->open_frame->root->unique);
+-
+- gf_proc_dump_write("loc.path", "%s", ob_fd->loc.path);
+-
+- gf_proc_dump_write("loc.ino", "%s", uuid_utoa(ob_fd->loc.gfid));
+-
+- gf_proc_dump_write("flags", "%d", ob_fd->flags);
++ gf_proc_dump_write("error", "%d", error);
+
+ UNLOCK(&fd->lock);
+
+@@ -1307,7 +965,7 @@ struct xlator_fops fops = {
+ };
+
+ struct xlator_cbks cbks = {
+- .release = ob_release,
++ .fdclose = ob_fdclose,
+ .forget = ob_forget,
+ };
+
+--
+1.8.3.1
+
diff --git a/0524-open-behind-fix-call_frame-leak.patch b/0524-open-behind-fix-call_frame-leak.patch
new file mode 100644
index 0000000..75a243d
--- /dev/null
+++ b/0524-open-behind-fix-call_frame-leak.patch
@@ -0,0 +1,70 @@
+From 36dddf59a02d91d3db5b124be626ab6bc235ed5a Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Wed, 19 Aug 2020 23:27:38 +0200
+Subject: [PATCH 524/526] open-behind: fix call_frame leak
+
+When an open was delayed, a copy of the frame was created because the
+current frame was used to unwind the "fake" open. When the open was
+actually sent, the frame was correctly destroyed. However if the file
+was closed before needing to send the open, the frame was not destroyed.
+
+This patch correctly destroys the frame in all cases.
+
+Upstream patch:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24892
+> Change-Id: I8c00fc7f15545c240e8151305d9e4cf06d653926
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+> Fixes: #1440
+
+BUG: 1830713
+Change-Id: I8c00fc7f15545c240e8151305d9e4cf06d653926
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/224488
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/open-behind/src/open-behind.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
+index e43fe73..1ab635e 100644
+--- a/xlators/performance/open-behind/src/open-behind.c
++++ b/xlators/performance/open-behind/src/open-behind.c
+@@ -333,6 +333,14 @@ ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
+ return 0;
+ }
+
++static void
++ob_open_destroy(call_stub_t *stub, fd_t *fd)
++{
++ STACK_DESTROY(stub->frame->root);
++ call_stub_destroy(stub);
++ fd_unref(fd);
++}
++
+ static int32_t
+ ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
+ call_stub_t *stub)
+@@ -355,8 +363,7 @@ ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
+
+ if (stub != NULL) {
+ if (closed) {
+- call_stub_destroy(stub);
+- fd_unref(fd);
++ ob_open_destroy(stub, fd);
+ } else {
+ call_resume(stub);
+ }
+@@ -776,8 +783,7 @@ ob_fdclose(xlator_t *this, fd_t *fd)
+ UNLOCK(&fd->inode->lock);
+
+ if (stub != NULL) {
+- call_stub_destroy(stub);
+- fd_unref(fd);
++ ob_open_destroy(stub, fd);
+ }
+
+ ob_resume_pending(&list);
+--
+1.8.3.1
+
diff --git a/0525-open-behind-implement-create-fop.patch b/0525-open-behind-implement-create-fop.patch
new file mode 100644
index 0000000..c7a5329
--- /dev/null
+++ b/0525-open-behind-implement-create-fop.patch
@@ -0,0 +1,109 @@
+From 41aae052b5e3afe64d3e0668643726bab0e77265 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Fri, 4 Sep 2020 14:49:50 +0200
+Subject: [PATCH 525/526] open-behind: implement create fop
+
+Open behind didn't implement create fop. This caused that files created
+were not accounted for the number of open fd's. This could cause future
+opens to be delayed when they shouldn't.
+
+This patch implements the create fop. It also fixes a problem when
+destroying the stack: when frame->local was not NULL, STACK_DESTROY()
+tried to mem_put() it, which is not correct.
+
+Upstream patch:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24953
+> Fixes: #1440
+> Change-Id: Ic982bad07d4af30b915d7eb1fbcef7a847a45869
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1830713
+Change-Id: Ic982bad07d4af30b915d7eb1fbcef7a847a45869
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/224489
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/open-behind/src/open-behind.c | 52 +++++++++++++++++++++++
+ 1 file changed, 52 insertions(+)
+
+diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
+index 1ab635e..600c3b6 100644
+--- a/xlators/performance/open-behind/src/open-behind.c
++++ b/xlators/performance/open-behind/src/open-behind.c
+@@ -336,6 +336,7 @@ ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
+ static void
+ ob_open_destroy(call_stub_t *stub, fd_t *fd)
+ {
++ stub->frame->local = NULL;
+ STACK_DESTROY(stub->frame->root);
+ call_stub_destroy(stub);
+ fd_unref(fd);
+@@ -516,6 +517,56 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ }
+
+ static int32_t
++ob_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
++ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
++{
++ ob_inode_t *ob_inode;
++ call_stub_t *stub;
++ fd_t *first_fd;
++ ob_state_t state;
++
++ /* Create requests are never delayed. We always send them synchronously. */
++ state = ob_open_and_resume_fd(this, fd, 1, true, true, &ob_inode,
++ &first_fd);
++ if (state == OB_STATE_READY) {
++ /* There's no pending open, but there are other file descriptors opened
++ * so we simply forward the request synchronously. */
++ return default_create(frame, this, loc, flags, mode, umask, fd, xdata);
++ }
++
++ if (state == OB_STATE_OPEN_TRIGGERED) {
++ /* The first open is in progress (either because it was already issued
++ * or because this request triggered it). We try to create a new stub
++ * to retry the operation once the initial open completes. */
++ stub = fop_create_stub(frame, ob_create, loc, flags, mode, umask, fd,
++ xdata);
++ if (stub != NULL) {
++ return ob_stub_dispatch(this, ob_inode, first_fd, stub);
++ }
++
++ state = -ENOMEM;
++ }
++
++ /* Since we forced a synchronous request, OB_STATE_FIRST_OPEN will never
++ * be returned by ob_open_and_resume_fd(). If we are here it can only be
++ * because there has been a problem. */
++
++ /* In case of failure we need to decrement the number of open files because
++ * ob_fdclose() won't be called. */
++
++ LOCK(&fd->inode->lock);
++ {
++ ob_inode->open_count--;
++ }
++ UNLOCK(&fd->inode->lock);
++
++ gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s",
++ "create", "path=%s", loc->path, NULL);
++
++ return default_create_failure_cbk(frame, -state);
++}
++
++static int32_t
+ ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+ {
+@@ -946,6 +997,7 @@ fini(xlator_t *this)
+
+ struct xlator_fops fops = {
+ .open = ob_open,
++ .create = ob_create,
+ .readv = ob_readv,
+ .writev = ob_writev,
+ .flush = ob_flush,
+--
+1.8.3.1
+
diff --git a/0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch b/0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch
new file mode 100644
index 0000000..fb74fd8
--- /dev/null
+++ b/0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch
@@ -0,0 +1,44 @@
+From baeca3c9b70548463ceea0ae27e6f98cf06e96b7 Mon Sep 17 00:00:00 2001
+From: srijan-sivakumar <ssivakum@redhat.com>
+Date: Tue, 28 Jul 2020 22:27:34 +0530
+Subject: [PATCH 526/526] Quota quota_fsck.py, converting byte string to string
+
+Issue: The quota_fsck.py script throws an TypeError
+due to the fact that the data is read as bytes and then
+the string operations are applied on the. Now, in python3
+string is unicode and hence we get the type error.
+
+Code Changes:
+Decoding the bytes value into utf-8 format.
+
+>Change-Id: Ia1ff52a821d664a371c8166692ff506ae39f6e40
+>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+>Fixes: #1401
+Upstream patch: https://review.gluster.org/c/glusterfs/+/24785
+
+BUG: 1719171
+Change-Id: Ia1ff52a821d664a371c8166692ff506ae39f6e40
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/224780
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Kshithij Iyer <kiyer@redhat.com>
+Reviewed-by: Rinku Kothiya <rkothiya@redhat.com>
+---
+ extras/quota/quota_fsck.py | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py
+index 174f2a2..ea8d638 100755
+--- a/extras/quota/quota_fsck.py
++++ b/extras/quota/quota_fsck.py
+@@ -157,6 +157,7 @@ def get_quota_xattr_brick(dpath):
+ xattr_dict['parents'] = {}
+
+ for xattr in pairs:
++ xattr = xattr.decode("utf-8")
+ xattr_key = xattr.split("=")[0]
+ if re.search("# file:", xattr_key):
+ # skip the file comment
+--
+1.8.3.1
+
diff --git a/0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch b/0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch
new file mode 100644
index 0000000..133a24e
--- /dev/null
+++ b/0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch
@@ -0,0 +1,200 @@
+From 4152c77defac24ace3b1b6b9cc81a4f614254e4f Mon Sep 17 00:00:00 2001
+From: srijan-sivakumar <ssivakum@redhat.com>
+Date: Sat, 18 Jul 2020 05:59:09 +0530
+Subject: [PATCH 527/532] Events: Socket creation after getaddrinfo and IPv4
+ and IPv6 packet capture
+
+Issue: Currently, the socket creation is done
+prior to getaddrinfo function being invoked. This
+can cause mismatch in the protocol and address
+families of the created socket and the result
+of the getaddrinfo api. Also, the glustereventsd
+UDP server by default only captures IPv4 packets
+hence IPv6 packets are not even captured.
+
+Code Changes:
+1. Modified the socket creation in such a way that
+the parameters taken in are dependent upon the
+result of the getaddrinfo function.
+2. Created a subclass for adding address family
+in glustereventsd.py for both AF_INET and AF_INET6.
+3. Modified addresses in the eventsapiconf.py.in
+
+Reasoning behind the approach:
+1. If we are using getaddrinfo function then
+socket creation should happen only after we
+check if we received back valid addresses.
+Hence socket creation should come after the call
+to getaddrinfo
+2. The listening server which pushes the events
+to the webhook has to listen for both IPv4
+and IPv6 messages as we would not be sure as to
+what address family is picked in _gf_event.
+
+>Fixes: #1377
+>Change-Id: I568dcd1a977c8832f0fef981e1f81cac7043c760
+>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Upstream patch: https://review.gluster.org/c/glusterfs/+/24722
+
+BUG: 1814744
+Change-Id: I568dcd1a977c8832f0fef981e1f81cac7043c760
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/225567
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ events/src/eventsapiconf.py.in | 2 ++
+ events/src/glustereventsd.py | 37 ++++++++++++++++++++++++++++++-------
+ libglusterfs/src/events.c | 27 +++++++++++++++++++--------
+ 3 files changed, 51 insertions(+), 15 deletions(-)
+
+diff --git a/events/src/eventsapiconf.py.in b/events/src/eventsapiconf.py.in
+index 76b5954..700093b 100644
+--- a/events/src/eventsapiconf.py.in
++++ b/events/src/eventsapiconf.py.in
+@@ -28,6 +28,8 @@ def get_glusterd_workdir():
+ return glusterd_workdir
+
+ SERVER_ADDRESS = "0.0.0.0"
++SERVER_ADDRESSv4 = "0.0.0.0"
++SERVER_ADDRESSv6 = "::1"
+ DEFAULT_CONFIG_FILE = "@SYSCONF_DIR@/glusterfs/eventsconfig.json"
+ CUSTOM_CONFIG_FILE_TO_SYNC = "/events/config.json"
+ CUSTOM_CONFIG_FILE = get_glusterd_workdir() + CUSTOM_CONFIG_FILE_TO_SYNC
+diff --git a/events/src/glustereventsd.py b/events/src/glustereventsd.py
+index c4c7b65..341a3b6 100644
+--- a/events/src/glustereventsd.py
++++ b/events/src/glustereventsd.py
+@@ -13,6 +13,7 @@
+ from __future__ import print_function
+ import sys
+ import signal
++import threading
+ try:
+ import socketserver
+ except ImportError:
+@@ -23,10 +24,17 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
+ from eventtypes import all_events
+ import handlers
+ import utils
+-from eventsapiconf import SERVER_ADDRESS, PID_FILE
++from eventsapiconf import SERVER_ADDRESSv4, SERVER_ADDRESSv6, PID_FILE
+ from eventsapiconf import AUTO_BOOL_ATTRIBUTES, AUTO_INT_ATTRIBUTES
+ from utils import logger, PidFile, PidFileLockFailed, boolify
+
++# Subclass so that specifically IPv4 packets are captured
++class UDPServerv4(socketserver.ThreadingUDPServer):
++ address_family = socket.AF_INET
++
++# Subclass so that specifically IPv6 packets are captured
++class UDPServerv6(socketserver.ThreadingUDPServer):
++ address_family = socket.AF_INET6
+
+ class GlusterEventsRequestHandler(socketserver.BaseRequestHandler):
+
+@@ -89,6 +97,10 @@ def signal_handler_sigusr2(sig, frame):
+ utils.restart_webhook_pool()
+
+
++def UDP_server_thread(sock):
++ sock.serve_forever()
++
++
+ def init_event_server():
+ utils.setup_logger()
+ utils.load_all()
+@@ -99,15 +111,26 @@ def init_event_server():
+ sys.stderr.write("Unable to get Port details from Config\n")
+ sys.exit(1)
+
+- # Start the Eventing Server, UDP Server
++ # Creating the Eventing Server, UDP Server for IPv4 packets
++ try:
++ serverv4 = UDPServerv4((SERVER_ADDRESSv4, port),
++ GlusterEventsRequestHandler)
++ except socket.error as e:
++ sys.stderr.write("Failed to start Eventsd for IPv4: {0}\n".format(e))
++ sys.exit(1)
++ # Creating the Eventing Server, UDP Server for IPv6 packets
+ try:
+- server = socketserver.ThreadingUDPServer(
+- (SERVER_ADDRESS, port),
+- GlusterEventsRequestHandler)
++ serverv6 = UDPServerv6((SERVER_ADDRESSv6, port),
++ GlusterEventsRequestHandler)
+ except socket.error as e:
+- sys.stderr.write("Failed to start Eventsd: {0}\n".format(e))
++ sys.stderr.write("Failed to start Eventsd for IPv6: {0}\n".format(e))
+ sys.exit(1)
+- server.serve_forever()
++ server_thread1 = threading.Thread(target=UDP_server_thread,
++ args=(serverv4,))
++ server_thread2 = threading.Thread(target=UDP_server_thread,
++ args=(serverv6,))
++ server_thread1.start()
++ server_thread2.start()
+
+
+ def get_args():
+diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c
+index 6d1e383..4d720ca 100644
+--- a/libglusterfs/src/events.c
++++ b/libglusterfs/src/events.c
+@@ -40,6 +40,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+ char *host = NULL;
+ struct addrinfo hints;
+ struct addrinfo *result = NULL;
++ struct addrinfo *iter_result_ptr = NULL;
+ xlator_t *this = THIS;
+ char *volfile_server_transport = NULL;
+
+@@ -51,13 +52,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+ goto out;
+ }
+
+- /* Initialize UDP socket */
+- sock = socket(AF_INET, SOCK_DGRAM, 0);
+- if (sock < 0) {
+- ret = EVENT_ERROR_SOCKET;
+- goto out;
+- }
+-
+ if (ctx) {
+ volfile_server_transport = ctx->cmd_args.volfile_server_transport;
+ }
+@@ -66,7 +60,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+ }
+
+ /* host = NULL returns localhost */
+- host = NULL;
+ if (ctx && ctx->cmd_args.volfile_server &&
+ (strcmp(volfile_server_transport, "unix"))) {
+ /* If it is client code then volfile_server is set
+@@ -84,6 +77,24 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+ goto out;
+ }
+
++ // iterate over the result and break when socket creation is success.
++ for (iter_result_ptr = result; iter_result_ptr != NULL;
++ iter_result_ptr = iter_result_ptr->ai_next) {
++ sock = socket(iter_result_ptr->ai_family, iter_result_ptr->ai_socktype,
++ iter_result_ptr->ai_protocol);
++ if (sock != -1) {
++ break;
++ }
++ }
++ /*
++ * If none of the addrinfo structures lead to a successful socket
++ * creation, socket creation has failed.
++ */
++ if (sock < 0) {
++ ret = EVENT_ERROR_SOCKET;
++ goto out;
++ }
++
+ va_start(arguments, fmt);
+ ret = gf_vasprintf(&msg, fmt, arguments);
+ va_end(arguments);
+--
+1.8.3.1
+
diff --git a/0528-Extras-Removing-xattr_analysis-script.patch b/0528-Extras-Removing-xattr_analysis-script.patch
new file mode 100644
index 0000000..d04068d
--- /dev/null
+++ b/0528-Extras-Removing-xattr_analysis-script.patch
@@ -0,0 +1,134 @@
+From 3fc74ce6c282f0f43fdcfeda47b71a1b19945b6d Mon Sep 17 00:00:00 2001
+From: srijan-sivakumar <ssivakum@redhat.com>
+Date: Wed, 3 Feb 2021 10:11:04 +0530
+Subject: [PATCH 528/532] Extras: Removing xattr_analysis script
+
+The xattr_analysis.py script is used rarely for
+debugging and seeing that it has some dependencies,
+removing it from the release.
+
+If need be, it would be directly shared with the cu.
+
+Label: DOWNSTREAM ONLY
+BUG: 1719171
+
+Change-Id: I4bb0df3ebfa7e43e13858b4b6e3efbb02ea79d5f
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/226301
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/quota/Makefile.am | 4 +--
+ extras/quota/xattr_analysis.py | 73 ------------------------------------------
+ glusterfs.spec.in | 1 -
+ 3 files changed, 2 insertions(+), 76 deletions(-)
+ delete mode 100755 extras/quota/xattr_analysis.py
+
+diff --git a/extras/quota/Makefile.am b/extras/quota/Makefile.am
+index cdb6be1..e4d9322 100644
+--- a/extras/quota/Makefile.am
++++ b/extras/quota/Makefile.am
+@@ -2,7 +2,7 @@ scriptsdir = $(datadir)/glusterfs/scripts
+ scripts_SCRIPTS = log_accounting.sh
+
+ if WITH_SERVER
+-scripts_SCRIPTS += xattr_analysis.py quota_fsck.py
++scripts_SCRIPTS += quota_fsck.py
+ endif
+
+-EXTRA_DIST = log_accounting.sh xattr_analysis.py quota_fsck.py
++EXTRA_DIST = log_accounting.sh quota_fsck.py
+diff --git a/extras/quota/xattr_analysis.py b/extras/quota/xattr_analysis.py
+deleted file mode 100755
+index 7bd7d96..0000000
+--- a/extras/quota/xattr_analysis.py
++++ /dev/null
+@@ -1,73 +0,0 @@
+-#!/usr/bin/python3
+-# Below script has two purposes
+-# 1. Display xattr of entire FS tree in a human readable form
+-# 2. Display all the directory where contri and size mismatch.
+-# (If there are any directory with contri and size mismatch that are not dirty
+-# then that highlights a propagation issue)
+-# The script takes only one input LOG _FILE generated from the command,
+-# find <brick_path> | xargs getfattr -d -m. -e hex > log_gluster_xattr
+-
+-from __future__ import print_function
+-import re
+-import subprocess
+-import sys
+-from hurry.filesize import size
+-
+-if len(sys.argv) < 2:
+- sys.exit('Usage: %s log_gluster_xattr \n'
+- 'to generate log_gluster_xattr use: \n'
+- 'find <brick_path> | xargs getfattr -d -m. -e hex > log_gluster_xattr'
+- % sys.argv[0])
+-LOG_FILE=sys.argv[1]
+-
+-def get_quota_xattr_brick():
+- out = subprocess.check_output (["/usr/bin/cat", LOG_FILE])
+- pairs = out.splitlines()
+-
+- xdict = {}
+- mismatch_size = [('====contri_size===', '====size====')]
+- for xattr in pairs:
+- k = xattr.split("=")[0]
+- if re.search("# file:", k):
+- print(xdict)
+- filename=k
+- print("=====" + filename + "=======")
+- xdict = {}
+- elif k is "":
+- pass
+- else:
+- print(xattr)
+- v = xattr.split("=")[1]
+- if re.search("contri", k):
+- if len(v) == 34:
+- # for files size is obtained in iatt, file count should be 1, dir count=0
+- xdict['contri_file_count'] = int(v[18:34], 16)
+- xdict['contri_dir_count'] = 0
+- else:
+- xdict['contri_size'] = size(int(v[2:18], 16))
+- xdict['contri_file_count'] = int(v[18:34], 16)
+- xdict['contri_dir_count'] = int(v[34:], 16)
+- elif re.search("size", k):
+- xdict['size'] = size(int(v[2:18], 16))
+- xdict['file_count'] = int(v[18:34], 16)
+- xdict['dir_count'] = int(v[34:], 16)
+- elif re.search("dirty", k):
+- if v == '0x3000':
+- xdict['dirty'] = False
+- elif v == '0x3100':
+- xdict['dirty'] = True
+- elif re.search("limit_objects", k):
+- xdict['limit_objects'] = int(v[2:18], 16)
+- elif re.search("limit_set", k):
+- xdict['limit_set'] = size(int(v[2:18], 16))
+-
+- if 'size' in xdict and 'contri_size' in xdict and xdict['size'] != xdict['contri_size']:
+- mismatch_size.append((xdict['contri_size'], xdict['size'], filename))
+-
+- for values in mismatch_size:
+- print(values)
+-
+-
+-if __name__ == '__main__':
+- get_quota_xattr_brick()
+-
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 30d7162..2be7677 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1380,7 +1380,6 @@ exit 0
+ %if ( 0%{!?_without_server:1} )
+ %files server
+ %doc extras/clear_xattrs.sh
+-%{_datadir}/glusterfs/scripts/xattr_analysis.py*
+ %{_datadir}/glusterfs/scripts/quota_fsck.py*
+ # sysconf
+ %config(noreplace) %{_sysconfdir}/glusterfs
+--
+1.8.3.1
+
diff --git a/0529-geo-rep-prompt-should-work-for-ignore_deletes.patch b/0529-geo-rep-prompt-should-work-for-ignore_deletes.patch
new file mode 100644
index 0000000..671451d
--- /dev/null
+++ b/0529-geo-rep-prompt-should-work-for-ignore_deletes.patch
@@ -0,0 +1,75 @@
+From 1c7e96e73273b7891ea6ef0d768c2bf7ff5de7b0 Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <sacharya@redhat.com>
+Date: Thu, 4 Feb 2021 16:29:39 +0530
+Subject: [PATCH 529/532] geo-rep: prompt should work for ignore_deletes
+
+The python cli is intelligent enough to parse both "-" and "_" alike:
+
+Example:
+geo-replication config updated successfully
+sync_job 4
+geo-replication config updated successfully
+gluster volume geo-replication primary 127.0.0.1::secondary config | grep sync_jobs
+sync_jobs:5
+
+Thus the prompt which appears after ignore-deletes true should
+work for both ignore-deletes and ignore_deletes.
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1224906
+Change-Id: I89f854200a604d07d3ac6c374fe6d445ce9f22ca
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/226599
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-parser.c | 5 +++--
+ tests/00-geo-rep/bug-1708603.t | 12 ++++++++++--
+ 2 files changed, 13 insertions(+), 4 deletions(-)
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index 34f17c9..dda8979 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -3107,8 +3107,9 @@ cli_cmd_gsync_set_parse(struct cli_state *state, const char **words,
+ if (!ret)
+ ret = dict_set_int32(dict, "type", type);
+ if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) {
+- if (!strcmp((char *)words[wordcount - 2], "ignore-deletes") &&
+- !strcmp((char *)words[wordcount - 1], "true")) {
++ if ((((!strcmp((char *)words[wordcount - 2], "ignore_deletes")) ||
++ (!strcmp((char *)words[wordcount - 2], "ignore-deletes")))) &&
++ ((!strcmp((char *)words[wordcount - 1], "true")))) {
+ question =
+ "There exists ~15 seconds delay for the option to take"
+ " effect from stime of the corresponding brick. Please"
+diff --git a/tests/00-geo-rep/bug-1708603.t b/tests/00-geo-rep/bug-1708603.t
+index 26913f1..edafb48 100644
+--- a/tests/00-geo-rep/bug-1708603.t
++++ b/tests/00-geo-rep/bug-1708603.t
+@@ -44,11 +44,19 @@ TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+ #Create geo-rep session
+ TEST create_georep_session $master $slave
+
+-echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1
+-EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore-deletes)
++echo n | $GEOREP_CLI $master $slave config ignore_deletes true >/dev/null 2>&1
++EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore_deletes)
++
++echo y | $GEOREP_CLI $master $slave config ignore_deletes true
++EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore_deletes)
++
++$GEOREP_CLI $master $slave config ignore_deletes false
+ echo y | $GEOREP_CLI $master $slave config ignore-deletes true
+ EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes)
+
++echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1
++EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes)
++
+ #Stop Geo-rep
+ TEST $GEOREP_CLI $master $slave stop
+
+--
+1.8.3.1
+
diff --git a/0530-gfapi-avoid-crash-while-logging-message.patch b/0530-gfapi-avoid-crash-while-logging-message.patch
new file mode 100644
index 0000000..aec73b7
--- /dev/null
+++ b/0530-gfapi-avoid-crash-while-logging-message.patch
@@ -0,0 +1,41 @@
+From 5a7348a266587704dae4f1ddda16b7c95f547251 Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Sun, 7 Feb 2021 13:40:24 +0000
+Subject: [PATCH 530/532] gfapi: avoid crash while logging message.
+
+Breaking parameter into two different parameter
+to avoid a crash.
+
+Upstream:
+> Reviewed-on: https://github.com/gluster/glusterfs/pull/2139
+> fixes: #2138
+> Change-Id: Idd5f3631488c1d892748f83e6847fb6fd2d0802a
+> Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+
+BUG: 1691320
+
+Change-Id: Ifd6a96982ffd4e5334f8be2297de2ad826f3145b
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/226851
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ api/src/glfs-fops.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
+index 051541f..6dc3b66 100644
+--- a/api/src/glfs-fops.c
++++ b/api/src/glfs-fops.c
+@@ -1529,7 +1529,7 @@ glfs_pwritev_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ ret = -1;
+ errno = EINVAL;
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG,
+- "size >= %llu is not allowed", GF_UNIT_GB, NULL);
++ "Data size too large", "size=%llu", GF_UNIT_GB, NULL);
+ goto out;
+ }
+
+--
+1.8.3.1
+
diff --git a/0531-Glustereventsd-Default-port-change-2091.patch b/0531-Glustereventsd-Default-port-change-2091.patch
new file mode 100644
index 0000000..8c2ecbf
--- /dev/null
+++ b/0531-Glustereventsd-Default-port-change-2091.patch
@@ -0,0 +1,69 @@
+From 058a853a1438b2a62586c545f71150ade3de23b7 Mon Sep 17 00:00:00 2001
+From: schaffung <ssivakum@redhat.com>
+Date: Wed, 10 Feb 2021 13:43:48 +0530
+Subject: [PATCH 531/532] Glustereventsd Default port change (#2091)
+
+Issue : The default port of glustereventsd is currently 24009
+which is preventing glustereventsd from binding to the UDP port
+due to selinux policies.
+
+Fix: Changing the default port to be bound by chanding it to something
+in the ephemeral range.
+
+>Fixes: #2080
+>Change-Id: Ibdc87f83f82f69660dca95d6d14b226e10d8bd33
+>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Upstream Patch : https://github.com/gluster/glusterfs/pull/2091
+
+BUG: 1814744
+Change-Id: Ibdc87f83f82f69660dca95d6d14b226e10d8bd33
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/227249
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ events/src/eventsconfig.json | 2 +-
+ extras/firewalld/glusterfs.xml | 2 +-
+ libglusterfs/src/events.c | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/events/src/eventsconfig.json b/events/src/eventsconfig.json
+index 89e5b9c..14d8f84 100644
+--- a/events/src/eventsconfig.json
++++ b/events/src/eventsconfig.json
+@@ -1,5 +1,5 @@
+ {
+ "log-level": "INFO",
+- "port": 24009,
++ "port": 55555,
+ "disable-events-log": false
+ }
+diff --git a/extras/firewalld/glusterfs.xml b/extras/firewalld/glusterfs.xml
+index 7e17644..dc74b2e 100644
+--- a/extras/firewalld/glusterfs.xml
++++ b/extras/firewalld/glusterfs.xml
+@@ -4,7 +4,7 @@
+ <description>Default ports for gluster-distributed storage</description>
+ <port protocol="tcp" port="24007"/> <!--For glusterd -->
+ <port protocol="tcp" port="24008"/> <!--For glusterd RDMA port management -->
+-<port protocol="tcp" port="24009"/> <!--For glustereventsd -->
++<port protocol="tcp" port="55555"/> <!--For glustereventsd -->
+ <port protocol="tcp" port="38465"/> <!--Gluster NFS service -->
+ <port protocol="tcp" port="38466"/> <!--Gluster NFS service -->
+ <port protocol="tcp" port="38467"/> <!--Gluster NFS service -->
+diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c
+index 4d720ca..3659606 100644
+--- a/libglusterfs/src/events.c
++++ b/libglusterfs/src/events.c
+@@ -26,7 +26,7 @@
+ #include "glusterfs/events.h"
+
+ #define EVENT_HOST "127.0.0.1"
+-#define EVENT_PORT 24009
++#define EVENT_PORT 55555
+
+ int
+ _gf_event(eventtypes_t event, const char *fmt, ...)
+--
+1.8.3.1
+
diff --git a/0532-glusterd-fix-for-starting-brick-on-new-port.patch b/0532-glusterd-fix-for-starting-brick-on-new-port.patch
new file mode 100644
index 0000000..97e5aa7
--- /dev/null
+++ b/0532-glusterd-fix-for-starting-brick-on-new-port.patch
@@ -0,0 +1,79 @@
+From 2dad17fdbaab2ab2cda6a05dec9dcd2d37ea32ff Mon Sep 17 00:00:00 2001
+From: Nikhil Ladha <nladha@redhat.com>
+Date: Wed, 10 Feb 2021 15:07:32 +0530
+Subject: [PATCH 532/532] glusterd: fix for starting brick on new port
+
+The Errno set by the runner code was not correct when the bind() fails
+to assign an already occupied port in the __socket_server_bind().
+
+Fix:
+Updated the code to return the correct errno from the
+__socket_server_bind() if the bind() fails due to EADDRINUSE error. And,
+use the returned errno from runner_run() to retry allocating a new port
+to the brick process.
+
+>Fixes: #1101
+
+>Change-Id: If124337f41344a04f050754e402490529ef4ecdc
+>Signed-off-by: nik-redhat nladha@redhat.com
+
+Upstream patch: https://github.com/gluster/glusterfs/pull/2090
+
+BUG: 1865796
+
+Change-Id: If124337f41344a04f050754e402490529ef4ecdc
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/227261
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
+---
+ rpc/rpc-transport/socket/src/socket.c | 3 +++
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 6 ++----
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
+index 1ee7320..96ed9f1 100644
+--- a/rpc/rpc-transport/socket/src/socket.c
++++ b/rpc/rpc-transport/socket/src/socket.c
+@@ -973,8 +973,11 @@ __socket_server_bind(rpc_transport_t *this)
+ this->myinfo.identifier, strerror(errno));
+ if (errno == EADDRINUSE) {
+ gf_log(this->name, GF_LOG_ERROR, "Port is already in use");
++ ret = -EADDRINUSE;
++ goto out;
+ }
+ }
++
+ if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) {
+ if (getsockname(priv->sock, SA(&this->myinfo.sockaddr),
+ &this->myinfo.sockaddr_len) != 0) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index cf32bd9..bc188a2 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -2151,6 +2151,7 @@ glusterd_volume_start_glusterfs(glusterd_volinfo_t *volinfo,
+ ret = -1;
+ goto out;
+ }
++
+ /* Build the exp_path, before starting the glusterfsd even in
+ valgrind mode. Otherwise all the glusterfsd processes start
+ writing the valgrind log to the same file.
+@@ -2289,13 +2290,10 @@ retry:
+
+ if (wait) {
+ synclock_unlock(&priv->big_lock);
+- errno = 0;
+ ret = runner_run(&runner);
+- if (errno != 0)
+- ret = errno;
+ synclock_lock(&priv->big_lock);
+
+- if (ret == EADDRINUSE) {
++ if (ret == -EADDRINUSE) {
+ /* retry after getting a new port */
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ GD_MSG_SRC_BRICK_PORT_UNAVAIL,
+--
+1.8.3.1
+
diff --git a/0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch b/0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch
new file mode 100644
index 0000000..158b4b7
--- /dev/null
+++ b/0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch
@@ -0,0 +1,250 @@
+From 854ab79dbef449c39adf66e3faebb4681359fce4 Mon Sep 17 00:00:00 2001
+From: mohit84 <moagrawa@redhat.com>
+Date: Thu, 18 Feb 2021 09:40:44 +0530
+Subject: [PATCH 533/538] glusterd: Rebalance cli is not showing correct status
+ after reboot (#2172)
+
+Rebalance cli is not showing correct status after reboot.
+
+The CLI is not correct status because defrag object is not
+valid at the time of creating a rpc connection to show the status.
+The defrag object is not valid because at the time of start a glusterd
+glusterd_restart_rebalance can be call almost at the same time by two
+different synctask and glusterd got a disconnect on rpc object and it
+cleanup the defrag object.
+
+Solution: To avoid the defrag object populate a reference count before
+ create a defrag rpc object.
+>Fixes: #1339
+>Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+>Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
+Upstream Patch : https://github.com/gluster/glusterfs/pull/2172
+
+BUG: 1832306
+Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/228249
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-rebalance.c | 35 ++++++++++-----
+ xlators/mgmt/glusterd/src/glusterd-syncop.c | 1 +
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 59 +++++++++++++++++++++++++-
+ xlators/mgmt/glusterd/src/glusterd-utils.h | 5 +++
+ xlators/mgmt/glusterd/src/glusterd.h | 1 +
+ 5 files changed, 90 insertions(+), 11 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+index b419a89..fcd5318 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+@@ -86,6 +86,7 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ int pid = -1;
++ int refcnt = 0;
+
+ this = THIS;
+ if (!this)
+@@ -125,11 +126,12 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
+ }
+
+ case RPC_CLNT_DISCONNECT: {
+- if (!defrag->connected)
+- return 0;
+-
+ LOCK(&defrag->lock);
+ {
++ if (!defrag->connected) {
++ UNLOCK(&defrag->lock);
++ return 0;
++ }
+ defrag->connected = 0;
+ }
+ UNLOCK(&defrag->lock);
+@@ -146,11 +148,11 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
+ glusterd_defrag_rpc_put(defrag);
+ if (defrag->cbk_fn)
+ defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status);
+-
+- GF_FREE(defrag);
++ refcnt = glusterd_defrag_unref(defrag);
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED,
+- "Rebalance process for volume %s has disconnected.",
+- volinfo->volname);
++ "Rebalance process for volume %s has disconnected"
++ " and defrag refcnt is %d.",
++ volinfo->volname, refcnt);
+ break;
+ }
+ case RPC_CLNT_DESTROY:
+@@ -309,7 +311,11 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
+ gf_msg_debug("glusterd", 0, "rebalance command failed");
+ goto out;
+ }
+-
++ /* Take reference before sleep to save defrag object cleanup while
++ glusterd_restart_rebalance call for other bricks by syncktask
++ at the time of restart a glusterd.
++ */
++ glusterd_defrag_ref(defrag);
+ sleep(5);
+
+ ret = glusterd_rebalance_rpc_create(volinfo);
+@@ -372,6 +378,7 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
++ struct rpc_clnt *rpc = NULL;
+
+ // rebalance process is not started
+ if (!defrag)
+@@ -396,13 +403,21 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
+ }
+
+ glusterd_volinfo_ref(volinfo);
+- ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify,
+- volinfo, _gf_true);
++ ret = glusterd_rpc_create(&rpc, options, glusterd_defrag_notify, volinfo,
++ _gf_false);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL,
+ "Glusterd RPC creation failed");
+ goto out;
+ }
++ LOCK(&defrag->lock);
++ {
++ if (!defrag->rpc)
++ defrag->rpc = rpc;
++ else
++ rpc_clnt_unref(rpc);
++ }
++ UNLOCK(&defrag->lock);
+ ret = 0;
+ out:
+ if (options)
+diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+index df78fef..05c9e11 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+@@ -1732,6 +1732,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ if (!rpc) {
+ if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
+ volinfo = pending_node->node;
++ glusterd_defrag_ref(volinfo->rebal.defrag);
+ ret = glusterd_rebalance_rpc_create(volinfo);
+ if (ret) {
+ ret = 0;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index bc188a2..9fb8eab 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -93,6 +93,44 @@
+ #define NLMV4_VERSION 4
+ #define NLMV1_VERSION 1
+
++int
++glusterd_defrag_ref(glusterd_defrag_info_t *defrag)
++{
++ int refcnt = 0;
++
++ if (!defrag)
++ goto out;
++
++ LOCK(&defrag->lock);
++ {
++ refcnt = ++defrag->refcnt;
++ }
++ UNLOCK(&defrag->lock);
++
++out:
++ return refcnt;
++}
++
++int
++glusterd_defrag_unref(glusterd_defrag_info_t *defrag)
++{
++ int refcnt = -1;
++
++ if (!defrag)
++ goto out;
++
++ LOCK(&defrag->lock);
++ {
++ refcnt = --defrag->refcnt;
++ if (refcnt <= 0)
++ GF_FREE(defrag);
++ }
++ UNLOCK(&defrag->lock);
++
++out:
++ return refcnt;
++}
++
+ gf_boolean_t
+ is_brick_mx_enabled(void)
+ {
+@@ -9370,6 +9408,7 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
+ char pidfile[PATH_MAX] = "";
+ int ret = -1;
+ pid_t pid = 0;
++ int refcnt = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+@@ -9410,7 +9449,25 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
+ volinfo->volname);
+ goto out;
+ }
+- ret = glusterd_rebalance_rpc_create(volinfo);
++ refcnt = glusterd_defrag_ref(volinfo->rebal.defrag);
++ /* If refcnt value is 1 it means either defrag object is
++ poulated by glusterd_rebalance_defrag_init or previous
++ rpc creation was failed.If it is not 1 it means it(defrag)
++ was populated at the time of start a rebalance daemon.
++ We need to create a rpc object only while a previous
++ rpc connection was not established successfully at the
++ time of restart a rebalance daemon by
++ glusterd_handle_defrag_start otherwise rebalance cli
++ does not show correct status after just reboot a node and try
++ to print the rebalance status because defrag object has been
++ destroyed during handling of rpc disconnect.
++ */
++ if (refcnt == 1) {
++ ret = glusterd_rebalance_rpc_create(volinfo);
++ } else {
++ ret = 0;
++ glusterd_defrag_unref(volinfo->rebal.defrag);
++ }
+ break;
+ }
+ case GF_DEFRAG_STATUS_NOT_STARTED:
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
+index 02d85d2..4541471 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
+@@ -886,4 +886,9 @@ int32_t
+ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
+ int32_t sub_count);
+
++int
++glusterd_defrag_ref(glusterd_defrag_info_t *defrag);
++
++int
++glusterd_defrag_unref(glusterd_defrag_info_t *defrag);
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index efe4d0e..9de3f28 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -321,6 +321,7 @@ struct glusterd_defrag_info_ {
+ uint64_t total_data;
+ uint64_t num_files_lookedup;
+ uint64_t total_failures;
++ int refcnt;
+ gf_lock_t lock;
+ int cmd;
+ pthread_t th;
+--
+1.8.3.1
+
diff --git a/0534-glusterd-Resolve-use-after-free-bug-2181.patch b/0534-glusterd-Resolve-use-after-free-bug-2181.patch
new file mode 100644
index 0000000..2dc72c1
--- /dev/null
+++ b/0534-glusterd-Resolve-use-after-free-bug-2181.patch
@@ -0,0 +1,47 @@
+From b3647eb5415b2e3d9e1a11ad6c4689e520f17b39 Mon Sep 17 00:00:00 2001
+From: mohit84 <moagrawa@redhat.com>
+Date: Mon, 22 Feb 2021 10:09:34 +0530
+Subject: [PATCH 534/538] glusterd: Resolve use after free bug (#2181)
+
+In the commit 61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea
+introduced a coverity bug use object after cleanup
+the object.
+
+Cleanup memory after comeout from a critical section
+>Fixes: #2180
+
+>Change-Id: Iee2050c4883a0dd44b8523bb822b664462ab6041
+>Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Upstream Patch : https://github.com/gluster/glusterfs/pull/2181
+
+BUG: 1832306
+Change-Id: Iee2050c4883a0dd44b8523bb822b664462ab6041
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/228578
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 9fb8eab..6d40be5 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -122,11 +122,10 @@ glusterd_defrag_unref(glusterd_defrag_info_t *defrag)
+ LOCK(&defrag->lock);
+ {
+ refcnt = --defrag->refcnt;
+- if (refcnt <= 0)
+- GF_FREE(defrag);
+ }
+ UNLOCK(&defrag->lock);
+-
++ if (refcnt <= 0)
++ GF_FREE(defrag);
+ out:
+ return refcnt;
+ }
+--
+1.8.3.1
+
diff --git a/0535-multiple-files-use-dict_allocate_and_serialize-where.patch b/0535-multiple-files-use-dict_allocate_and_serialize-where.patch
new file mode 100644
index 0000000..e1622de
--- /dev/null
+++ b/0535-multiple-files-use-dict_allocate_and_serialize-where.patch
@@ -0,0 +1,270 @@
+From 775d500cd136bd8c940faaeffde1217c25a87e3d Mon Sep 17 00:00:00 2001
+From: Yaniv Kaul <ykaul@redhat.com>
+Date: Sun, 2 Jun 2019 21:14:18 +0300
+Subject: [PATCH 535/538] (multiple files) use dict_allocate_and_serialize()
+ where applicable.
+
+This function does length, allocation and serialization for you.
+
+Upstream patch:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/22800
+> Change-Id: I142a259952a2fe83dd719442afaefe4a43a8e55e
+> updates: bz#1193929
+> Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
+
+Change-Id: I142a259952a2fe83dd719442afaefe4a43a8e55e
+BUG: 1911292
+Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/228611
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-inode-read.c | 34 +++++---------------------
+ xlators/cluster/ec/src/ec-combine.c | 16 +++---------
+ xlators/features/locks/src/posix.c | 23 +++--------------
+ xlators/protocol/client/src/client-handshake.c | 14 +++--------
+ xlators/protocol/server/src/server-handshake.c | 24 +++++++-----------
+ xlators/protocol/server/src/server-helpers.c | 27 +++-----------------
+ 6 files changed, 28 insertions(+), 110 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
+index 523a5b4..cf305af 100644
+--- a/xlators/cluster/afr/src/afr-inode-read.c
++++ b/xlators/cluster/afr/src/afr-inode-read.c
+@@ -948,24 +948,13 @@ unlock:
+ goto unwind;
+ }
+
+- len = dict_serialized_length(local->dict);
+- if (len <= 0) {
+- goto unwind;
+- }
+-
+- lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char);
+- if (!lockinfo_buf) {
++ op_ret = dict_allocate_and_serialize(
++ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
++ if (op_ret != 0) {
+ local->op_ret = -1;
+- local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+- op_ret = dict_serialize(local->dict, lockinfo_buf);
+- if (op_ret < 0) {
+- local->op_ret = -1;
+- local->op_errno = -op_ret;
+- }
+-
+ op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+@@ -1064,24 +1053,13 @@ unlock:
+ goto unwind;
+ }
+
+- len = dict_serialized_length(local->dict);
+- if (len <= 0) {
+- goto unwind;
+- }
+-
+- lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char);
+- if (!lockinfo_buf) {
++ op_ret = dict_allocate_and_serialize(
++ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
++ if (op_ret != 0) {
+ local->op_ret = -1;
+- local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+- op_ret = dict_serialize(local->dict, lockinfo_buf);
+- if (op_ret < 0) {
+- local->op_ret = -1;
+- local->op_errno = -op_ret;
+- }
+-
+ op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
+index 99e5534..9d712b3 100644
+--- a/xlators/cluster/ec/src/ec-combine.c
++++ b/xlators/cluster/ec/src/ec-combine.c
+@@ -486,22 +486,12 @@ ec_dict_data_merge(ec_cbk_data_t *cbk, int32_t which, char *key)
+
+ tmp = NULL;
+
+- len = dict_serialized_length(lockinfo);
+- if (len < 0) {
+- err = len;
+-
+- goto out;
+- }
+- ptr = GF_MALLOC(len, gf_common_mt_char);
+- if (ptr == NULL) {
+- err = -ENOMEM;
+-
+- goto out;
+- }
+- err = dict_serialize(lockinfo, ptr);
++ err = dict_allocate_and_serialize(lockinfo, (char **)&ptr,
++ (unsigned int *)&len);
+ if (err != 0) {
+ goto out;
+ }
++
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ err = dict_set_dynptr(dict, key, ptr, len);
+ if (err != 0) {
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index 5ae0125..cdd1ff7 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -1547,8 +1547,9 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict,
+ goto out;
+ }
+
+- len = dict_serialized_length(tmp);
+- if (len < 0) {
++ op_ret = dict_allocate_and_serialize(tmp, (char **)&buf,
++ (unsigned int *)&len);
++ if (op_ret != 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log(this->name, GF_LOG_WARNING,
+@@ -1558,24 +1559,6 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict,
+ goto out;
+ }
+
+- buf = GF_CALLOC(1, len, gf_common_mt_char);
+- if (buf == NULL) {
+- op_ret = -1;
+- *op_errno = ENOMEM;
+- goto out;
+- }
+-
+- op_ret = dict_serialize(tmp, buf);
+- if (op_ret < 0) {
+- *op_errno = -op_ret;
+- op_ret = -1;
+- gf_log(this->name, GF_LOG_WARNING,
+- "dict_serialize failed (%s) while handling lockinfo "
+- "for fd (ptr: %p inode-gfid:%s)",
+- strerror(*op_errno), fd, uuid_utoa(fd->inode->gfid));
+- goto out;
+- }
+-
+ op_ret = dict_set_dynptr(dict, GF_XATTR_LOCKINFO_KEY, buf, len);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
+index 0002361..6b20d92 100644
+--- a/xlators/protocol/client/src/client-handshake.c
++++ b/xlators/protocol/client/src/client-handshake.c
+@@ -1286,18 +1286,10 @@ client_setvolume(xlator_t *this, struct rpc_clnt *rpc)
+ "Failed to set client opversion in handshake message");
+ }
+
+- ret = dict_serialized_length(options);
+- if (ret < 0) {
+- gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DICT_ERROR,
+- "failed to get serialized length of dict");
++ ret = dict_allocate_and_serialize(options, (char **)&req.dict.dict_val,
++ &req.dict.dict_len);
++ if (ret != 0) {
+ ret = -1;
+- goto fail;
+- }
+- req.dict.dict_len = ret;
+- req.dict.dict_val = GF_CALLOC(1, req.dict.dict_len,
+- gf_client_mt_clnt_req_buf_t);
+- ret = dict_serialize(options, req.dict.dict_val);
+- if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DICT_SERIALIZE_FAIL,
+ "failed to serialize "
+ "dictionary");
+diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
+index eeca73c..54dc030 100644
+--- a/xlators/protocol/server/src/server-handshake.c
++++ b/xlators/protocol/server/src/server-handshake.c
+@@ -676,22 +676,16 @@ fail:
+ GF_ASSERT(rsp);
+
+ rsp->op_ret = 0;
+- ret = dict_serialized_length(reply);
+- if (ret > 0) {
+- rsp->dict.dict_len = ret;
+- rsp->dict.dict_val = GF_CALLOC(1, rsp->dict.dict_len,
+- gf_server_mt_rsp_buf_t);
+- if (rsp->dict.dict_val) {
+- ret = dict_serialize(reply, rsp->dict.dict_val);
+- if (ret < 0) {
+- gf_msg_debug("server-handshake", 0,
+- "failed "
+- "to serialize reply dict");
+- op_ret = -1;
+- op_errno = -ret;
+- }
+- }
++
++ ret = dict_allocate_and_serialize(reply, (char **)&rsp->dict.dict_val,
++ &rsp->dict.dict_len);
++ if (ret != 0) {
++ ret = -1;
++ gf_msg_debug("server-handshake", 0, "failed to serialize reply dict");
++ op_ret = -1;
++ op_errno = -ret;
+ }
++
+ rsp->op_ret = op_ret;
+ rsp->op_errno = gf_errno_to_error(op_errno);
+
+diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c
+index e74a24d..33959b5 100644
+--- a/xlators/protocol/server/src/server-helpers.c
++++ b/xlators/protocol/server/src/server-helpers.c
+@@ -902,7 +902,6 @@ serialize_rsp_direntp(gf_dirent_t *entries, gfs3_readdirp_rsp *rsp)
+ gfs3_dirplist *trav = NULL;
+ gfs3_dirplist *prev = NULL;
+ int ret = -1;
+- int temp = 0;
+
+ GF_VALIDATE_OR_GOTO("server", entries, out);
+ GF_VALIDATE_OR_GOTO("server", rsp, out);
+@@ -923,28 +922,10 @@ serialize_rsp_direntp(gf_dirent_t *entries, gfs3_readdirp_rsp *rsp)
+
+ /* if 'dict' is present, pack it */
+ if (entry->dict) {
+- temp = dict_serialized_length(entry->dict);
+-
+- if (temp < 0) {
+- gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, PS_MSG_INVALID_ENTRY,
+- "failed to get "
+- "serialized length of reply dict");
+- errno = EINVAL;
+- trav->dict.dict_len = 0;
+- goto out;
+- }
+- trav->dict.dict_len = temp;
+-
+- trav->dict.dict_val = GF_CALLOC(1, trav->dict.dict_len,
+- gf_server_mt_rsp_buf_t);
+- if (!trav->dict.dict_val) {
+- errno = ENOMEM;
+- trav->dict.dict_len = 0;
+- goto out;
+- }
+-
+- ret = dict_serialize(entry->dict, trav->dict.dict_val);
+- if (ret < 0) {
++ ret = dict_allocate_and_serialize(entry->dict,
++ (char **)&trav->dict.dict_val,
++ &trav->dict.dict_len);
++ if (ret != 0) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, PS_MSG_DICT_SERIALIZE_FAIL,
+ "failed to serialize reply dict");
+ errno = -ret;
+--
+1.8.3.1
+
diff --git a/0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch b/0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch
new file mode 100644
index 0000000..94e0b64
--- /dev/null
+++ b/0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch
@@ -0,0 +1,102 @@
+From 32281b4b5cf79d0ef6f0c65775bb81093e1ba479 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Wed, 24 Feb 2021 18:44:12 +0530
+Subject: [PATCH 536/538] dht: Ongoing IO is failed during volume shrink
+ operation (#2188)
+
+In the commit (c878174) we have introduced a check
+to avoid stale layout issue.To avoid a stale layout
+issue dht has set a key along with layout at the time
+of wind a create fop and posix validates the parent
+layout based on the key value. If layout does not match
+it throw and error.In case of volume shrink layout has
+been changed by reabalance daemon and if layout does not
+matches dht is not able to wind a create fop successfully.
+
+Solution: To avoid the issue populate a key only while
+ dht has wind a fop first time. After got an
+ error in 2nd attempt dht takes a lock and then
+ reattempt to wind a fop again.
+
+> Fixes: #2187
+> Change-Id: Ie018386e7823a11eea415496bb226ca032453a55
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry pick from commit da6ce622b722f7d12619c5860293faf03f7cd00c
+> Reviewed on upstream link https://github.com/gluster/glusterfs/pull/2188
+
+Bug: 1924044
+Change-Id: I7670dbe2d562b83db0af3753f994653ffdd49591
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/228941
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 41 ++++++++++++++++++++++++++----------
+ 1 file changed, 30 insertions(+), 11 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index fe1d0ee..7425c1a 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -8526,15 +8526,32 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
+ {
+ dht_local_t *local = NULL;
+ xlator_t *avail_subvol = NULL;
++ int lk_count = 0;
+
+ local = frame->local;
+
+ if (!dht_is_subvol_filled(this, subvol)) {
+- gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+- subvol->name);
+-
+- dht_set_parent_layout_in_dict(loc, this, local);
+-
++ lk_count = local->lock[0].layout.parent_layout.lk_count;
++ gf_msg_debug(this->name, 0, "creating %s on %s with lock_count %d",
++ loc->path, subvol->name, lk_count);
++ /*The function dht_set_parent_layout_in_dict sets the layout
++ in dictionary and posix_create validates a layout before
++ creating a file.In case if parent layout does not match
++ with disk layout posix xlator throw an error but in case
++ if volume is shrunk layout has been changed by rebalance daemon
++ so we need to call this function only while a function is calling
++ without taking any lock otherwise we would not able to populate a
++ layout on disk in case if layout has changed.
++ */
++ if (!lk_count) {
++ dht_set_parent_layout_in_dict(loc, this, local);
++ } else {
++ /* Delete a key to avoid layout validate if it was set by
++ previous STACK_WIND attempt when a lock was not taken
++ by dht_create
++ */
++ (void)dict_del_sizen(local->params, GF_PREOP_PARENT_KEY);
++ }
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
+@@ -8554,12 +8571,14 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
+
+ goto out;
+ }
+-
+- gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+- subvol->name);
+-
+- dht_set_parent_layout_in_dict(loc, this, local);
+-
++ lk_count = local->lock[0].layout.parent_layout.lk_count;
++ gf_msg_debug(this->name, 0, "creating %s on %s with lk_count %d",
++ loc->path, subvol->name, lk_count);
++ if (!lk_count) {
++ dht_set_parent_layout_in_dict(loc, this, local);
++ } else {
++ (void)dict_del_sizen(local->params, GF_PREOP_PARENT_KEY);
++ }
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
+--
+1.8.3.1
+
diff --git a/0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch b/0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch
new file mode 100644
index 0000000..dcf0940
--- /dev/null
+++ b/0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch
@@ -0,0 +1,387 @@
+From 7b7ec67680415c22773ebb2a5daacf298b6b1e06 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Sat, 13 Feb 2021 18:37:32 +0100
+Subject: [PATCH 537/538] cluster/afr: Fix race in lockinfo (f)getxattr
+
+A shared dictionary was updated outside the lock after having updated
+the number of remaining answers. This means that one thread may be
+processing the last answer and unwinding the request before another
+thread completes updating the dict.
+
+ Thread 1 Thread 2
+
+ LOCK()
+ call_cnt-- (=1)
+ UNLOCK()
+ LOCK()
+ call_cnt-- (=0)
+ UNLOCK()
+ update_dict(dict)
+ if (call_cnt == 0) {
+ STACK_UNWIND(dict);
+ }
+ update_dict(dict)
+ if (call_cnt == 0) {
+ STACK_UNWIND(dict);
+ }
+
+The updates from thread 1 are lost.
+
+This patch also reduces the work done inside the locked region and
+reduces code duplication.
+
+Upstream-patch:
+> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2162
+> Fixes: #2161
+> Change-Id: Idc0d34ab19ea6031de0641f7b05c624d90fac8fa
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1911292
+Change-Id: Idc0d34ab19ea6031de0641f7b05c624d90fac8fa
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/228924
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-inode-read.c | 254 ++++++++++++++-----------------
+ 1 file changed, 112 insertions(+), 142 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
+index cf305af..98e195a 100644
+--- a/xlators/cluster/afr/src/afr-inode-read.c
++++ b/xlators/cluster/afr/src/afr-inode-read.c
+@@ -15,6 +15,8 @@
+ #include <stdlib.h>
+ #include <signal.h>
+
++#include <urcu/uatomic.h>
++
+ #include <glusterfs/glusterfs.h>
+ #include "afr.h"
+ #include <glusterfs/dict.h>
+@@ -868,188 +870,121 @@ afr_getxattr_quota_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ return 0;
+ }
+
+-int32_t
+-afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, dict_t *dict,
+- dict_t *xdata)
++static int32_t
++afr_update_local_dicts(call_frame_t *frame, dict_t *dict, dict_t *xdata)
+ {
+- int call_cnt = 0, len = 0;
+- char *lockinfo_buf = NULL;
+- dict_t *lockinfo = NULL, *newdict = NULL;
+- afr_local_t *local = NULL;
++ afr_local_t *local;
++ dict_t *local_dict;
++ dict_t *local_xdata;
++ int32_t ret;
+
+- LOCK(&frame->lock);
+- {
+- local = frame->local;
++ local = frame->local;
++ local_dict = NULL;
++ local_xdata = NULL;
+
+- call_cnt = --local->call_count;
++ ret = -ENOMEM;
+
+- if ((op_ret < 0) || (!dict && !xdata)) {
+- goto unlock;
+- }
+-
+- if (xdata) {
+- if (!local->xdata_rsp) {
+- local->xdata_rsp = dict_new();
+- if (!local->xdata_rsp) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto unlock;
+- }
+- }
++ if ((dict != NULL) && (local->dict == NULL)) {
++ local_dict = dict_new();
++ if (local_dict == NULL) {
++ goto done;
+ }
++ }
+
+- if (!dict) {
+- goto unlock;
++ if ((xdata != NULL) && (local->xdata_rsp == NULL)) {
++ local_xdata = dict_new();
++ if (local_xdata == NULL) {
++ goto done;
+ }
++ }
+
+- op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
+- (void **)&lockinfo_buf, &len);
++ if ((local_dict != NULL) || (local_xdata != NULL)) {
++ /* TODO: Maybe it would be better to preallocate both dicts before
++ * sending the requests. This way we don't need to use a LOCK()
++ * here. */
++ LOCK(&frame->lock);
+
+- if (!lockinfo_buf) {
+- goto unlock;
++ if ((local_dict != NULL) && (local->dict == NULL)) {
++ local->dict = local_dict;
++ local_dict = NULL;
+ }
+
+- if (!local->dict) {
+- local->dict = dict_new();
+- if (!local->dict) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto unlock;
+- }
++ if ((local_xdata != NULL) && (local->xdata_rsp == NULL)) {
++ local->xdata_rsp = local_xdata;
++ local_xdata = NULL;
+ }
+- }
+-unlock:
+- UNLOCK(&frame->lock);
+
+- if (lockinfo_buf != NULL) {
+- lockinfo = dict_new();
+- if (lockinfo == NULL) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- } else {
+- op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
+-
+- if (lockinfo && local->dict) {
+- dict_copy(lockinfo, local->dict);
+- }
+- }
+- }
+-
+- if (xdata && local->xdata_rsp) {
+- dict_copy(xdata, local->xdata_rsp);
++ UNLOCK(&frame->lock);
+ }
+
+- if (!call_cnt) {
+- newdict = dict_new();
+- if (!newdict) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto unwind;
++ if (dict != NULL) {
++ if (dict_copy(dict, local->dict) < 0) {
++ goto done;
+ }
++ }
+
+- op_ret = dict_allocate_and_serialize(
+- local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+- if (op_ret != 0) {
+- local->op_ret = -1;
+- goto unwind;
++ if (xdata != NULL) {
++ if (dict_copy(xdata, local->xdata_rsp) < 0) {
++ goto done;
+ }
++ }
+
+- op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
+- (void *)lockinfo_buf, len);
+- if (op_ret < 0) {
+- local->op_ret = -1;
+- local->op_errno = -op_ret;
+- goto unwind;
+- }
++ ret = 0;
+
+- unwind:
+- AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict,
+- local->xdata_rsp);
++done:
++ if (local_dict != NULL) {
++ dict_unref(local_dict);
+ }
+
+- dict_unref(lockinfo);
++ if (local_xdata != NULL) {
++ dict_unref(local_xdata);
++ }
+
+- return 0;
++ return ret;
+ }
+
+-int32_t
+-afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+- int32_t op_ret, int32_t op_errno, dict_t *dict,
+- dict_t *xdata)
++static void
++afr_getxattr_lockinfo_cbk_common(call_frame_t *frame, int32_t op_ret,
++ int32_t op_errno, dict_t *dict, dict_t *xdata,
++ bool is_fgetxattr)
+ {
+- int call_cnt = 0, len = 0;
++ int len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+- LOCK(&frame->lock);
+- {
+- local = frame->local;
+-
+- call_cnt = --local->call_count;
+-
+- if ((op_ret < 0) || (!dict && !xdata)) {
+- goto unlock;
+- }
+-
+- if (xdata) {
+- if (!local->xdata_rsp) {
+- local->xdata_rsp = dict_new();
+- if (!local->xdata_rsp) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto unlock;
+- }
+- }
+- }
+-
+- if (!dict) {
+- goto unlock;
+- }
++ local = frame->local;
+
++ if ((op_ret >= 0) && (dict != NULL)) {
+ op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+-
+- if (!lockinfo_buf) {
+- goto unlock;
+- }
+-
+- if (!local->dict) {
+- local->dict = dict_new();
+- if (!local->dict) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- goto unlock;
++ if (lockinfo_buf != NULL) {
++ lockinfo = dict_new();
++ if (lockinfo == NULL) {
++ op_ret = -1;
++ } else {
++ op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
+ }
+ }
+ }
+-unlock:
+- UNLOCK(&frame->lock);
+
+- if (lockinfo_buf != NULL) {
+- lockinfo = dict_new();
+- if (lockinfo == NULL) {
+- local->op_ret = -1;
+- local->op_errno = ENOMEM;
+- } else {
+- op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
+-
+- if (lockinfo && local->dict) {
+- dict_copy(lockinfo, local->dict);
+- }
++ if ((op_ret >= 0) && ((lockinfo != NULL) || (xdata != NULL))) {
++ op_ret = afr_update_local_dicts(frame, lockinfo, xdata);
++ if (lockinfo != NULL) {
++ dict_unref(lockinfo);
+ }
+ }
+
+- if (xdata && local->xdata_rsp) {
+- dict_copy(xdata, local->xdata_rsp);
++ if (op_ret < 0) {
++ local->op_ret = -1;
++ local->op_errno = ENOMEM;
+ }
+
+- if (!call_cnt) {
++ if (uatomic_sub_return(&local->call_count, 1) == 0) {
+ newdict = dict_new();
+ if (!newdict) {
+ local->op_ret = -1;
+- local->op_errno = ENOMEM;
++ local->op_errno = op_errno = ENOMEM;
+ goto unwind;
+ }
+
+@@ -1057,23 +992,58 @@ unlock:
+ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+ if (op_ret != 0) {
+ local->op_ret = -1;
++ local->op_errno = op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+- local->op_ret = -1;
+- local->op_errno = -op_ret;
++ GF_FREE(lockinfo_buf);
++ local->op_ret = op_ret = -1;
++ local->op_errno = op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+- AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict,
+- local->xdata_rsp);
++ /* TODO: These unwinds use op_ret and op_errno instead of local->op_ret
++ * and local->op_errno. This doesn't seem right because any
++ * failure during processing of each answer could be silently
++ * ignored. This is kept this was the old behavior and because
++ * local->op_ret is initialized as -1 and local->op_errno is
++ * initialized as EUCLEAN, which makes these values useless. */
++ if (is_fgetxattr) {
++ AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict,
++ local->xdata_rsp);
++ } else {
++ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict,
++ local->xdata_rsp);
++ }
++
++ if (newdict != NULL) {
++ dict_unref(newdict);
++ }
+ }
++}
++
++static int32_t
++afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *dict,
++ dict_t *xdata)
++{
++ afr_getxattr_lockinfo_cbk_common(frame, op_ret, op_errno, dict, xdata,
++ false);
+
+- dict_unref(lockinfo);
++ return 0;
++}
++
++static int32_t
++afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++ int32_t op_ret, int32_t op_errno, dict_t *dict,
++ dict_t *xdata)
++{
++ afr_getxattr_lockinfo_cbk_common(frame, op_ret, op_errno, dict, xdata,
++ true);
+
+ return 0;
+ }
+--
+1.8.3.1
+
diff --git a/0538-afr-fix-coverity-issue-introduced-by-90cefde.patch b/0538-afr-fix-coverity-issue-introduced-by-90cefde.patch
new file mode 100644
index 0000000..de164a3
--- /dev/null
+++ b/0538-afr-fix-coverity-issue-introduced-by-90cefde.patch
@@ -0,0 +1,46 @@
+From 31cd7627ff329a39691239322df3bc88e962ad02 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Mon, 1 Mar 2021 05:19:39 +0100
+Subject: [PATCH 538/538] afr: fix coverity issue introduced by 90cefde
+
+Fixes coverity issues 1447029 and 1447028.
+
+Backport of:
+> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2201
+> Updates: #2161
+> Change-Id: I6a564231d6aeb76de20675b7ced5d45eed8c377f
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1911292
+Change-Id: I6a564231d6aeb76de20675b7ced5d45eed8c377f
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/229200
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-inode-read.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
+index 98e195a..d874172 100644
+--- a/xlators/cluster/afr/src/afr-inode-read.c
++++ b/xlators/cluster/afr/src/afr-inode-read.c
+@@ -918,13 +918,13 @@ afr_update_local_dicts(call_frame_t *frame, dict_t *dict, dict_t *xdata)
+ }
+
+ if (dict != NULL) {
+- if (dict_copy(dict, local->dict) < 0) {
++ if (dict_copy(dict, local->dict) == NULL) {
+ goto done;
+ }
+ }
+
+ if (xdata != NULL) {
+- if (dict_copy(xdata, local->xdata_rsp) < 0) {
++ if (dict_copy(xdata, local->xdata_rsp) == NULL) {
+ goto done;
+ }
+ }
+--
+1.8.3.1
+
diff --git a/0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch b/0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch
new file mode 100644
index 0000000..18f851f
--- /dev/null
+++ b/0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch
@@ -0,0 +1,62 @@
+From 88523814fe296c9cc9f7619e06210830f59c5edf Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Fri, 12 Mar 2021 10:32:09 +0100
+Subject: [PATCH 539/539] extras: disable lookup-optimize in virt and block
+ groups
+
+lookup-optimize doesn't provide any benefit for virtualized
+environments and gluster-block workloads, but it's known to cause
+corruption in some cases when sharding is also enabled and the volume
+is expanded or shrunk.
+
+For this reason, we disable lookup-optimize by default on those
+environments.
+
+Backport of:
+> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2254
+> Fixes: #2253
+> Change-Id: I25861aa50b335556a995a9c33318dd3afb41bf71
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1939372
+Change-Id: I25861aa50b335556a995a9c33318dd3afb41bf71
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/231173
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/group-distributed-virt | 1 +
+ extras/group-gluster-block | 1 +
+ extras/group-virt.example | 1 +
+ 3 files changed, 3 insertions(+)
+
+diff --git a/extras/group-distributed-virt b/extras/group-distributed-virt
+index a960b76..6da3de0 100644
+--- a/extras/group-distributed-virt
++++ b/extras/group-distributed-virt
+@@ -8,3 +8,4 @@ user.cifs=off
+ client.event-threads=4
+ server.event-threads=4
+ performance.client-io-threads=on
++cluster.lookup-optimize=off
+diff --git a/extras/group-gluster-block b/extras/group-gluster-block
+index 1e39801..b8d3e8d 100644
+--- a/extras/group-gluster-block
++++ b/extras/group-gluster-block
+@@ -25,3 +25,4 @@ features.shard-block-size=64MB
+ user.cifs=off
+ server.allow-insecure=on
+ cluster.choose-local=off
++cluster.lookup-optimize=off
+diff --git a/extras/group-virt.example b/extras/group-virt.example
+index 3a441eb..155f5f5 100644
+--- a/extras/group-virt.example
++++ b/extras/group-virt.example
+@@ -21,3 +21,4 @@ server.tcp-user-timeout=20
+ server.keepalive-time=10
+ server.keepalive-interval=2
+ server.keepalive-count=5
++cluster.lookup-optimize=off
+--
+1.8.3.1
+
diff --git a/0540-extras-Disable-write-behind-for-group-samba.patch b/0540-extras-Disable-write-behind-for-group-samba.patch
new file mode 100644
index 0000000..0a89c64
--- /dev/null
+++ b/0540-extras-Disable-write-behind-for-group-samba.patch
@@ -0,0 +1,37 @@
+From 6895b6c67e9c29af3f966b4d9ee5cb40da763d24 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Wed, 14 Apr 2021 12:38:45 +0530
+Subject: [PATCH 540/540] extras: Disable write-behind for group samba.
+
+when write-behind is enabled with Samba it could be a
+source of data corruption. The translator, while
+processing a write call, immediately returns success but continues
+writing the data to the server in the background. This can cause data
+corruption when two clients relying on Samba to provide data consistency
+are operating on the same file.
+
+> fixes: https://github.com/gluster/glusterfs/issues/2329
+
+Change-Id: I5265056ff315a5f3cd97ea11b18db0831b1b901d
+Solution: Disable write-behind for samba group
+BUG: 1948547
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/235876
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/group-samba | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/extras/group-samba b/extras/group-samba
+index eeee6e0..9611a1f 100644
+--- a/extras/group-samba
++++ b/extras/group-samba
+@@ -9,3 +9,4 @@ performance.nl-cache=on
+ performance.nl-cache-timeout=600
+ performance.readdir-ahead=on
+ performance.parallel-readdir=on
++performance.write-behind=off
+--
+1.8.3.1
+
diff --git a/0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch b/0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch
new file mode 100644
index 0000000..29135df
--- /dev/null
+++ b/0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch
@@ -0,0 +1,545 @@
+From 23ab7175e64ab4d75fbcb6874008843cc78b65b8 Mon Sep 17 00:00:00 2001
+From: Ashish Pandey <aspandey@redhat.com>
+Date: Fri, 16 Apr 2021 18:48:56 +0530
+Subject: [PATCH 541/542] glusterd-volgen: Add functionality to accept any
+ custom xlator
+
+Add new function which allow users to insert any custom xlators.
+It makes to provide a way to add any processing into file operations.
+
+Users can deploy the plugin(xlator shared object) and integrate it to glusterfsd.
+
+If users want to enable a custom xlator, do the follows:
+
+1. put xlator object(.so file) into "XLATOR_DIR/user/"
+2. set the option user.xlator.<xlator> to the existing xlator-name to specify of the position in graph
+3. restart gluster volume
+
+Options for custom xlator are able to set in "user.xlator.<xlator>.<optkey>".
+
+Backport of :
+>https://github.com/gluster/glusterfs/commit/ea86b664f3b1f54901ce1b7d7fba7d80456f2089
+>Fixes: https://github.com/gluster/glusterfs/issues/1943
+>Change-Id: Ife3ae1514ea474f5dae2897223012f9d04b64674
+>Signed-off-by:Ryo Furuhashi <ryo.furuhashi.nh@hitachi.com>
+>Co-authored-by: Yaniv Kaul <ykaul@redhat.com>
+>Co-authored-by: Xavi Hernandez <xhernandez@users.noreply.github.com>
+
+Change-Id: Ic8f28bfcfde67213eb1092b0ebf4822c874d37bb
+BUG: 1927235
+Signed-off-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/236830
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
+---
+ cli/src/cli-rpc-ops.c | 148 ++++++++++++++++++++------
+ cli/src/cli.h | 2 -
+ tests/basic/user-xlator.t | 65 ++++++++++++
+ tests/env.rc.in | 3 +
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 155 ++++++++++++++++++++++++++++
+ 5 files changed, 342 insertions(+), 31 deletions(-)
+ create mode 100755 tests/basic/user-xlator.t
+
+diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
+index 4e91265..51b5447 100644
+--- a/cli/src/cli-rpc-ops.c
++++ b/cli/src/cli-rpc-ops.c
+@@ -2269,49 +2269,131 @@ out:
+ return ret;
+ }
+
+-char *
+-is_server_debug_xlator(void *myframe)
++/*
++ * returns
++ * 1 : is server debug xlator
++ * 0 : is not server debug xlator
++ * <0 : error
++ */
++static int
++is_server_debug_xlator(char *key, char *value)
++{
++ if (!key || !value)
++ return -1;
++
++ if (strcmp("debug.trace", key) == 0 ||
++ strcmp("debug.error-gen", key) == 0) {
++ if (strcmp("client", value) == 0)
++ return 0;
++ else
++ return 1;
++ }
++
++ return 0;
++}
++
++/*
++ * returns
++ * 1 : is user xlator
++ * 0 : is not user xlator
++ * <0 : error
++ */
++static int
++is_server_user_xlator(char *key, char *value)
++{
++ int ret = 0;
++
++ if (!key || !value)
++ return -1;
++
++ ret = fnmatch("user.xlator.*", key, 0);
++ if (ret < 0) {
++ ret = -1;
++ goto out;
++ } else if (ret == FNM_NOMATCH) {
++ ret = 0;
++ goto out;
++ }
++
++ ret = fnmatch("user.xlator.*.*", key, 0);
++ if (ret < 0) {
++ ret = -1;
++ goto out;
++ } else if (ret != FNM_NOMATCH) { // this is user xlator's option key
++ ret = 0;
++ goto out;
++ }
++
++ ret = 1;
++
++out:
++ return ret;
++}
++
++static int
++added_server_xlator(void *myframe, char **added_xlator)
+ {
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ char **words = NULL;
+ char *key = NULL;
+ char *value = NULL;
+- char *debug_xlator = NULL;
++ int ret = 0;
+
+ frame = myframe;
+ local = frame->local;
+ words = (char **)local->words;
+
+ while (*words != NULL) {
+- if (strstr(*words, "trace") == NULL &&
+- strstr(*words, "error-gen") == NULL) {
+- words++;
+- continue;
+- }
+-
+ key = *words;
+ words++;
+ value = *words;
+- if (value == NULL)
++
++ if (!value) {
+ break;
+- if (strstr(value, "client")) {
+- words++;
+- continue;
+- } else {
+- if (!(strstr(value, "posix") || strstr(value, "acl") ||
+- strstr(value, "locks") || strstr(value, "io-threads") ||
+- strstr(value, "marker") || strstr(value, "index"))) {
+- words++;
+- continue;
+- } else {
+- debug_xlator = gf_strdup(key);
+- break;
++ }
++
++ ret = is_server_debug_xlator(key, value);
++ if (ret < 0) {
++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
++ "failed to check that debug xlator was added");
++ ret = -1;
++ goto out;
++ }
++
++ if (ret) {
++ *added_xlator = gf_strdup(key);
++ if (!*added_xlator) {
++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
++ "Out of memory");
++ ret = -1;
++ goto out;
++ }
++ break;
++ }
++
++ ret = is_server_user_xlator(key, value);
++ if (ret < 0) {
++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
++ "failed to check that user xlator was added");
++ ret = -1;
++ goto out;
++ }
++
++ if (ret) {
++ *added_xlator = gf_strdup(key);
++ if (!*added_xlator) {
++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
++ "Out of memory");
++ ret = -1;
++ goto out;
+ }
++ break;
+ }
+ }
+
+- return debug_xlator;
++out:
++ return ret;
+ }
+
+ int
+@@ -2327,7 +2409,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ char msg[1024] = {
+ 0,
+ };
+- char *debug_xlator = NULL;
++ char *added_xlator = NULL;
+ char tmp_str[512] = {
+ 0,
+ };
+@@ -2365,18 +2447,26 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ * The process has to be restarted. So this is a check from the
+ * volume set option such that if debug xlators such as trace/errorgen
+ * are provided in the set command, warn the user.
++ * volume set option such that if user custom xlators or debug
++ * xlators such as trace/errorgen are provided in the set command,
++ * warn the user.
+ */
+- debug_xlator = is_server_debug_xlator(myframe);
++ ret = added_server_xlator(myframe, &added_xlator);
++ if (ret < 0) {
++ gf_log("cli", GF_LOG_ERROR,
++ "failed to check that server graph has been changed");
++ goto out;
++ }
+
+ if (dict_get_str(dict, "help-str", &help_str) && !msg[0])
+ snprintf(msg, sizeof(msg), "Set volume %s",
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+- if (rsp.op_ret == 0 && debug_xlator) {
++ if (rsp.op_ret == 0 && added_xlator) {
+ snprintf(tmp_str, sizeof(tmp_str),
+ "\n%s translator has been "
+ "added to the server volume file. Please restart the"
+ " volume for enabling the translator",
+- debug_xlator);
++ added_xlator);
+ }
+
+ if ((global_state->mode & GLUSTER_MODE_XML) && (help_str == NULL)) {
+@@ -2394,7 +2484,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ cli_err("volume set: failed");
+ } else {
+ if (help_str == NULL) {
+- if (debug_xlator == NULL)
++ if (added_xlator == NULL)
+ cli_out("volume set: success");
+ else
+ cli_out("volume set: success%s", tmp_str);
+@@ -2408,7 +2498,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ out:
+ if (dict)
+ dict_unref(dict);
+- GF_FREE(debug_xlator);
++ GF_FREE(added_xlator);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+diff --git a/cli/src/cli.h b/cli/src/cli.h
+index 7b4f446..b5b69ea 100644
+--- a/cli/src/cli.h
++++ b/cli/src/cli.h
+@@ -502,8 +502,6 @@ cli_xml_output_snapshot(int cmd_type, dict_t *dict, int op_ret, int op_errno,
+ int
+ cli_xml_snapshot_status_single_snap(cli_local_t *local, dict_t *dict,
+ char *key);
+-char *
+-is_server_debug_xlator(void *myframe);
+
+ int32_t
+ cli_cmd_snapshot_parse(const char **words, int wordcount, dict_t **options,
+diff --git a/tests/basic/user-xlator.t b/tests/basic/user-xlator.t
+new file mode 100755
+index 0000000..a711f9f
+--- /dev/null
++++ b/tests/basic/user-xlator.t
+@@ -0,0 +1,65 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++
++#### patchy.dev.d-backends-patchy1.vol
++brick=${B0//\//-}
++SERVER_VOLFILE="/var/lib/glusterd/vols/${V0}/${V0}.${H0}.${brick:1}-${V0}1.vol"
++
++cleanup;
++
++TEST mkdir -p $B0/single-brick
++TEST mkdir -p ${GLUSTER_XLATOR_DIR}/user
++
++## deploy dummy user xlator
++TEST cp ${GLUSTER_XLATOR_DIR}/playground/template.so ${GLUSTER_XLATOR_DIR}/user/hoge.so
++
++TEST glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
++TEST $CLI volume set $V0 user.xlator.hoge posix
++TEST grep -q 'user/hoge' ${SERVER_VOLFILE}
++
++TEST $CLI volume set $V0 user.xlator.hoge.opt1 10
++TEST grep -q '"option opt1 10"' ${SERVER_VOLFILE}
++TEST $CLI volume set $V0 user.xlator.hoge.opt2 hogehoge
++TEST grep -q '"option opt2 hogehoge"' ${SERVER_VOLFILE}
++TEST $CLI volume set $V0 user.xlator.hoge.opt3 true
++TEST grep -q '"option opt3 true"' ${SERVER_VOLFILE}
++
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6
++
++TEST $CLI volume set $V0 user.xlator.hoge trash
++TEST grep -q 'user/hoge' ${SERVER_VOLFILE}
++
++TEST $CLI volume stop $V0
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6
++
++TEST ! $CLI volume set $V0 user.xlator.hoge unknown
++TEST grep -q 'user/hoge' ${SERVER_VOLFILE} # When the CLI fails, the volfile is not modified.
++
++TEST $CLI volume stop $V0
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6
++
++#### teardown
++
++TEST rm -f ${GLUSTER_XLATOR_DIR}/user/hoge.so
++cleanup;
+diff --git a/tests/env.rc.in b/tests/env.rc.in
+index c7472a7..1f0ca88 100644
+--- a/tests/env.rc.in
++++ b/tests/env.rc.in
+@@ -40,3 +40,6 @@ export GLUSTER_LIBEXECDIR
+
+ RUN_NFS_TESTS=@BUILD_GNFS@
+ export RUN_NFS_TESTS
++
++GLUSTER_XLATOR_DIR=@libdir@/glusterfs/@PACKAGE_VERSION@/xlator
++export GLUSTER_XLATOR_DIR
+\ No newline at end of file
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 1920284..a242b5c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -45,6 +45,11 @@ struct gd_validate_reconf_opts {
+
+ extern struct volopt_map_entry glusterd_volopt_map[];
+
++struct check_and_add_user_xlator_t {
++ volgen_graph_t *graph;
++ char *volname;
++};
++
+ #define RPC_SET_OPT(XL, CLI_OPT, XLATOR_OPT, ERROR_CMD) \
+ do { \
+ char *_value = NULL; \
+@@ -2822,6 +2827,145 @@ out:
+ return ret;
+ }
+
++static gf_boolean_t
++check_user_xlator_position(dict_t *dict, char *key, data_t *value,
++ void *prev_xlname)
++{
++ if (strncmp(key, "user.xlator.", SLEN("user.xlator.")) != 0) {
++ return false;
++ }
++
++ if (fnmatch("user.xlator.*.*", key, 0) == 0) {
++ return false;
++ }
++
++ char *value_str = data_to_str(value);
++ if (!value_str) {
++ return false;
++ }
++
++ if (strcmp(value_str, prev_xlname) == 0) {
++ gf_log("glusterd", GF_LOG_INFO,
++ "found insert position of user-xlator(%s)", key);
++ return true;
++ }
++
++ return false;
++}
++
++static int
++set_user_xlator_option(dict_t *set_dict, char *key, data_t *value, void *data)
++{
++ xlator_t *xl = data;
++ char *optname = strrchr(key, '.') + 1;
++
++ gf_log("glusterd", GF_LOG_DEBUG, "set user xlator option %s = %s", key,
++ value->data);
++
++ return xlator_set_option(xl, optname, strlen(optname), data_to_str(value));
++}
++
++static int
++insert_user_xlator_to_graph(dict_t *set_dict, char *key, data_t *value,
++ void *action_data)
++{
++ int ret = -1;
++
++ struct check_and_add_user_xlator_t *data = action_data;
++
++ char *xlator_name = strrchr(key, '.') + 1; // user.xlator.<xlator_name>
++ char *xlator_option_matcher = NULL;
++ char *type = NULL;
++ xlator_t *xl = NULL;
++
++ // convert optkey to xlator type
++ if (gf_asprintf(&type, "user/%s", xlator_name) < 0) {
++ gf_log("glusterd", GF_LOG_ERROR, "failed to generate user-xlator type");
++ goto out;
++ }
++
++ gf_log("glusterd", GF_LOG_INFO, "add user xlator=%s to graph", type);
++
++ xl = volgen_graph_add(data->graph, type, data->volname);
++ if (!xl) {
++ goto out;
++ }
++
++ ret = gf_asprintf(&xlator_option_matcher, "user.xlator.%s.*", xlator_name);
++ if (ret < 0) {
++ gf_log("glusterd", GF_LOG_ERROR,
++ "failed to generate user-xlator option matcher");
++ goto out;
++ }
++
++ dict_foreach_fnmatch(set_dict, xlator_option_matcher,
++ set_user_xlator_option, xl);
++
++out:
++ if (type)
++ GF_FREE(type);
++ if (xlator_option_matcher)
++ GF_FREE(xlator_option_matcher);
++
++ return ret;
++}
++
++static int
++validate_user_xlator_position(dict_t *this, char *key, data_t *value,
++ void *unused)
++{
++ int ret = -1;
++ int i = 0;
++
++ if (!value)
++ goto out;
++
++ if (fnmatch("user.xlator.*.*", key, 0) == 0) {
++ ret = 0;
++ goto out;
++ }
++
++ char *value_str = data_to_str(value);
++ if (!value_str)
++ goto out;
++
++ int num_xlators = sizeof(server_graph_table) /
++ sizeof(server_graph_table[0]);
++ for (i = 0; i < num_xlators; i++) {
++ if (server_graph_table[i].dbg_key &&
++ strcmp(value_str, server_graph_table[i].dbg_key) == 0) {
++ ret = 0;
++ goto out;
++ }
++ }
++
++out:
++ if (ret == -1)
++ gf_log("glusterd", GF_LOG_ERROR, "invalid user xlator position %s = %s",
++ key, value->data);
++
++ return ret;
++}
++
++static int
++check_and_add_user_xl(volgen_graph_t *graph, dict_t *set_dict, char *volname,
++ char *prev_xlname)
++{
++ if (!prev_xlname)
++ goto out;
++
++ struct check_and_add_user_xlator_t data = {.graph = graph,
++ .volname = volname};
++
++ if (dict_foreach_match(set_dict, check_user_xlator_position, prev_xlname,
++ insert_user_xlator_to_graph, &data) < 0) {
++ return -1;
++ }
++
++out:
++ return 0;
++}
++
+ static int
+ server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, void *param)
+@@ -2831,6 +2975,12 @@ server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ char *loglevel = NULL;
+ int i = 0;
+
++ if (dict_foreach_fnmatch(set_dict, "user.xlator.*",
++ validate_user_xlator_position, NULL) < 0) {
++ ret = -EINVAL;
++ goto out;
++ }
++
+ i = sizeof(server_graph_table) / sizeof(server_graph_table[0]) - 1;
+
+ while (i >= 0) {
+@@ -2848,6 +2998,11 @@ server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ if (ret)
+ goto out;
+
++ ret = check_and_add_user_xl(graph, set_dict, volinfo->volname,
++ server_graph_table[i].dbg_key);
++ if (ret)
++ goto out;
++
+ i--;
+ }
+
+--
+1.8.3.1
+
diff --git a/0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch b/0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch
new file mode 100644
index 0000000..f6e0641
--- /dev/null
+++ b/0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch
@@ -0,0 +1,64 @@
+From f3db0c99faf813e0f2e9ffcf599416555a59df1f Mon Sep 17 00:00:00 2001
+From: Ashish Pandey <aspandey@redhat.com>
+Date: Tue, 9 Feb 2021 16:43:35 +0530
+Subject: [PATCH 542/542] xlaotrs/mgmt: Fixing coverity issue 1445996
+
+Backport of https://github.com/gluster/glusterfs/pull/2148/commits/9785e96e0bdf6e60896570fdf5e4a6976a6f60ba
+
+Fixing "Null pointer dereferences"
+
+BUG: 1927235
+Change-Id: Idbc014e1302d2450f97bccd028681198c0d97424
+Signed-off-by: Ashish Pandey <aspandey@redhat.com>
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/237433
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index a242b5c..71aed08 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -2916,21 +2916,23 @@ validate_user_xlator_position(dict_t *this, char *key, data_t *value,
+ {
+ int ret = -1;
+ int i = 0;
++ char *value_str = NULL;
+
+ if (!value)
+ goto out;
+
++ value_str = data_to_str(value);
++ if (!value_str)
++ goto out;
++
+ if (fnmatch("user.xlator.*.*", key, 0) == 0) {
+ ret = 0;
+ goto out;
+ }
+
+- char *value_str = data_to_str(value);
+- if (!value_str)
+- goto out;
+-
+ int num_xlators = sizeof(server_graph_table) /
+ sizeof(server_graph_table[0]);
++
+ for (i = 0; i < num_xlators; i++) {
+ if (server_graph_table[i].dbg_key &&
+ strcmp(value_str, server_graph_table[i].dbg_key) == 0) {
+@@ -2942,7 +2944,7 @@ validate_user_xlator_position(dict_t *this, char *key, data_t *value,
+ out:
+ if (ret == -1)
+ gf_log("glusterd", GF_LOG_ERROR, "invalid user xlator position %s = %s",
+- key, value->data);
++ key, value_str);
+
+ return ret;
+ }
+--
+1.8.3.1
+
diff --git a/0543-glusterd-handle-custom-xlator-failure-cases.patch b/0543-glusterd-handle-custom-xlator-failure-cases.patch
new file mode 100644
index 0000000..c6194c7
--- /dev/null
+++ b/0543-glusterd-handle-custom-xlator-failure-cases.patch
@@ -0,0 +1,162 @@
+From 71fc5b7949e00c4448f5ec1291e756b201a70082 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Thu, 29 Apr 2021 18:34:57 +0530
+Subject: [PATCH 543/543] glusterd: handle custom xlator failure cases
+
+Problem-1:
+custom xlator insertion was failing for those xlators in the brick graph
+whose dbg_key was NULL in the server_graph_table. Looking at the git log,
+the dbg_key was added in commit d1397dbd7d6cdbd2d81d5d36d608b6175d449db4
+for inserting debug xlators.
+
+Fix: I think it is fine to define it for all brick xlators below server.
+
+Problem-2:
+In the commit-op phase, glusterd_op_set_volume() updates the volinfo
+dict with the key-value pairs and then proceeds to create the volfiles.
+If any of the steps fail, the volinfo dict retains those key-values,
+until glusterd is restarted or `gluster vol reset $VOLNAME` is issued.
+
+Fix:
+Make a copy of the volinfo dict and if there are any failures in
+proceeding with the set volume logic, restore the dict to its original
+state.
+
+Backport of:
+> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2371
+> Change-Id: I9010dab33d0139b8e6d603308e331b6d220a4849
+> Updates: #2370
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+
+Change-Id: I9010dab33d0139b8e6d603308e331b6d220a4849
+BUG: 1953901
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/239889
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/user-xlator.t | 16 ++++++++++++++--
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 16 ++++++++++++++++
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 14 +++++++-------
+ 3 files changed, 37 insertions(+), 9 deletions(-)
+
+diff --git a/tests/basic/user-xlator.t b/tests/basic/user-xlator.t
+index a711f9f..ed2d831 100755
+--- a/tests/basic/user-xlator.t
++++ b/tests/basic/user-xlator.t
+@@ -35,8 +35,18 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6
+
+-TEST $CLI volume set $V0 user.xlator.hoge trash
+-TEST grep -q 'user/hoge' ${SERVER_VOLFILE}
++# Test that the insertion at all positions between server and posix is successful.
++# It is not guaranteed that the brick process will start/work in all positions though.
++TESTS_EXPECTED_IN_LOOP=34
++declare -a brick_side_xlators=("decompounder" "io-stats" "quota" "index" "barrier"
++ "marker" "selinux" "io-threads" "upcall" "leases"
++ "read-only" "worm" "locks" "access-control"
++ "bitrot-stub" "changelog" "trash")
++for xlator in "${brick_side_xlators[@]}"
++ do
++ TEST_IN_LOOP $CLI volume set $V0 user.xlator.hoge $xlator
++ TEST_IN_LOOP grep -q 'user/hoge' ${SERVER_VOLFILE}
++ done
+
+ TEST $CLI volume stop $V0
+ TEST $CLI volume start $V0
+@@ -49,6 +59,8 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6
+
+ TEST ! $CLI volume set $V0 user.xlator.hoge unknown
+ TEST grep -q 'user/hoge' ${SERVER_VOLFILE} # When the CLI fails, the volfile is not modified.
++# User xlator insert failures must not prevent setting other volume options.
++TEST $CLI volume set $V0 storage.reserve 10%
+
+ TEST $CLI volume stop $V0
+ TEST $CLI volume start $V0
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 1e84f5f..893af29 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -2911,6 +2911,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+ uint32_t new_op_version = 0;
+ gf_boolean_t quorum_action = _gf_false;
+ glusterd_svc_t *svc = NULL;
++ dict_t *volinfo_dict_orig = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+@@ -2918,6 +2919,10 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+ priv = this->private;
+ GF_ASSERT(priv);
+
++ volinfo_dict_orig = dict_new();
++ if (!volinfo_dict_orig)
++ goto out;
++
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &dict_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+@@ -2949,6 +2954,11 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+ goto out;
+ }
+
++ if (dict_copy(volinfo->dict, volinfo_dict_orig) == NULL) {
++ ret = -ENOMEM;
++ goto out;
++ }
++
+ /* TODO: Remove this once v3.3 compatibility is not required */
+ check_op_version = dict_get_str_boolean(dict, "check-op-version",
+ _gf_false);
+@@ -3171,6 +3181,12 @@ out:
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+ if (quorum_action)
+ glusterd_do_quorum_action();
++ if (ret < 0 && count > 1) {
++ if (dict_reset(volinfo->dict) == 0)
++ dict_copy(volinfo_dict_orig, volinfo->dict);
++ }
++ if (volinfo_dict_orig)
++ dict_unref(volinfo_dict_orig);
+ return ret;
+ }
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 71aed08..aa85bdb 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -2706,24 +2706,24 @@ out:
+ static volgen_brick_xlator_t server_graph_table[] = {
+ {brick_graph_add_server, NULL},
+ {brick_graph_add_decompounder, "decompounder"},
+- {brick_graph_add_io_stats, "NULL"},
++ {brick_graph_add_io_stats, "io-stats"},
+ {brick_graph_add_sdfs, "sdfs"},
+ {brick_graph_add_namespace, "namespace"},
+- {brick_graph_add_cdc, NULL},
++ {brick_graph_add_cdc, "cdc" },
+ {brick_graph_add_quota, "quota"},
+ {brick_graph_add_index, "index"},
+- {brick_graph_add_barrier, NULL},
++ {brick_graph_add_barrier, "barrier" },
+ {brick_graph_add_marker, "marker"},
+ {brick_graph_add_selinux, "selinux"},
+ {brick_graph_add_fdl, "fdl"},
+ {brick_graph_add_iot, "io-threads"},
+ {brick_graph_add_upcall, "upcall"},
+ {brick_graph_add_leases, "leases"},
+- {brick_graph_add_pump, NULL},
+- {brick_graph_add_ro, NULL},
+- {brick_graph_add_worm, NULL},
++ {brick_graph_add_pump, "pump" },
++ {brick_graph_add_ro, "read-only" },
++ {brick_graph_add_worm, "worm" },
+ {brick_graph_add_locks, "locks"},
+- {brick_graph_add_acl, "acl"},
++ {brick_graph_add_acl, "access-control"},
+ {brick_graph_add_bitrot_stub, "bitrot-stub"},
+ {brick_graph_add_changelog, "changelog"},
+ #if USE_GFDB /* changetimerecorder depends on gfdb */
+--
+1.8.3.1
+
diff --git a/0900-rhel-9.0-beta-build-fixing-gcc-10-and-LTO-errors.patch b/0900-rhel-9.0-beta-build-fixing-gcc-10-and-LTO-errors.patch
new file mode 100644
index 0000000..87f7be7
--- /dev/null
+++ b/0900-rhel-9.0-beta-build-fixing-gcc-10-and-LTO-errors.patch
@@ -0,0 +1,2389 @@
+From e0ee7d755bc90c1f829e4ec3b0d5dfd9f9d480b9 Mon Sep 17 00:00:00 2001
+From: Tamar Shacked <tshacked@redhat.com>
+Date: Mon, 26 Jul 2021 16:59:28 +0300
+Subject: rhel-9.0-beta build: fixing gcc-10 and LTO errors
+
+libgfapi symbol versions break LTO in Fedora rawhide/f33
+Upstream:
+> Reviewed-on: https://review.gluster.org/#/c/glusterfs/+/24666/
+> fixes: #1352
+> Change-Id: I05fda580afacfff1bfc07be810dd1afc08a92fb8
+> Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+
+Duplicate defns of cli_default_conn_timeout and cli_ten_minutes_timeout
+Upstream:
+> Reviewed-on: https://review.gluster.org/#/c/glusterfs/+/23956/
+> Change-Id: I54ea485736a4910254eeb21222ad263721cdef3c
+> Fixes: bz#1193929
+> Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+
+BUG: 1939340
+
+Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+Change-Id: If40e4733d12713d2821653cdbce7b6e931f12140
+---
+ api/src/glfs-fops.c | 322 ++++++++++++------------------------
+ api/src/glfs-handleops.c | 99 ++++-------
+ api/src/glfs-internal.h | 27 ++-
+ api/src/glfs-mgmt.c | 3 +-
+ api/src/glfs-resolve.c | 14 +-
+ api/src/glfs.c | 99 +++++------
+ cli/src/cli-cmd-global.c | 1 -
+ cli/src/cli-cmd-misc.c | 4 -
+ cli/src/cli-cmd-peer.c | 4 -
+ cli/src/cli-cmd-snapshot.c | 2 -
+ cli/src/cli-cmd-system.c | 4 -
+ cli/src/cli-cmd-volume.c | 4 -
+ cli/src/cli-quotad-client.c | 3 -
+ cli/src/cli-rpc-ops.c | 5 +-
+ cli/src/cli.c | 4 +
+ cli/src/cli.h | 10 +-
+ 16 files changed, 223 insertions(+), 382 deletions(-)
+
+diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
+index 6dc3b66c8..e772aa77a 100644
+--- a/api/src/glfs-fops.c
++++ b/api/src/glfs-fops.c
+@@ -292,6 +292,7 @@ glfs_iatt_to_statx(struct glfs *fs, const struct iatt *iatt,
+ statx->glfs_st_attributes_mask = 0;
+ }
+
++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_iatt_from_statx, 6.0)
+ void
+ priv_glfs_iatt_from_statx(struct iatt *iatt, const struct glfs_stat *statx)
+ {
+@@ -371,7 +372,6 @@ priv_glfs_iatt_from_statx(struct iatt *iatt, const struct glfs_stat *statx)
+ iatt->ia_attributes = statx->glfs_st_attributes;
+ iatt->ia_attributes_mask = statx->glfs_st_attributes_mask;
+ }
+-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_iatt_from_statx, 6.0);
+
+ void
+ glfsflags_from_gfapiflags(struct glfs_stat *stat, int *glvalid)
+@@ -415,6 +415,7 @@ glfs_loc_unlink(loc_t *loc)
+ return 0;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_open, 3.4.0)
+ struct glfs_fd *
+ pub_glfs_open(struct glfs *fs, const char *path, int flags)
+ {
+@@ -509,8 +510,7 @@ invalid_fs:
+ return glfd;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_open, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_close, 3.4.0)
+ int
+ pub_glfs_close(struct glfs_fd *glfd)
+ {
+@@ -565,8 +565,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_close, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lstat, 3.4.0)
+ int
+ pub_glfs_lstat(struct glfs *fs, const char *path, struct stat *stat)
+ {
+@@ -607,8 +606,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lstat, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_stat, 3.4.0)
+ int
+ pub_glfs_stat(struct glfs *fs, const char *path, struct stat *stat)
+ {
+@@ -649,8 +647,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_stat, 3.4.0);
+-
++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_statx, 6.0)
+ int
+ priv_glfs_statx(struct glfs *fs, const char *path, const unsigned int mask,
+ struct glfs_stat *statxbuf)
+@@ -704,8 +701,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_statx, 6.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fstat, 3.4.0)
+ int
+ pub_glfs_fstat(struct glfs_fd *glfd, struct stat *stat)
+ {
+@@ -754,8 +750,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fstat, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_creat, 3.4.0)
+ struct glfs_fd *
+ pub_glfs_creat(struct glfs *fs, const char *path, int flags, mode_t mode)
+ {
+@@ -902,8 +897,6 @@ invalid_fs:
+ return glfd;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_creat, 3.4.0);
+-
+ #ifdef HAVE_SEEK_HOLE
+ static int
+ glfs_seek(struct glfs_fd *glfd, off_t offset, int whence)
+@@ -957,6 +950,7 @@ out:
+ }
+ #endif
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lseek, 3.4.0)
+ off_t
+ pub_glfs_lseek(struct glfs_fd *glfd, off_t offset, int whence)
+ {
+@@ -1012,8 +1006,6 @@ invalid_fs:
+ return -1;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lseek, 3.4.0);
+-
+ static ssize_t
+ glfs_preadv_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags, struct glfs_stat *poststat)
+@@ -1091,6 +1083,7 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv, 3.4.0)
+ ssize_t
+ pub_glfs_preadv(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags)
+@@ -1098,8 +1091,7 @@ pub_glfs_preadv(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ return glfs_preadv_common(glfd, iovec, iovcnt, offset, flags, NULL);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read, 3.4.0)
+ ssize_t
+ pub_glfs_read(struct glfs_fd *glfd, void *buf, size_t count, int flags)
+ {
+@@ -1116,8 +1108,7 @@ pub_glfs_read(struct glfs_fd *glfd, void *buf, size_t count, int flags)
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC(glfs_pread34, glfs_pread, 3.4.0)
+ ssize_t
+ pub_glfs_pread34(struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
+ int flags)
+@@ -1135,8 +1126,7 @@ pub_glfs_pread34(struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_pread34, glfs_pread, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread, 6.0)
+ ssize_t
+ pub_glfs_pread(struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
+ int flags, struct glfs_stat *poststat)
+@@ -1154,8 +1144,7 @@ pub_glfs_pread(struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread, 6.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv, 3.4.0)
+ ssize_t
+ pub_glfs_readv(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags)
+@@ -1167,8 +1156,6 @@ pub_glfs_readv(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv, 3.4.0);
+-
+ struct glfs_io {
+ struct glfs_fd *glfd;
+ int op;
+@@ -1370,6 +1357,7 @@ invalid_fs:
+ return -1;
+ }
+
++GFAPI_SYMVER_PUBLIC(glfs_preadv_async34, glfs_preadv_async, 3.4.0)
+ int
+ pub_glfs_preadv_async34(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags, glfs_io_cbk34 fn,
+@@ -1379,8 +1367,7 @@ pub_glfs_preadv_async34(struct glfs_fd *glfd, const struct iovec *iovec,
+ (void *)fn, data);
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_preadv_async34, glfs_preadv_async, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv_async, 6.0)
+ int
+ pub_glfs_preadv_async(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags, glfs_io_cbk fn,
+@@ -1390,8 +1377,7 @@ pub_glfs_preadv_async(struct glfs_fd *glfd, const struct iovec *iovec,
+ _gf_false, fn, data);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv_async, 6.0);
+-
++GFAPI_SYMVER_PUBLIC(glfs_read_async34, glfs_read_async, 3.4.0)
+ int
+ pub_glfs_read_async34(struct glfs_fd *glfd, void *buf, size_t count, int flags,
+ glfs_io_cbk34 fn, void *data)
+@@ -1410,8 +1396,7 @@ pub_glfs_read_async34(struct glfs_fd *glfd, void *buf, size_t count, int flags,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_read_async34, glfs_read_async, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read_async, 6.0)
+ int
+ pub_glfs_read_async(struct glfs_fd *glfd, void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data)
+@@ -1430,8 +1415,7 @@ pub_glfs_read_async(struct glfs_fd *glfd, void *buf, size_t count, int flags,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read_async, 6.0);
+-
++GFAPI_SYMVER_PUBLIC(glfs_pread_async34, glfs_pread_async, 3.4.0)
+ int
+ pub_glfs_pread_async34(struct glfs_fd *glfd, void *buf, size_t count,
+ off_t offset, int flags, glfs_io_cbk34 fn, void *data)
+@@ -1450,8 +1434,7 @@ pub_glfs_pread_async34(struct glfs_fd *glfd, void *buf, size_t count,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_pread_async34, glfs_pread_async, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread_async, 6.0)
+ int
+ pub_glfs_pread_async(struct glfs_fd *glfd, void *buf, size_t count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+@@ -1470,8 +1453,7 @@ pub_glfs_pread_async(struct glfs_fd *glfd, void *buf, size_t count,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread_async, 6.0);
+-
++GFAPI_SYMVER_PUBLIC(glfs_readv_async34, glfs_readv_async, 3.4.0)
+ int
+ pub_glfs_readv_async34(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk34 fn, void *data)
+@@ -1483,8 +1465,7 @@ pub_glfs_readv_async34(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_readv_async34, glfs_readv_async, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv_async, 6.0)
+ int
+ pub_glfs_readv_async(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data)
+@@ -1496,8 +1477,6 @@ pub_glfs_readv_async(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv_async, 6.0);
+-
+ static ssize_t
+ glfs_pwritev_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags, struct glfs_stat *prestat,
+@@ -1591,6 +1570,7 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_copy_file_range, 6.0)
+ ssize_t
+ pub_glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in,
+ struct glfs_fd *glfd_out, off64_t *off_out, size_t len,
+@@ -1744,8 +1724,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_copy_file_range, 6.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev, 3.4.0)
+ ssize_t
+ pub_glfs_pwritev(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags)
+@@ -1753,8 +1732,7 @@ pub_glfs_pwritev(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ return glfs_pwritev_common(glfd, iovec, iovcnt, offset, flags, NULL, NULL);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write, 3.4.0)
+ ssize_t
+ pub_glfs_write(struct glfs_fd *glfd, const void *buf, size_t count, int flags)
+ {
+@@ -1771,8 +1749,7 @@ pub_glfs_write(struct glfs_fd *glfd, const void *buf, size_t count, int flags)
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev, 3.4.0)
+ ssize_t
+ pub_glfs_writev(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags)
+@@ -1784,8 +1761,7 @@ pub_glfs_writev(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC(glfs_pwrite34, glfs_pwrite, 3.4.0)
+ ssize_t
+ pub_glfs_pwrite34(struct glfs_fd *glfd, const void *buf, size_t count,
+ off_t offset, int flags)
+@@ -1803,8 +1779,7 @@ pub_glfs_pwrite34(struct glfs_fd *glfd, const void *buf, size_t count,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_pwrite34, glfs_pwrite, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite, 6.0)
+ ssize_t
+ pub_glfs_pwrite(struct glfs_fd *glfd, const void *buf, size_t count,
+ off_t offset, int flags, struct glfs_stat *prestat,
+@@ -1823,8 +1798,6 @@ pub_glfs_pwrite(struct glfs_fd *glfd, const void *buf, size_t count,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite, 6.0);
+-
+ extern glfs_t *
+ pub_glfs_from_glfd(glfs_fd_t *);
+
+@@ -1943,6 +1916,7 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC(glfs_pwritev_async34, glfs_pwritev_async, 3.4.0)
+ int
+ pub_glfs_pwritev_async34(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags, glfs_io_cbk34 fn,
+@@ -1952,8 +1926,7 @@ pub_glfs_pwritev_async34(struct glfs_fd *glfd, const struct iovec *iovec,
+ _gf_true, (void *)fn, data);
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_pwritev_async34, glfs_pwritev_async, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev_async, 6.0)
+ int
+ pub_glfs_pwritev_async(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags, glfs_io_cbk fn,
+@@ -1963,8 +1936,7 @@ pub_glfs_pwritev_async(struct glfs_fd *glfd, const struct iovec *iovec,
+ _gf_false, fn, data);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev_async, 6.0);
+-
++GFAPI_SYMVER_PUBLIC(glfs_write_async34, glfs_write_async, 3.4.0)
+ int
+ pub_glfs_write_async34(struct glfs_fd *glfd, const void *buf, size_t count,
+ int flags, glfs_io_cbk34 fn, void *data)
+@@ -1983,8 +1955,7 @@ pub_glfs_write_async34(struct glfs_fd *glfd, const void *buf, size_t count,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_write_async34, glfs_write_async, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write_async, 6.0)
+ int
+ pub_glfs_write_async(struct glfs_fd *glfd, const void *buf, size_t count,
+ int flags, glfs_io_cbk fn, void *data)
+@@ -2003,8 +1974,7 @@ pub_glfs_write_async(struct glfs_fd *glfd, const void *buf, size_t count,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write_async, 6.0);
+-
++GFAPI_SYMVER_PUBLIC(glfs_pwrite_async34, glfs_pwrite_async, 3.4.0)
+ int
+ pub_glfs_pwrite_async34(struct glfs_fd *glfd, const void *buf, int count,
+ off_t offset, int flags, glfs_io_cbk34 fn, void *data)
+@@ -2023,8 +1993,7 @@ pub_glfs_pwrite_async34(struct glfs_fd *glfd, const void *buf, int count,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_pwrite_async34, glfs_pwrite_async, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite_async, 6.0)
+ int
+ pub_glfs_pwrite_async(struct glfs_fd *glfd, const void *buf, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+@@ -2043,8 +2012,7 @@ pub_glfs_pwrite_async(struct glfs_fd *glfd, const void *buf, int count,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite_async, 6.0);
+-
++GFAPI_SYMVER_PUBLIC(glfs_writev_async34, glfs_writev_async, 3.4.0)
+ int
+ pub_glfs_writev_async34(struct glfs_fd *glfd, const struct iovec *iov,
+ int count, int flags, glfs_io_cbk34 fn, void *data)
+@@ -2056,8 +2024,7 @@ pub_glfs_writev_async34(struct glfs_fd *glfd, const struct iovec *iov,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_writev_async34, glfs_writev_async, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev_async, 6.0)
+ int
+ pub_glfs_writev_async(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data)
+@@ -2069,8 +2036,6 @@ pub_glfs_writev_async(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev_async, 6.0);
+-
+ static int
+ glfs_fsync_common(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+@@ -2135,14 +2100,14 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC(glfs_fsync34, glfs_fsync, 3.4.0)
+ int
+ pub_glfs_fsync34(struct glfs_fd *glfd)
+ {
+ return glfs_fsync_common(glfd, NULL, NULL);
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_fsync34, glfs_fsync, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync, 6.0)
+ int
+ pub_glfs_fsync(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+@@ -2150,8 +2115,6 @@ pub_glfs_fsync(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ return glfs_fsync_common(glfd, prestat, poststat);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync, 6.0);
+-
+ static int
+ glfs_fsync_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+@@ -2232,6 +2195,7 @@ out:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC(glfs_fsync_async34, glfs_fsync_async, 3.4.0)
+ int
+ pub_glfs_fsync_async34(struct glfs_fd *glfd, glfs_io_cbk34 fn, void *data)
+ {
+@@ -2248,8 +2212,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_fsync_async34, glfs_fsync_async, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync_async, 6.0)
+ int
+ pub_glfs_fsync_async(struct glfs_fd *glfd, glfs_io_cbk fn, void *data)
+ {
+@@ -2266,8 +2229,6 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync_async, 6.0);
+-
+ static int
+ glfs_fdatasync_common(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+@@ -2332,14 +2293,14 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC(glfs_fdatasync34, glfs_fdatasync, 3.4.0)
+ int
+ pub_glfs_fdatasync34(struct glfs_fd *glfd)
+ {
+ return glfs_fdatasync_common(glfd, NULL, NULL);
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_fdatasync34, glfs_fdatasync, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync, 6.0)
+ int
+ pub_glfs_fdatasync(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+@@ -2347,8 +2308,7 @@ pub_glfs_fdatasync(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ return glfs_fdatasync_common(glfd, prestat, poststat);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync, 6.0);
+-
++GFAPI_SYMVER_PUBLIC(glfs_fdatasync_async34, glfs_fdatasync_async, 3.4.0)
+ int
+ pub_glfs_fdatasync_async34(struct glfs_fd *glfd, glfs_io_cbk34 fn, void *data)
+ {
+@@ -2365,8 +2325,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_fdatasync_async34, glfs_fdatasync_async, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync_async, 6.0)
+ int
+ pub_glfs_fdatasync_async(struct glfs_fd *glfd, glfs_io_cbk fn, void *data)
+ {
+@@ -2383,8 +2342,6 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync_async, 6.0);
+-
+ static int
+ glfs_ftruncate_common(struct glfs_fd *glfd, off_t offset,
+ struct glfs_stat *prestat, struct glfs_stat *poststat)
+@@ -2450,14 +2407,14 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC(glfs_ftruncate34, glfs_ftruncate, 3.4.0)
+ int
+ pub_glfs_ftruncate34(struct glfs_fd *glfd, off_t offset)
+ {
+ return glfs_ftruncate_common(glfd, offset, NULL, NULL);
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_ftruncate34, glfs_ftruncate, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate, 6.0)
+ int
+ pub_glfs_ftruncate(struct glfs_fd *glfd, off_t offset,
+ struct glfs_stat *prestat, struct glfs_stat *poststat)
+@@ -2465,8 +2422,7 @@ pub_glfs_ftruncate(struct glfs_fd *glfd, off_t offset,
+ return glfs_ftruncate_common(glfd, offset, prestat, poststat);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate, 6.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_truncate, 3.7.15)
+ int
+ pub_glfs_truncate(struct glfs *fs, const char *path, off_t length)
+ {
+@@ -2512,8 +2468,6 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_truncate, 3.7.15);
+-
+ static int
+ glfs_ftruncate_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+@@ -2606,6 +2560,7 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC(glfs_ftruncate_async34, glfs_ftruncate_async, 3.4.0)
+ int
+ pub_glfs_ftruncate_async34(struct glfs_fd *glfd, off_t offset, glfs_io_cbk34 fn,
+ void *data)
+@@ -2614,8 +2569,7 @@ pub_glfs_ftruncate_async34(struct glfs_fd *glfd, off_t offset, glfs_io_cbk34 fn,
+ data);
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_ftruncate_async34, glfs_ftruncate_async, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate_async, 6.0)
+ int
+ pub_glfs_ftruncate_async(struct glfs_fd *glfd, off_t offset, glfs_io_cbk fn,
+ void *data)
+@@ -2623,8 +2577,7 @@ pub_glfs_ftruncate_async(struct glfs_fd *glfd, off_t offset, glfs_io_cbk fn,
+ return glfs_ftruncate_async_common(glfd, offset, _gf_false, fn, data);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate_async, 6.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_access, 3.4.0)
+ int
+ pub_glfs_access(struct glfs *fs, const char *path, int mode)
+ {
+@@ -2670,8 +2623,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_access, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_symlink, 3.4.0)
+ int
+ pub_glfs_symlink(struct glfs *fs, const char *data, const char *path)
+ {
+@@ -2761,8 +2713,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_symlink, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readlink, 3.4.0)
+ int
+ pub_glfs_readlink(struct glfs *fs, const char *path, char *buf, size_t bufsiz)
+ {
+@@ -2819,8 +2770,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readlink, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mknod, 3.4.0)
+ int
+ pub_glfs_mknod(struct glfs *fs, const char *path, mode_t mode, dev_t dev)
+ {
+@@ -2910,8 +2860,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mknod, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mkdir, 3.4.0)
+ int
+ pub_glfs_mkdir(struct glfs *fs, const char *path, mode_t mode)
+ {
+@@ -3001,8 +2950,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mkdir, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unlink, 3.4.0)
+ int
+ pub_glfs_unlink(struct glfs *fs, const char *path)
+ {
+@@ -3058,8 +3006,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unlink, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rmdir, 3.4.0)
+ int
+ pub_glfs_rmdir(struct glfs *fs, const char *path)
+ {
+@@ -3114,8 +3061,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rmdir, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rename, 3.4.0)
+ int
+ pub_glfs_rename(struct glfs *fs, const char *oldpath, const char *newpath)
+ {
+@@ -3204,8 +3150,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rename, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_link, 3.4.0)
+ int
+ pub_glfs_link(struct glfs *fs, const char *oldpath, const char *newpath)
+ {
+@@ -3291,8 +3236,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_link, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_opendir, 3.4.0)
+ struct glfs_fd *
+ pub_glfs_opendir(struct glfs *fs, const char *path)
+ {
+@@ -3373,8 +3317,7 @@ invalid_fs:
+ return glfd;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_opendir, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_closedir, 3.4.0)
+ int
+ pub_glfs_closedir(struct glfs_fd *glfd)
+ {
+@@ -3395,16 +3338,14 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_closedir, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_telldir, 3.4.0)
+ long
+ pub_glfs_telldir(struct glfs_fd *fd)
+ {
+ return fd->offset;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_telldir, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_seekdir, 3.4.0)
+ void
+ pub_glfs_seekdir(struct glfs_fd *fd, long offset)
+ {
+@@ -3433,8 +3374,6 @@ pub_glfs_seekdir(struct glfs_fd *fd, long offset)
+ */
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_seekdir, 3.4.0);
+-
+ static int
+ glfs_discard_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+@@ -3525,6 +3464,7 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC(glfs_discard_async35, glfs_discard_async, 3.5.0)
+ int
+ pub_glfs_discard_async35(struct glfs_fd *glfd, off_t offset, size_t len,
+ glfs_io_cbk34 fn, void *data)
+@@ -3533,8 +3473,7 @@ pub_glfs_discard_async35(struct glfs_fd *glfd, off_t offset, size_t len,
+ data);
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_discard_async35, glfs_discard_async, 3.5.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard_async, 6.0)
+ int
+ pub_glfs_discard_async(struct glfs_fd *glfd, off_t offset, size_t len,
+ glfs_io_cbk fn, void *data)
+@@ -3542,8 +3481,6 @@ pub_glfs_discard_async(struct glfs_fd *glfd, off_t offset, size_t len,
+ return glfs_discard_async_common(glfd, offset, len, _gf_false, fn, data);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard_async, 6.0);
+-
+ static int
+ glfs_zerofill_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+@@ -3636,6 +3573,7 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC(glfs_zerofill_async35, glfs_zerofill_async, 3.5.0)
+ int
+ pub_glfs_zerofill_async35(struct glfs_fd *glfd, off_t offset, off_t len,
+ glfs_io_cbk34 fn, void *data)
+@@ -3644,8 +3582,7 @@ pub_glfs_zerofill_async35(struct glfs_fd *glfd, off_t offset, off_t len,
+ data);
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_zerofill_async35, glfs_zerofill_async, 3.5.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill_async, 6.0)
+ int
+ pub_glfs_zerofill_async(struct glfs_fd *glfd, off_t offset, off_t len,
+ glfs_io_cbk fn, void *data)
+@@ -3653,8 +3590,6 @@ pub_glfs_zerofill_async(struct glfs_fd *glfd, off_t offset, off_t len,
+ return glfs_zerofill_async_common(glfd, offset, len, _gf_false, fn, data);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill_async, 6.0);
+-
+ void
+ gf_dirent_to_dirent(gf_dirent_t *gf_dirent, struct dirent *dirent)
+ {
+@@ -3814,6 +3749,7 @@ unlock:
+ return buf;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus_r, 3.4.0)
+ int
+ pub_glfs_readdirplus_r(struct glfs_fd *glfd, struct stat *stat,
+ struct dirent *ext, struct dirent **res)
+@@ -3869,8 +3805,7 @@ invalid_fs:
+ return -1;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus_r, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir_r, 3.4.0)
+ int
+ pub_glfs_readdir_r(struct glfs_fd *glfd, struct dirent *buf,
+ struct dirent **res)
+@@ -3878,8 +3813,7 @@ pub_glfs_readdir_r(struct glfs_fd *glfd, struct dirent *buf,
+ return pub_glfs_readdirplus_r(glfd, 0, buf, res);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir_r, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus, 3.5.0)
+ struct dirent *
+ pub_glfs_readdirplus(struct glfs_fd *glfd, struct stat *stat)
+ {
+@@ -3893,16 +3827,14 @@ pub_glfs_readdirplus(struct glfs_fd *glfd, struct stat *stat)
+ return res;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus, 3.5.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir, 3.5.0)
+ struct dirent *
+ pub_glfs_readdir(struct glfs_fd *glfd)
+ {
+ return pub_glfs_readdirplus(glfd, NULL);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir, 3.5.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_statvfs, 3.4.0)
+ int
+ pub_glfs_statvfs(struct glfs *fs, const char *path, struct statvfs *buf)
+ {
+@@ -3948,8 +3880,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_statvfs, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setattr, 6.0)
+ int
+ pub_glfs_setattr(struct glfs *fs, const char *path, struct glfs_stat *stat,
+ int follow)
+@@ -4009,8 +3940,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setattr, 6.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetattr, 6.0)
+ int
+ pub_glfs_fsetattr(struct glfs_fd *glfd, struct glfs_stat *stat)
+ {
+@@ -4063,8 +3993,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetattr, 6.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chmod, 3.4.0)
+ int
+ pub_glfs_chmod(struct glfs *fs, const char *path, mode_t mode)
+ {
+@@ -4081,8 +4010,7 @@ pub_glfs_chmod(struct glfs *fs, const char *path, mode_t mode)
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chmod, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchmod, 3.4.0)
+ int
+ pub_glfs_fchmod(struct glfs_fd *glfd, mode_t mode)
+ {
+@@ -4099,8 +4027,7 @@ pub_glfs_fchmod(struct glfs_fd *glfd, mode_t mode)
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchmod, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chown, 3.4.0)
+ int
+ pub_glfs_chown(struct glfs *fs, const char *path, uid_t uid, gid_t gid)
+ {
+@@ -4125,8 +4052,7 @@ pub_glfs_chown(struct glfs *fs, const char *path, uid_t uid, gid_t gid)
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chown, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lchown, 3.4.0)
+ int
+ pub_glfs_lchown(struct glfs *fs, const char *path, uid_t uid, gid_t gid)
+ {
+@@ -4151,8 +4077,7 @@ pub_glfs_lchown(struct glfs *fs, const char *path, uid_t uid, gid_t gid)
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lchown, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchown, 3.4.0)
+ int
+ pub_glfs_fchown(struct glfs_fd *glfd, uid_t uid, gid_t gid)
+ {
+@@ -4177,8 +4102,7 @@ pub_glfs_fchown(struct glfs_fd *glfd, uid_t uid, gid_t gid)
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchown, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_utimens, 3.4.0)
+ int
+ pub_glfs_utimens(struct glfs *fs, const char *path,
+ const struct timespec times[2])
+@@ -4198,8 +4122,7 @@ pub_glfs_utimens(struct glfs *fs, const char *path,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_utimens, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lutimens, 3.4.0)
+ int
+ pub_glfs_lutimens(struct glfs *fs, const char *path,
+ const struct timespec times[2])
+@@ -4219,8 +4142,7 @@ pub_glfs_lutimens(struct glfs *fs, const char *path,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lutimens, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_futimens, 3.4.0)
+ int
+ pub_glfs_futimens(struct glfs_fd *glfd, const struct timespec times[2])
+ {
+@@ -4239,8 +4161,6 @@ pub_glfs_futimens(struct glfs_fd *glfd, const struct timespec times[2])
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_futimens, 3.4.0);
+-
+ int
+ glfs_getxattr_process(void *value, size_t size, dict_t *xattr, const char *name)
+ {
+@@ -4340,6 +4260,7 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getxattr, 3.4.0)
+ ssize_t
+ pub_glfs_getxattr(struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size)
+@@ -4347,8 +4268,7 @@ pub_glfs_getxattr(struct glfs *fs, const char *path, const char *name,
+ return glfs_getxattr_common(fs, path, name, value, size, 1);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getxattr, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lgetxattr, 3.4.0)
+ ssize_t
+ pub_glfs_lgetxattr(struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size)
+@@ -4356,8 +4276,7 @@ pub_glfs_lgetxattr(struct glfs *fs, const char *path, const char *name,
+ return glfs_getxattr_common(fs, path, name, value, size, 0);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lgetxattr, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fgetxattr, 3.4.0)
+ ssize_t
+ pub_glfs_fgetxattr(struct glfs_fd *glfd, const char *name, void *value,
+ size_t size)
+@@ -4420,8 +4339,6 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fgetxattr, 3.4.0);
+-
+ int
+ glfs_listxattr_process(void *value, size_t size, dict_t *xattr)
+ {
+@@ -4505,22 +4422,21 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_listxattr, 3.4.0)
+ ssize_t
+ pub_glfs_listxattr(struct glfs *fs, const char *path, void *value, size_t size)
+ {
+ return glfs_listxattr_common(fs, path, value, size, 1);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_listxattr, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_llistxattr, 3.4.0)
+ ssize_t
+ pub_glfs_llistxattr(struct glfs *fs, const char *path, void *value, size_t size)
+ {
+ return glfs_listxattr_common(fs, path, value, size, 0);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_llistxattr, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_flistxattr, 3.4.0)
+ ssize_t
+ pub_glfs_flistxattr(struct glfs_fd *glfd, void *value, size_t size)
+ {
+@@ -4570,8 +4486,6 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_flistxattr, 3.4.0);
+-
+ int
+ glfs_setxattr_common(struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags, int follow)
+@@ -4651,6 +4565,7 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setxattr, 3.4.0)
+ int
+ pub_glfs_setxattr(struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags)
+@@ -4658,8 +4573,7 @@ pub_glfs_setxattr(struct glfs *fs, const char *path, const char *name,
+ return glfs_setxattr_common(fs, path, name, value, size, flags, 1);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setxattr, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lsetxattr, 3.4.0)
+ int
+ pub_glfs_lsetxattr(struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags)
+@@ -4667,8 +4581,7 @@ pub_glfs_lsetxattr(struct glfs *fs, const char *path, const char *name,
+ return glfs_setxattr_common(fs, path, name, value, size, flags, 0);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lsetxattr, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetxattr, 3.4.0)
+ int
+ pub_glfs_fsetxattr(struct glfs_fd *glfd, const char *name, const void *value,
+ size_t size, int flags)
+@@ -4743,8 +4656,6 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetxattr, 3.4.0);
+-
+ int
+ glfs_removexattr_common(struct glfs *fs, const char *path, const char *name,
+ int follow)
+@@ -4795,22 +4706,21 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_removexattr, 3.4.0)
+ int
+ pub_glfs_removexattr(struct glfs *fs, const char *path, const char *name)
+ {
+ return glfs_removexattr_common(fs, path, name, 1);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_removexattr, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lremovexattr, 3.4.0)
+ int
+ pub_glfs_lremovexattr(struct glfs *fs, const char *path, const char *name)
+ {
+ return glfs_removexattr_common(fs, path, name, 0);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lremovexattr, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fremovexattr, 3.4.0)
+ int
+ pub_glfs_fremovexattr(struct glfs_fd *glfd, const char *name)
+ {
+@@ -4853,8 +4763,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fremovexattr, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fallocate, 3.5.0)
+ int
+ pub_glfs_fallocate(struct glfs_fd *glfd, int keep_size, off_t offset,
+ size_t len)
+@@ -4905,8 +4814,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fallocate, 3.5.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard, 3.5.0)
+ int
+ pub_glfs_discard(struct glfs_fd *glfd, off_t offset, size_t len)
+ {
+@@ -4956,8 +4864,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard, 3.5.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill, 3.5.0)
+ int
+ pub_glfs_zerofill(struct glfs_fd *glfd, off_t offset, off_t len)
+ {
+@@ -5005,8 +4912,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill, 3.5.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chdir, 3.4.0)
+ int
+ pub_glfs_chdir(struct glfs *fs, const char *path)
+ {
+@@ -5056,8 +4962,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chdir, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchdir, 3.4.0)
+ int
+ pub_glfs_fchdir(struct glfs_fd *glfd)
+ {
+@@ -5109,8 +5014,6 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchdir, 3.4.0);
+-
+ static gf_boolean_t warn_realpath = _gf_true; /* log once */
+
+ static char *
+@@ -5193,22 +5096,21 @@ invalid_fs:
+ return retpath;
+ }
+
++GFAPI_SYMVER_PUBLIC(glfs_realpath34, glfs_realpath, 3.4.0)
+ char *
+ pub_glfs_realpath34(struct glfs *fs, const char *path, char *resolved_path)
+ {
+ return glfs_realpath_common(fs, path, resolved_path, _gf_true);
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_realpath34, glfs_realpath, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_realpath, 3.7.17)
+ char *
+ pub_glfs_realpath(struct glfs *fs, const char *path, char *resolved_path)
+ {
+ return glfs_realpath_common(fs, path, resolved_path, _gf_false);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_realpath, 3.7.17);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getcwd, 3.4.0)
+ char *
+ pub_glfs_getcwd(struct glfs *fs, char *buf, size_t n)
+ {
+@@ -5257,8 +5159,6 @@ invalid_fs:
+ return buf;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getcwd, 3.4.0);
+-
+ static void
+ gf_flock_to_flock(struct gf_flock *gf_flock, struct flock *flock)
+ {
+@@ -5367,6 +5267,7 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_file_lock, 4.0.0)
+ int
+ pub_glfs_file_lock(struct glfs_fd *glfd, int cmd, struct flock *flock,
+ glfs_lock_mode_t lk_mode)
+@@ -5404,16 +5305,14 @@ out:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_file_lock, 4.0.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_posix_lock, 3.4.0)
+ int
+ pub_glfs_posix_lock(struct glfs_fd *glfd, int cmd, struct flock *flock)
+ {
+ return glfs_lock_common(glfd, cmd, flock, NULL);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_posix_lock, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fd_set_lkowner, 3.10.7)
+ int
+ pub_glfs_fd_set_lkowner(struct glfs_fd *glfd, void *data, int len)
+ {
+@@ -5449,8 +5348,8 @@ out:
+ invalid_fs:
+ return ret;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fd_set_lkowner, 3.10.7);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_dup, 3.4.0)
+ struct glfs_fd *
+ pub_glfs_dup(struct glfs_fd *glfd)
+ {
+@@ -5501,8 +5400,6 @@ invalid_fs:
+ return dupfd;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_dup, 3.4.0);
+-
+ static void
+ glfs_enqueue_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data)
+ {
+@@ -5986,6 +5883,7 @@ out:
+ * Otherwise all the upcall events are queued up in a list
+ * to be read/polled by the applications.
+ */
++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_process_upcall_event, 3.7.0)
+ void
+ priv_glfs_process_upcall_event(struct glfs *fs, void *data)
+ {
+@@ -6053,7 +5951,6 @@ out:
+ err:
+ return;
+ }
+-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_process_upcall_event, 3.7.0);
+
+ ssize_t
+ glfs_anonymous_pwritev(struct glfs *fs, struct glfs_object *object,
+@@ -6241,6 +6138,7 @@ glfs_release_xreaddirp_stat(void *ptr)
+ * Given glfd of a directory, this function does readdirp and returns
+ * xstat along with dirents.
+ */
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_r, 3.11.0)
+ int
+ pub_glfs_xreaddirplus_r(struct glfs_fd *glfd, uint32_t flags,
+ struct glfs_xreaddirp_stat **xstat_p,
+@@ -6349,8 +6247,8 @@ out:
+ invalid_fs:
+ return -1;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_r, 3.11.0);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_stat, 3.11.0)
+ struct stat *
+ pub_glfs_xreaddirplus_get_stat(struct glfs_xreaddirp_stat *xstat)
+ {
+@@ -6366,7 +6264,6 @@ pub_glfs_xreaddirplus_get_stat(struct glfs_xreaddirp_stat *xstat)
+ out:
+ return NULL;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_stat, 3.11.0);
+
+ void
+ gf_lease_to_glfs_lease(struct gf_lease *gf_lease, struct glfs_lease *lease)
+@@ -6386,6 +6283,7 @@ glfs_lease_to_gf_lease(struct glfs_lease *lease, struct gf_lease *gf_lease)
+ memcpy(gf_lease->lease_id, lease->lease_id, LEASE_ID_SIZE);
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lease, 4.0.0)
+ int
+ pub_glfs_lease(struct glfs_fd *glfd, struct glfs_lease *lease,
+ glfs_recall_cbk fn, void *data)
+@@ -6487,5 +6385,3 @@ out:
+ invalid_fs:
+ return ret;
+ }
+-
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lease, 4.0.0);
+diff --git a/api/src/glfs-handleops.c b/api/src/glfs-handleops.c
+index 7b8ff1468..4315a34d2 100644
+--- a/api/src/glfs-handleops.c
++++ b/api/src/glfs-handleops.c
+@@ -60,6 +60,7 @@ glfs_iatt_from_stat(struct stat *stat, int valid, struct iatt *iatt,
+ return;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lookupat, 3.7.4)
+ struct glfs_object *
+ pub_glfs_h_lookupat(struct glfs *fs, struct glfs_object *parent,
+ const char *path, struct stat *stat, int follow)
+@@ -126,8 +127,7 @@ invalid_fs:
+ return object;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lookupat, 3.7.4);
+-
++GFAPI_SYMVER_PUBLIC(glfs_h_lookupat34, glfs_h_lookupat, 3.4.2)
+ struct glfs_object *
+ pub_glfs_h_lookupat34(struct glfs *fs, struct glfs_object *parent,
+ const char *path, struct stat *stat)
+@@ -135,8 +135,7 @@ pub_glfs_h_lookupat34(struct glfs *fs, struct glfs_object *parent,
+ return pub_glfs_h_lookupat(fs, parent, path, stat, 0);
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_h_lookupat34, glfs_h_lookupat, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_statfs, 3.7.0)
+ int
+ pub_glfs_h_statfs(struct glfs *fs, struct glfs_object *object,
+ struct statvfs *statvfs)
+@@ -194,8 +193,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_statfs, 3.7.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_stat, 3.4.2)
+ int
+ pub_glfs_h_stat(struct glfs *fs, struct glfs_object *object, struct stat *stat)
+ {
+@@ -259,8 +257,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_stat, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getattrs, 3.4.2)
+ int
+ pub_glfs_h_getattrs(struct glfs *fs, struct glfs_object *object,
+ struct stat *stat)
+@@ -317,8 +314,6 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getattrs, 3.4.2);
+-
+ int
+ glfs_h_getxattrs_common(struct glfs *fs, struct glfs_object *object,
+ dict_t **xattr, const char *name,
+@@ -380,6 +375,7 @@ out:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getxattrs, 3.5.1)
+ int
+ pub_glfs_h_getxattrs(struct glfs *fs, struct glfs_object *object,
+ const char *name, void *value, size_t size)
+@@ -416,8 +412,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getxattrs, 3.5.1);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setattrs, 3.4.2)
+ int
+ pub_glfs_h_setattrs(struct glfs *fs, struct glfs_object *object,
+ struct stat *stat, int valid)
+@@ -480,8 +475,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setattrs, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setxattrs, 3.5.0)
+ int
+ pub_glfs_h_setxattrs(struct glfs *fs, struct glfs_object *object,
+ const char *name, const void *value, size_t size,
+@@ -568,8 +562,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setxattrs, 3.5.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_removexattrs, 3.5.1)
+ int
+ pub_glfs_h_removexattrs(struct glfs *fs, struct glfs_object *object,
+ const char *name)
+@@ -626,8 +619,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_removexattrs, 3.5.1);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_open, 3.4.2)
+ struct glfs_fd *
+ pub_glfs_h_open(struct glfs *fs, struct glfs_object *object, int flags)
+ {
+@@ -727,8 +719,7 @@ invalid_fs:
+ return glfd;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_open, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat, 3.4.2)
+ struct glfs_object *
+ pub_glfs_h_creat(struct glfs *fs, struct glfs_object *parent, const char *path,
+ int flags, mode_t mode, struct stat *stat)
+@@ -840,7 +831,7 @@ invalid_fs:
+ return object;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat, 3.4.2);
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat_open, 3.4.2);
+
+ struct glfs_object *
+ pub_glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent,
+@@ -975,8 +966,7 @@ invalid_fs:
+ return object;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat_open, 6.6);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mkdir, 3.4.2)
+ struct glfs_object *
+ pub_glfs_h_mkdir(struct glfs *fs, struct glfs_object *parent, const char *path,
+ mode_t mode, struct stat *stat)
+@@ -1074,8 +1064,7 @@ invalid_fs:
+ return object;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mkdir, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mknod, 3.4.2)
+ struct glfs_object *
+ pub_glfs_h_mknod(struct glfs *fs, struct glfs_object *parent, const char *path,
+ mode_t mode, dev_t dev, struct stat *stat)
+@@ -1172,8 +1161,7 @@ invalid_fs:
+ return object;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mknod, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_unlink, 3.4.2)
+ int
+ pub_glfs_h_unlink(struct glfs *fs, struct glfs_object *parent, const char *path)
+ {
+@@ -1244,8 +1232,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_unlink, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_opendir, 3.4.2)
+ struct glfs_fd *
+ pub_glfs_h_opendir(struct glfs *fs, struct glfs_object *object)
+ {
+@@ -1327,8 +1314,7 @@ invalid_fs:
+ return glfd;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_opendir, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_access, 3.6.0)
+ int
+ pub_glfs_h_access(struct glfs *fs, struct glfs_object *object, int mask)
+ {
+@@ -1385,8 +1371,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_access, 3.6.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_extract_handle, 3.4.2)
+ ssize_t
+ pub_glfs_h_extract_handle(struct glfs_object *object, unsigned char *handle,
+ int len)
+@@ -1417,8 +1402,7 @@ out:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_extract_handle, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_create_from_handle, 3.4.2)
+ struct glfs_object *
+ pub_glfs_h_create_from_handle(struct glfs *fs, unsigned char *handle, int len,
+ struct stat *stat)
+@@ -1541,8 +1525,7 @@ invalid_fs:
+ return object;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_create_from_handle, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_close, 3.4.2)
+ int
+ pub_glfs_h_close(struct glfs_object *object)
+ {
+@@ -1555,8 +1538,7 @@ pub_glfs_h_close(struct glfs_object *object)
+ return 0;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_close, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_truncate, 3.4.2)
+ int
+ pub_glfs_h_truncate(struct glfs *fs, struct glfs_object *object, off_t offset)
+ {
+@@ -1616,8 +1598,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_truncate, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_symlink, 3.4.2)
+ struct glfs_object *
+ pub_glfs_h_symlink(struct glfs *fs, struct glfs_object *parent,
+ const char *name, const char *data, struct stat *stat)
+@@ -1716,8 +1697,7 @@ invalid_fs:
+ return object;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_symlink, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_readlink, 3.4.2)
+ int
+ pub_glfs_h_readlink(struct glfs *fs, struct glfs_object *object, char *buf,
+ size_t bufsiz)
+@@ -1782,8 +1762,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_readlink, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_link, 3.4.2)
+ int
+ pub_glfs_h_link(struct glfs *fs, struct glfs_object *linksrc,
+ struct glfs_object *parent, const char *name)
+@@ -1880,8 +1859,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_link, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_rename, 3.4.2)
+ int
+ pub_glfs_h_rename(struct glfs *fs, struct glfs_object *olddir,
+ const char *oldname, struct glfs_object *newdir,
+@@ -1991,8 +1969,6 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_rename, 3.4.2);
+-
+ /*
+ * Given a handle/gfid, find if the corresponding inode is present in
+ * the inode table. If yes create and return the corresponding glfs_object.
+@@ -2200,6 +2176,7 @@ glfs_release_upcall(void *ptr)
+ * calling glfs_fini(..). Hence making an assumption that 'fs' & ctx structures
+ * cannot be freed while in this routine.
+ */
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_poll_upcall, 3.7.16)
+ int
+ pub_glfs_h_poll_upcall(struct glfs *fs, struct glfs_upcall **up_arg)
+ {
+@@ -2317,8 +2294,6 @@ err:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_poll_upcall, 3.7.16);
+-
+ static gf_boolean_t log_upcall370 = _gf_true; /* log once */
+
+ /* The old glfs_h_poll_upcall interface requires intimate knowledge of the
+@@ -2332,6 +2307,7 @@ static gf_boolean_t log_upcall370 = _gf_true; /* log once */
+ *
+ * WARNING: this function will be removed in the future.
+ */
++GFAPI_SYMVER_PUBLIC(glfs_h_poll_upcall370, glfs_h_poll_upcall, 3.7.0)
+ int
+ pub_glfs_h_poll_upcall370(struct glfs *fs, struct glfs_callback_arg *up_arg)
+ {
+@@ -2399,12 +2375,11 @@ out:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC(glfs_h_poll_upcall370, glfs_h_poll_upcall, 3.7.0);
+-
+ #ifdef HAVE_ACL_LIBACL_H
+ #include <glusterfs/glusterfs-acl.h>
+ #include <acl/libacl.h>
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_set, 3.7.0)
+ int
+ pub_glfs_h_acl_set(struct glfs *fs, struct glfs_object *object,
+ const acl_type_t type, const acl_t acl)
+@@ -2453,6 +2428,7 @@ invalid_fs:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_get, 3.7.0)
+ acl_t
+ pub_glfs_h_acl_get(struct glfs *fs, struct glfs_object *object,
+ const acl_type_t type)
+@@ -2507,6 +2483,7 @@ invalid_fs:
+ return acl;
+ }
+ #else /* !HAVE_ACL_LIBACL_H */
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_get, 3.7.0)
+ acl_t
+ pub_glfs_h_acl_get(struct glfs *fs, struct glfs_object *object,
+ const acl_type_t type)
+@@ -2515,6 +2492,7 @@ pub_glfs_h_acl_get(struct glfs *fs, struct glfs_object *object,
+ return NULL;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_set, 3.7.0)
+ int
+ pub_glfs_h_acl_set(struct glfs *fs, struct glfs_object *object,
+ const acl_type_t type, const acl_t acl)
+@@ -2523,10 +2501,9 @@ pub_glfs_h_acl_set(struct glfs *fs, struct glfs_object *object,
+ return -1;
+ }
+ #endif
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_set, 3.7.0);
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_get, 3.7.0);
+
+ /* The API to perform read using anonymous fd */
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_read, 3.7.0)
+ ssize_t
+ pub_glfs_h_anonymous_read(struct glfs *fs, struct glfs_object *object,
+ const void *buf, size_t count, off_t offset)
+@@ -2550,9 +2527,8 @@ pub_glfs_h_anonymous_read(struct glfs *fs, struct glfs_object *object,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_read, 3.7.0);
+-
+ /* The API to perform write using anonymous fd */
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_write, 3.7.0)
+ ssize_t
+ pub_glfs_h_anonymous_write(struct glfs *fs, struct glfs_object *object,
+ const void *buf, size_t count, off_t offset)
+@@ -2576,8 +2552,7 @@ pub_glfs_h_anonymous_write(struct glfs *fs, struct glfs_object *object,
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_write, 3.7.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_object_copy, 3.11.0)
+ struct glfs_object *
+ pub_glfs_object_copy(struct glfs_object *src)
+ {
+@@ -2600,8 +2575,8 @@ pub_glfs_object_copy(struct glfs_object *src)
+ out:
+ return object;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_object_copy, 3.11.0);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_object, 3.11.0)
+ struct glfs_object *
+ pub_glfs_xreaddirplus_get_object(struct glfs_xreaddirp_stat *xstat)
+ {
+@@ -2618,8 +2593,8 @@ pub_glfs_xreaddirplus_get_object(struct glfs_xreaddirp_stat *xstat)
+ out:
+ return NULL;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_object, 3.11.0);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lease, 4.0.0)
+ int
+ pub_glfs_h_lease(struct glfs *fs, struct glfs_object *object,
+ struct glfs_lease *lease)
+@@ -2681,5 +2656,3 @@ out:
+ invalid_fs:
+ return ret;
+ }
+-
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lease, 4.0.0);
+diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h
+index 15cf0ee00..a6cd1ff20 100644
+--- a/api/src/glfs-internal.h
++++ b/api/src/glfs-internal.h
+@@ -81,25 +81,40 @@
+ #ifndef GFAPI_PRIVATE
+ #define GFAPI_PRIVATE(sym, ver) /**/
+ #endif
++#if __GNUC__ >= 10
+ #define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, ver) \
+- asm(".symver pub_" STR(fn) ", " STR(fn) "@@GFAPI_" STR(ver))
++ __attribute__((__symver__(STR(fn) "@@GFAPI_" STR(ver))))
+
+ #define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, ver) \
+- asm(".symver priv_" STR(fn) ", " STR(fn) "@@GFAPI_PRIVATE_" STR(ver))
++ __attribute__((__symver__(STR(fn) "@@GFAPI_PRIVATE_" STR(ver))))
+
+ #define GFAPI_SYMVER_PUBLIC(fn1, fn2, ver) \
+- asm(".symver pub_" STR(fn1) ", " STR(fn2) "@GFAPI_" STR(ver))
++ __attribute__((__symver__(STR(fn2) "@GFAPI_" STR(ver))))
+
+ #define GFAPI_SYMVER_PRIVATE(fn1, fn2, ver) \
+- asm(".symver priv_" STR(fn1) ", " STR(fn2) "@GFAPI_PRIVATE_" STR(ver))
++ __attribute__((__symver__(STR(fn2) "@GFAPI_PRIVATE_" STR(ver))))
++
++#else
++#define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, ver) \
++ asm(".symver pub_" STR(fn) ", " STR(fn) "@@GFAPI_" STR(ver));
++
++#define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, ver) \
++ asm(".symver priv_" STR(fn) ", " STR(fn) "@@GFAPI_PRIVATE_" STR(ver));
++
++#define GFAPI_SYMVER_PUBLIC(fn1, fn2, ver) \
++ asm(".symver pub_" STR(fn1) ", " STR(fn2) "@GFAPI_" STR(ver));
++
++#define GFAPI_SYMVER_PRIVATE(fn1, fn2, ver) \
++ asm(".symver priv_" STR(fn1) ", " STR(fn2) "@GFAPI_PRIVATE_" STR(ver));
++#endif
+ #define STR(str) #str
+ #else
+ #ifndef GFAPI_PUBLIC
+-#define GFAPI_PUBLIC(sym, ver) __asm("_" __STRING(sym) "$GFAPI_" __STRING(ver))
++#define GFAPI_PUBLIC(sym, ver) __asm("_" __STRING(sym) "$GFAPI_" __STRING(ver));
+ #endif
+ #ifndef GFAPI_PRIVATE
+ #define GFAPI_PRIVATE(sym, ver) \
+- __asm("_" __STRING(sym) "$GFAPI_PRIVATE_" __STRING(ver))
++ __asm("_" __STRING(sym) "$GFAPI_PRIVATE_" __STRING(ver));
+ #endif
+ #define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, dotver) /**/
+ #define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, dotver) /**/
+diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c
+index 7476d5b64..07be8a403 100644
+--- a/api/src/glfs-mgmt.c
++++ b/api/src/glfs-mgmt.c
+@@ -383,6 +383,7 @@ out:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volumeid, 3.5.0)
+ int
+ pub_glfs_get_volumeid(struct glfs *fs, char *volid, size_t size)
+ {
+@@ -439,8 +440,6 @@ invalid_fs:
+ return -1;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volumeid, 3.5.0);
+-
+ int
+ glfs_get_volume_info(struct glfs *fs)
+ {
+diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c
+index 58b6ace58..c3e114a39 100644
+--- a/api/src/glfs-resolve.c
++++ b/api/src/glfs-resolve.c
+@@ -163,6 +163,7 @@ __glfs_refresh_inode(struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ return newinode;
+ }
+
++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_loc_touchup, 3.4.0)
+ int
+ priv_glfs_loc_touchup(loc_t *loc)
+ {
+@@ -177,8 +178,6 @@ priv_glfs_loc_touchup(loc_t *loc)
+ return ret;
+ }
+
+-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_loc_touchup, 3.4.0);
+-
+ int
+ glfs_resolve_symlink(struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ char **lpath)
+@@ -466,6 +465,7 @@ out:
+ return inode;
+ }
+
++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve_at, 3.4.0)
+ int
+ priv_glfs_resolve_at(struct glfs *fs, xlator_t *subvol, inode_t *at,
+ const char *origpath, loc_t *loc, struct iatt *iatt,
+@@ -616,8 +616,6 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve_at, 3.4.0);
+-
+ int
+ glfs_resolve_path(struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int follow, int reval)
+@@ -646,6 +644,7 @@ out:
+ return ret;
+ }
+
++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve, 3.7.0)
+ int
+ priv_glfs_resolve(struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int reval)
+@@ -656,7 +655,6 @@ priv_glfs_resolve(struct glfs *fs, xlator_t *subvol, const char *origpath,
+
+ return ret;
+ }
+-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve, 3.7.0);
+
+ int
+ glfs_lresolve(struct glfs *fs, xlator_t *subvol, const char *origpath,
+@@ -977,6 +975,7 @@ __glfs_active_subvol(struct glfs *fs)
+ return new_subvol;
+ }
+
++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_subvol_done, 3.4.0)
+ void
+ priv_glfs_subvol_done(struct glfs *fs, xlator_t *subvol)
+ {
+@@ -1004,8 +1003,7 @@ priv_glfs_subvol_done(struct glfs *fs, xlator_t *subvol)
+ }
+ }
+
+-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_subvol_done, 3.4.0);
+-
++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_active_subvol, 3.4.0)
+ xlator_t *
+ priv_glfs_active_subvol(struct glfs *fs)
+ {
+@@ -1033,8 +1031,6 @@ priv_glfs_active_subvol(struct glfs *fs)
+ return subvol;
+ }
+
+-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_active_subvol, 3.4.0);
+-
+ int
+ __glfs_cwd_set(struct glfs *fs, inode_t *inode)
+ {
+diff --git a/api/src/glfs.c b/api/src/glfs.c
+index ae994faaf..b85b1c4be 100644
+--- a/api/src/glfs.c
++++ b/api/src/glfs.c
+@@ -277,6 +277,7 @@ out:
+
+ ///////////////////////////////////////////////////////////////////////////////
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_xlator_option, 3.4.0)
+ int
+ pub_glfs_set_xlator_option(struct glfs *fs, const char *xlator, const char *key,
+ const char *value)
+@@ -326,8 +327,7 @@ invalid_fs:
+ return -1;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_xlator_option, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unset_volfile_server, 3.5.1)
+ int
+ pub_glfs_unset_volfile_server(struct glfs *fs, const char *transport,
+ const char *host, const int port)
+@@ -385,8 +385,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unset_volfile_server, 3.5.1);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile_server, 3.4.0)
+ int
+ pub_glfs_set_volfile_server(struct glfs *fs, const char *transport,
+ const char *host, int port)
+@@ -468,8 +467,6 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile_server, 3.4.0);
+-
+ /* *
+ * Used to free the arguments allocated by glfs_set_volfile_server()
+ */
+@@ -512,6 +509,7 @@ glfs_free_xlator_options(cmd_args_t *cmd_args)
+ }
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsuid, 3.4.2)
+ int
+ pub_glfs_setfsuid(uid_t fsuid)
+ {
+@@ -521,8 +519,7 @@ pub_glfs_setfsuid(uid_t fsuid)
+ return syncopctx_setfsuid(&fsuid);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsuid, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgid, 3.4.2)
+ int
+ pub_glfs_setfsgid(gid_t fsgid)
+ {
+@@ -532,8 +529,7 @@ pub_glfs_setfsgid(gid_t fsgid)
+ return syncopctx_setfsgid(&fsgid);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgid, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgroups, 3.4.2)
+ int
+ pub_glfs_setfsgroups(size_t size, const gid_t *list)
+ {
+@@ -543,8 +539,7 @@ pub_glfs_setfsgroups(size_t size, const gid_t *list)
+ return syncopctx_setfsgroups(size, list);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgroups, 3.4.2);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsleaseid, 4.0.0)
+ int
+ pub_glfs_setfsleaseid(glfs_leaseid_t leaseid)
+ {
+@@ -566,8 +561,6 @@ pub_glfs_setfsleaseid(glfs_leaseid_t leaseid)
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsleaseid, 4.0.0);
+-
+ int
+ get_fop_attr_glfd(dict_t **fop_attr, struct glfs_fd *glfd)
+ {
+@@ -655,14 +648,14 @@ unset_fop_attr(dict_t **fop_attr)
+ *fop_attr = NULL;
+ }
+ }
++
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_from_glfd, 3.4.0)
+ struct glfs *
+ pub_glfs_from_glfd(struct glfs_fd *glfd)
+ {
+ return glfd->fs;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_from_glfd, 3.4.0);
+-
+ static void
+ glfs_fd_destroy(struct glfs_fd *glfd)
+ {
+@@ -811,6 +804,7 @@ unlock:
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_new, 3.4.0)
+ struct glfs *
+ pub_glfs_new(const char *volname)
+ {
+@@ -899,8 +893,7 @@ out:
+ return fs;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_new, 3.4.0);
+-
++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_new_from_ctx, 3.7.0)
+ struct glfs *
+ priv_glfs_new_from_ctx(glusterfs_ctx_t *ctx)
+ {
+@@ -919,8 +912,7 @@ out:
+ return fs;
+ }
+
+-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_new_from_ctx, 3.7.0);
+-
++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_free_from_ctx, 3.7.0)
+ void
+ priv_glfs_free_from_ctx(struct glfs *fs)
+ {
+@@ -956,8 +948,7 @@ priv_glfs_free_from_ctx(struct glfs *fs)
+ FREE(fs);
+ }
+
+-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_free_from_ctx, 3.7.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile, 3.4.0)
+ int
+ pub_glfs_set_volfile(struct glfs *fs, const char *volfile)
+ {
+@@ -974,8 +965,7 @@ pub_glfs_set_volfile(struct glfs *fs, const char *volfile)
+ return 0;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_logging, 3.4.0)
+ int
+ pub_glfs_set_logging(struct glfs *fs, const char *logfile, int loglevel)
+ {
+@@ -1013,8 +1003,6 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_logging, 3.4.0);
+-
+ int
+ glfs_init_wait(struct glfs *fs)
+ {
+@@ -1033,6 +1021,7 @@ glfs_init_wait(struct glfs *fs)
+ return ret;
+ }
+
++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_init_done, 3.4.0)
+ void
+ priv_glfs_init_done(struct glfs *fs, int ret)
+ {
+@@ -1064,8 +1053,6 @@ out:
+ return;
+ }
+
+-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_init_done, 3.4.0);
+-
+ int
+ glfs_init_common(struct glfs *fs)
+ {
+@@ -1106,6 +1093,7 @@ glfs_init_async(struct glfs *fs, glfs_init_cbk cbk)
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_init, 3.4.0)
+ int
+ pub_glfs_init(struct glfs *fs)
+ {
+@@ -1139,8 +1127,6 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_init, 3.4.0);
+-
+ static int
+ glusterfs_ctx_destroy(glusterfs_ctx_t *ctx)
+ {
+@@ -1218,6 +1204,7 @@ glusterfs_ctx_destroy(glusterfs_ctx_t *ctx)
+ return ret;
+ }
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fini, 3.4.0)
+ int
+ pub_glfs_fini(struct glfs *fs)
+ {
+@@ -1412,8 +1399,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fini, 3.4.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volfile, 3.6.0)
+ ssize_t
+ pub_glfs_get_volfile(struct glfs *fs, void *buf, size_t len)
+ {
+@@ -1439,8 +1425,7 @@ invalid_fs:
+ return res;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volfile, 3.6.0);
+-
++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_ipc, 3.12.0)
+ int
+ priv_glfs_ipc(struct glfs *fs, int opcode, void *xd_in, void **xd_out)
+ {
+@@ -1468,8 +1453,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_ipc, 3.12.0);
+-
++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_setfspid, 6.1)
+ int
+ priv_glfs_setfspid(struct glfs *fs, pid_t pid)
+ {
+@@ -1483,107 +1467,104 @@ priv_glfs_setfspid(struct glfs *fs, pid_t pid)
+
+ return ret;
+ }
+-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_setfspid, 6.1);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_free, 3.7.16)
+ void
+ pub_glfs_free(void *ptr)
+ {
+ GLFS_FREE(ptr);
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_free, 3.7.16);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_fs, 3.7.16)
+ struct glfs *
+ pub_glfs_upcall_get_fs(struct glfs_upcall *arg)
+ {
+ return arg->fs;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_fs, 3.7.16);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_reason, 3.7.16)
+ enum glfs_upcall_reason
+ pub_glfs_upcall_get_reason(struct glfs_upcall *arg)
+ {
+ return arg->reason;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_reason, 3.7.16);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_event, 3.7.16)
+ void *
+ pub_glfs_upcall_get_event(struct glfs_upcall *arg)
+ {
+ return arg->event;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_event, 3.7.16);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_object, 3.7.16)
+ struct glfs_object *
+ pub_glfs_upcall_inode_get_object(struct glfs_upcall_inode *arg)
+ {
+ return arg->object;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_object, 3.7.16);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_flags, 3.7.16)
+ uint64_t
+ pub_glfs_upcall_inode_get_flags(struct glfs_upcall_inode *arg)
+ {
+ return arg->flags;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_flags, 3.7.16);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_stat, 3.7.16)
+ struct stat *
+ pub_glfs_upcall_inode_get_stat(struct glfs_upcall_inode *arg)
+ {
+ return &arg->buf;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_stat, 3.7.16);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_expire, 3.7.16)
+ uint64_t
+ pub_glfs_upcall_inode_get_expire(struct glfs_upcall_inode *arg)
+ {
+ return arg->expire_time_attr;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_expire, 3.7.16);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pobject, 3.7.16)
+ struct glfs_object *
+ pub_glfs_upcall_inode_get_pobject(struct glfs_upcall_inode *arg)
+ {
+ return arg->p_object;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pobject, 3.7.16);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pstat, 3.7.16)
+ struct stat *
+ pub_glfs_upcall_inode_get_pstat(struct glfs_upcall_inode *arg)
+ {
+ return &arg->p_buf;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pstat, 3.7.16);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpobject, 3.7.16)
+ struct glfs_object *
+ pub_glfs_upcall_inode_get_oldpobject(struct glfs_upcall_inode *arg)
+ {
+ return arg->oldp_object;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpobject, 3.7.16);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpstat, 3.7.16)
+ struct stat *
+ pub_glfs_upcall_inode_get_oldpstat(struct glfs_upcall_inode *arg)
+ {
+ return &arg->oldp_buf;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpstat, 3.7.16);
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_object, 4.1.6)
+ struct glfs_object *
+ pub_glfs_upcall_lease_get_object(struct glfs_upcall_lease *arg)
+ {
+ return arg->object;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_object, 4.1.6);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_lease_type, 4.1.6)
+ uint32_t
+ pub_glfs_upcall_lease_get_lease_type(struct glfs_upcall_lease *arg)
+ {
+ return arg->lease_type;
+ }
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_lease_type, 4.1.6);
+
+ /* definitions of the GLFS_SYSRQ_* chars are in glfs.h */
+ static struct glfs_sysrq_help {
+@@ -1593,6 +1574,7 @@ static struct glfs_sysrq_help {
+ {GLFS_SYSRQ_STATEDUMP, "(S)tatedump"},
+ {0, NULL}};
+
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_sysrq, 3.10.0)
+ int
+ pub_glfs_sysrq(struct glfs *fs, char sysrq)
+ {
+@@ -1641,8 +1623,7 @@ out:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_sysrq, 3.10.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_register, 3.13.0)
+ int
+ pub_glfs_upcall_register(struct glfs *fs, uint32_t event_list,
+ glfs_upcall_cbk cbk, void *data)
+@@ -1698,8 +1679,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_register, 3.13.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_unregister, 3.13.0)
+ int
+ pub_glfs_upcall_unregister(struct glfs *fs, uint32_t event_list)
+ {
+@@ -1746,8 +1726,7 @@ invalid_fs:
+ return ret;
+ }
+
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_unregister, 3.13.0);
+-
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_statedump_path, 6.4)
+ int
+ pub_glfs_set_statedump_path(struct glfs *fs, const char *path)
+ {
+@@ -1807,5 +1786,3 @@ err:
+ invalid_fs:
+ return -1;
+ }
+-
+-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_statedump_path, 6.4);
+diff --git a/cli/src/cli-cmd-global.c b/cli/src/cli-cmd-global.c
+index 270b76f44..c44ef1087 100644
+--- a/cli/src/cli-cmd-global.c
++++ b/cli/src/cli-cmd-global.c
+@@ -27,7 +27,6 @@
+ #include <glusterfs/syscall.h>
+ #include <glusterfs/common-utils.h>
+
+-extern rpc_clnt_prog_t *cli_rpc_prog;
+
+ int
+ cli_cmd_global_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+diff --git a/cli/src/cli-cmd-misc.c b/cli/src/cli-cmd-misc.c
+index 04dd2efc2..6eaac8b92 100644
+--- a/cli/src/cli-cmd-misc.c
++++ b/cli/src/cli-cmd-misc.c
+@@ -18,10 +18,6 @@
+ #include "cli-mem-types.h"
+ #include "protocol-common.h"
+
+-extern struct rpc_clnt *global_rpc;
+-
+-extern rpc_clnt_prog_t *cli_rpc_prog;
+-
+ extern struct cli_cmd volume_cmds[];
+ extern struct cli_cmd bitrot_cmds[];
+ extern struct cli_cmd quota_cmds[];
+diff --git a/cli/src/cli-cmd-peer.c b/cli/src/cli-cmd-peer.c
+index e42a1139b..55deb4369 100644
+--- a/cli/src/cli-cmd-peer.c
++++ b/cli/src/cli-cmd-peer.c
+@@ -20,10 +20,6 @@
+ #include "protocol-common.h"
+ #include <glusterfs/events.h>
+
+-extern struct rpc_clnt *global_rpc;
+-
+-extern rpc_clnt_prog_t *cli_rpc_prog;
+-
+ int
+ cli_cmd_peer_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+diff --git a/cli/src/cli-cmd-snapshot.c b/cli/src/cli-cmd-snapshot.c
+index 814ab82f6..3c523ad17 100644
+--- a/cli/src/cli-cmd-snapshot.c
++++ b/cli/src/cli-cmd-snapshot.c
+@@ -17,8 +17,6 @@
+ #include "cli-cmd.h"
+ #include "cli-mem-types.h"
+
+-extern rpc_clnt_prog_t *cli_rpc_prog;
+-
+ int
+ cli_cmd_snapshot_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+diff --git a/cli/src/cli-cmd-system.c b/cli/src/cli-cmd-system.c
+index cb3a9ea74..ca802187a 100644
+--- a/cli/src/cli-cmd-system.c
++++ b/cli/src/cli-cmd-system.c
+@@ -18,10 +18,6 @@
+ #include "cli-mem-types.h"
+ #include "protocol-common.h"
+
+-extern struct rpc_clnt *global_rpc;
+-
+-extern rpc_clnt_prog_t *cli_rpc_prog;
+-
+ int
+ cli_cmd_system_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index b6bef80f1..8186ed7bc 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -28,10 +28,6 @@
+ #include <glusterfs/common-utils.h>
+ #include <glusterfs/events.h>
+
+-extern struct rpc_clnt *global_rpc;
+-extern struct rpc_clnt *global_quotad_rpc;
+-
+-extern rpc_clnt_prog_t *cli_rpc_prog;
+ extern rpc_clnt_prog_t cli_quotad_clnt;
+
+ int
+diff --git a/cli/src/cli-quotad-client.c b/cli/src/cli-quotad-client.c
+index 52ab97ee8..1da7b3f0a 100644
+--- a/cli/src/cli-quotad-client.c
++++ b/cli/src/cli-quotad-client.c
+@@ -10,9 +10,6 @@
+
+ #include "cli-quotad-client.h"
+
+-extern struct rpc_clnt global_quotad_rpc;
+-extern struct rpc_clnt_program cli_quotad_clnt;
+-
+ int
+ cli_quotad_submit_request(void *req, call_frame_t *frame, rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref, xlator_t *this,
+diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
+index 51b544786..ef320b019 100644
+--- a/cli/src/cli-rpc-ops.c
++++ b/cli/src/cli-rpc-ops.c
+@@ -48,10 +48,7 @@
+
+ enum gf_task_types { GF_TASK_TYPE_REBALANCE, GF_TASK_TYPE_REMOVE_BRICK };
+
+-extern struct rpc_clnt *global_quotad_rpc;
+-extern rpc_clnt_prog_t cli_quotad_clnt;
+-extern rpc_clnt_prog_t *cli_rpc_prog;
+-extern int cli_op_ret;
++rpc_clnt_prog_t cli_quotad_clnt;
+ extern int connected;
+
+ int32_t
+diff --git a/cli/src/cli.c b/cli/src/cli.c
+index a76c5a263..74e055222 100644
+--- a/cli/src/cli.c
++++ b/cli/src/cli.c
+@@ -74,6 +74,10 @@ rpc_clnt_prog_t *cli_rpc_prog;
+
+ extern struct rpc_clnt_program cli_prog;
+
++int cli_default_conn_timeout = 120;
++int cli_ten_minutes_timeout = 600;
++
++
+ static int
+ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
+ {
+diff --git a/cli/src/cli.h b/cli/src/cli.h
+index b5b69ea48..21982594e 100644
+--- a/cli/src/cli.h
++++ b/cli/src/cli.h
+@@ -42,8 +42,8 @@ enum argp_option_keys {
+ ARGP_PORT_KEY = 'p',
+ };
+
+-int cli_default_conn_timeout;
+-int cli_ten_minutes_timeout;
++extern int cli_default_conn_timeout;
++extern int cli_ten_minutes_timeout;
+
+ typedef enum {
+ COLD_BRICK_COUNT,
+@@ -191,6 +191,12 @@ typedef ssize_t (*cli_serialize_t)(struct iovec outmsg, void *args);
+
+ extern struct cli_state *global_state; /* use only in readline callback */
+
++extern struct rpc_clnt *global_quotad_rpc;
++
++extern struct rpc_clnt *global_rpc;
++
++extern rpc_clnt_prog_t *cli_rpc_prog;
++
+ typedef const char *(*cli_selector_t)(void *wcon);
+
+ char *
+--
+2.27.0
+
diff --git a/0901-contrib-remove-contrib-sunrpc-xdr_sizeof.c.patch b/0901-contrib-remove-contrib-sunrpc-xdr_sizeof.c.patch
new file mode 100644
index 0000000..99459af
--- /dev/null
+++ b/0901-contrib-remove-contrib-sunrpc-xdr_sizeof.c.patch
@@ -0,0 +1,250 @@
+From 04a43bb831b98ce9942ac6daa7e14ff88ecd9bee Mon Sep 17 00:00:00 2001
+From: Tamar Shacked <tshacked@redhat.com>
+Date: Sun, 1 Aug 2021 09:30:18 +0300
+Subject: contrib: remove contrib/sunrpc/xdr_sizeof.c
+
+It's not needed, and it has a license that Fedora is not very happy with.
+Just removed that file.
+
+Backport of:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24761/
+> Fixes: #1383
+> Change-Id: Ia753f0058c8a7c6482aca40c3b3dc8f6aa4a266d
+> Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
+
+BUG: 1939340
+
+Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+Change-Id: Ibbc3871e66f542f4cb0c5a6c3182792eb125d0f1
+---
+ contrib/sunrpc/xdr_sizeof.c | 204 ------------------------------------
+ rpc/rpc-lib/src/Makefile.am | 2 +-
+ 2 files changed, 1 insertion(+), 205 deletions(-)
+ delete mode 100644 contrib/sunrpc/xdr_sizeof.c
+
+diff --git a/contrib/sunrpc/xdr_sizeof.c b/contrib/sunrpc/xdr_sizeof.c
+deleted file mode 100644
+index ca1f7bf0a..000000000
+--- a/contrib/sunrpc/xdr_sizeof.c
++++ /dev/null
+@@ -1,204 +0,0 @@
+-/*
+- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+- *
+- * @APPLE_LICENSE_HEADER_START@
+- *
+- * Portions Copyright (c) 1999 Apple Computer, Inc. All Rights
+- * Reserved. This file contains Original Code and/or Modifications of
+- * Original Code as defined in and that are subject to the Apple Public
+- * Source License Version 1.1 (the "License"). You may not use this file
+- * except in compliance with the License. Please obtain a copy of the
+- * License at http://www.apple.com/publicsource and read it before using
+- * this file.
+- *
+- * The Original Code and all software distributed under the License are
+- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+- * FITNESS FOR A PARTICULAR PURPOSE OR NON- INFRINGEMENT. Please see the
+- * License for the specific language governing rights and limitations
+- * under the License.
+- *
+- * @APPLE_LICENSE_HEADER_END@
+- */
+-
+-/*
+- * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
+- * unrestricted use provided that this legend is included on all tape
+- * media and as a part of the software program in whole or part. Users
+- * may copy or modify Sun RPC without charge, but are not authorized
+- * to license or distribute it to anyone else except as part of a product or
+- * program developed by the user.
+- *
+- * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
+- * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
+- * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
+- *
+- * Sun RPC is provided with no support and without any obligation on the
+- * part of Sun Microsystems, Inc. to assist in its use, correction,
+- * modification or enhancement.
+- *
+- * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
+- * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
+- * OR ANY PART THEREOF.
+- *
+- * In no event will Sun Microsystems, Inc. be liable for any lost revenue
+- * or profits or other special, indirect and consequential damages, even if
+- * Sun has been advised of the possibility of such damages.
+- *
+- * Sun Microsystems, Inc.
+- * 2550 Garcia Avenue
+- * Mountain View, California 94043
+- */
+-
+-/*
+- * xdr_sizeof.c
+- *
+- * Copyright 1990 Sun Microsystems, Inc.
+- *
+- * General purpose routine to see how much space something will use
+- * when serialized using XDR.
+- */
+-
+-#ifdef GF_DARWIN_HOST_OS
+-
+-#include <rpc/types.h>
+-#include <rpc/xdr.h>
+-#include <sys/types.h>
+-#include <sys/cdefs.h>
+-
+-#include <stdlib.h>
+-
+-/* ARGSUSED */
+-#ifdef GF_DARWIN_HOST_OS
+-static bool_t
+-x_putlong (XDR *xdrs, const int *longp)
+-{
+- xdrs->x_handy += BYTES_PER_XDR_UNIT;
+- return TRUE;
+-}
+-
+-#else
+-static bool_t
+-x_putlong (XDR *xdrs, const long *longp)
+-{
+- xdrs->x_handy += BYTES_PER_XDR_UNIT;
+- return TRUE;
+-}
+-#endif
+-
+-/* ARGSUSED */
+-static bool_t
+-x_putbytes (XDR *xdrs, const char *bp, u_int len)
+-{
+- xdrs->x_handy += len;
+- return TRUE;
+-}
+-
+-#ifdef GF_DARWIN_HOST_OS
+-static u_int
+-x_getpostn (XDR *xdrs)
+-{
+- return xdrs->x_handy;
+-}
+-#else
+-static u_int
+-x_getpostn (const XDR *xdrs)
+-{
+- return xdrs->x_handy;
+-}
+-#endif
+-
+-/* ARGSUSED */
+-static bool_t
+-x_setpostn (XDR *xdrs, u_int len)
+-{
+- /* This is not allowed */
+- return FALSE;
+-}
+-
+-static int32_t *
+-x_inline (XDR *xdrs, u_int len)
+-{
+- if (len == 0)
+- return NULL;
+- if (xdrs->x_op != XDR_ENCODE)
+- return NULL;
+- if (len < (u_int) (long int) xdrs->x_base)
+- {
+- /* x_private was already allocated */
+- xdrs->x_handy += len;
+- return (int32_t *) xdrs->x_private;
+- }
+- else
+- {
+- /* Free the earlier space and allocate new area */
+- free (xdrs->x_private);
+- if ((xdrs->x_private = (caddr_t) malloc (len)) == NULL)
+- {
+- xdrs->x_base = 0;
+- return NULL;
+- }
+- xdrs->x_base = (void *) (long) len;
+- xdrs->x_handy += len;
+- return (int32_t *) xdrs->x_private;
+- }
+-}
+-
+-static int
+-harmless (void)
+-{
+- /* Always return FALSE/NULL, as the case may be */
+- return 0;
+-}
+-
+-static void
+-x_destroy (XDR *xdrs)
+-{
+- xdrs->x_handy = 0;
+- xdrs->x_base = 0;
+- if (xdrs->x_private)
+- {
+- free (xdrs->x_private);
+- xdrs->x_private = NULL;
+- }
+- return;
+-}
+-
+-unsigned long
+-xdr_sizeof (xdrproc_t func, void *data)
+-{
+- XDR x;
+- struct xdr_ops ops;
+- bool_t stat;
+-
+-#ifdef GF_DARWIN_HOST_OS
+- typedef bool_t (*dummyfunc1) (XDR *, int *);
+-#else
+- typedef bool_t (*dummyfunc1) (XDR *, long *);
+-#endif
+- typedef bool_t (*dummyfunc2) (XDR *, caddr_t, u_int);
+-
+- ops.x_putlong = x_putlong;
+- ops.x_putbytes = x_putbytes;
+- ops.x_inline = x_inline;
+- ops.x_getpostn = x_getpostn;
+- ops.x_setpostn = x_setpostn;
+- ops.x_destroy = x_destroy;
+-
+- /* the other harmless ones */
+- ops.x_getlong = (dummyfunc1) harmless;
+- ops.x_getbytes = (dummyfunc2) harmless;
+-
+- x.x_op = XDR_ENCODE;
+- x.x_ops = &ops;
+- x.x_handy = 0;
+- x.x_private = (caddr_t) NULL;
+- x.x_base = (caddr_t) 0;
+-
+- stat = func (&x, data, 0);
+- if (x.x_private)
+- free (x.x_private);
+- return (stat == TRUE ? (unsigned) x.x_handy : 0);
+-}
+-#endif /* GF_DARWIN_HOST_OS */
+diff --git a/rpc/rpc-lib/src/Makefile.am b/rpc/rpc-lib/src/Makefile.am
+index 81a964768..35c9db07e 100644
+--- a/rpc/rpc-lib/src/Makefile.am
++++ b/rpc/rpc-lib/src/Makefile.am
+@@ -2,7 +2,7 @@ lib_LTLIBRARIES = libgfrpc.la
+
+ libgfrpc_la_SOURCES = auth-unix.c rpcsvc-auth.c rpcsvc.c auth-null.c \
+ rpc-transport.c xdr-rpc.c xdr-rpcclnt.c rpc-clnt.c auth-glusterfs.c \
+- rpc-drc.c $(CONTRIBDIR)/sunrpc/xdr_sizeof.c rpc-clnt-ping.c \
++ rpc-drc.c rpc-clnt-ping.c \
+ autoscale-threads.c mgmt-pmap.c
+
+ EXTRA_DIST = libgfrpc.sym
+--
+2.27.0
+
diff --git a/glusterfs.spec b/glusterfs.spec
new file mode 100644
index 0000000..3e63faa
--- /dev/null
+++ b/glusterfs.spec
@@ -0,0 +1,2823 @@
+%global _hardened_build 1
+
+%global _for_fedora_koji_builds 0
+
+# uncomment and add '%' to use the prereltag for pre-releases
+# %%global prereltag qa3
+
+##-----------------------------------------------------------------------------
+## All argument definitions should be placed here and keep them sorted
+##
+
+# asan
+# if you wish to compile an rpm with address sanitizer...
+# rpmbuild -ta glusterfs-6.0.tar.gz --with asan
+%{?_with_asan:%global _with_asan --enable-asan}
+
+%if ( 0%{?rhel} && 0%{?rhel} < 7 )
+%global _with_asan %{nil}
+%endif
+
+# bd
+# if you wish to compile an rpm without the BD map support...
+# rpmbuild -ta glusterfs-6.0.tar.gz --without bd
+%{?_without_bd:%global _without_bd --disable-bd-xlator}
+
+%if ( 0%{?rhel} && 0%{?rhel} > 7 )
+%global _without_bd --without-bd
+%endif
+
+# cmocka
+# if you wish to compile an rpm with cmocka unit testing...
+# rpmbuild -ta glusterfs-6.0.tar.gz --with cmocka
+%{?_with_cmocka:%global _with_cmocka --enable-cmocka}
+
+# debug
+# if you wish to compile an rpm with debugging...
+# rpmbuild -ta glusterfs-6.0.tar.gz --with debug
+%{?_with_debug:%global _with_debug --enable-debug}
+
+# epoll
+# if you wish to compile an rpm without epoll...
+# rpmbuild -ta glusterfs-6.0.tar.gz --without epoll
+%{?_without_epoll:%global _without_epoll --disable-epoll}
+
+# fusermount
+# if you wish to compile an rpm without fusermount...
+# rpmbuild -ta glusterfs-6.0.tar.gz --without fusermount
+%{?_without_fusermount:%global _without_fusermount --disable-fusermount}
+
+# geo-rep
+# if you wish to compile an rpm without geo-replication support, compile like this...
+# rpmbuild -ta glusterfs-6.0.tar.gz --without georeplication
+%{?_without_georeplication:%global _without_georeplication --disable-georeplication}
+
+# ipv6default
+# if you wish to compile an rpm with IPv6 default...
+# rpmbuild -ta glusterfs-6.0.tar.gz --with ipv6default
+%{?_with_ipv6default:%global _with_ipv6default --with-ipv6-default}
+
+# libtirpc
+# if you wish to compile an rpm without TIRPC (i.e. use legacy glibc rpc)
+# rpmbuild -ta glusterfs-6.0.tar.gz --without libtirpc
+%{?_without_libtirpc:%global _without_libtirpc --without-libtirpc}
+
+# Do not use libtirpc on EL6, it does not have xdr_uint64_t() and xdr_uint32_t
+# Do not use libtirpc on EL7, it does not have xdr_sizeof()
+%if ( 0%{?rhel} && 0%{?rhel} <= 7 )
+%global _without_libtirpc --without-libtirpc
+%endif
+
+
+# ocf
+# if you wish to compile an rpm without the OCF resource agents...
+# rpmbuild -ta glusterfs-6.0.tar.gz --without ocf
+%{?_without_ocf:%global _without_ocf --without-ocf}
+
+# rdma
+# if you wish to compile an rpm without rdma support, compile like this...
+# rpmbuild -ta glusterfs-6.0.tar.gz --without rdma
+%{?_without_rdma:%global _without_rdma --disable-ibverbs}
+
+# No RDMA Support on 32-bit ARM
+%ifarch armv7hl
+%global _without_rdma --disable-ibverbs
+%endif
+
+# server
+# if you wish to build rpms without server components, compile like this
+# rpmbuild -ta glusterfs-6.0.tar.gz --without server
+%{?_without_server:%global _without_server --without-server}
+
+# disable server components forcefully as rhel <= 6
+%if ( 0%{?rhel} )
+%if (!(( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" ) || ( "%{?dist}" == ".el8rhgs" )))
+%global _without_server --without-server
+%endif
+%endif
+
+%global _without_extra_xlators 1
+%global _without_regression_tests 1
+
+# syslog
+# if you wish to build rpms without syslog logging, compile like this
+# rpmbuild -ta glusterfs-6.0.tar.gz --without syslog
+%{?_without_syslog:%global _without_syslog --disable-syslog}
+
+# disable syslog forcefully as rhel <= 6 doesn't have rsyslog or rsyslog-mmcount
+# Fedora deprecated syslog, see
+# https://fedoraproject.org/wiki/Changes/NoDefaultSyslog
+# (And what about RHEL7?)
+%if ( 0%{?fedora} && 0%{?fedora} >= 20 ) || ( 0%{?rhel} && 0%{?rhel} <= 6 )
+%global _without_syslog --disable-syslog
+%endif
+
+# tsan
+# if you wish to compile an rpm with thread sanitizer...
+# rpmbuild -ta glusterfs-6.0.tar.gz --with tsan
+%{?_with_tsan:%global _with_tsan --enable-tsan}
+
+%if ( 0%{?rhel} && 0%{?rhel} < 7 )
+%global _with_tsan %{nil}
+%endif
+
+# valgrind
+# if you wish to compile an rpm to run all processes under valgrind...
+# rpmbuild -ta glusterfs-6.0.tar.gz --with valgrind
+%{?_with_valgrind:%global _with_valgrind --enable-valgrind}
+
+##-----------------------------------------------------------------------------
+## All %%global definitions should be placed here and keep them sorted
+##
+
+# selinux booleans whose defalut value needs modification
+# these booleans will be consumed by "%%selinux_set_booleans" macro.
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+%global selinuxbooleans rsync_full_access=1 rsync_client=1
+%endif
+
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+%global _with_systemd true
+%endif
+
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 7 )
+%global _with_firewalld --enable-firewalld
+%endif
+
+%if 0%{?_tmpfilesdir:1}
+%global _with_tmpfilesdir --with-tmpfilesdir=%{_tmpfilesdir}
+%else
+%global _with_tmpfilesdir --without-tmpfilesdir
+%endif
+
+# without server should also disable some server-only components
+%if 0%{?_without_server:1}
+%global _without_events --disable-events
+%global _without_georeplication --disable-georeplication
+%global _without_tiering --disable-tiering
+%global _without_ocf --without-ocf
+%endif
+
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 7 )
+%global _usepython3 1
+%global _pythonver 3
+%else
+%global _usepython3 0
+%global _pythonver 2
+%endif
+
+# From https://fedoraproject.org/wiki/Packaging:Python#Macros
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
+%{!?python2_sitelib: %global python2_sitelib %(python2 -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")}
+%{!?python2_sitearch: %global python2_sitearch %(python2 -c "from distutils.sysconfig import get_python_lib; print(get_python_lib(1))")}
+%global _rundir %{_localstatedir}/run
+%endif
+
+%if ( 0%{?_with_systemd:1} )
+%global service_enable() /bin/systemctl --quiet enable %1.service || : \
+%{nil}
+%global service_start() /bin/systemctl --quiet start %1.service || : \
+%{nil}
+%global service_stop() /bin/systemctl --quiet stop %1.service || :\
+%{nil}
+%global service_install() install -D -p -m 0644 %1.service %{buildroot}%2 \
+%{nil}
+# can't seem to make a generic macro that works
+%global glusterd_svcfile %{_unitdir}/glusterd.service
+%global glusterfsd_svcfile %{_unitdir}/glusterfsd.service
+%global glusterta_svcfile %{_unitdir}/gluster-ta-volume.service
+%global glustereventsd_svcfile %{_unitdir}/glustereventsd.service
+%global glusterfssharedstorage_svcfile %{_unitdir}/glusterfssharedstorage.service
+%else
+%global service_enable() /sbin/chkconfig --add %1 >/dev/null 2>&1 || : \
+%{nil}
+%global systemd_preun() /sbin/chkconfig --del %1 >/dev/null 2>&1 || : \
+%{nil}
+%global systemd_postun_with_restart() /sbin/service %1 condrestart >/dev/null 2>&1 || : \
+%{nil}
+%global service_start() /sbin/service %1 start >/dev/null 2>&1 || : \
+%{nil}
+%global service_stop() /sbin/service %1 stop >/dev/null 2>&1 || : \
+%{nil}
+%global service_install() install -D -p -m 0755 %1.init %{buildroot}%2 \
+%{nil}
+# can't seem to make a generic macro that works
+%global glusterd_svcfile %{_sysconfdir}/init.d/glusterd
+%global glusterfsd_svcfile %{_sysconfdir}/init.d/glusterfsd
+%global glustereventsd_svcfile %{_sysconfdir}/init.d/glustereventsd
+%endif
+
+%{!?_pkgdocdir: %global _pkgdocdir %{_docdir}/%{name}-%{version}}
+
+# We do not want to generate useless provides and requires for xlator
+# .so files to be set for glusterfs packages.
+# Filter all generated:
+#
+# TODO: RHEL5 does not have a convenient solution
+%if ( 0%{?rhel} == 6 )
+# filter_setup exists in RHEL6 only
+%filter_provides_in %{_libdir}/glusterfs/%{version}/
+%global __filter_from_req %{?__filter_from_req} | grep -v -P '^(?!lib).*\.so.*$'
+%filter_setup
+%else
+# modern rpm and current Fedora do not generate requires when the
+# provides are filtered
+%global __provides_exclude_from ^%{_libdir}/glusterfs/%{version}/.*$
+%endif
+
+
+##-----------------------------------------------------------------------------
+## All package definitions should be placed here in alphabetical order
+##
+Summary: Distributed File System
+%if ( 0%{_for_fedora_koji_builds} )
+Name: glusterfs
+Version: 3.8.0
+Release: 0.2%{?prereltag:.%{prereltag}}%{?dist}
+%else
+Name: glusterfs
+Version: 6.0
+Release: 57.4%{?dist}
+ExcludeArch: i686
+%endif
+License: GPLv2 or LGPLv3+
+URL: http://docs.gluster.org/
+%if ( 0%{_for_fedora_koji_builds} )
+Source0: http://bits.gluster.org/pub/gluster/glusterfs/src/glusterfs-%{version}%{?prereltag}.tar.gz
+Source1: glusterd.sysconfig
+Source2: glusterfsd.sysconfig
+Source7: glusterfsd.service
+Source8: glusterfsd.init
+%else
+Source0: glusterfs-6.0.tar.gz
+%endif
+
+Requires(pre): shadow-utils
+%if ( 0%{?_with_systemd:1} )
+BuildRequires: systemd
+%endif
+
+Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+%if ( 0%{?_with_systemd:1} )
+%{?systemd_requires}
+%endif
+%if 0%{?_with_asan:1} && !( 0%{?rhel} && 0%{?rhel} < 7 )
+BuildRequires: libasan
+%endif
+%if 0%{?_with_tsan:1} && !( 0%{?rhel} && 0%{?rhel} < 7 )
+BuildRequires: libtsan
+%endif
+BuildRequires: git
+BuildRequires: bison flex
+BuildRequires: gcc make libtool
+BuildRequires: ncurses-devel readline-devel
+BuildRequires: libxml2-devel openssl-devel
+BuildRequires: libaio-devel libacl-devel
+BuildRequires: python%{_pythonver}-devel
+%if ( 0%{?rhel} && 0%{?rhel} < 8 )
+BuildRequires: python-ctypes
+%endif
+%if ( 0%{?_with_ipv6default:1} ) || ( 0%{!?_without_libtirpc:1} ) || ( 0%{?rhel} && ( 0%{?rhel} >= 8 ) )
+BuildRequires: libtirpc-devel
+%endif
+%if ( 0%{?fedora} && 0%{?fedora} > 27 ) || ( 0%{?rhel} && 0%{?rhel} > 7 )
+BuildRequires: rpcgen
+%endif
+BuildRequires: userspace-rcu-devel >= 0.7
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
+BuildRequires: automake
+%endif
+BuildRequires: libuuid-devel
+%if ( 0%{?_with_cmocka:1} )
+BuildRequires: libcmocka-devel >= 1.0.1
+%endif
+%if ( 0%{!?_without_tiering:1} )
+BuildRequires: sqlite-devel
+%endif
+%if ( 0%{!?_without_georeplication:1} )
+BuildRequires: libattr-devel
+%endif
+
+%if (0%{?_with_firewalld:1})
+BuildRequires: firewalld
+%endif
+
+Obsoletes: hekafs
+Obsoletes: %{name}-common < %{version}-%{release}
+Obsoletes: %{name}-core < %{version}-%{release}
+Obsoletes: %{name}-ufo
+%if ( 0%{!?_with_gnfs:1} )
+Obsoletes: %{name}-gnfs
+%endif
+%if ( 0%{?rhel} < 7 )
+Obsoletes: %{name}-ganesha
+%endif
+Provides: %{name}-common = %{version}-%{release}
+Provides: %{name}-core = %{version}-%{release}
+
+# Patch0001: 0001-Update-rfc.sh-to-rhgs-3.5.0.patch
+Patch0002: 0002-glusterd-fix-op-versions-for-RHS-backwards-compatabi.patch
+Patch0003: 0003-rpc-set-bind-insecure-to-off-by-default.patch
+Patch0004: 0004-glusterd-spec-fixing-autogen-issue.patch
+Patch0005: 0005-libglusterfs-glusterd-Fix-compilation-errors.patch
+Patch0006: 0006-build-remove-ghost-directory-entries.patch
+Patch0007: 0007-build-add-RHGS-specific-changes.patch
+Patch0008: 0008-secalert-remove-setuid-bit-for-fusermount-glusterfs.patch
+Patch0009: 0009-build-introduce-security-hardening-flags-in-gluster.patch
+Patch0010: 0010-spec-fix-add-pre-transaction-scripts-for-geo-rep-and.patch
+Patch0011: 0011-rpm-glusterfs-devel-for-client-builds-should-not-dep.patch
+Patch0012: 0012-build-add-pretrans-check.patch
+Patch0013: 0013-glusterd-fix-info-file-checksum-mismatch-during-upgr.patch
+Patch0014: 0014-build-spec-file-conflict-resolution.patch
+Patch0015: 0015-build-randomize-temp-file-names-in-pretrans-scriptle.patch
+Patch0016: 0016-glusterd-parallel-readdir-Change-the-op-version-of-p.patch
+Patch0017: 0017-glusterd-Revert-op-version-for-cluster.max-brick-per.patch
+Patch0018: 0018-cli-Add-message-for-user-before-modifying-brick-mult.patch
+Patch0019: 0019-build-launch-glusterd-upgrade-after-all-new-bits-are.patch
+Patch0020: 0020-spec-unpackaged-files-found-for-RHEL-7-client-build.patch
+Patch0021: 0021-cli-glusterfsd-remove-copyright-information.patch
+Patch0022: 0022-cli-Remove-upstream-doc-reference.patch
+Patch0023: 0023-hooks-remove-selinux-hooks.patch
+Patch0024: 0024-glusterd-Make-localtime-logging-option-invisible-in-.patch
+Patch0025: 0025-build-make-RHGS-version-available-for-server.patch
+Patch0026: 0026-glusterd-Introduce-daemon-log-level-cluster-wide-opt.patch
+Patch0027: 0027-glusterd-change-op-version-of-fips-mode-rchecksum.patch
+Patch0028: 0028-glusterd-Reset-op-version-for-features.shard-deletio.patch
+Patch0029: 0029-glusterd-Reset-op-version-for-features.shard-lru-lim.patch
+Patch0030: 0030-selinux-glusterd-add-features.selinux-to-glusterd-vo.patch
+Patch0031: 0031-glusterd-turn-off-selinux-feature-in-downstream.patch
+Patch0032: 0032-glusterd-update-gd-op-version-to-3_7_0.patch
+Patch0033: 0033-build-add-missing-explicit-package-dependencies.patch
+Patch0034: 0034-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch
+Patch0035: 0035-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch
+Patch0036: 0036-build-add-conditional-dependency-on-server-for-devel.patch
+Patch0037: 0037-cli-change-the-warning-message.patch
+Patch0038: 0038-spec-avoid-creation-of-temp-file-in-lua-script.patch
+Patch0039: 0039-cli-fix-query-to-user-during-brick-mux-selection.patch
+Patch0040: 0040-build-Remove-unsupported-test-cases-failing-consiste.patch
+Patch0041: 0041-tests-geo-rep-Build-failed-in-Jenkins-for-test-bug-1.patch
+Patch0042: 0042-spec-client-server-Builds-are-failing-on-rhel-6.patch
+Patch0043: 0043-inode-don-t-dump-the-whole-table-to-CLI.patch
+Patch0044: 0044-cluster-ec-Don-t-enqueue-an-entry-if-it-is-already-h.patch
+Patch0045: 0045-glusterd-fix-txn-id-mem-leak.patch
+Patch0046: 0046-protocol-client-Do-not-fallback-to-anon-fd-if-fd-is-.patch
+Patch0047: 0047-client-rpc-Fix-the-payload-being-sent-on-the-wire.patch
+Patch0048: 0048-gfapi-Unblock-epoll-thread-for-upcall-processing.patch
+Patch0049: 0049-transport-socket-log-shutdown-msg-occasionally.patch
+Patch0050: 0050-geo-rep-Fix-syncing-multiple-rename-of-symlink.patch
+Patch0051: 0051-spec-update-rpm-install-condition.patch
+Patch0052: 0052-geo-rep-IPv6-support.patch
+Patch0053: 0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch
+Patch0054: 0054-Revert-glusterd-storhaug-remove-ganesha.patch
+Patch0055: 0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch
+Patch0056: 0056-common-ha-fixes-for-Debian-based-systems.patch
+Patch0057: 0057-ganesha-scripts-Remove-export-entries-from-ganesha.c.patch
+Patch0058: 0058-glusterd-ganesha-During-volume-delete-remove-the-gan.patch
+Patch0059: 0059-glusterd-ganesha-throw-proper-error-for-gluster-nfs-.patch
+Patch0060: 0060-ganesha-scripts-Stop-ganesha-process-on-all-nodes-if.patch
+Patch0061: 0061-ganesha-allow-refresh-config-and-volume-export-unexp.patch
+Patch0062: 0062-glusterd-ganesha-perform-removal-of-ganesha.conf-on-.patch
+Patch0063: 0063-glusterd-ganesha-update-cache-invalidation-properly-.patch
+Patch0064: 0064-glusterd-ganesha-return-proper-value-in-pre_setup.patch
+Patch0065: 0065-ganesha-scripts-remove-dependency-over-export-config.patch
+Patch0066: 0066-glusterd-ganesha-add-proper-NULL-check-in-manage_exp.patch
+Patch0067: 0067-ganesha-minor-improvments-for-commit-e91cdf4-17081.patch
+Patch0068: 0068-common-ha-surviving-ganesha.nfsd-not-put-in-grace-on.patch
+Patch0069: 0069-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch
+Patch0070: 0070-packaging-glusterfs-ganesha-update-sometimes-fails-s.patch
+Patch0071: 0071-common-ha-enable-and-disable-selinux-gluster_use_exe.patch
+Patch0072: 0072-ganesha-ha-don-t-set-SELinux-booleans-if-SELinux-is-.patch
+Patch0073: 0073-build-remove-ganesha-dependency-on-selinux-policy.patch
+Patch0074: 0074-common-ha-enable-pacemaker-at-end-of-setup.patch
+Patch0075: 0075-common-ha-Fix-an-incorrect-syntax-during-setup.patch
+Patch0076: 0076-glusterd-ganesha-change-voltype-for-ganesha.enable-i.patch
+Patch0077: 0077-glusterd-ganesha-create-remove-export-file-only-from.patch
+Patch0078: 0078-common-ha-scripts-pass-the-list-of-servers-properly-.patch
+Patch0079: 0079-common-ha-All-statd-related-files-need-to-be-owned-b.patch
+Patch0080: 0080-glusterd-ganesha-Skip-non-ganesha-nodes-properly-for.patch
+Patch0081: 0081-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch
+Patch0082: 0082-build-Add-dependency-on-netstat-for-glusterfs-ganesh.patch
+Patch0083: 0083-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch
+Patch0084: 0084-glusterd-Fix-duplicate-client_op_version-in-info-fil.patch
+Patch0085: 0085-Revert-all-remove-code-which-is-not-being-considered.patch
+Patch0086: 0086-Revert-tiering-remove-the-translator-from-build-and-.patch
+Patch0087: 0087-ganesha-fixing-minor-issues-after-the-backport-from-.patch
+Patch0088: 0088-tier-fix-failures-noticed-during-tier-start-and-tier.patch
+Patch0089: 0089-glusterd-gNFS-On-post-upgrade-to-3.2-disable-gNFS-fo.patch
+Patch0090: 0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch
+Patch0091: 0091-glusterd-gNFS-explicitly-set-nfs.disable-to-off-afte.patch
+Patch0092: 0092-logging-Fix-GF_LOG_OCCASSIONALLY-API.patch
+Patch0093: 0093-glusterd-Change-op-version-of-cache-invalidation-in-.patch
+Patch0094: 0094-glusterd-load-ctime-in-the-client-graph-only-if-it-s.patch
+Patch0095: 0095-cluster-afr-Remove-local-from-owners_list-on-failure.patch
+Patch0096: 0096-core-Brick-is-not-able-to-detach-successfully-in-bri.patch
+Patch0097: 0097-glusterd-tier-while-doing-an-attach-tier-the-self-he.patch
+Patch0098: 0098-mgmt-shd-Implement-multiplexing-in-self-heal-daemon.patch
+Patch0099: 0099-client-fini-return-fini-after-rpc-cleanup.patch
+Patch0100: 0100-clnt-rpc-ref-leak-during-disconnect.patch
+Patch0101: 0101-shd-mux-Fix-coverity-issues-introduced-by-shd-mux-pa.patch
+Patch0102: 0102-rpc-transport-Missing-a-ref-on-dict-while-creating-t.patch
+Patch0103: 0103-dht-NULL-check-before-setting-error-flag.patch
+Patch0104: 0104-afr-shd-Cleanup-self-heal-daemon-resources-during-af.patch
+Patch0105: 0105-core-Log-level-changes-do-not-effect-on-running-clie.patch
+Patch0106: 0106-libgfchangelog-use-find_library-to-locate-shared-lib.patch
+Patch0107: 0107-gfapi-add-function-to-set-client-pid.patch
+Patch0108: 0108-afr-add-client-pid-to-all-gf_event-calls.patch
+Patch0109: 0109-glusterd-Optimize-glusterd-handshaking-code-path.patch
+Patch0110: 0110-tier-shd-glusterd-with-shd-mux-the-shd-volfile-path-.patch
+Patch0111: 0111-glusterd-fix-loading-ctime-in-client-graph-logic.patch
+Patch0112: 0112-geo-rep-fix-incorrectly-formatted-authorized_keys.patch
+Patch0113: 0113-spec-Glusterd-did-not-start-by-default-after-node-re.patch
+Patch0114: 0114-core-fix-hang-issue-in-__gf_free.patch
+Patch0115: 0115-core-only-log-seek-errors-if-SEEK_HOLE-SEEK_DATA-is-.patch
+Patch0116: 0116-cluster-ec-fix-fd-reopen.patch
+Patch0117: 0117-spec-Remove-thin-arbiter-package.patch
+Patch0118: 0118-tests-mark-thin-arbiter-test-ta.t-as-bad.patch
+Patch0119: 0119-glusterd-provide-a-way-to-detach-failed-node.patch
+Patch0120: 0120-glusterd-shd-Keep-a-ref-on-volinfo-until-attach-rpc-.patch
+Patch0121: 0121-spec-glusterfs-devel-for-client-build-should-not-dep.patch
+Patch0122: 0122-posix-ctime-Fix-stat-time-attributes-inconsistency-d.patch
+Patch0123: 0123-ctime-Fix-log-repeated-logging-during-open.patch
+Patch0124: 0124-spec-remove-duplicate-references-to-files.patch
+Patch0125: 0125-glusterd-define-dumpops-in-the-xlator_api-of-gluster.patch
+Patch0126: 0126-cluster-dht-refactor-dht-lookup-functions.patch
+Patch0127: 0127-cluster-dht-Refactor-dht-lookup-functions.patch
+Patch0128: 0128-glusterd-Fix-bulkvoldict-thread-logic-in-brick-multi.patch
+Patch0129: 0129-core-handle-memory-accounting-correctly.patch
+Patch0130: 0130-tier-test-new-tier-cmds.t-fails-after-a-glusterd-res.patch
+Patch0131: 0131-tests-dht-Test-that-lookups-are-sent-post-brick-up.patch
+Patch0132: 0132-glusterd-remove-duplicate-occurrence-of-features.sel.patch
+Patch0133: 0133-glusterd-enable-fips-mode-rchecksum-for-new-volumes.patch
+Patch0134: 0134-performance-write-behind-remove-request-from-wip-lis.patch
+Patch0135: 0135-geo-rep-fix-incorrectly-formatted-authorized_keys.patch
+Patch0136: 0136-glusterd-fix-inconsistent-global-option-output-in-vo.patch
+Patch0137: 0137-shd-glusterd-Serialize-shd-manager-to-prevent-race-c.patch
+Patch0138: 0138-glusterd-Add-gluster-volume-stop-operation-to-gluste.patch
+Patch0139: 0139-ec-shd-Cleanup-self-heal-daemon-resources-during-ec-.patch
+Patch0140: 0140-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch
+Patch0141: 0141-socket-ssl-fix-crl-handling.patch
+Patch0142: 0142-lock-check-null-value-of-dict-to-avoid-log-flooding.patch
+Patch0143: 0143-packaging-Change-the-dependency-on-nfs-ganesha-to-2..patch
+Patch0144: 0144-cluster-ec-honor-contention-notifications-for-partia.patch
+Patch0145: 0145-core-Capture-process-memory-usage-at-the-time-of-cal.patch
+Patch0146: 0146-dht-Custom-xattrs-are-not-healed-in-case-of-add-bric.patch
+Patch0147: 0147-glusterd-bulkvoldict-thread-is-not-handling-all-volu.patch
+Patch0148: 0148-cluster-dht-Lookup-all-files-when-processing-directo.patch
+Patch0149: 0149-glusterd-Optimize-code-to-copy-dictionary-in-handsha.patch
+Patch0150: 0150-libglusterfs-define-macros-needed-for-cloudsync.patch
+Patch0151: 0151-mgmt-glusterd-Make-changes-related-to-cloudsync-xlat.patch
+Patch0152: 0152-storage-posix-changes-with-respect-to-cloudsync.patch
+Patch0153: 0153-features-cloudsync-Added-some-new-functions.patch
+Patch0154: 0154-cloudsync-cvlt-Cloudsync-plugin-for-commvault-store.patch
+Patch0155: 0155-cloudsync-Make-readdirp-return-stat-info-of-all-the-.patch
+Patch0156: 0156-cloudsync-Fix-bug-in-cloudsync-fops-c.py.patch
+Patch0157: 0157-afr-frame-Destroy-frame-after-afr_selfheal_entry_gra.patch
+Patch0158: 0158-glusterfsd-cleanup-Protect-graph-object-under-a-lock.patch
+Patch0159: 0159-glusterd-add-an-op-version-check.patch
+Patch0160: 0160-geo-rep-Geo-rep-help-text-issue.patch
+Patch0161: 0161-geo-rep-Fix-rename-with-existing-destination-with-sa.patch
+Patch0162: 0162-geo-rep-Fix-sync-method-config.patch
+Patch0163: 0163-geo-rep-Fix-sync-hang-with-tarssh.patch
+Patch0164: 0164-cluster-ec-Fix-handling-of-heal-info-cases-without-l.patch
+Patch0165: 0165-tests-shd-Add-test-coverage-for-shd-mux.patch
+Patch0166: 0166-glusterd-svc-glusterd_svcs_stop-should-call-individu.patch
+Patch0167: 0167-glusterd-shd-Optimize-the-glustershd-manager-to-send.patch
+Patch0168: 0168-cluster-dht-Fix-directory-perms-during-selfheal.patch
+Patch0169: 0169-Build-Fix-spec-to-enable-rhel8-client-build.patch
+Patch0170: 0170-geo-rep-Convert-gfid-conflict-resolutiong-logs-into-.patch
+Patch0171: 0171-posix-add-storage.reserve-size-option.patch
+Patch0172: 0172-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch
+Patch0173: 0173-glusterd-store-fips-mode-rchecksum-option-in-the-inf.patch
+Patch0174: 0174-xlator-log-Add-more-logging-in-xlator_is_cleanup_sta.patch
+Patch0175: 0175-ec-fini-Fix-race-between-xlator-cleanup-and-on-going.patch
+Patch0176: 0176-features-shard-Fix-crash-during-background-shard-del.patch
+Patch0177: 0177-features-shard-Fix-extra-unref-when-inode-object-is-.patch
+Patch0178: 0178-Cluster-afr-Don-t-treat-all-bricks-having-metadata-p.patch
+Patch0179: 0179-tests-Fix-split-brain-favorite-child-policy.t-failur.patch
+Patch0180: 0180-ganesha-scripts-Make-generate-epoch.py-python3-compa.patch
+Patch0181: 0181-afr-log-before-attempting-data-self-heal.patch
+Patch0182: 0182-geo-rep-fix-mountbroker-setup.patch
+Patch0183: 0183-glusterd-svc-Stop-stale-process-using-the-glusterd_p.patch
+Patch0184: 0184-tests-Add-gating-configuration-file-for-rhel8.patch
+Patch0185: 0185-gfapi-provide-an-api-for-setting-statedump-path.patch
+Patch0186: 0186-cli-Remove-brick-warning-seems-unnecessary.patch
+Patch0187: 0187-gfapi-statedump_path-add-proper-version-number.patch
+Patch0188: 0188-features-shard-Fix-integer-overflow-in-block-count-a.patch
+Patch0189: 0189-features-shard-Fix-block-count-accounting-upon-trunc.patch
+Patch0190: 0190-Build-removing-the-hardcoded-usage-of-python3.patch
+Patch0191: 0191-Build-Update-python-shebangs-based-on-version.patch
+Patch0192: 0192-build-Ensure-gluster-cli-package-is-built-as-part-of.patch
+Patch0193: 0193-spec-fixed-python-dependency-for-rhel6.patch
+Patch0194: 0194-stack-Make-sure-to-have-unique-call-stacks-in-all-ca.patch
+Patch0195: 0195-build-package-glusterfs-ganesha-for-rhel7-and-above.patch
+Patch0196: 0196-posix-ctime-Fix-ctime-upgrade-issue.patch
+Patch0197: 0197-posix-fix-crash-in-posix_cs_set_state.patch
+Patch0198: 0198-cluster-ec-Prevent-double-pre-op-xattrops.patch
+Patch0199: 0199-upcall-Avoid-sending-notifications-for-invalid-inode.patch
+Patch0200: 0200-gfapi-fix-incorrect-initialization-of-upcall-syncop-.patch
+Patch0201: 0201-geo-rep-Fix-permissions-for-GEOREP_DIR-in-non-root-s.patch
+Patch0202: 0202-shd-mux-Fix-race-between-mux_proc-unlink-and-stop.patch
+Patch0203: 0203-glusterd-shd-Change-shd-logfile-to-a-unique-name.patch
+Patch0204: 0204-glusterd-conditionally-clear-txn_opinfo-in-stage-op.patch
+Patch0205: 0205-glusterd-Can-t-run-rebalance-due-to-long-unix-socket.patch
+Patch0206: 0206-glusterd-ignore-user.-options-from-compatibility-che.patch
+Patch0207: 0207-glusterd-fix-use-after-free-of-a-dict_t.patch
+Patch0208: 0208-mem-pool-remove-dead-code.patch
+Patch0209: 0209-core-avoid-dynamic-TLS-allocation-when-possible.patch
+Patch0210: 0210-mem-pool.-c-h-minor-changes.patch
+Patch0211: 0211-libglusterfs-Fix-compilation-when-disable-mempool-is.patch
+Patch0212: 0212-core-fix-memory-allocation-issues.patch
+Patch0213: 0213-cluster-dht-Strip-out-dht-xattrs.patch
+Patch0214: 0214-geo-rep-Upgrading-config-file-to-new-version.patch
+Patch0215: 0215-posix-modify-storage.reserve-option-to-take-size-and.patch
+Patch0216: 0216-Test-case-fixe-for-downstream-3.5.0.patch
+Patch0217: 0217-uss-Fix-tar-issue-with-ctime-and-uss-enabled.patch
+Patch0218: 0218-graph-shd-Use-glusterfs_graph_deactivate-to-free-the.patch
+Patch0219: 0219-posix-add-posix_set_ctime-in-posix_ftruncate.patch
+Patch0220: 0220-graph-shd-Use-top-down-approach-while-cleaning-xlato.patch
+Patch0221: 0221-protocol-client-propagte-GF_EVENT_CHILD_PING-only-fo.patch
+Patch0222: 0222-cluster-dht-Fixed-a-memleak-in-dht_rename_cbk.patch
+Patch0223: 0223-change-get_real_filename-implementation-to-use-ENOAT.patch
+Patch0224: 0224-core-replace-inet_addr-with-inet_pton.patch
+Patch0225: 0225-tests-utils-Fix-py2-py3-util-python-scripts.patch
+Patch0226: 0226-geo-rep-fix-gluster-command-path-for-non-root-sessio.patch
+Patch0227: 0227-glusterd-svc-update-pid-of-mux-volumes-from-the-shd-.patch
+Patch0228: 0228-locks-enable-notify-contention-by-default.patch
+Patch0229: 0229-glusterd-Show-the-correct-brick-status-in-get-state.patch
+Patch0230: 0230-Revert-glusterd-svc-update-pid-of-mux-volumes-from-t.patch
+Patch0231: 0231-Revert-graph-shd-Use-top-down-approach-while-cleanin.patch
+Patch0232: 0232-cluster-afr-Fix-incorrect-reporting-of-gfid-type-mis.patch
+Patch0233: 0233-Revert-graph-shd-Use-glusterfs_graph_deactivate-to-f.patch
+Patch0234: 0234-Revert-glusterd-shd-Change-shd-logfile-to-a-unique-n.patch
+Patch0235: 0235-Revert-glusterd-svc-Stop-stale-process-using-the-glu.patch
+Patch0236: 0236-Revert-shd-mux-Fix-race-between-mux_proc-unlink-and-.patch
+Patch0237: 0237-Revert-ec-fini-Fix-race-between-xlator-cleanup-and-o.patch
+Patch0238: 0238-Revert-xlator-log-Add-more-logging-in-xlator_is_clea.patch
+Patch0239: 0239-Revert-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch
+Patch0240: 0240-Revert-glusterd-shd-Optimize-the-glustershd-manager-.patch
+Patch0241: 0241-Revert-glusterd-svc-glusterd_svcs_stop-should-call-i.patch
+Patch0242: 0242-Revert-tests-shd-Add-test-coverage-for-shd-mux.patch
+Patch0243: 0243-Revert-glusterfsd-cleanup-Protect-graph-object-under.patch
+Patch0244: 0244-Revert-ec-shd-Cleanup-self-heal-daemon-resources-dur.patch
+Patch0245: 0245-Revert-shd-glusterd-Serialize-shd-manager-to-prevent.patch
+Patch0246: 0246-Revert-glusterd-shd-Keep-a-ref-on-volinfo-until-atta.patch
+Patch0247: 0247-Revert-afr-shd-Cleanup-self-heal-daemon-resources-du.patch
+Patch0248: 0248-Revert-shd-mux-Fix-coverity-issues-introduced-by-shd.patch
+Patch0249: 0249-Revert-client-fini-return-fini-after-rpc-cleanup.patch
+Patch0250: 0250-Revert-mgmt-shd-Implement-multiplexing-in-self-heal-.patch
+Patch0251: 0251-tests-Fix-bug-1717819-metadata-split-brain-detection.patch
+Patch0252: 0252-glusterd-do-not-mark-skip_locking-as-true-for-geo-re.patch
+Patch0253: 0253-core-fix-deadlock-between-statedump-and-fd_anonymous.patch
+Patch0254: 0254-Detach-iot_worker-to-release-its-resources.patch
+Patch0255: 0255-Revert-tier-shd-glusterd-with-shd-mux-the-shd-volfil.patch
+Patch0256: 0256-features-snapview-server-use-the-same-volfile-server.patch
+Patch0257: 0257-geo-rep-Test-case-for-upgrading-config-file.patch
+Patch0258: 0258-geo-rep-Fix-mount-broker-setup-issue.patch
+Patch0259: 0259-gluster-block-tuning-perf-options.patch
+Patch0260: 0260-ctime-Set-mdata-xattr-on-legacy-files.patch
+Patch0261: 0261-features-utime-Fix-mem_put-crash.patch
+Patch0262: 0262-glusterd-ctime-Disable-ctime-by-default.patch
+Patch0263: 0263-tests-fix-ctime-related-tests.patch
+Patch0264: 0264-gfapi-Fix-deadlock-while-processing-upcall.patch
+Patch0265: 0265-fuse-add-missing-GF_FREE-to-fuse_interrupt.patch
+Patch0266: 0266-geo-rep-Fix-mount-broker-setup-issue.patch
+Patch0267: 0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch
+Patch0268: 0268-rpc-transport-have-default-listen-port.patch
+Patch0269: 0269-ec-fix-truncate-lock-to-cover-the-write-in-tuncate-c.patch
+Patch0270: 0270-cluster-ec-inherit-healing-from-lock-when-it-has-inf.patch
+Patch0271: 0271-cluster-ec-fix-EIO-error-for-concurrent-writes-on-sp.patch
+Patch0272: 0272-cluster-ec-Always-read-from-good-mask.patch
+Patch0273: 0273-cluster-ec-Fix-reopen-flags-to-avoid-misbehavior.patch
+Patch0274: 0274-cluster-ec-Update-lock-good_mask-on-parent-fop-failu.patch
+Patch0275: 0275-cluster-ec-Create-heal-task-with-heal-process-id.patch
+Patch0276: 0276-features-utime-always-update-ctime-at-setattr.patch
+Patch0277: 0277-geo-rep-Fix-Config-Get-Race.patch
+Patch0278: 0278-geo-rep-Fix-worker-connection-issue.patch
+Patch0279: 0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch
+Patch0280: 0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch
+Patch0281: 0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch
+Patch0282: 0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch
+Patch0283: 0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch
+Patch0284: 0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch
+Patch0285: 0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch
+Patch0286: 0286-glusterfs.spec.in-added-script-files-for-machine-com.patch
+Patch0287: 0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch
+Patch0288: 0288-cluster-ec-Fix-coverity-issues.patch
+Patch0289: 0289-cluster-ec-quorum-count-implementation.patch
+Patch0290: 0290-glusterd-tag-disperse.quorum-count-for-31306.patch
+Patch0291: 0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch
+Patch0292: 0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch
+Patch0293: 0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch
+Patch0294: 0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch
+Patch0295: 0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch
+Patch0296: 0296-glusterfind-pre-command-failure-on-a-modify.patch
+Patch0297: 0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch
+Patch0298: 0298-geo-rep-fix-sub-command-during-worker-connection.patch
+Patch0299: 0299-geo-rep-performance-improvement-while-syncing-rename.patch
+Patch0300: 0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch
+Patch0301: 0301-posix-Brick-is-going-down-unexpectedly.patch
+Patch0302: 0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch
+Patch0303: 0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch
+Patch0304: 0304-cluster-dht-Correct-fd-processing-loop.patch
+Patch0305: 0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch
+Patch0306: 0306-cli-fix-distCount-value.patch
+Patch0307: 0307-ssl-fix-RHEL8-regression-failure.patch
+Patch0308: 0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch
+Patch0309: 0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch
+Patch0310: 0310-tests-test-case-for-non-root-geo-rep-setup.patch
+Patch0311: 0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch
+Patch0312: 0312-Scripts-quota_fsck-script-KeyError-contri_size.patch
+Patch0313: 0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch
+Patch0314: 0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch
+Patch0315: 0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch
+Patch0316: 0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch
+Patch0317: 0317-Update-rfc.sh-to-rhgs-3.5.1.patch
+Patch0318: 0318-Update-rfc.sh-to-rhgs-3.5.1.patch
+Patch0319: 0319-features-snapview-server-obtain-the-list-of-snapshot.patch
+Patch0320: 0320-gf-event-Handle-unix-volfile-servers.patch
+Patch0321: 0321-Adding-white-spaces-to-description-of-set-group.patch
+Patch0322: 0322-glusterd-display-correct-rebalance-data-size-after-g.patch
+Patch0323: 0323-cli-display-detailed-rebalance-info.patch
+Patch0324: 0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch
+Patch0325: 0325-extras-hooks-Install-and-package-newly-added-post-ad.patch
+Patch0326: 0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch
+Patch0327: 0327-glusterfind-integrate-with-gfid2path.patch
+Patch0328: 0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch
+Patch0329: 0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch
+Patch0330: 0330-mount.glusterfs-change-the-error-message.patch
+Patch0331: 0331-features-locks-Do-special-handling-for-op-version-3..patch
+Patch0332: 0332-Removing-one-top-command-from-gluster-v-help.patch
+Patch0333: 0333-rpc-Synchronize-slot-allocation-code.patch
+Patch0334: 0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch
+Patch0335: 0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch
+Patch0336: 0336-spec-check-and-return-exit-code-in-rpm-scripts.patch
+Patch0337: 0337-fuse-Set-limit-on-invalidate-queue-size.patch
+Patch0338: 0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch
+Patch0339: 0339-geo-rep-fix-integer-config-validation.patch
+Patch0340: 0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch
+Patch0341: 0341-socket-fix-error-handling.patch
+Patch0342: 0342-Revert-hooks-remove-selinux-hooks.patch
+Patch0343: 0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch
+Patch0344: 0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch
+Patch0345: 0345-read-ahead-io-cache-turn-off-by-default.patch
+Patch0346: 0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch
+Patch0347: 0347-tools-glusterfind-handle-offline-bricks.patch
+Patch0348: 0348-glusterfind-Fix-py2-py3-issues.patch
+Patch0349: 0349-glusterfind-python3-compatibility.patch
+Patch0350: 0350-tools-glusterfind-Remove-an-extra-argument.patch
+Patch0351: 0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch
+Patch0352: 0352-spec-fixed-missing-dependencies-for-glusterfs-clouds.patch
+Patch0353: 0353-build-glusterfs-ganesha-pkg-requires-python3-policyc.patch
+Patch0354: 0354-core-fix-memory-pool-management-races.patch
+Patch0355: 0355-core-Prevent-crash-on-process-termination.patch
+Patch0356: 0356-Update-rfc.sh-to-rhgs-3.5.1-rhel-8.patch
+Patch0357: 0357-ganesha-ha-updates-for-pcs-0.10.x-i.e.-in-Fedora-29-.patch
+Patch0358: 0358-inode-fix-wrong-loop-count-in-__inode_ctx_free.patch
+Patch0359: 0359-dht-gf_defrag_process_dir-is-called-even-if-gf_defra.patch
+Patch0360: 0360-rpc-Make-ssl-log-more-useful.patch
+Patch0361: 0361-snap_scheduler-python3-compatibility-and-new-test-ca.patch
+Patch0362: 0362-write-behind-fix-data-corruption.patch
+Patch0363: 0363-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch
+Patch0364: 0364-dht-fixing-rebalance-failures-for-files-with-holes.patch
+Patch0365: 0365-build-geo-rep-requires-relevant-selinux-permission-f.patch
+Patch0366: 0366-snapshot-fix-python3-issue-in-gcron.patch
+Patch0367: 0367-dht-Handle-setxattr-and-rm-race-for-directory-in-reb.patch
+Patch0368: 0368-Update-rfc.sh-to-rhgs-3.5.2.patch
+Patch0369: 0369-cluster-ec-Return-correct-error-code-and-log-message.patch
+Patch0370: 0370-dht-Do-opendir-selectively-in-gf_defrag_process_dir.patch
+Patch0371: 0371-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch
+Patch0372: 0372-posix-fix-seek-functionality.patch
+Patch0373: 0373-build-geo-rep-sub-pkg-requires-policycoreutils-pytho.patch
+Patch0374: 0374-open-behind-fix-missing-fd-reference.patch
+Patch0375: 0375-features-shard-Send-correct-size-when-reads-are-sent.patch
+Patch0376: 0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch
+Patch0377: 0377-syncop-improve-scaling-and-implement-more-tools.patch
+Patch0378: 0378-Revert-open-behind-fix-missing-fd-reference.patch
+Patch0379: 0379-glusterd-add-missing-synccond_broadcast.patch
+Patch0380: 0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch
+Patch0381: 0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch
+Patch0382: 0382-features-shard-Aggregate-file-size-block-count-befor.patch
+Patch0383: 0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch
+Patch0384: 0384-Update-rfc.sh-to-rhgs-3.5.3.patch
+Patch0385: 0385-glusterd-start-glusterd-automatically-on-abnormal-sh.patch
+Patch0386: 0386-glusterd-increase-the-StartLimitBurst.patch
+Patch0387: 0387-To-fix-readdir-ahead-memory-leak.patch
+Patch0388: 0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch
+Patch0389: 0389-posix-Avoid-diskpace-error-in-case-of-overwriting-th.patch
+Patch0390: 0390-glusterd-deafult-options-after-volume-reset.patch
+Patch0391: 0391-glusterd-unlink-the-file-after-killing-the-process.patch
+Patch0392: 0392-glusterd-Brick-process-fails-to-come-up-with-brickmu.patch
+Patch0393: 0393-afr-restore-timestamp-of-files-during-metadata-heal.patch
+Patch0394: 0394-man-gluster-Add-volume-top-command-to-gluster-man-pa.patch
+Patch0395: 0395-Cli-Removing-old-log-rotate-command.patch
+Patch0396: 0396-Updating-gluster-manual.patch
+Patch0397: 0397-mgmt-brick-mux-Avoid-sending-two-response-when-attac.patch
+Patch0398: 0398-ec-change-error-message-for-heal-commands-for-disper.patch
+Patch0399: 0399-glusterd-coverity-fixes.patch
+Patch0400: 0400-cli-throw-a-warning-if-replica-count-greater-than-3.patch
+Patch0401: 0401-cli-change-the-warning-message.patch
+Patch0402: 0402-afr-wake-up-index-healer-threads.patch
+Patch0403: 0403-Fix-spurious-failure-in-bug-1744548-heal-timeout.t.patch
+Patch0404: 0404-tests-Fix-spurious-failure.patch
+Patch0405: 0405-core-fix-return-of-local-in-__nlc_inode_ctx_get.patch
+Patch0406: 0406-afr-support-split-brain-CLI-for-replica-3.patch
+Patch0407: 0407-geo-rep-Improving-help-message-in-schedule_georep.py.patch
+Patch0408: 0408-geo-rep-Fix-ssh-port-validation.patch
+Patch0409: 0409-system-posix-acl-update-ctx-only-if-iatt-is-non-NULL.patch
+Patch0410: 0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch
+Patch0411: 0411-tools-glusterfind-validate-session-name.patch
+Patch0412: 0412-gluster-smb-add-smb-parameter-when-access-gluster-by.patch
+Patch0413: 0413-extras-hooks-Remove-smb.conf-parameter-allowing-gues.patch
+Patch0414: 0414-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch
+Patch0415: 0415-dht-Fix-stale-layout-and-create-issue.patch
+Patch0416: 0416-tests-fix-spurious-failure-of-bug-1402841.t-mt-dir-s.patch
+Patch0417: 0417-events-fix-IPv6-memory-corruption.patch
+Patch0418: 0418-md-cache-avoid-clearing-cache-when-not-necessary.patch
+Patch0419: 0419-cluster-afr-fix-race-when-bricks-come-up.patch
+Patch0420: 0420-scripts-quota_fsck-script-TypeError-d-format-not-dic.patch
+Patch0421: 0421-Improve-logging-in-EC-client-and-lock-translator.patch
+Patch0422: 0422-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch
+Patch0423: 0423-ctime-Fix-ctime-inconsisteny-with-utimensat.patch
+Patch0424: 0424-afr-make-heal-info-lockless.patch
+Patch0425: 0425-tests-Fix-spurious-self-heald.t-failure.patch
+Patch0426: 0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch
+Patch0427: 0427-storage-posix-Fixing-a-coverity-issue.patch
+Patch0428: 0428-glusterd-ganesha-fixing-resource-leak-in-tear_down_c.patch
+Patch0429: 0429-dht-rebalance-fixing-failure-occurace-due-to-rebalan.patch
+Patch0430: 0430-Fix-some-Null-pointer-dereference-coverity-issues.patch
+Patch0431: 0431-glusterd-check-for-same-node-while-adding-bricks-in-.patch
+Patch0432: 0432-glusterd-Fix-coverity-defects-put-coverity-annotatio.patch
+Patch0433: 0433-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch
+Patch0434: 0434-glusterd-ganesha-fix-Coverity-CID-1405785.patch
+Patch0435: 0435-glusterd-coverity-fix.patch
+Patch0436: 0436-glusterd-coverity-fixes.patch
+Patch0437: 0437-glusterd-prevent-use-after-free-in-glusterd_op_ac_se.patch
+Patch0438: 0438-dht-sparse-files-rebalance-enhancements.patch
+Patch0439: 0439-cluster-afr-Delay-post-op-for-fsync.patch
+Patch0440: 0440-glusterd-snapshot-Improve-log-message-during-snapsho.patch
+Patch0441: 0441-fuse-occasional-logging-for-fuse-device-weird-write-.patch
+Patch0442: 0442-fuse-correctly-handle-setxattr-values.patch
+Patch0443: 0443-fuse-fix-high-sev-coverity-issue.patch
+Patch0444: 0444-mount-fuse-Fixing-a-coverity-issue.patch
+Patch0445: 0445-feature-changelog-Avoid-thread-creation-if-xlator-is.patch
+Patch0446: 0446-bitrot-Make-number-of-signer-threads-configurable.patch
+Patch0447: 0447-core-brick_mux-brick-crashed-when-creating-and-delet.patch
+Patch0448: 0448-Posix-Use-simple-approach-to-close-fd.patch
+Patch0449: 0449-test-Test-case-brick-mux-validation-in-cluster.t-is-.patch
+Patch0450: 0450-tests-basic-ctime-enable-ctime-before-testing.patch
+Patch0451: 0451-extras-Modify-group-virt-to-include-network-related-.patch
+Patch0452: 0452-Tier-DHT-Handle-the-pause-case-missed-out.patch
+Patch0453: 0453-glusterd-add-brick-command-failure.patch
+Patch0454: 0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch
+Patch0455: 0455-locks-prevent-deletion-of-locked-entries.patch
+Patch0456: 0456-add-clean-local-after-grant-lock.patch
+Patch0457: 0457-cluster-ec-Improve-detection-of-new-heals.patch
+Patch0458: 0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch
+Patch0459: 0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch
+Patch0460: 0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch
+Patch0461: 0461-geo-replication-Fix-IPv6-parsing.patch
+Patch0462: 0462-Issue-with-gf_fill_iatt_for_dirent.patch
+Patch0463: 0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch
+Patch0464: 0464-storage-posix-Remove-nr_files-usage.patch
+Patch0465: 0465-posix-Implement-a-janitor-thread-to-close-fd.patch
+Patch0466: 0466-cluster-ec-Change-stale-index-handling.patch
+Patch0467: 0467-build-Added-dependency-for-glusterfs-selinux.patch
+Patch0468: 0468-build-Update-the-glusterfs-selinux-version.patch
+Patch0469: 0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch
+Patch0470: 0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch
+Patch0471: 0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch
+Patch0472: 0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch
+Patch0473: 0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch
+Patch0474: 0474-features-locks-posixlk-clear-lock-should-set-error-a.patch
+Patch0475: 0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch
+Patch0476: 0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch
+Patch0477: 0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch
+Patch0478: 0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch
+Patch0479: 0479-ganesha-ha-revised-regex-exprs-for-status.patch
+Patch0480: 0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch
+Patch0481: 0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch
+Patch0482: 0482-logger-Always-print-errors-in-english.patch
+Patch0483: 0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch
+Patch0484: 0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch
+Patch0485: 0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch
+Patch0486: 0486-glusterd-brick-sock-file-deleted-log-error-1560.patch
+Patch0487: 0487-Events-Log-file-not-re-opened-after-logrotate.patch
+Patch0488: 0488-glusterd-afr-enable-granular-entry-heal-by-default.patch
+Patch0489: 0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch
+Patch0490: 0490-Segmentation-fault-occurs-during-truncate.patch
+Patch0491: 0491-glusterd-mount-directory-getting-truncated-on-mounti.patch
+Patch0492: 0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch
+Patch0493: 0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch
+Patch0494: 0494-glusterd-start-the-brick-on-a-different-port.patch
+Patch0495: 0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch
+Patch0496: 0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch
+Patch0497: 0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch
+Patch0498: 0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch
+Patch0499: 0499-gfapi-give-appropriate-error-when-size-exceeds.patch
+Patch0500: 0500-features-shard-Convert-shard-block-indices-to-uint64.patch
+Patch0501: 0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch
+Patch0502: 0502-dht-fixing-a-permission-update-issue.patch
+Patch0503: 0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch
+Patch0504: 0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch
+Patch0505: 0505-trash-Create-inode_table-only-while-feature-is-enabl.patch
+Patch0506: 0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch
+Patch0507: 0507-inode-make-critical-section-smaller.patch
+Patch0508: 0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch
+Patch0509: 0509-core-configure-optimum-inode-table-hash_size-for-shd.patch
+Patch0510: 0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch
+Patch0511: 0511-features-shard-Missing-format-specifier.patch
+Patch0512: 0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch
+Patch0513: 0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch
+Patch0514: 0514-afr-event-gen-changes.patch
+Patch0515: 0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch
+Patch0516: 0516-afr-return-EIO-for-gfid-split-brains.patch
+Patch0517: 0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch
+Patch0518: 0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch
+Patch0519: 0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch
+Patch0520: 0520-performance-open-behind-seek-fop-should-open_and_res.patch
+Patch0521: 0521-open-behind-fix-missing-fd-reference.patch
+Patch0522: 0522-lcov-improve-line-coverage.patch
+Patch0523: 0523-open-behind-rewrite-of-internal-logic.patch
+Patch0524: 0524-open-behind-fix-call_frame-leak.patch
+Patch0525: 0525-open-behind-implement-create-fop.patch
+Patch0526: 0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch
+Patch0527: 0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch
+Patch0528: 0528-Extras-Removing-xattr_analysis-script.patch
+Patch0529: 0529-geo-rep-prompt-should-work-for-ignore_deletes.patch
+Patch0530: 0530-gfapi-avoid-crash-while-logging-message.patch
+Patch0531: 0531-Glustereventsd-Default-port-change-2091.patch
+Patch0532: 0532-glusterd-fix-for-starting-brick-on-new-port.patch
+Patch0533: 0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch
+Patch0534: 0534-glusterd-Resolve-use-after-free-bug-2181.patch
+Patch0535: 0535-multiple-files-use-dict_allocate_and_serialize-where.patch
+Patch0536: 0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch
+Patch0537: 0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch
+Patch0538: 0538-afr-fix-coverity-issue-introduced-by-90cefde.patch
+Patch0539: 0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch
+Patch0540: 0540-extras-Disable-write-behind-for-group-samba.patch
+Patch0541: 0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch
+Patch0542: 0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch
+Patch0543: 0543-glusterd-handle-custom-xlator-failure-cases.patch
+Patch0900: 0900-rhel-9.0-beta-build-fixing-gcc-10-and-LTO-errors.patch
+Patch0901: 0901-contrib-remove-contrib-sunrpc-xdr_sizeof.c.patch
+
+%description
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package includes the glusterfs binary, the glusterfsd daemon and the
+libglusterfs and glusterfs translator modules common to both GlusterFS server
+and client framework.
+
+%package api
+Summary: GlusterFS api library
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
+Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+
+%description api
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the glusterfs libgfapi library.
+
+%package api-devel
+Summary: Development Libraries
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-devel%{?_isa} = %{version}-%{release}
+Requires: libacl-devel
+Requires: %{name}-api%{?_isa} = %{version}-%{release}
+
+%description api-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the api include files.
+
+%package cli
+Summary: GlusterFS CLI
+Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+
+%description cli
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the GlusterFS CLI application and its man page
+
+%package cloudsync-plugins
+Summary: Cloudsync Plugins
+BuildRequires: libcurl-devel
+Requires: glusterfs-libs = %{version}-%{release}
+
+%description cloudsync-plugins
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides cloudsync plugins for archival feature.
+
+%package devel
+Summary: Development Libraries
+Requires: %{name}%{?_isa} = %{version}-%{release}
+# Needed for the Glupy examples to work
+%if ( 0%{!?_without_extra_xlators:1} )
+Requires: %{name}-extra-xlators%{?_isa} = %{version}-%{release}
+%endif
+Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+%if ( 0%{!?_without_server:1} )
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+%endif
+
+%description devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the development libraries and include files.
+
+%if ( 0%{!?_without_extra_xlators:1} )
+%package extra-xlators
+Summary: Extra Gluster filesystem Translators
+# We need python-gluster rpm for gluster module's __init__.py in Python
+# site-packages area
+Requires: python%{_pythonver}-gluster = %{version}-%{release}
+Requires: python%{_pythonver}
+
+%description extra-xlators
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides extra filesystem Translators, such as Glupy,
+for GlusterFS.
+%endif
+
+%package fuse
+Summary: Fuse client
+BuildRequires: fuse-devel
+Requires: attr
+Requires: psmisc
+
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
+
+Obsoletes: %{name}-client < %{version}-%{release}
+Provides: %{name}-client = %{version}-%{release}
+Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+
+%description fuse
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides support to FUSE based clients and inlcudes the
+glusterfs(d) binary.
+
+%if ( 0%{!?_without_server:1} && 0%{?rhel} > 6 )
+%package ganesha
+Summary: NFS-Ganesha configuration
+Group: Applications/File
+
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+Requires: nfs-ganesha-selinux >= 2.7.3
+Requires: nfs-ganesha-gluster >= 2.7.3
+Requires: pcs, dbus
+%if ( 0%{?rhel} && 0%{?rhel} == 6 )
+Requires: cman, pacemaker, corosync
+%endif
+
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 5 )
+# we need portblock resource-agent in 3.9.5 and later.
+Requires: resource-agents >= 3.9.5
+Requires: net-tools
+%endif
+
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%if ( 0%{?rhel} && 0%{?rhel} < 8 )
+Requires: selinux-policy >= 3.13.1-160
+Requires(post): policycoreutils-python
+Requires(postun): policycoreutils-python
+%else
+Requires(post): policycoreutils-python-utils
+Requires(postun): policycoreutils-python-utils
+%endif
+%endif
+
+%description ganesha
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the configuration and related files for using
+NFS-Ganesha as the NFS server using GlusterFS
+%endif
+
+%if ( 0%{!?_without_georeplication:1} )
+%package geo-replication
+Summary: GlusterFS Geo-replication
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+Requires: python%{_pythonver}
+%if ( 0%{?rhel} && 0%{?rhel} < 7 )
+Requires: python-prettytable
+%else
+Requires: python%{_pythonver}-prettytable
+%endif
+Requires: python%{_pythonver}-gluster = %{version}-%{release}
+
+Requires: rsync
+Requires: util-linux
+Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+# required for setting selinux bools
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+Requires(post): policycoreutils-python-utils
+Requires(postun): policycoreutils-python-utils
+Requires: selinux-policy-targeted
+Requires(post): selinux-policy-targeted
+BuildRequires: selinux-policy-devel
+%endif
+
+%description geo-replication
+GlusterFS is a distributed file-system capable of scaling to several
+peta-bytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file system in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in userspace and easily manageable.
+
+This package provides support to geo-replication.
+%endif
+
+%package libs
+Summary: GlusterFS common libraries
+
+%description libs
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the base GlusterFS libraries
+
+%package -n python%{_pythonver}-gluster
+Summary: GlusterFS python library
+Requires: python%{_pythonver}
+%if ( ! %{_usepython3} )
+%{?python_provide:%python_provide python-gluster}
+Provides: python-gluster = %{version}-%{release}
+Obsoletes: python-gluster < 3.10
+%endif
+
+%description -n python%{_pythonver}-gluster
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package contains the python modules of GlusterFS and own gluster
+namespace.
+
+%if ( 0%{!?_without_rdma:1} )
+%package rdma
+Summary: GlusterFS rdma support for ib-verbs
+%if ( 0%{?fedora} && 0%{?fedora} > 26 )
+BuildRequires: rdma-core-devel
+%else
+BuildRequires: libibverbs-devel
+BuildRequires: librdmacm-devel >= 1.0.15
+%endif
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+
+%description rdma
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides support to ib-verbs library.
+%endif
+
+%if ( 0%{!?_without_regression_tests:1} )
+%package regression-tests
+Summary: Development Tools
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-fuse%{?_isa} = %{version}-%{release}
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+## thin provisioning support
+Requires: lvm2 >= 2.02.89
+Requires: perl(App::Prove) perl(Test::Harness) gcc util-linux-ng
+Requires: python%{_pythonver}
+Requires: attr dbench file git libacl-devel net-tools
+Requires: nfs-utils xfsprogs yajl psmisc bc
+
+%description regression-tests
+The Gluster Test Framework, is a suite of scripts used for
+regression testing of Gluster.
+%endif
+
+%if ( 0%{!?_without_ocf:1} )
+%package resource-agents
+Summary: OCF Resource Agents for GlusterFS
+License: GPLv3+
+BuildArch: noarch
+# this Group handling comes from the Fedora resource-agents package
+# for glusterd
+Requires: %{name}-server = %{version}-%{release}
+# depending on the distribution, we need pacemaker or resource-agents
+Requires: %{_prefix}/lib/ocf/resource.d
+
+%description resource-agents
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the resource agents which plug glusterd into
+Open Cluster Framework (OCF) compliant cluster resource managers,
+like Pacemaker.
+%endif
+
+%if ( 0%{!?_without_server:1} )
+%package server
+Summary: Clustered file-system server
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-cli%{?_isa} = %{version}-%{release}
+Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+%if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) )
+Requires: glusterfs-selinux >= 1.0-1
+%endif
+# some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse
+Requires: %{name}-fuse%{?_isa} = %{version}-%{release}
+# self-heal daemon, rebalance, nfs-server etc. are actually clients
+Requires: %{name}-api%{?_isa} = %{version}-%{release}
+Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
+# lvm2 for snapshot, and nfs-utils and rpcbind/portmap for gnfs server
+Requires: lvm2
+Requires: nfs-utils
+%if ( 0%{?_with_systemd:1} )
+%{?systemd_requires}
+%else
+Requires(post): /sbin/chkconfig
+Requires(preun): /sbin/service
+Requires(preun): /sbin/chkconfig
+Requires(postun): /sbin/service
+%endif
+%if (0%{?_with_firewalld:1})
+# we install firewalld rules, so we need to have the directory owned
+%if ( 0%{!?rhel} )
+# not on RHEL because firewalld-filesystem appeared in 7.3
+# when EL7 rpm gets weak dependencies we can add a Suggests:
+Requires: firewalld-filesystem
+%endif
+%endif
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+Requires: rpcbind
+%else
+Requires: portmap
+%endif
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
+Requires: python-argparse
+%endif
+%if ( 0%{?fedora} && 0%{?fedora} > 27 ) || ( 0%{?rhel} && 0%{?rhel} > 7 )
+Requires: python%{_pythonver}-pyxattr
+%else
+Requires: pyxattr
+%endif
+%if (0%{?_with_valgrind:1})
+Requires: valgrind
+%endif
+
+%description server
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the glusterfs server daemon.
+%endif
+
+%package client-xlators
+Summary: GlusterFS client-side translators
+Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+
+%description client-xlators
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the translators needed on any GlusterFS client.
+
+%if ( 0%{!?_without_events:1} )
+%package events
+Summary: GlusterFS Events
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+Requires: python%{_pythonver}
+Requires: python%{_pythonver}-gluster = %{version}-%{release}
+%if ( 0%{?rhel} && 0%{?rhel} < 8 )
+Requires: python-requests
+%else
+Requires: python%{_pythonver}-requests
+%endif
+%if ( 0%{?rhel} && 0%{?rhel} < 7 )
+Requires: python-prettytable
+Requires: python-argparse
+%else
+Requires: python%{_pythonver}-prettytable
+%endif
+%if ( 0%{?_with_systemd:1} )
+%{?systemd_requires}
+%endif
+
+%description events
+GlusterFS Events
+
+%endif
+
+%prep
+%setup -q -n %{name}-%{version}%{?prereltag}
+
+# sanitization scriptlet for patches with file renames
+ls %{_topdir}/SOURCES/*.patch | sort | \
+while read p
+do
+ # if the destination file exists, its most probably stale
+ # so we must remove it
+ rename_to=( $(grep -i 'rename to' $p | cut -f 3 -d ' ') )
+ if [ ${#rename_to[*]} -gt 0 ]; then
+ for f in ${rename_to[*]}
+ do
+ if [ -f $f ]; then
+ rm -f $f
+ elif [ -d $f ]; then
+ rm -rf $f
+ fi
+ done
+ fi
+
+ SOURCE_FILES=( $(egrep '^\-\-\- a/' $p | cut -f 2- -d '/') )
+ DEST_FILES=( $(egrep '^\+\+\+ b/' $p | cut -f 2- -d '/') )
+ EXCLUDE_DOCS=()
+ for idx in ${!SOURCE_FILES[@]}; do
+ # skip the doc
+ source_file=${SOURCE_FILES[$idx]}
+ dest_file=${DEST_FILES[$idx]}
+ if [[ "$dest_file" =~ ^doc/.+ ]]; then
+ if [ "$source_file" != "dev/null" ] && [ ! -f "$dest_file" ]; then
+ # if patch is being applied to a doc file and if the doc file
+ # hasn't been added so far then we need to exclude it
+ EXCLUDE_DOCS=( ${EXCLUDE_DOCS[*]} "$dest_file" )
+ fi
+ fi
+ done
+ EXCLUDE_DOCS_OPT=""
+ for doc in ${EXCLUDE_DOCS}; do
+ EXCLUDE_DOCS_OPT="--exclude=$doc $EXCLUDE_DOCS_OPT"
+ done
+
+ # HACK to fix build
+ bn=$(basename $p)
+ if [ "$bn" == "0085-Revert-all-remove-code-which-is-not-being-considered.patch" ]; then
+ (patch -p1 -u -F3 < $p || :)
+ if [ -f libglusterfs/Makefile.am.rej ]; then
+ sed -i -e 's/^SUBDIRS = src/SUBDIRS = src src\/gfdb/g;s/^CLEANFILES = /CLEANFILES =/g' libglusterfs/Makefile.am
+ fi
+ elif [ "$bn" == "0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch" ]; then
+ (patch -p1 < $p || :)
+ elif [ "$bn" == "0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch" ]; then
+ (patch -p1 < $p || :)
+ elif [ "$bn" == "0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch" ]; then
+ (patch -p1 < $p || :)
+ elif [ "$bn" == "0117-spec-Remove-thin-arbiter-package.patch" ]; then
+ (patch -p1 < $p || :)
+ elif [ "$bn" == "0023-hooks-remove-selinux-hooks.patch" ]; then
+ (patch -p1 < $p || :)
+ elif [ "$bn" == "0042-spec-client-server-Builds-are-failing-on-rhel-6.patch" ]; then
+ (patch -p1 < $p || :)
+ else
+ # apply the patch with 'git apply'
+ git apply -p1 --exclude=rfc.sh \
+ --exclude=.gitignore \
+ --exclude=.testignore \
+ --exclude=MAINTAINERS \
+ --exclude=extras/checkpatch.pl \
+ --exclude=build-aux/checkpatch.pl \
+ --exclude='tests/*' \
+ ${EXCLUDE_DOCS_OPT} \
+ $p
+ fi
+
+done
+
+echo "fixing python shebangs..."
+%if ( %{_usepython3} )
+ for i in `find . -type f -exec bash -c "if file {} | grep 'Python script, ASCII text executable' >/dev/null; then echo {}; fi" ';'`; do
+ sed -i -e 's|^#!/usr/bin/python.*|#!%{__python3}|' -e 's|^#!/usr/bin/env python.*|#!%{__python3}|' $i
+ done
+%else
+ for f in api events extras geo-replication libglusterfs tools xlators; do
+ find $f -type f -exec sed -i 's|/usr/bin/python3|/usr/bin/python2|' {} \;
+ done
+%endif
+
+%build
+
+# In RHEL7 few hardening flags are available by default, however the RELRO
+# default behaviour is partial, convert to full
+%if ( 0%{?rhel} && 0%{?rhel} >= 7 )
+LDFLAGS="$RPM_LD_FLAGS -Wl,-z,relro,-z,now"
+export LDFLAGS
+%else
+%if ( 0%{?rhel} && 0%{?rhel} == 6 )
+CFLAGS="$RPM_OPT_FLAGS -fPIE -DPIE"
+LDFLAGS="$RPM_LD_FLAGS -pie -Wl,-z,relro,-z,now"
+%else
+#It appears that with gcc-4.1.2 in RHEL5 there is an issue using both -fPIC and
+ # -fPIE that makes -z relro not work; -fPIE seems to undo what -fPIC does
+CFLAGS="$CFLAGS $RPM_OPT_FLAGS"
+LDFLAGS="$RPM_LD_FLAGS -Wl,-z,relro,-z,now"
+%endif
+export CFLAGS
+export LDFLAGS
+%endif
+
+./autogen.sh && %configure \
+ %{?_with_asan} \
+ %{?_with_cmocka} \
+ %{?_with_debug} \
+ %{?_with_firewalld} \
+ %{?_with_tmpfilesdir} \
+ %{?_with_tsan} \
+ %{?_with_valgrind} \
+ %{?_without_epoll} \
+ %{?_without_events} \
+ %{?_without_fusermount} \
+ %{?_without_georeplication} \
+ %{?_without_ocf} \
+ %{?_without_rdma} \
+ %{?_without_server} \
+ %{?_without_syslog} \
+ %{?_without_tiering} \
+ %{?_with_ipv6default} \
+ %{?_without_libtirpc}
+
+# fix hardening and remove rpath in shlibs
+%if ( 0%{?fedora} && 0%{?fedora} > 17 ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+sed -i 's| \\\$compiler_flags |&\\\$LDFLAGS |' libtool
+%endif
+sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|' libtool
+sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|' libtool
+
+make %{?_smp_mflags}
+
+%check
+make check
+
+%install
+rm -rf %{buildroot}
+make install DESTDIR=%{buildroot}
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{_for_fedora_koji_builds} )
+install -D -p -m 0644 %{SOURCE1} \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterd
+install -D -p -m 0644 %{SOURCE2} \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterfsd
+%else
+install -D -p -m 0644 extras/glusterd-sysconfig \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterd
+%endif
+%endif
+
+mkdir -p %{buildroot}%{_localstatedir}/log/glusterd
+mkdir -p %{buildroot}%{_localstatedir}/log/glusterfs
+mkdir -p %{buildroot}%{_localstatedir}/log/glusterfsd
+mkdir -p %{buildroot}%{_rundir}/gluster
+
+# Remove unwanted files from all the shared libraries
+find %{buildroot}%{_libdir} -name '*.a' -delete
+find %{buildroot}%{_libdir} -name '*.la' -delete
+
+# Remove installed docs, the ones we want are included by %%doc, in
+# /usr/share/doc/glusterfs or /usr/share/doc/glusterfs-x.y.z depending
+# on the distribution
+%if ( 0%{?fedora} && 0%{?fedora} > 19 ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+rm -rf %{buildroot}%{_pkgdocdir}/*
+%else
+rm -rf %{buildroot}%{_defaultdocdir}/%{name}
+mkdir -p %{buildroot}%{_pkgdocdir}
+%endif
+head -50 ChangeLog > ChangeLog.head && mv ChangeLog.head ChangeLog
+cat << EOM >> ChangeLog
+
+More commit messages for this ChangeLog can be found at
+https://forge.gluster.org/glusterfs-core/glusterfs/commits/v%{version}%{?prereltag}
+EOM
+
+# Remove benchmarking and other unpackaged files
+# make install always puts these in %%{_defaultdocdir}/%%{name} so don't
+# use %%{_pkgdocdir}; that will be wrong on later Fedora distributions
+rm -rf %{buildroot}%{_defaultdocdir}/%{name}/benchmarking
+rm -f %{buildroot}%{_defaultdocdir}/%{name}/glusterfs-mode.el
+rm -f %{buildroot}%{_defaultdocdir}/%{name}/glusterfs.vim
+
+%if ( 0%{!?_without_server:1} )
+# Create working directory
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd
+
+# Update configuration file to /var/lib working directory
+sed -i 's|option working-directory /etc/glusterd|option working-directory %{_sharedstatedir}/glusterd|g' \
+ %{buildroot}%{_sysconfdir}/glusterfs/glusterd.vol
+%endif
+
+# Install glusterfsd .service or init.d file
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{_for_fedora_koji_builds} )
+%service_install glusterfsd %{glusterfsd_svcfile}
+%endif
+%endif
+
+install -D -p -m 0644 extras/glusterfs-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs
+
+# ganesha ghosts
+%if ( 0%{!?_without_server:1} && 0%{?rhel} > 6 )
+mkdir -p %{buildroot}%{_sysconfdir}/ganesha
+touch %{buildroot}%{_sysconfdir}/ganesha/ganesha-ha.conf
+mkdir -p %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/
+touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf
+touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf
+%endif
+
+%if ( 0%{!?_without_georeplication:1} )
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/geo-replication
+touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
+install -D -p -m 0644 extras/glusterfs-georep-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-georep
+%endif
+
+%if ( 0%{!?_without_server:1} )
+touch %{buildroot}%{_sharedstatedir}/glusterd/glusterd.info
+touch %{buildroot}%{_sharedstatedir}/glusterd/options
+subdirs=(add-brick create copy-file delete gsync-create remove-brick reset set start stop)
+for dir in ${subdirs[@]}; do
+ mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/"$dir"/{pre,post}
+done
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/glustershd
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/peers
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/vols
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/nfs/run
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/bitd
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/quotad
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/scrub
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/snaps
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/ss_brick
+touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+%endif
+
+find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glusterfs
+
+## Install bash completion for cli
+install -p -m 0744 -D extras/command-completion/gluster.bash \
+ %{buildroot}%{_sysconfdir}/bash_completion.d/gluster
+
+%if ( 0%{!?_without_server:1} )
+echo "RHGS 3.5" > %{buildroot}%{_datadir}/glusterfs/release
+%endif
+
+%clean
+rm -rf %{buildroot}
+
+##-----------------------------------------------------------------------------
+## All %%post should be placed here and keep them sorted
+##
+%post
+/sbin/ldconfig
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+%systemd_postun_with_restart rsyslog
+%endif
+%endif
+exit 0
+
+%post api
+/sbin/ldconfig
+
+%if ( 0%{!?_without_events:1} )
+%post events
+%service_enable glustereventsd
+%endif
+
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%post ganesha
+semanage boolean -m ganesha_use_fusefs --on
+exit 0
+%endif
+%endif
+
+%if ( 0%{!?_without_georeplication:1} )
+%post geo-replication
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+%selinux_set_booleans %{selinuxbooleans}
+%endif
+if [ $1 -ge 1 ]; then
+ %systemd_postun_with_restart glusterd
+fi
+exit 0
+%endif
+
+%post libs
+/sbin/ldconfig
+
+%if ( 0%{!?_without_server:1} )
+%post server
+# Legacy server
+%service_enable glusterd
+%if ( 0%{_for_fedora_koji_builds} )
+%service_enable glusterfsd
+%endif
+# ".cmd_log_history" is renamed to "cmd_history.log" in GlusterFS-3.7 .
+# While upgrading glusterfs-server package form GlusterFS version <= 3.6 to
+# GlusterFS version 3.7, ".cmd_log_history" should be renamed to
+# "cmd_history.log" to retain cli command history contents.
+if [ -f %{_localstatedir}/log/glusterfs/.cmd_log_history ]; then
+ mv %{_localstatedir}/log/glusterfs/.cmd_log_history \
+ %{_localstatedir}/log/glusterfs/cmd_history.log
+fi
+
+# Genuine Fedora (and EPEL) builds never put gluster files in /etc; if
+# there are any files in /etc from a prior gluster.org install, move them
+# to /var/lib. (N.B. Starting with 3.3.0 all gluster files are in /var/lib
+# in gluster.org RPMs.) Be careful to copy them on the off chance that
+# /etc and /var/lib are on separate file systems
+if [ -d /etc/glusterd -a ! -h %{_sharedstatedir}/glusterd ]; then
+ mkdir -p %{_sharedstatedir}/glusterd
+ cp -a /etc/glusterd %{_sharedstatedir}/glusterd
+ rm -rf /etc/glusterd
+ ln -sf %{_sharedstatedir}/glusterd /etc/glusterd
+fi
+
+# Rename old volfiles in an RPM-standard way. These aren't actually
+# considered package config files, so %%config doesn't work for them.
+if [ -d %{_sharedstatedir}/glusterd/vols ]; then
+ for file in $(find %{_sharedstatedir}/glusterd/vols -name '*.vol'); do
+ newfile=${file}.rpmsave
+ echo "warning: ${file} saved as ${newfile}"
+ cp ${file} ${newfile}
+ done
+fi
+
+# add marker translator
+# but first make certain that there are no old libs around to bite us
+# BZ 834847
+if [ -e /etc/ld.so.conf.d/glusterfs.conf ]; then
+ rm -f /etc/ld.so.conf.d/glusterfs.conf
+ /sbin/ldconfig
+fi
+
+%if (0%{?_with_firewalld:1})
+ %firewalld_reload
+%endif
+
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%pre should be placed here and keep them sorted
+##
+%pre
+getent group gluster > /dev/null || groupadd -r gluster
+getent passwd gluster > /dev/null || useradd -r -g gluster -d %{_rundir}/gluster -s /sbin/nologin -c "GlusterFS daemons" gluster
+exit 0
+
+##-----------------------------------------------------------------------------
+## All %%preun should be placed here and keep them sorted
+##
+%if ( 0%{!?_without_events:1} )
+%preun events
+if [ $1 -eq 0 ]; then
+ if [ -f %glustereventsd_svcfile ]; then
+ %service_stop glustereventsd
+ %systemd_preun glustereventsd
+ fi
+fi
+exit 0
+%endif
+
+%if ( 0%{!?_without_server:1} )
+%preun server
+if [ $1 -eq 0 ]; then
+ if [ -f %glusterfsd_svcfile ]; then
+ %service_stop glusterfsd
+ fi
+ %service_stop glusterd
+ if [ -f %glusterfsd_svcfile ]; then
+ %systemd_preun glusterfsd
+ fi
+ %systemd_preun glusterd
+fi
+if [ $1 -ge 1 ]; then
+ if [ -f %glusterfsd_svcfile ]; then
+ %systemd_postun_with_restart glusterfsd
+ fi
+ %systemd_postun_with_restart glusterd
+fi
+exit 0
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%postun should be placed here and keep them sorted
+##
+%postun
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+%systemd_postun_with_restart rsyslog
+%endif
+%endif
+
+%if ( 0%{!?_without_server:1} )
+%postun server
+%if (0%{?_with_firewalld:1})
+ %firewalld_reload
+%endif
+exit 0
+%endif
+
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%postun ganesha
+semanage boolean -m ganesha_use_fusefs --off
+exit 0
+%endif
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%trigger should be placed here and keep them sorted
+##
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%trigger ganesha -- selinux-policy-targeted
+semanage boolean -m ganesha_use_fusefs --on
+exit 0
+%endif
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%triggerun should be placed here and keep them sorted
+##
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%triggerun ganesha -- selinux-policy-targeted
+semanage boolean -m ganesha_use_fusefs --off
+exit 0
+%endif
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%files should be placed here and keep them grouped
+##
+%files
+%doc ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL README.md THANKS COMMITMENT
+%{_mandir}/man8/*gluster*.8*
+%if ( 0%{!?_without_server:1} )
+%exclude %{_mandir}/man8/gluster.8*
+%endif
+%dir %{_localstatedir}/log/glusterfs
+%if ( 0%{!?_without_rdma:1} )
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma*
+%endif
+%if 0%{?!_without_server:1}
+%dir %{_datadir}/glusterfs
+%dir %{_datadir}/glusterfs/scripts
+ %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh
+ %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh
+%endif
+%{_datadir}/glusterfs/scripts/identify-hangs.sh
+%{_datadir}/glusterfs/scripts/collect-system-stats.sh
+%{_datadir}/glusterfs/scripts/log_accounting.sh
+# xlators that are needed on the client- and on the server-side
+%dir %{_libdir}/glusterfs
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/auth
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/auth/addr.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/auth/login.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/socket.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/error-gen.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/delay-gen.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/io-stats.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/sink.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/trace.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/access-control.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/barrier.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/cdc.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changelog.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/utime.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/gfid-access.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/namespace.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/read-only.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/shard.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/snapview-client.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/worm.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/cloudsync.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/meta.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/io-cache.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/io-threads.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/md-cache.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/open-behind.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/quick-read.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/read-ahead.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/readdir-ahead.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/stat-prefetch.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/write-behind.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/nl-cache.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/system
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/system/posix-acl.so
+%dir %attr(0775,gluster,gluster) %{_rundir}/gluster
+%if 0%{?_tmpfilesdir:1} && 0%{!?_without_server:1}
+%{_tmpfilesdir}/gluster.conf
+%endif
+%if ( 0%{?_without_extra_xlators:1} )
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so
+%endif
+%if ( 0%{?_without_regression_tests:1} )
+%exclude %{_datadir}/glusterfs/run-tests.sh
+%exclude %{_datadir}/glusterfs/tests
+%endif
+%if 0%{?_without_server:1}
+%if ( 0%{?_with_systemd:1} )
+%exclude %{_datadir}/glusterfs/scripts/control-cpu-load.sh
+%exclude %{_datadir}/glusterfs/scripts/control-mem.sh
+%endif
+%endif
+
+%if ( 0%{?_without_server:1} || 0%{?rhel} < 7 )
+#exclude ganesha related files for rhel 6 and client builds
+%exclude %{_sysconfdir}/ganesha/ganesha-ha.conf.sample
+%exclude %{_libexecdir}/ganesha/*
+%exclude %{_prefix}/lib/ocf/resource.d/heartbeat/*
+%if ( 0%{!?_without_server:1} )
+%{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh
+%endif
+%endif
+
+%exclude %{_datadir}/glusterfs/scripts/setup-thin-arbiter.sh
+
+%if ( 0%{?_without_server:1} )
+%exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol
+%endif
+
+%files api
+%exclude %{_libdir}/*.so
+# libgfapi files
+%{_libdir}/libgfapi.*
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/api.so
+
+%files api-devel
+%{_libdir}/pkgconfig/glusterfs-api.pc
+%{_libdir}/libgfapi.so
+%dir %{_includedir}/glusterfs
+%dir %{_includedir}/glusterfs/api
+ %{_includedir}/glusterfs/api/*
+
+%files cli
+%{_sbindir}/gluster
+%{_mandir}/man8/gluster.8*
+%{_sysconfdir}/bash_completion.d/gluster
+
+%files cloudsync-plugins
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsyncs3.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsynccvlt.so
+
+%files devel
+%dir %{_includedir}/glusterfs
+ %{_includedir}/glusterfs/*
+%exclude %{_includedir}/glusterfs/api
+%exclude %{_libdir}/libgfapi.so
+%{_libdir}/*.so
+%if ( 0%{?_without_server:1} )
+%exclude %{_libdir}/pkgconfig/libgfchangelog.pc
+%exclude %{_libdir}/libgfchangelog.so
+%if ( 0%{!?_without_tiering:1} )
+%exclude %{_libdir}/pkgconfig/libgfdb.pc
+%endif
+%else
+%{_libdir}/pkgconfig/libgfchangelog.pc
+%if ( 0%{!?_without_tiering:1} )
+%{_libdir}/pkgconfig/libgfdb.pc
+%endif
+%endif
+
+%files client-xlators
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster/*.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/client.so
+
+%if ( 0%{!?_without_extra_xlators:1} )
+%files extra-xlators
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so
+%endif
+
+%files fuse
+# glusterfs is a symlink to glusterfsd, -server depends on -fuse.
+%{_sbindir}/glusterfs
+%{_sbindir}/glusterfsd
+%config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/fuse.so
+/sbin/mount.glusterfs
+%if ( 0%{!?_without_fusermount:1} )
+%{_bindir}/fusermount-glusterfs
+%endif
+
+%if ( 0%{!?_without_georeplication:1} )
+%files geo-replication
+%config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs-georep
+
+%{_sbindir}/gfind_missing_files
+%{_sbindir}/gluster-mountbroker
+%dir %{_libexecdir}/glusterfs
+%dir %{_libexecdir}/glusterfs/python
+%dir %{_libexecdir}/glusterfs/python/syncdaemon
+ %{_libexecdir}/glusterfs/gsyncd
+ %{_libexecdir}/glusterfs/python/syncdaemon/*
+ %{_libexecdir}/glusterfs/gverify.sh
+ %{_libexecdir}/glusterfs/set_geo_rep_pem_keys.sh
+ %{_libexecdir}/glusterfs/peer_gsec_create
+ %{_libexecdir}/glusterfs/peer_mountbroker
+ %{_libexecdir}/glusterfs/peer_mountbroker.py*
+ %{_libexecdir}/glusterfs/gfind_missing_files
+ %{_libexecdir}/glusterfs/peer_georep-sshkey.py*
+%{_sbindir}/gluster-georep-sshkey
+
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/geo-replication
+%ghost %attr(0644,-,-) %{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create/post/S56glusterd-geo-rep-create-post.sh
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create/pre
+
+%dir %{_datadir}/glusterfs
+%dir %{_datadir}/glusterfs/scripts
+ %{_datadir}/glusterfs/scripts/get-gfid.sh
+ %{_datadir}/glusterfs/scripts/slave-upgrade.sh
+ %{_datadir}/glusterfs/scripts/gsync-upgrade.sh
+ %{_datadir}/glusterfs/scripts/generate-gfid-file.sh
+ %{_datadir}/glusterfs/scripts/gsync-sync-gfid
+ %{_datadir}/glusterfs/scripts/schedule_georep.py*
+%endif
+
+%files libs
+%{_libdir}/*.so.*
+%exclude %{_libdir}/libgfapi.*
+%if ( 0%{!?_without_tiering:1} )
+# libgfdb is only needed server-side
+%exclude %{_libdir}/libgfdb.*
+%endif
+
+%files -n python%{_pythonver}-gluster
+# introducing glusterfs module in site packages.
+# so that all other gluster submodules can reside in the same namespace.
+%if ( %{_usepython3} )
+%dir %{python3_sitelib}/gluster
+ %{python3_sitelib}/gluster/__init__.*
+ %{python3_sitelib}/gluster/__pycache__
+ %{python3_sitelib}/gluster/cliutils
+%else
+%dir %{python2_sitelib}/gluster
+ %{python2_sitelib}/gluster/__init__.*
+ %{python2_sitelib}/gluster/cliutils
+%endif
+
+%if ( 0%{!?_without_rdma:1} )
+%files rdma
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma*
+%endif
+
+%if ( 0%{!?_without_regression_tests:1} )
+%files regression-tests
+%dir %{_datadir}/glusterfs
+ %{_datadir}/glusterfs/run-tests.sh
+ %{_datadir}/glusterfs/tests
+%exclude %{_datadir}/glusterfs/tests/vagrant
+%endif
+
+%if ( 0%{!?_without_server:1} && 0%{?rhel} > 6 )
+%files ganesha
+%dir %{_libexecdir}/ganesha
+%{_sysconfdir}/ganesha/ganesha-ha.conf.sample
+%{_libexecdir}/ganesha/*
+%{_prefix}/lib/ocf/resource.d/heartbeat/*
+%{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh
+%ghost %attr(0644,-,-) %config(noreplace) %{_sysconfdir}/ganesha/ganesha-ha.conf
+%ghost %dir %attr(0755,-,-) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha
+%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf
+%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf
+%endif
+
+%if ( 0%{!?_without_ocf:1} )
+%files resource-agents
+# /usr/lib is the standard for OCF, also on x86_64
+%{_prefix}/lib/ocf/resource.d/glusterfs
+%endif
+
+%if ( 0%{!?_without_server:1} )
+%files server
+%doc extras/clear_xattrs.sh
+%{_datadir}/glusterfs/scripts/xattr_analysis.py*
+%{_datadir}/glusterfs/scripts/quota_fsck.py*
+# sysconf
+%config(noreplace) %{_sysconfdir}/glusterfs
+%exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol
+%exclude %{_sysconfdir}/glusterfs/eventsconfig.json
+%config(noreplace) %{_sysconfdir}/sysconfig/glusterd
+%if ( 0%{_for_fedora_koji_builds} )
+%config(noreplace) %{_sysconfdir}/sysconfig/glusterfsd
+%endif
+
+# init files
+%glusterd_svcfile
+%if ( 0%{_for_fedora_koji_builds} )
+%glusterfsd_svcfile
+%endif
+%if ( 0%{?_with_systemd:1} )
+%glusterfssharedstorage_svcfile
+%endif
+
+# binaries
+%{_sbindir}/glusterd
+%{_sbindir}/glfsheal
+%{_sbindir}/gf_attach
+%{_sbindir}/gluster-setgfid2path
+# {_sbindir}/glusterfsd is the actual binary, but glusterfs (client) is a
+# symlink. The binary itself (and symlink) are part of the glusterfs-fuse
+# package, because glusterfs-server depends on that anyway.
+
+# Manpages
+%{_mandir}/man8/gluster-setgfid2path.8*
+
+# xlators
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/arbiter.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so
+%if ( 0%{!?_without_tiering:1} )
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changetimerecorder.so
+ %{_libdir}/libgfdb.so.*
+%endif
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix*
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/snapview-server.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/marker.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quota*
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/selinux.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/trash.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/upcall.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/leases.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs*
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt/glusterd.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage/posix.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/decompounder.so
+
+# snap_scheduler
+%{_sbindir}/snap_scheduler.py
+%{_sbindir}/gcron.py
+%{_sbindir}/conf.py
+
+# /var/lib/glusterd, e.g. hookscripts, etc.
+%ghost %attr(0644,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/glusterd.info
+%ghost %attr(0600,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/options
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/bitd
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/groups
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/virt
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/metadata-cache
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/gluster-block
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/nl-cache
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/db-workload
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/distributed-virt
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/samba
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind/.keys
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glustershd
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post
+ %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/reset
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/reset/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/reset/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/post/S30samba-set.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/post/S32gluster_enable_shared_storage.sh
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S29CTDBsetup.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S30samba-start.sh
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/post
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre/S30samba-stop.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre/S29CTDB-teardown.sh
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs
+%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run
+%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/peers
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/quotad
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/scrub
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/snaps
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/ss_brick
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/vols
+
+# Extra utility script
+%dir %{_libexecdir}/glusterfs
+ %{_datadir}/glusterfs/release
+%dir %{_datadir}/glusterfs/scripts
+ %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh
+%if ( 0%{?_with_systemd:1} )
+ %{_libexecdir}/glusterfs/mount-shared-storage.sh
+ %{_datadir}/glusterfs/scripts/control-cpu-load.sh
+ %{_datadir}/glusterfs/scripts/control-mem.sh
+%endif
+
+# Incrementalapi
+ %{_libexecdir}/glusterfs/glusterfind
+%{_bindir}/glusterfind
+ %{_libexecdir}/glusterfs/peer_add_secret_pub
+
+%if ( 0%{?_with_firewalld:1} )
+%{_prefix}/lib/firewalld/services/glusterfs.xml
+%endif
+# end of server files
+%endif
+
+# Events
+%if ( 0%{!?_without_events:1} )
+%files events
+%config(noreplace) %{_sysconfdir}/glusterfs/eventsconfig.json
+%dir %{_sharedstatedir}/glusterd
+%dir %{_sharedstatedir}/glusterd/events
+%dir %{_libexecdir}/glusterfs
+ %{_libexecdir}/glusterfs/gfevents
+ %{_libexecdir}/glusterfs/peer_eventsapi.py*
+%{_sbindir}/glustereventsd
+%{_sbindir}/gluster-eventsapi
+%{_datadir}/glusterfs/scripts/eventsdash.py*
+%if ( 0%{?_with_systemd:1} )
+%{_unitdir}/glustereventsd.service
+%else
+%{_sysconfdir}/init.d/glustereventsd
+%endif
+%endif
+
+##-----------------------------------------------------------------------------
+## All %pretrans should be placed here and keep them sorted
+##
+%if 0%{!?_without_server:1}
+%pretrans -p <lua>
+if not posix.access("/bin/bash", "x") then
+ -- initial installation, no shell, no running glusterfsd
+ return 0
+end
+
+-- TODO: move this completely to a lua script
+-- For now, we write a temporary bash script and execute that.
+
+script = [[#!/bin/sh
+pidof -c -o %PPID -x glusterfsd &>/dev/null
+
+if [ $? -eq 0 ]; then
+ pushd . > /dev/null 2>&1
+ for volume in /var/lib/glusterd/vols/*; do cd $volume;
+ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
+ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
+ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
+ echo "ERROR: Distribute volumes detected. In-service rolling upgrade requires distribute volume(s) to be stopped."
+ echo "ERROR: Please stop distribute volume(s) before proceeding... exiting!"
+ exit 1;
+ fi
+ done
+
+ popd > /dev/null 2>&1
+ echo "WARNING: Updating glusterfs requires its processes to be killed. This action does NOT incur downtime."
+ echo "WARNING: Ensure to wait for the upgraded server to finish healing before proceeding."
+ echo "WARNING: Refer upgrade section of install guide for more details"
+ echo "Please run # service glusterd stop; pkill glusterfs; pkill glusterfsd; pkill gsyncd.py;"
+ exit 1;
+fi
+]]
+
+ok, how, val = os.execute(script)
+rc = val or ok
+if not (rc == 0) then
+ error("Detected running glusterfs processes", rc)
+end
+
+
+
+%pretrans api -p <lua>
+if not posix.access("/bin/bash", "x") then
+ -- initial installation, no shell, no running glusterfsd
+ return 0
+end
+
+-- TODO: move this completely to a lua script
+-- For now, we write a temporary bash script and execute that.
+
+script = [[#!/bin/sh
+pidof -c -o %PPID -x glusterfsd &>/dev/null
+
+if [ $? -eq 0 ]; then
+ pushd . > /dev/null 2>&1
+ for volume in /var/lib/glusterd/vols/*; do cd $volume;
+ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
+ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
+ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
+ exit 1;
+ fi
+ done
+
+ popd > /dev/null 2>&1
+ exit 1;
+fi
+]]
+
+ok, how, val = os.execute(script)
+rc = val or ok
+if not (rc == 0) then
+ error("Detected running glusterfs processes", rc)
+end
+
+
+
+%pretrans api-devel -p <lua>
+if not posix.access("/bin/bash", "x") then
+ -- initial installation, no shell, no running glusterfsd
+ return 0
+end
+
+-- TODO: move this completely to a lua script
+-- For now, we write a temporary bash script and execute that.
+
+script = [[#!/bin/sh
+pidof -c -o %PPID -x glusterfsd &>/dev/null
+
+if [ $? -eq 0 ]; then
+ pushd . > /dev/null 2>&1
+ for volume in /var/lib/glusterd/vols/*; do cd $volume;
+ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
+ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
+ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
+ exit 1;
+ fi
+ done
+
+ popd > /dev/null 2>&1
+ exit 1;
+fi
+]]
+
+ok, how, val = os.execute(script)
+rc = val or ok
+if not (rc == 0) then
+ error("Detected running glusterfs processes", rc)
+end
+
+
+
+%pretrans cli -p <lua>
+if not posix.access("/bin/bash", "x") then
+ -- initial installation, no shell, no running glusterfsd
+ return 0
+end
+
+-- TODO: move this completely to a lua script
+-- For now, we write a temporary bash script and execute that.
+
+script = [[#!/bin/sh
+pidof -c -o %PPID -x glusterfsd &>/dev/null
+
+if [ $? -eq 0 ]; then
+ pushd . > /dev/null 2>&1
+ for volume in /var/lib/glusterd/vols/*; do cd $volume;
+ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
+ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
+ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
+ exit 1;
+ fi
+ done
+
+ popd > /dev/null 2>&1
+ exit 1;
+fi
+]]
+
+ok, how, val = os.execute(script)
+rc = val or ok
+if not (rc == 0) then
+ error("Detected running glusterfs processes", rc)
+end
+
+
+%pretrans client-xlators -p <lua>
+if not posix.access("/bin/bash", "x") then
+ -- initial installation, no shell, no running glusterfsd
+ return 0
+end
+
+-- TODO: move this completely to a lua script
+-- For now, we write a temporary bash script and execute that.
+
+script = [[#!/bin/sh
+pidof -c -o %PPID -x glusterfsd &>/dev/null
+
+if [ $? -eq 0 ]; then
+ pushd . > /dev/null 2>&1
+ for volume in /var/lib/glusterd/vols/*; do cd $volume;
+ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
+ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
+ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
+ exit 1;
+ fi
+ done
+
+ popd > /dev/null 2>&1
+ exit 1;
+fi
+]]
+
+ok, how, val = os.execute(script)
+rc = val or ok
+if not (rc == 0) then
+ error("Detected running glusterfs processes", rc)
+end
+
+
+%pretrans fuse -p <lua>
+if not posix.access("/bin/bash", "x") then
+ -- initial installation, no shell, no running glusterfsd
+ return 0
+end
+
+-- TODO: move this completely to a lua script
+-- For now, we write a temporary bash script and execute that.
+
+script = [[#!/bin/sh
+pidof -c -o %PPID -x glusterfsd &>/dev/null
+
+if [ $? -eq 0 ]; then
+ pushd . > /dev/null 2>&1
+ for volume in /var/lib/glusterd/vols/*; do cd $volume;
+ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
+ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
+ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
+ exit 1;
+ fi
+ done
+
+ popd > /dev/null 2>&1
+ exit 1;
+fi
+]]
+
+ok, how, val = os.execute(script)
+rc = val or ok
+if not (rc == 0) then
+ error("Detected running glusterfs processes", rc)
+end
+
+
+
+%if ( 0%{!?_without_georeplication:1} )
+%pretrans geo-replication -p <lua>
+if not posix.access("/bin/bash", "x") then
+ -- initial installation, no shell, no running glusterfsd
+ return 0
+end
+
+-- TODO: move this completely to a lua script
+-- For now, we write a temporary bash script and execute that.
+
+script = [[#!/bin/sh
+pidof -c -o %PPID -x glusterfsd &>/dev/null
+
+if [ $? -eq 0 ]; then
+ pushd . > /dev/null 2>&1
+ for volume in /var/lib/glusterd/vols/*; do cd $volume;
+ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
+ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
+ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
+ exit 1;
+ fi
+ done
+
+ popd > /dev/null 2>&1
+ exit 1;
+fi
+]]
+
+ok, how, val = os.execute(script)
+rc = val or ok
+if not (rc == 0) then
+ error("Detected running glusterfs processes", rc)
+end
+%endif
+
+
+
+%pretrans libs -p <lua>
+if not posix.access("/bin/bash", "x") then
+ -- initial installation, no shell, no running glusterfsd
+ return 0
+end
+
+-- TODO: move this completely to a lua script
+-- For now, we write a temporary bash script and execute that.
+
+script = [[#!/bin/sh
+pidof -c -o %PPID -x glusterfsd &>/dev/null
+
+if [ $? -eq 0 ]; then
+ pushd . > /dev/null 2>&1
+ for volume in /var/lib/glusterd/vols/*; do cd $volume;
+ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
+ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
+ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
+ exit 1;
+ fi
+ done
+
+ popd > /dev/null 2>&1
+ exit 1;
+fi
+]]
+
+ok, how, val = os.execute(script)
+rc = val or ok
+if not (rc == 0) then
+ error("Detected running glusterfs processes", rc)
+end
+
+
+
+%if ( 0%{!?_without_rdma:1} )
+%pretrans rdma -p <lua>
+if not posix.access("/bin/bash", "x") then
+ -- initial installation, no shell, no running glusterfsd
+ return 0
+end
+
+-- TODO: move this completely to a lua script
+-- For now, we write a temporary bash script and execute that.
+
+script = [[#!/bin/sh
+pidof -c -o %PPID -x glusterfsd &>/dev/null
+
+if [ $? -eq 0 ]; then
+ pushd . > /dev/null 2>&1
+ for volume in /var/lib/glusterd/vols/*; do cd $volume;
+ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
+ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
+ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
+ exit 1;
+ fi
+ done
+
+ popd > /dev/null 2>&1
+ exit 1;
+fi
+]]
+
+ok, how, val = os.execute(script)
+rc = val or ok
+if not (rc == 0) then
+ error("Detected running glusterfs processes", rc)
+end
+%endif
+
+
+
+%if ( 0%{!?_without_ocf:1} )
+%pretrans resource-agents -p <lua>
+if not posix.access("/bin/bash", "x") then
+ -- initial installation, no shell, no running glusterfsd
+ return 0
+end
+
+-- TODO: move this completely to a lua script
+-- For now, we write a temporary bash script and execute that.
+
+script = [[#!/bin/sh
+pidof -c -o %PPID -x glusterfsd &>/dev/null
+
+if [ $? -eq 0 ]; then
+ pushd . > /dev/null 2>&1
+ for volume in /var/lib/glusterd/vols/*; do cd $volume;
+ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
+ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
+ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
+ exit 1;
+ fi
+ done
+
+ popd > /dev/null 2>&1
+ exit 1;
+fi
+]]
+
+ok, how, val = os.execute(script)
+rc = val or ok
+if not (rc == 0) then
+ error("Detected running glusterfs processes", rc)
+end
+%endif
+
+
+
+%pretrans server -p <lua>
+if not posix.access("/bin/bash", "x") then
+ -- initial installation, no shell, no running glusterfsd
+ return 0
+end
+
+-- TODO: move this completely to a lua script
+-- For now, we write a temporary bash script and execute that.
+
+script = [[#!/bin/sh
+pidof -c -o %PPID -x glusterfsd &>/dev/null
+
+if [ $? -eq 0 ]; then
+ pushd . > /dev/null 2>&1
+ for volume in /var/lib/glusterd/vols/*; do cd $volume;
+ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
+ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
+ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
+ exit 1;
+ fi
+ done
+
+ popd > /dev/null 2>&1
+ exit 1;
+fi
+]]
+
+ok, how, val = os.execute(script)
+rc = val or ok
+if not (rc == 0) then
+ error("Detected running glusterfs processes", rc)
+end
+
+%posttrans server
+pidof -c -o %PPID -x glusterd &> /dev/null
+if [ $? -eq 0 ]; then
+ kill -9 `pgrep -f gsyncd.py` &> /dev/null
+
+ killall --wait -SIGTERM glusterd &> /dev/null
+
+ if [ "$?" != "0" ]; then
+ echo "killall failed while killing glusterd"
+ fi
+
+ glusterd --xlator-option *.upgrade=on -N
+
+ #Cleaning leftover glusterd socket file which is created by glusterd in
+ #rpm_script_t context.
+ rm -rf /var/run/glusterd.socket
+
+ # glusterd _was_ running, we killed it, it exited after *.upgrade=on,
+ # so start it again
+ %service_start glusterd
+else
+ glusterd --xlator-option *.upgrade=on -N
+
+ #Cleaning leftover glusterd socket file which is created by glusterd in
+ #rpm_script_t context.
+ rm -rf /var/run/glusterd.socket
+fi
+
+%endif
+
+%changelog
+* Mon Aug 09 2021 Mohan Boddu <mboddu@redhat.com> - 6.0-57.4
+- Rebuilt for IMA sigs, glibc 2.34, aarch64 flags
+ Related: rhbz#1991688
+
+* Sun Aug 1 2021 Tamar Shacked <tshacked@redhat.com> - 6.0-56.4
+- remove unneeded file with ambiguous licence
+- fixes bug bz#1939340
+
+* Mon Jul 26 2021 Tamar Shacked <tshacked@redhat.com> - 6.0-56.3
+- Rebase with latest RHGS-3.5.4
+- Fix changlog chronological order by removing unneeded changelogs
+- fixes bug bz#1939340
+
+* Thu May 06 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-56.2
+- fixes bugs bz#1953901
+
+* Thu Apr 22 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-56.1
+- fixes bugs bz#1927235
+
+* Wed Apr 14 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-56
+- fixes bugs bz#1948547
+
+* Fri Mar 19 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-55
+- fixes bugs bz#1939372
+
+* Wed Mar 03 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-54
+- fixes bugs bz#1832306 bz#1911292 bz#1924044
+
+* Thu Feb 11 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-53
+- fixes bugs bz#1224906 bz#1691320 bz#1719171 bz#1814744 bz#1865796
+
+* Thu Jan 28 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-52
+- fixes bugs bz#1600459 bz#1719171 bz#1830713 bz#1856574
+
+* Mon Dec 28 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-51
+- fixes bugs bz#1640148 bz#1856574 bz#1910119
+
+* Tue Dec 15 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-50
+- fixes bugs bz#1224906 bz#1412494 bz#1612973 bz#1663821 bz#1691320
+ bz#1726673 bz#1749304 bz#1752739 bz#1779238 bz#1813866 bz#1814744 bz#1821599
+ bz#1832306 bz#1835229 bz#1842449 bz#1865796 bz#1878077 bz#1882923 bz#1885966
+ bz#1890506 bz#1896425 bz#1898776 bz#1898777 bz#1898778 bz#1898781 bz#1898784
+ bz#1903468
+
+* Wed Nov 25 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-49
+- fixes bugs bz#1286171
+
+* Tue Nov 10 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-48
+- fixes bugs bz#1895301
+
+* Thu Nov 05 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-47
+- fixes bugs bz#1286171 bz#1821743 bz#1837926
+
+* Wed Oct 21 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-46
+- fixes bugs bz#1873469 bz#1881823
+
+* Wed Sep 09 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-45
+- fixes bugs bz#1785714
+
+* Thu Sep 03 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-44
+- fixes bugs bz#1460657
+
+* Thu Sep 03 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-43
+- fixes bugs bz#1460657
+
+* Wed Sep 02 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-42
+- fixes bugs bz#1785714
+
+* Tue Aug 25 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-41
+- fixes bugs bz#1785714 bz#1851424 bz#1851989 bz#1852736 bz#1853189 bz#1855966
+
+* Tue Jul 21 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-40
+- fixes bugs bz#1812789 bz#1844359 bz#1847081 bz#1854165
+
+* Wed Jun 17 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-39
+- fixes bugs bz#1844359 bz#1845064
+
+* Wed Jun 10 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-38
+- fixes bugs bz#1234220 bz#1286171 bz#1487177 bz#1524457 bz#1640573
+ bz#1663557 bz#1667954 bz#1683602 bz#1686897 bz#1721355 bz#1748865 bz#1750211
+ bz#1754391 bz#1759875 bz#1761531 bz#1761932 bz#1763124 bz#1763129 bz#1764091
+ bz#1775637 bz#1776901 bz#1781550 bz#1781649 bz#1781710 bz#1783232 bz#1784211
+ bz#1784415 bz#1786516 bz#1786681 bz#1787294 bz#1787310 bz#1787331 bz#1787994
+ bz#1790336 bz#1792873 bz#1794663 bz#1796814 bz#1804164 bz#1810924 bz#1815434
+ bz#1836099 bz#1837467 bz#1837926 bz#1838479 bz#1839137 bz#1844359
+
+* Fri May 29 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-37
+- fixes bugs bz#1840794
+
+* Wed May 27 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-36
+- fixes bugs bz#1812789 bz#1823423
+
+* Fri May 22 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-35
+- fixes bugs bz#1810516 bz#1830713 bz#1836233
+
+* Sun May 17 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-34
+- fixes bugs bz#1802013 bz#1823706 bz#1825177 bz#1830713 bz#1831403 bz#1833017
+
+* Wed Apr 29 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-33
+- fixes bugs bz#1812789 bz#1813917 bz#1823703 bz#1823706 bz#1825195
+
+* Sat Apr 04 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-32
+- fixes bugs bz#1781543 bz#1812789 bz#1812824 bz#1817369 bz#1819059
+
+* Tue Mar 17 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-31
+- fixes bugs bz#1802727
+
+* Thu Feb 20 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-30.1
+- fixes bugs bz#1800703
+
+* Sat Feb 01 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-30
+- fixes bugs bz#1775564 bz#1794153
+
+* Thu Jan 23 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-29
+- fixes bugs bz#1793035
+
+* Tue Jan 14 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-28
+- fixes bugs bz#1789447
+
+* Mon Jan 13 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-27
+- fixes bugs bz#1789447
+
+* Fri Jan 10 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-26
+- fixes bugs bz#1763208 bz#1788656
+
+* Mon Dec 23 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-25
+- fixes bugs bz#1686800 bz#1763208 bz#1779696 bz#1781444 bz#1782162
+
+* Thu Nov 28 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-24
+- fixes bugs bz#1768786
+
+* Thu Nov 21 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-23
+- fixes bugs bz#1344758 bz#1599802 bz#1685406 bz#1686800 bz#1724021
+ bz#1726058 bz#1727755 bz#1731513 bz#1741193 bz#1758923 bz#1761326 bz#1761486
+ bz#1762180 bz#1764095 bz#1766640
+
+* Thu Nov 14 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-22
+- fixes bugs bz#1771524 bz#1771614
+
+* Fri Oct 25 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-21
+- fixes bugs bz#1765555
+
+* Wed Oct 23 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-20
+- fixes bugs bz#1719171 bz#1763412 bz#1764202
+
+* Thu Oct 17 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-19
+- fixes bugs bz#1760939
+
+* Wed Oct 16 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-18
+- fixes bugs bz#1758432
+
+* Fri Oct 11 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-17
+- fixes bugs bz#1704562 bz#1758618 bz#1760261
+
+* Wed Oct 09 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-16
+- fixes bugs bz#1752713 bz#1756325
+
+* Fri Sep 27 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-15
+- fixes bugs bz#1726000 bz#1731826 bz#1754407 bz#1754790 bz#1755227
+
+* Fri Sep 20 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-14
+- fixes bugs bz#1719171 bz#1728673 bz#1731896 bz#1732443 bz#1733970
+ bz#1745107 bz#1746027 bz#1748688 bz#1750241 bz#1572163
+
+* Fri Aug 23 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-13
+- fixes bugs bz#1729915 bz#1732376 bz#1743611 bz#1743627 bz#1743634 bz#1744518
+
+* Fri Aug 09 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-12
+- fixes bugs bz#1730914 bz#1731448 bz#1732770 bz#1732792 bz#1733531
+ bz#1734305 bz#1734534 bz#1734734 bz#1735514 bz#1737705 bz#1732774
+ bz#1732793
+
+* Tue Aug 06 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-11
+- fixes bugs bz#1733520 bz#1734423
+
+* Fri Aug 02 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-10
+- fixes bugs bz#1713890
+
+* Tue Jul 23 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-9
+- fixes bugs bz#1708064 bz#1708180 bz#1715422 bz#1720992 bz#1722757
+
+* Tue Jul 16 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-8
+- fixes bugs bz#1698435 bz#1712591 bz#1715447 bz#1720488 bz#1722209
+ bz#1722512 bz#1724089 bz#1726991 bz#1727785 bz#1729108
+
+* Fri Jun 28 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-7
+- fixes bugs bz#1573077 bz#1600918 bz#1703423 bz#1704207 bz#1708064
+ bz#1709301 bz#1713664 bz#1716760 bz#1717784 bz#1720163 bz#1720192
+ bz#1720551 bz#1721351 bz#1721357 bz#1721477 bz#1722131 bz#1722331
+ bz#1722509 bz#1722801 bz#1720248
+
+* Fri Jun 14 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-6
+- fixes bugs bz#1668001 bz#1708043 bz#1708183 bz#1710701
+ bz#1719640 bz#1720079 bz#1720248 bz#1720318 bz#1720461
+
+* Tue Jun 11 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-5
+- fixes bugs bz#1573077 bz#1694595 bz#1703434 bz#1714536 bz#1714588
+ bz#1715407 bz#1715438 bz#1705018
+
+* Fri Jun 07 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-4
+- fixes bugs bz#1480907 bz#1702298 bz#1703455 bz#1704181 bz#1707246
+ bz#1708067 bz#1708116 bz#1708121 bz#1709087 bz#1711249 bz#1711296
+ bz#1714078 bz#1714124 bz#1716385 bz#1716626 bz#1716821 bz#1716865 bz#1717927
+
+* Tue May 14 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-3
+- fixes bugs bz#1583585 bz#1671862 bz#1702686 bz#1703434 bz#1703753
+ bz#1703897 bz#1704562 bz#1704769 bz#1704851 bz#1706683 bz#1706776 bz#1706893
+
+* Thu Apr 25 2019 Milind Changire <mchangir@redhat.com> - 6.0-2
+- fixes bugs bz#1471742 bz#1652461 bz#1671862 bz#1676495 bz#1691620
+ bz#1696334 bz#1696903 bz#1697820 bz#1698436 bz#1698728 bz#1699709 bz#1699835
+ bz#1702240
+
+* Mon Apr 08 2019 Milind Changire <mchangir@redhat.com> - 6.0-1
+- rebase to upstream glusterfs at v6.0
+- fixes bugs bz#1493284 bz#1578703 bz#1600918 bz#1670415 bz#1691620
+ bz#1693935 bz#1695057
+
diff --git a/sources b/sources
new file mode 100644
index 0000000..4fa11ef
--- /dev/null
+++ b/sources
@@ -0,0 +1 @@
+d4aede64fb096b1a32f5113aaafc0d78 glusterfs-6.0.tar.gz