summaryrefslogtreecommitdiff
path: root/0354-core-fix-memory-pool-management-races.patch
diff options
context:
space:
mode:
Diffstat (limited to '0354-core-fix-memory-pool-management-races.patch')
-rw-r--r--0354-core-fix-memory-pool-management-races.patch466
1 files changed, 466 insertions, 0 deletions
diff --git a/0354-core-fix-memory-pool-management-races.patch b/0354-core-fix-memory-pool-management-races.patch
new file mode 100644
index 0000000..a7cdfc0
--- /dev/null
+++ b/0354-core-fix-memory-pool-management-races.patch
@@ -0,0 +1,466 @@
+From 75a9d946d252ce70460144615ca17dbdf2e80fab Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Fri, 7 Feb 2020 10:19:57 +0100
+Subject: [PATCH 354/355] core: fix memory pool management races
+
+Objects allocated from a per-thread memory pool keep a reference to it
+to be able to return the object to the pool when not used anymore. The
+object holding this reference can have a long life cycle that could
+survive a glfs_fini() call.
+
+This means that it's unsafe to destroy memory pools from glfs_fini().
+
+Another side effect of destroying memory pools from glfs_fini() is that
+the TLS variable that points to one of those pools cannot be reset for
+all alive threads. This means that any attempt to allocate memory from
+those threads will access already free'd memory, which is very
+dangerous.
+
+To fix these issues, mem_pools_fini() doesn't destroy pool lists
+anymore. Only at process termination the pools are destroyed.
+
+Upatream patch:
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24099
+> Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965
+> Fixes: bz#1801684
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965
+BUG: 1800703
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/192262
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/globals.c | 13 ++-
+ libglusterfs/src/glusterfs/globals.h | 3 +
+ libglusterfs/src/glusterfs/mem-pool.h | 28 ++---
+ libglusterfs/src/mem-pool.c | 201 ++++++++++++++++++----------------
+ libglusterfs/src/syncop.c | 7 ++
+ 5 files changed, 146 insertions(+), 106 deletions(-)
+
+diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
+index 02098e6..e433ee8 100644
+--- a/libglusterfs/src/globals.c
++++ b/libglusterfs/src/globals.c
+@@ -319,7 +319,18 @@ glusterfs_cleanup(void *ptr)
+ GF_FREE(thread_syncopctx.groups);
+ }
+
+- mem_pool_thread_destructor();
++ mem_pool_thread_destructor(NULL);
++}
++
++void
++gf_thread_needs_cleanup(void)
++{
++ /* The value stored in free_key TLS is not really used for anything, but
++ * pthread implementation doesn't call the TLS destruction function unless
++ * it's != NULL. This function must be called whenever something is
++ * allocated for this thread so that glusterfs_cleanup() will be called
++ * and resources can be released. */
++ (void)pthread_setspecific(free_key, (void *)1);
+ }
+
+ static void
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index e218285..31717ed 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -181,6 +181,9 @@ glusterfs_leaseid_exist(void);
+ int
+ glusterfs_globals_init(glusterfs_ctx_t *ctx);
+
++void
++gf_thread_needs_cleanup(void);
++
+ struct tvec_base *
+ glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx);
+ void
+diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h
+index be0a26d..97bf76c 100644
+--- a/libglusterfs/src/glusterfs/mem-pool.h
++++ b/libglusterfs/src/glusterfs/mem-pool.h
+@@ -245,24 +245,26 @@ typedef struct per_thread_pool {
+ } per_thread_pool_t;
+
+ typedef struct per_thread_pool_list {
+- /*
+- * These first two members are protected by the global pool lock. When
+- * a thread first tries to use any pool, we create one of these. We
+- * link it into the global list using thr_list so the pool-sweeper
+- * thread can find it, and use pthread_setspecific so this thread can
+- * find it. When the per-thread destructor runs, we "poison" the pool
+- * list to prevent further allocations. This also signals to the
+- * pool-sweeper thread that the list should be detached and freed after
+- * the next time it's swept.
+- */
++ /* thr_list is used to place the TLS pool_list into the active global list
++ * (pool_threads) or the inactive global list (pool_free_threads). It's
++ * protected by the global pool_lock. */
+ struct list_head thr_list;
+- unsigned int poison;
++
++ /* This lock is used to update poison and the hot/cold lists of members
++ * of 'pools' array. */
++ pthread_spinlock_t lock;
++
++ /* This field is used to mark a pool_list as not being owned by any thread.
++ * This means that the sweeper thread won't be cleaning objects stored in
++ * its pools. mem_put() uses it to decide if the object being released is
++ * placed into its original pool_list or directly destroyed. */
++ bool poison;
++
+ /*
+ * There's really more than one pool, but the actual number is hidden
+ * in the implementation code so we just make it a single-element array
+ * here.
+ */
+- pthread_spinlock_t lock;
+ per_thread_pool_t pools[1];
+ } per_thread_pool_list_t;
+
+@@ -307,7 +309,7 @@ void
+ mem_pool_destroy(struct mem_pool *pool);
+
+ void
+-mem_pool_thread_destructor(void);
++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list);
+
+ void
+ gf_mem_acct_enable_set(void *ctx);
+diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
+index d88041d..2b41c01 100644
+--- a/libglusterfs/src/mem-pool.c
++++ b/libglusterfs/src/mem-pool.c
+@@ -367,7 +367,6 @@ static __thread per_thread_pool_list_t *thread_pool_list = NULL;
+ #define POOL_SWEEP_SECS 30
+
+ typedef struct {
+- struct list_head death_row;
+ pooled_obj_hdr_t *cold_lists[N_COLD_LISTS];
+ unsigned int n_cold_lists;
+ } sweep_state_t;
+@@ -384,36 +383,33 @@ static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
+ static unsigned int init_count = 0;
+ static pthread_t sweeper_tid;
+
+-gf_boolean_t
++static bool
+ collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list)
+ {
+ unsigned int i;
+ per_thread_pool_t *pt_pool;
+- gf_boolean_t poisoned;
+
+ (void)pthread_spin_lock(&pool_list->lock);
+
+- poisoned = pool_list->poison != 0;
+- if (!poisoned) {
+- for (i = 0; i < NPOOLS; ++i) {
+- pt_pool = &pool_list->pools[i];
+- if (pt_pool->cold_list) {
+- if (state->n_cold_lists >= N_COLD_LISTS) {
+- break;
+- }
+- state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
++ for (i = 0; i < NPOOLS; ++i) {
++ pt_pool = &pool_list->pools[i];
++ if (pt_pool->cold_list) {
++ if (state->n_cold_lists >= N_COLD_LISTS) {
++ (void)pthread_spin_unlock(&pool_list->lock);
++ return true;
+ }
+- pt_pool->cold_list = pt_pool->hot_list;
+- pt_pool->hot_list = NULL;
++ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
+ }
++ pt_pool->cold_list = pt_pool->hot_list;
++ pt_pool->hot_list = NULL;
+ }
+
+ (void)pthread_spin_unlock(&pool_list->lock);
+
+- return poisoned;
++ return false;
+ }
+
+-void
++static void
+ free_obj_list(pooled_obj_hdr_t *victim)
+ {
+ pooled_obj_hdr_t *next;
+@@ -425,82 +421,96 @@ free_obj_list(pooled_obj_hdr_t *victim)
+ }
+ }
+
+-void *
++static void *
+ pool_sweeper(void *arg)
+ {
+ sweep_state_t state;
+ per_thread_pool_list_t *pool_list;
+- per_thread_pool_list_t *next_pl;
+- per_thread_pool_t *pt_pool;
+- unsigned int i;
+- gf_boolean_t poisoned;
++ uint32_t i;
++ bool pending;
+
+ /*
+ * This is all a bit inelegant, but the point is to avoid doing
+ * expensive things (like freeing thousands of objects) while holding a
+- * global lock. Thus, we split each iteration into three passes, with
++ * global lock. Thus, we split each iteration into two passes, with
+ * only the first and fastest holding the lock.
+ */
+
++ pending = true;
++
+ for (;;) {
+- sleep(POOL_SWEEP_SECS);
++ /* If we know there's pending work to do (or it's the first run), we
++ * do collect garbage more often. */
++ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS);
++
+ (void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+- INIT_LIST_HEAD(&state.death_row);
+ state.n_cold_lists = 0;
++ pending = false;
+
+ /* First pass: collect stuff that needs our attention. */
+ (void)pthread_mutex_lock(&pool_lock);
+- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list)
++ list_for_each_entry(pool_list, &pool_threads, thr_list)
+ {
+- (void)pthread_mutex_unlock(&pool_lock);
+- poisoned = collect_garbage(&state, pool_list);
+- (void)pthread_mutex_lock(&pool_lock);
+-
+- if (poisoned) {
+- list_move(&pool_list->thr_list, &state.death_row);
++ if (collect_garbage(&state, pool_list)) {
++ pending = true;
+ }
+ }
+ (void)pthread_mutex_unlock(&pool_lock);
+
+- /* Second pass: free dead pools. */
+- (void)pthread_mutex_lock(&pool_free_lock);
+- list_for_each_entry_safe(pool_list, next_pl, &state.death_row, thr_list)
+- {
+- for (i = 0; i < NPOOLS; ++i) {
+- pt_pool = &pool_list->pools[i];
+- free_obj_list(pt_pool->cold_list);
+- free_obj_list(pt_pool->hot_list);
+- pt_pool->hot_list = pt_pool->cold_list = NULL;
+- }
+- list_del(&pool_list->thr_list);
+- list_add(&pool_list->thr_list, &pool_free_threads);
+- }
+- (void)pthread_mutex_unlock(&pool_free_lock);
+-
+- /* Third pass: free cold objects from live pools. */
++ /* Second pass: free cold objects from live pools. */
+ for (i = 0; i < state.n_cold_lists; ++i) {
+ free_obj_list(state.cold_lists[i]);
+ }
+ (void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+ }
++
++ return NULL;
+ }
+
+ void
+-mem_pool_thread_destructor(void)
++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
+ {
+- per_thread_pool_list_t *pool_list = thread_pool_list;
+-
+- /* The pool-sweeper thread will take it from here.
+- *
+- * We can change 'poison' here without taking locks because the change
+- * itself doesn't interact with other parts of the code and a simple write
+- * is already atomic from the point of view of the processor.
+- *
+- * This change can modify what mem_put() does, but both possibilities are
+- * fine until the sweeper thread kicks in. The real synchronization must be
+- * between mem_put() and the sweeper thread. */
++ per_thread_pool_t *pt_pool;
++ uint32_t i;
++
++ if (pool_list == NULL) {
++ pool_list = thread_pool_list;
++ }
++
++ /* The current thread is terminating. None of the allocated objects will
++ * be used again. We can directly destroy them here instead of delaying
++ * it until the next sweeper loop. */
+ if (pool_list != NULL) {
+- pool_list->poison = 1;
++ /* Remove pool_list from the global list to avoid that sweeper
++ * could touch it. */
++ pthread_mutex_lock(&pool_lock);
++ list_del(&pool_list->thr_list);
++ pthread_mutex_unlock(&pool_lock);
++
++ /* We need to protect hot/cold changes from potential mem_put() calls
++ * that reference this pool_list. Once poison is set to true, we are
++ * sure that no one else will touch hot/cold lists. The only possible
++ * race is when at the same moment a mem_put() is adding a new item
++ * to the hot list. We protect from that by taking pool_list->lock.
++ * After that we don't need the lock to destroy the hot/cold lists. */
++ pthread_spin_lock(&pool_list->lock);
++ pool_list->poison = true;
++ pthread_spin_unlock(&pool_list->lock);
++
++ for (i = 0; i < NPOOLS; i++) {
++ pt_pool = &pool_list->pools[i];
++
++ free_obj_list(pt_pool->hot_list);
++ pt_pool->hot_list = NULL;
++
++ free_obj_list(pt_pool->cold_list);
++ pt_pool->cold_list = NULL;
++ }
++
++ pthread_mutex_lock(&pool_free_lock);
++ list_add(&pool_list->thr_list, &pool_free_threads);
++ pthread_mutex_unlock(&pool_free_lock);
++
+ thread_pool_list = NULL;
+ }
+ }
+@@ -528,6 +538,30 @@ mem_pools_preinit(void)
+ init_done = GF_MEMPOOL_INIT_EARLY;
+ }
+
++static __attribute__((destructor)) void
++mem_pools_postfini(void)
++{
++ per_thread_pool_list_t *pool_list, *next;
++
++ /* This is part of a process shutdown (or dlclose()) which means that
++ * most probably all threads should be stopped. However this is not the
++ * case for gluster and there are even legitimate situations in which we
++ * could have some threads alive. What is sure is that none of those
++ * threads should be using anything from this library, so destroying
++ * everything here should be fine and safe. */
++
++ list_for_each_entry_safe(pool_list, next, &pool_threads, thr_list)
++ {
++ mem_pool_thread_destructor(pool_list);
++ }
++
++ list_for_each_entry_safe(pool_list, next, &pool_free_threads, thr_list)
++ {
++ list_del(&pool_list->thr_list);
++ FREE(pool_list);
++ }
++}
++
+ /* Call mem_pools_init() once threading has been configured completely. This
+ * prevent the pool_sweeper thread from getting killed once the main() thread
+ * exits during deamonizing. */
+@@ -560,10 +594,6 @@ mem_pools_fini(void)
+ */
+ break;
+ case 1: {
+- per_thread_pool_list_t *pool_list;
+- per_thread_pool_list_t *next_pl;
+- unsigned int i;
+-
+ /* if mem_pools_init() was not called, sweeper_tid will be invalid
+ * and the functions will error out. That is not critical. In all
+ * other cases, the sweeper_tid will be valid and the thread gets
+@@ -571,32 +601,11 @@ mem_pools_fini(void)
+ (void)pthread_cancel(sweeper_tid);
+ (void)pthread_join(sweeper_tid, NULL);
+
+- /* At this point all threads should have already terminated, so
+- * it should be safe to destroy all pending per_thread_pool_list_t
+- * structures that are stored for each thread. */
+- mem_pool_thread_destructor();
+-
+- /* free all objects from all pools */
+- list_for_each_entry_safe(pool_list, next_pl, &pool_threads,
+- thr_list)
+- {
+- for (i = 0; i < NPOOLS; ++i) {
+- free_obj_list(pool_list->pools[i].hot_list);
+- free_obj_list(pool_list->pools[i].cold_list);
+- pool_list->pools[i].hot_list = NULL;
+- pool_list->pools[i].cold_list = NULL;
+- }
+-
+- list_del(&pool_list->thr_list);
+- FREE(pool_list);
+- }
+-
+- list_for_each_entry_safe(pool_list, next_pl, &pool_free_threads,
+- thr_list)
+- {
+- list_del(&pool_list->thr_list);
+- FREE(pool_list);
+- }
++ /* There could be threads still running in some cases, so we can't
++ * destroy pool_lists in use. We can also not destroy unused
++ * pool_lists because some allocated objects may still be pointing
++ * to them. */
++ mem_pool_thread_destructor(NULL);
+
+ init_done = GF_MEMPOOL_INIT_DESTROY;
+ /* Fall through. */
+@@ -617,7 +626,7 @@ mem_pools_fini(void)
+ {
+ }
+ void
+-mem_pool_thread_destructor(void)
++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
+ {
+ }
+
+@@ -738,13 +747,21 @@ mem_get_pool_list(void)
+ }
+ }
+
++ /* There's no need to take pool_list->lock, because this is already an
++ * atomic operation and we don't need to synchronize it with any change
++ * in hot/cold lists. */
++ pool_list->poison = false;
++
+ (void)pthread_mutex_lock(&pool_lock);
+- pool_list->poison = 0;
+ list_add(&pool_list->thr_list, &pool_threads);
+ (void)pthread_mutex_unlock(&pool_lock);
+
+ thread_pool_list = pool_list;
+
++ /* Ensure that all memory objects associated to the new pool_list are
++ * destroyed when the thread terminates. */
++ gf_thread_needs_cleanup();
++
+ return pool_list;
+ }
+
+diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
+index 2eb7b49..0de53c6 100644
+--- a/libglusterfs/src/syncop.c
++++ b/libglusterfs/src/syncop.c
+@@ -97,6 +97,13 @@ syncopctx_setfsgroups(int count, const void *groups)
+
+ /* set/reset the ngrps, this is where reset of groups is handled */
+ opctx->ngrps = count;
++
++ if ((opctx->valid & SYNCOPCTX_GROUPS) == 0) {
++ /* This is the first time we are storing groups into the TLS structure
++ * so we mark the current thread so that it will be properly cleaned
++ * up when the thread terminates. */
++ gf_thread_needs_cleanup();
++ }
+ opctx->valid |= SYNCOPCTX_GROUPS;
+
+ out:
+--
+1.8.3.1
+