diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index cadfc8f9..5b33769f 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -148,14 +148,14 @@ arena_decay_extent(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
 	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
-		arena_stats_lock(tsdn, &arena->stats);
-		arena_stats_add_u64(tsdn, &arena->stats,
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->decay_dirty.stats->nmadvise, 1);
-		arena_stats_add_u64(tsdn, &arena->stats,
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->decay_dirty.stats->purged, extent_size >> LG_PAGE);
-		arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
-		    extent_size);
-		arena_stats_unlock(tsdn, &arena->stats);
+		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.mapped, extent_size);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
 
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index ab103619..0a1ec734 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_ARENA_STATS_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/pa.h"
@@ -9,40 +10,28 @@
 
 JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 
-/*
- * In those architectures that support 64-bit atomics, we use atomic updates for
- * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
- * externally.
- */
-#ifdef JEMALLOC_ATOMIC_U64
-typedef atomic_u64_t arena_stats_u64_t;
-#else
-/* Must hold the arena stats mutex while reading atomically. */
-typedef uint64_t arena_stats_u64_t;
-#endif
-
 typedef struct arena_stats_large_s arena_stats_large_t;
 struct arena_stats_large_s {
 	/*
 	 * Total number of allocation/deallocation requests served directly by
 	 * the arena.
 	 */
-	arena_stats_u64_t	nmalloc;
-	arena_stats_u64_t	ndalloc;
+	locked_u64_t	nmalloc;
+	locked_u64_t	ndalloc;
 
 	/*
 	 * Number of allocation requests that correspond to this size class.
 	 * This includes requests served by tcache, though tcache only
 	 * periodically merges into this counter.
 	 */
-	arena_stats_u64_t	nrequests; /* Partially derived. */
+	locked_u64_t	nrequests; /* Partially derived. */
 	/*
 	 * Number of tcache fills / flushes for large (similarly, periodically
 	 * merged).  Note that there is no large tcache batch-fill currently
 	 * (i.e. only fill 1 at a time); however flush may be batched.
 	 */
-	arena_stats_u64_t	nfills; /* Partially derived. */
-	arena_stats_u64_t	nflushes; /* Partially derived. */
+	locked_u64_t	nfills; /* Partially derived. */
+	locked_u64_t	nflushes; /* Partially derived. */
 
 	/* Current number of allocations of this size class. */
 	size_t		curlextents; /* Derived. */
@@ -51,11 +40,11 @@ struct arena_stats_large_s {
 typedef struct arena_stats_decay_s arena_stats_decay_t;
 struct arena_stats_decay_s {
 	/* Total number of purge sweeps. */
-	arena_stats_u64_t	npurge;
+	locked_u64_t	npurge;
 	/* Total number of madvise calls made. */
-	arena_stats_u64_t	nmadvise;
+	locked_u64_t	nmadvise;
 	/* Total number of pages purged. */
-	arena_stats_u64_t	purged;
+	locked_u64_t	purged;
 };
 
 typedef struct arena_stats_extents_s arena_stats_extents_t;
@@ -81,19 +70,19 @@ struct arena_stats_extents_s {
  */
 typedef struct arena_stats_s arena_stats_t;
 struct arena_stats_s {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_t		mtx;
-#endif
+	LOCKEDINT_MTX_DECLARE(mtx)
 
-	/* Number of bytes currently mapped, excluding retained memory. */
-	atomic_zu_t		mapped; /* Partially derived. */
+	/*
+	 * Number of bytes currently mapped, excluding retained memory.
+	 */
+	locked_zu_t		mapped; /* Partially derived. */
 
 	/*
 	 * Number of unused virtual memory bytes currently retained.  Retained
 	 * bytes are technically mapped (though always decommitted or purged),
 	 * but they are excluded from the mapped statistic (above).
 	 */
-	atomic_zu_t		retained; /* Derived. */
+	locked_zu_t		retained; /* Derived. */
 
 	/* Number of edata_t structs allocated by base, but not being used. */
 	atomic_zu_t		edata_avail;
@@ -107,11 +96,11 @@ struct arena_stats_s {
 	atomic_zu_t		metadata_thp;
 
 	atomic_zu_t		allocated_large; /* Derived. */
-	arena_stats_u64_t	nmalloc_large; /* Derived. */
-	arena_stats_u64_t	ndalloc_large; /* Derived. */
-	arena_stats_u64_t	nfills_large; /* Derived. */
-	arena_stats_u64_t	nflushes_large; /* Derived. */
-	arena_stats_u64_t	nrequests_large; /* Derived. */
+	locked_u64_t	nmalloc_large; /* Derived. */
+	locked_u64_t	ndalloc_large; /* Derived. */
+	locked_u64_t	nfills_large; /* Derived. */
+	locked_u64_t	nflushes_large; /* Derived. */
+	locked_u64_t	nrequests_large; /* Derived. */
 
 	/*
 	 * The stats logically owned by the pa_shard in the same arena.  This
@@ -139,138 +128,32 @@ arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
 			assert(((char *)arena_stats)[i] == 0);
 		}
 	}
-#ifndef JEMALLOC_ATOMIC_U64
-	if (malloc_mutex_init(&arena_stats->mtx, "arena_stats",
+	if (LOCKEDINT_MTX_INIT(LOCKEDINT_MTX(arena_stats->mtx), "arena_stats",
 	    WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-#endif
 	/* Memory is zeroed, so there is no need to clear stats. */
 	return false;
 }
 
-static inline void
-arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_lock(tsdn, &arena_stats->mtx);
-#endif
-}
-
-static inline void
-arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_unlock(tsdn, &arena_stats->mtx);
-#endif
-}
-
-static inline uint64_t
-arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
-	return atomic_load_u64(p, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	return *p;
-#endif
-}
-
-static inline void
-arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p, uint64_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	atomic_fetch_add_u64(p, x, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	*p += x;
-#endif
-}
-
-static inline void
-arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p, uint64_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
-	assert(r - x <= r);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	*p -= x;
-	assert(*p + x >= *p);
-#endif
-}
-
-/*
- * Non-atomically sets *dst += src.  *dst needs external synchronization.
- * This lets us avoid the cost of a fetch_add when its unnecessary (note that
- * the types here are atomic).
- */
-static inline void
-arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
-#ifdef JEMALLOC_ATOMIC_U64
-	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
-	atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED);
-#else
-	*dst += src;
-#endif
-}
-
-static inline size_t
-arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    atomic_zu_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
-	return atomic_load_zu(p, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	return atomic_load_zu(p, ATOMIC_RELAXED);
-#endif
-}
-
-static inline void
-arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    atomic_zu_t *p, size_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
-	atomic_store_zu(p, cur + x, ATOMIC_RELAXED);
-#endif
-}
-
-static inline void
-arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    atomic_zu_t *p, size_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
-	assert(r - x <= r);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
-	atomic_store_zu(p, cur - x, ATOMIC_RELAXED);
-#endif
-}
-
-/* Like the _u64 variant, needs an externally synchronized *dst. */
-static inline void
-arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
-	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
-	atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED);
-}
-
 static inline void
 arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     szind_t szind, uint64_t nrequests) {
-	arena_stats_lock(tsdn, arena_stats);
+	LOCKEDINT_MTX_LOCK(tsdn, arena_stats->mtx);
 	arena_stats_large_t *lstats = &arena_stats->lstats[szind - SC_NBINS];
-	arena_stats_add_u64(tsdn, arena_stats, &lstats->nrequests, nrequests);
-	arena_stats_add_u64(tsdn, arena_stats, &lstats->nflushes, 1);
-	arena_stats_unlock(tsdn, arena_stats);
+	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
+	    &lstats->nrequests, nrequests);
+	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
+	    &lstats->nflushes, 1);
+	LOCKEDINT_MTX_UNLOCK(tsdn, arena_stats->mtx);
 }
 
 static inline void
 arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
-	arena_stats_lock(tsdn, arena_stats);
-	arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size);
-	arena_stats_unlock(tsdn, arena_stats);
+	LOCKEDINT_MTX_LOCK(tsdn, arena_stats->mtx);
+	locked_inc_zu(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
+	    &arena_stats->mapped, size);
+	LOCKEDINT_MTX_UNLOCK(tsdn, arena_stats->mtx);
 }
 
 #endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index a76f54ce..e5afb202 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -51,6 +51,20 @@
 #define ATOMIC_ACQ_REL atomic_memory_order_acq_rel
 #define ATOMIC_SEQ_CST atomic_memory_order_seq_cst
 
+/*
+ * Another convenience -- simple atomic helper functions.
+ */
+#define JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(type, short_type,	\
+    lg_size)								\
+    JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)		\
+    ATOMIC_INLINE void							\
+    atomic_load_add_store_##short_type(atomic_##short_type##_t *a,	\
+	type inc) {							\
+	    type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);	\
+	    type newval = oldval + inc;					\
+	    atomic_store_##short_type(a, newval, ATOMIC_RELAXED);	\
+	}
+
 /*
  * Not all platforms have 64-bit atomics.  If we do, this #define exposes that
  * fact.
@@ -67,18 +81,18 @@ JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR)
  */
 JEMALLOC_GENERATE_ATOMICS(bool, b, 0)
 
-JEMALLOC_GENERATE_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
 
-JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
 
-JEMALLOC_GENERATE_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
 
-JEMALLOC_GENERATE_INT_ATOMICS(uint8_t, u8, 0)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(uint8_t, u8, 0)
 
-JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(uint32_t, u32, 2)
 
 #ifdef JEMALLOC_ATOMIC_U64
-JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(uint64_t, u64, 3)
 #endif
 
 #undef ATOMIC_INLINE
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
new file mode 100644
index 00000000..6a1f9ad1
--- /dev/null
+++ b/include/jemalloc/internal/lockedint.h
@@ -0,0 +1,151 @@
+#ifndef JEMALLOC_INTERNAL_LOCKEDINT_H
+#define JEMALLOC_INTERNAL_LOCKEDINT_H
+
+/*
+ * In those architectures that support 64-bit atomics, we use atomic updates for
+ * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
+ * externally.
+ */
+
+typedef struct locked_u64_s locked_u64_t;
+#ifdef JEMALLOC_ATOMIC_U64
+struct locked_u64_s {
+	atomic_u64_t val;
+};
+#else
+/* Must hold the associated mutex. */
+struct locked_u64_s {
+	uint64_t val;
+};
+#endif
+
+typedef struct locked_zu_s locked_zu_t;
+struct locked_zu_s {
+	atomic_zu_t val;
+};
+
+#ifndef JEMALLOC_ATOMIC_U64
+#  define LOCKEDINT_MTX_DECLARE(name) malloc_mutex_t name;
+#  define LOCKEDINT_MTX_INIT(ptr, name, rank, rank_mode)		\
+    malloc_mutex_init(ptr, name, rank, rank_mode)
+#  define LOCKEDINT_MTX(mtx) (&(mtx))
+#  define LOCKEDINT_MTX_LOCK(tsdn, mu) malloc_mutex_lock(tsdn, &(mu))
+#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) malloc_mutex_unlock(tsdn, &(mu))
+#else
+#  define LOCKEDINT_MTX_DECLARE(name)
+#  define LOCKEDINT_MTX(ptr) NULL
+#  define LOCKEDINT_MTX_INIT(ptr, name, rank, rank_mode) false
+#  define LOCKEDINT_MTX_LOCK(tsdn, mu) do {} while (0)
+#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) do {} while (0)
+#endif
+
+static inline uint64_t
+locked_read_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_u64(&p->val, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	return p->val;
+#endif
+}
+
+static inline void
+locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
+    uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_fetch_add_u64(&p->val, x, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	p->val += x;
+#endif
+}
+
+static inline void
+locked_dec_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
+    uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	uint64_t r = atomic_fetch_sub_u64(&p->val, x, ATOMIC_RELAXED);
+	assert(r - x <= r);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	p->val -= x;
+	assert(p->val + x >= p->val);
+#endif
+}
+
+/*
+ * Non-atomically sets *dst += src.  *dst needs external synchronization.
+ * This lets us avoid the cost of a fetch_add when its unnecessary (note that
+ * the types here are atomic).
+ */
+static inline void
+locked_inc_u64_unsynchronized(locked_u64_t *dst, uint64_t src) {
+#ifdef JEMALLOC_ATOMIC_U64
+	uint64_t cur_dst = atomic_load_u64(&dst->val, ATOMIC_RELAXED);
+	atomic_store_u64(&dst->val, src + cur_dst, ATOMIC_RELAXED);
+#else
+	dst->val += src;
+#endif
+}
+
+static inline uint64_t
+locked_read_u64_unsynchronized(locked_u64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_u64(&p->val, ATOMIC_RELAXED);
+#else
+	return p->val;
+#endif
+
+}
+
+static inline size_t
+locked_read_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_zu(&p->val, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	return atomic_load_zu(&p->val, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
+    size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_fetch_add_zu(&p->val, x, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	size_t cur = atomic_load_zu(&p->val, ATOMIC_RELAXED);
+	atomic_store_zu(&p->val, cur + x, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+locked_dec_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
+    size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	size_t r = atomic_fetch_sub_zu(&p->val, x, ATOMIC_RELAXED);
+	assert(r - x <= r);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	size_t cur = atomic_load_zu(&p->val, ATOMIC_RELAXED);
+	atomic_store_zu(&p->val, cur - x, ATOMIC_RELAXED);
+#endif
+}
+
+/* Like the _u64 variant, needs an externally synchronized *dst. */
+static inline void
+locked_inc_zu_unsynchronized(locked_zu_t *dst, size_t src) {
+	size_t cur_dst = atomic_load_zu(&dst->val, ATOMIC_RELAXED);
+	atomic_store_zu(&dst->val, src + cur_dst, ATOMIC_RELAXED);
+}
+
+/*
+ * Unlike the _u64 variant, this is safe to call unconditionally.
+ */
+static inline size_t
+locked_read_atomic_zu(locked_zu_t *p) {
+	return atomic_load_zu(&p->val, ATOMIC_RELAXED);
+}
+
+#endif /* JEMALLOC_INTERNAL_LOCKEDINT_H */
diff --git a/src/arena.c b/src/arena.c
index ced01d73..d4e200cf 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -93,80 +93,89 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
 	    &base_mapped, &metadata_thp);
 
-	arena_stats_lock(tsdn, &arena->stats);
+	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
-	arena_stats_accum_zu(&astats->mapped, base_mapped
-	    + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped));
-	arena_stats_accum_zu(&astats->retained,
+	locked_inc_zu_unsynchronized(&astats->mapped, base_mapped
+	    + locked_read_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+	    &arena->stats.mapped));
+	locked_inc_zu_unsynchronized(&astats->retained,
 	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->edata_avail,
 	    atomic_load_zu(&arena->pa_shard.edata_cache.count, ATOMIC_RELAXED),
 	    ATOMIC_RELAXED);
 
-	arena_stats_accum_u64(&astats->decay_dirty.npurge,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_dirty.npurge,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_dirty.npurge));
-	arena_stats_accum_u64(&astats->decay_dirty.nmadvise,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_dirty.nmadvise,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_dirty.nmadvise));
-	arena_stats_accum_u64(&astats->decay_dirty.purged,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_dirty.purged,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_dirty.purged));
 
-	arena_stats_accum_u64(&astats->decay_muzzy.npurge,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_muzzy.npurge,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_muzzy.npurge));
-	arena_stats_accum_u64(&astats->decay_muzzy.nmadvise,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_muzzy.nmadvise,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_muzzy.nmadvise));
-	arena_stats_accum_u64(&astats->decay_muzzy.purged,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_muzzy.purged,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_muzzy.purged));
 
-	arena_stats_accum_zu(&astats->base, base_allocated);
-	arena_stats_accum_zu(&astats->internal, arena_internal_get(arena));
-	arena_stats_accum_zu(&astats->metadata_thp, metadata_thp);
-	arena_stats_accum_zu(&astats->resident, base_resident +
+	atomic_load_add_store_zu(&astats->base, base_allocated);
+	atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena));
+	atomic_load_add_store_zu(&astats->metadata_thp, metadata_thp);
+	atomic_load_add_store_zu(&astats->resident, base_resident +
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
 	    ecache_npages_get(&arena->pa_shard.ecache_dirty) +
 	    ecache_npages_get(&arena->pa_shard.ecache_muzzy)) << LG_PAGE)));
-	arena_stats_accum_zu(&astats->pa_shard_stats.abandoned_vm,
+	atomic_load_add_store_zu(&astats->pa_shard_stats.abandoned_vm,
 	    atomic_load_zu(&arena->stats.pa_shard_stats.abandoned_vm,
 	    ATOMIC_RELAXED));
 
 	for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) {
-		uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats,
+		uint64_t nmalloc = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nmalloc);
-		arena_stats_accum_u64(&lstats[i].nmalloc, nmalloc);
-		arena_stats_accum_u64(&astats->nmalloc_large, nmalloc);
+		locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc);
+		locked_inc_u64_unsynchronized(&astats->nmalloc_large,
+		    nmalloc);
 
-		uint64_t ndalloc = arena_stats_read_u64(tsdn, &arena->stats,
+		uint64_t ndalloc = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].ndalloc);
-		arena_stats_accum_u64(&lstats[i].ndalloc, ndalloc);
-		arena_stats_accum_u64(&astats->ndalloc_large, ndalloc);
+		locked_inc_u64_unsynchronized(&lstats[i].ndalloc, ndalloc);
+		locked_inc_u64_unsynchronized(&astats->ndalloc_large,
+		    ndalloc);
 
-		uint64_t nrequests = arena_stats_read_u64(tsdn, &arena->stats,
+		uint64_t nrequests = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nrequests);
-		arena_stats_accum_u64(&lstats[i].nrequests,
+		locked_inc_u64_unsynchronized(&lstats[i].nrequests,
 		    nmalloc + nrequests);
-		arena_stats_accum_u64(&astats->nrequests_large,
+		locked_inc_u64_unsynchronized(&astats->nrequests_large,
 		    nmalloc + nrequests);
 
 		/* nfill == nmalloc for large currently. */
-		arena_stats_accum_u64(&lstats[i].nfills, nmalloc);
-		arena_stats_accum_u64(&astats->nfills_large, nmalloc);
+		locked_inc_u64_unsynchronized(&lstats[i].nfills, nmalloc);
+		locked_inc_u64_unsynchronized(&astats->nfills_large,
+		    nmalloc);
 
-		uint64_t nflush = arena_stats_read_u64(tsdn, &arena->stats,
+		uint64_t nflush = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nflushes);
-		arena_stats_accum_u64(&lstats[i].nflushes, nflush);
-		arena_stats_accum_u64(&astats->nflushes_large, nflush);
+		locked_inc_u64_unsynchronized(&lstats[i].nflushes, nflush);
+		locked_inc_u64_unsynchronized(&astats->nflushes_large,
+		    nflush);
 
 		assert(nmalloc >= ndalloc);
 		assert(nmalloc - ndalloc <= SIZE_T_MAX);
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
 		lstats[i].curlextents += curlextents;
-		arena_stats_accum_zu(&astats->allocated_large,
+		atomic_load_add_store_zu(&astats->allocated_large,
 		    curlextents * sz_index2size(SC_NBINS + i));
 	}
 
@@ -195,7 +204,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		    ATOMIC_RELAXED);
 	}
 
-	arena_stats_unlock(tsdn, &arena->stats);
+	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
 	/* tcache_bytes counts currently cached bytes. */
 	atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED);
@@ -204,13 +213,13 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
 		for (szind_t i = 0; i < SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_small[i];
-			arena_stats_accum_zu(&astats->tcache_bytes,
-			    cache_bin_ncached_get(tbin, &tcache_bin_info[i])
-			    * sz_index2size(i));
+			atomic_load_add_store_zu(&astats->tcache_bytes,
+			    cache_bin_ncached_get(tbin,
+			    &tcache_bin_info[i]) * sz_index2size(i));
 		}
 		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_large[i];
-			arena_stats_accum_zu(&astats->tcache_bytes,
+			atomic_load_add_store_zu(&astats->tcache_bytes,
 			    cache_bin_ncached_get(tbin,
 			    &tcache_bin_info[i + SC_NBINS])
 			    * sz_index2size(i + SC_NBINS));
@@ -397,7 +406,7 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	index = sz_size2index(usize);
 	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
 
-	arena_stats_add_u64(tsdn, &arena->stats,
+	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.lstats[hindex].nmalloc, 1);
 }
 
@@ -413,7 +422,7 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	index = sz_size2index(usize);
 	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
 
-	arena_stats_add_u64(tsdn, &arena->stats,
+	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.lstats[hindex].ndalloc, 1);
 }
 
@@ -466,13 +475,14 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	if (edata != NULL) {
 		if (config_stats) {
-			arena_stats_lock(tsdn, &arena->stats);
+			LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 			arena_large_malloc_stats_update(tsdn, arena, usize);
 			if (mapped_add != 0) {
-				arena_stats_add_zu(tsdn, &arena->stats,
+				locked_inc_zu(tsdn,
+				    LOCKEDINT_MTX(arena->stats.mtx),
 				    &arena->stats.mapped, mapped_add);
 			}
-			arena_stats_unlock(tsdn, &arena->stats);
+			LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 		}
 		arena_nactive_add(arena, esize >> LG_PAGE);
 	}
@@ -487,10 +497,10 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void
 arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	if (config_stats) {
-		arena_stats_lock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_dalloc_stats_update(tsdn, arena,
 		    edata_usize_get(edata));
-		arena_stats_unlock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 	arena_nactive_sub(arena, edata_size_get(edata) >> LG_PAGE);
 }
@@ -502,9 +512,9 @@ arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	size_t udiff = oldusize - usize;
 
 	if (config_stats) {
-		arena_stats_lock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
-		arena_stats_unlock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 	arena_nactive_sub(arena, udiff >> LG_PAGE);
 }
@@ -516,9 +526,9 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	size_t udiff = usize - oldusize;
 
 	if (config_stats) {
-		arena_stats_lock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
-		arena_stats_unlock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 }
@@ -894,16 +904,16 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 
 	if (config_stats) {
-		arena_stats_lock(tsdn, &arena->stats);
-		arena_stats_add_u64(tsdn, &arena->stats, &decay->stats->npurge,
-		    1);
-		arena_stats_add_u64(tsdn, &arena->stats,
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &decay->stats->npurge, 1);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		    &decay->stats->nmadvise, nmadvise);
-		arena_stats_add_u64(tsdn, &arena->stats, &decay->stats->purged,
-		    npurged);
-		arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
-		    nunmapped << LG_PAGE);
-		arena_stats_unlock(tsdn, &arena->stats);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		&decay->stats->purged, npurged);
+		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.mapped, nunmapped << LG_PAGE);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 
 	return npurged;
diff --git a/src/ctl.c b/src/ctl.c
index 1a9b0d9f..56d30000 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -676,28 +676,19 @@ static const ctl_named_node_t super_root_node[] = {
  * synchronized by the ctl mutex.
  */
 static void
-ctl_accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) {
-#ifdef JEMALLOC_ATOMIC_U64
-	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
-	uint64_t cur_src = atomic_load_u64(src, ATOMIC_RELAXED);
-	atomic_store_u64(dst, cur_dst + cur_src, ATOMIC_RELAXED);
-#else
-	*dst += *src;
-#endif
-}
-
-/* Likewise: with ctl mutex synchronization, reading is simple. */
-static uint64_t
-ctl_arena_stats_read_u64(arena_stats_u64_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
-	return atomic_load_u64(p, ATOMIC_RELAXED);
-#else
-	return *p;
-#endif
+ctl_accum_locked_u64(locked_u64_t *dst, locked_u64_t *src) {
+	locked_inc_u64_unsynchronized(dst,
+	    locked_read_u64_unsynchronized(src));
 }
 
 static void
-accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) {
+ctl_accum_locked_zu(locked_zu_t *dst, locked_zu_t *src) {
+	locked_inc_zu_unsynchronized(dst,
+	    locked_read_atomic_zu(src));
+}
+
+static void
+ctl_accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) {
 	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
 	size_t cur_src = atomic_load_zu(src, ATOMIC_RELAXED);
 	atomic_store_zu(dst, cur_dst + cur_src, ATOMIC_RELAXED);
@@ -870,26 +861,26 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		ctl_arena_stats_t *astats = ctl_arena->astats;
 
 		if (!destroyed) {
-			accum_atomic_zu(&sdstats->astats.mapped,
+			ctl_accum_locked_zu(&sdstats->astats.mapped,
 			    &astats->astats.mapped);
-			accum_atomic_zu(&sdstats->astats.retained,
+			ctl_accum_locked_zu(&sdstats->astats.retained,
 			    &astats->astats.retained);
-			accum_atomic_zu(&sdstats->astats.edata_avail,
+			ctl_accum_atomic_zu(&sdstats->astats.edata_avail,
 			    &astats->astats.edata_avail);
 		}
 
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge,
+		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.npurge,
 		    &astats->astats.decay_dirty.npurge);
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise,
+		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.nmadvise,
 		    &astats->astats.decay_dirty.nmadvise);
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged,
+		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.purged,
 		    &astats->astats.decay_dirty.purged);
 
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge,
+		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.npurge,
 		    &astats->astats.decay_muzzy.npurge);
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise,
+		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.nmadvise,
 		    &astats->astats.decay_muzzy.nmadvise);
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged,
+		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.purged,
 		    &astats->astats.decay_muzzy.purged);
 
 #define OP(mtx) malloc_mutex_prof_merge(				\
@@ -900,13 +891,13 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 		if (!destroyed) {
-			accum_atomic_zu(&sdstats->astats.base,
+			ctl_accum_atomic_zu(&sdstats->astats.base,
 			    &astats->astats.base);
-			accum_atomic_zu(&sdstats->astats.internal,
+			ctl_accum_atomic_zu(&sdstats->astats.internal,
 			    &astats->astats.internal);
-			accum_atomic_zu(&sdstats->astats.resident,
+			ctl_accum_atomic_zu(&sdstats->astats.resident,
 			    &astats->astats.resident);
-			accum_atomic_zu(&sdstats->astats.metadata_thp,
+			ctl_accum_atomic_zu(&sdstats->astats.metadata_thp,
 			    &astats->astats.metadata_thp);
 		} else {
 			assert(atomic_load_zu(
@@ -925,24 +916,25 @@ MUTEX_PROF_ARENA_MUTEXES
 		sdstats->nflushes_small += astats->nflushes_small;
 
 		if (!destroyed) {
-			accum_atomic_zu(&sdstats->astats.allocated_large,
+			ctl_accum_atomic_zu(&sdstats->astats.allocated_large,
 			    &astats->astats.allocated_large);
 		} else {
 			assert(atomic_load_zu(&astats->astats.allocated_large,
 			    ATOMIC_RELAXED) == 0);
 		}
-		ctl_accum_arena_stats_u64(&sdstats->astats.nmalloc_large,
+		ctl_accum_locked_u64(&sdstats->astats.nmalloc_large,
 		    &astats->astats.nmalloc_large);
-		ctl_accum_arena_stats_u64(&sdstats->astats.ndalloc_large,
+		ctl_accum_locked_u64(&sdstats->astats.ndalloc_large,
 		    &astats->astats.ndalloc_large);
-		ctl_accum_arena_stats_u64(&sdstats->astats.nrequests_large,
+		ctl_accum_locked_u64(&sdstats->astats.nrequests_large,
 		    &astats->astats.nrequests_large);
-		ctl_accum_arena_stats_u64(&sdstats->astats.nflushes_large,
+		ctl_accum_locked_u64(&sdstats->astats.nflushes_large,
 		    &astats->astats.nflushes_large);
-		accum_atomic_zu(&sdstats->astats.pa_shard_stats.abandoned_vm,
+		ctl_accum_atomic_zu(
+		    &sdstats->astats.pa_shard_stats.abandoned_vm,
 		    &astats->astats.pa_shard_stats.abandoned_vm);
 
-		accum_atomic_zu(&sdstats->astats.tcache_bytes,
+		ctl_accum_atomic_zu(&sdstats->astats.tcache_bytes,
 		    &astats->astats.tcache_bytes);
 
 		if (ctl_arena->arena_ind == 0) {
@@ -978,11 +970,11 @@ MUTEX_PROF_ARENA_MUTEXES
 
 		/* Merge stats for large allocations. */
 		for (i = 0; i < SC_NSIZES - SC_NBINS; i++) {
-			ctl_accum_arena_stats_u64(&sdstats->lstats[i].nmalloc,
+			ctl_accum_locked_u64(&sdstats->lstats[i].nmalloc,
 			    &astats->lstats[i].nmalloc);
-			ctl_accum_arena_stats_u64(&sdstats->lstats[i].ndalloc,
+			ctl_accum_locked_u64(&sdstats->lstats[i].ndalloc,
 			    &astats->lstats[i].ndalloc);
-			ctl_accum_arena_stats_u64(&sdstats->lstats[i].nrequests,
+			ctl_accum_locked_u64(&sdstats->lstats[i].nrequests,
 			    &astats->lstats[i].nrequests);
 			if (!destroyed) {
 				sdstats->lstats[i].curlextents +=
@@ -994,17 +986,17 @@ MUTEX_PROF_ARENA_MUTEXES
 
 		/* Merge extents stats. */
 		for (i = 0; i < SC_NPSIZES; i++) {
-			accum_atomic_zu(&sdstats->estats[i].ndirty,
+			ctl_accum_atomic_zu(&sdstats->estats[i].ndirty,
 			    &astats->estats[i].ndirty);
-			accum_atomic_zu(&sdstats->estats[i].nmuzzy,
+			ctl_accum_atomic_zu(&sdstats->estats[i].nmuzzy,
 			    &astats->estats[i].nmuzzy);
-			accum_atomic_zu(&sdstats->estats[i].nretained,
+			ctl_accum_atomic_zu(&sdstats->estats[i].nretained,
 			    &astats->estats[i].nretained);
-			accum_atomic_zu(&sdstats->estats[i].dirty_bytes,
+			ctl_accum_atomic_zu(&sdstats->estats[i].dirty_bytes,
 			    &astats->estats[i].dirty_bytes);
-			accum_atomic_zu(&sdstats->estats[i].muzzy_bytes,
+			ctl_accum_atomic_zu(&sdstats->estats[i].muzzy_bytes,
 			    &astats->estats[i].muzzy_bytes);
-			accum_atomic_zu(&sdstats->estats[i].retained_bytes,
+			ctl_accum_atomic_zu(&sdstats->estats[i].retained_bytes,
 			    &astats->estats[i].retained_bytes);
 		}
 	}
@@ -1104,10 +1096,10 @@ ctl_refresh(tsdn_t *tsdn) {
 		    &ctl_sarena->astats->astats.metadata_thp, ATOMIC_RELAXED);
 		ctl_stats->resident = atomic_load_zu(
 		    &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED);
-		ctl_stats->mapped = atomic_load_zu(
-		    &ctl_sarena->astats->astats.mapped, ATOMIC_RELAXED);
-		ctl_stats->retained = atomic_load_zu(
-		    &ctl_sarena->astats->astats.retained, ATOMIC_RELAXED);
+		ctl_stats->mapped = locked_read_atomic_zu(
+		    &ctl_sarena->astats->astats.mapped);
+		ctl_stats->retained = locked_read_atomic_zu(
+		    &ctl_sarena->astats->astats.retained);
 
 		ctl_background_thread_stats_read(tsdn);
 
@@ -2916,10 +2908,10 @@ CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t)
 CTL_RO_GEN(stats_arenas_i_pmuzzy, arenas_i(mib[2])->pmuzzy, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.mapped, ATOMIC_RELAXED),
+    locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.mapped),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.retained, ATOMIC_RELAXED),
+    locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.retained),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.edata_avail,
@@ -2927,23 +2919,23 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
     size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_dirty.npurge), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_dirty.nmadvise), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_dirty.purged), uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_muzzy.npurge), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_muzzy.nmadvise), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_muzzy.purged), uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
@@ -2982,23 +2974,23 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large,
     ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.nrequests_large), uint64_t)
 /*
  * Note: "nmalloc_large" here instead of "nfills" in the read.  This is
  * intentional (large has no batch fill).
  */
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nfills,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.nflushes_large), uint64_t)
 
 /* Lock profiling related APIs below. */
@@ -3124,13 +3116,13 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
     arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t)
diff --git a/src/extent.c b/src/extent.c
index 7c00525a..a023d3e2 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -199,7 +199,8 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
-		arena_stats_accum_zu(&arena->pa_shard.stats->abandoned_vm, sz);
+		atomic_fetch_add_zu(&arena->pa_shard.stats->abandoned_vm, sz,
+		    ATOMIC_RELAXED);
 	}
 	/*
 	 * Leak extent after making sure its pages have already been purged, so