From 99b1291d1760ad164346073b35ac03ce2eb35e68 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 18 Feb 2020 17:21:40 -0800
Subject: [PATCH] Edata cache: add edata_cache_small_t.

This can be used to amortize the synchronization costs of edata_cache accesses.
---
 include/jemalloc/internal/edata_cache.h | 28 ++++++++++++
 src/edata_cache.c                       | 60 ++++++++++++++++++++++--
 test/unit/edata_cache.c                 | 61 ++++++++++++++++++++-----
 3 files changed, 134 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 73ac7af8..620360d1 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -25,4 +25,32 @@ void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
 
+typedef struct edata_cache_small_s edata_cache_small_t;
+struct edata_cache_small_s {
+	edata_list_t list;
+	size_t count;
+	edata_cache_t *fallback;
+};
+
+/*
+ * An edata_cache_small is like an edata_cache, but it relies on external
+ * synchronization and avoids first-fit strategies.  You can call "prepare" to
+ * acquire at least num edata_t objects, and then "finish" to flush all
+ * excess ones back to their fallback edata_cache_t.  Once they have been
+ * acquired, they can be allocated without failing (and in fact, this is
+ * required -- it's not permitted to attempt to get an edata_t without first
+ * preparing for it).
+ */
+
+void edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback);
+
+/* Returns whether or not an error occurred. */
+bool edata_cache_small_prepare(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    size_t num);
+edata_t *edata_cache_small_get(edata_cache_small_t *ecs);
+
+void edata_cache_small_put(edata_cache_small_t *ecs, edata_t *edata);
+void edata_cache_small_finish(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    size_t num);
+
 #endif /* JEMALLOC_INTERNAL_EDATA_CACHE_H */
diff --git a/src/edata_cache.c b/src/edata_cache.c
index 1af7b96f..b62972a1 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -40,14 +40,68 @@ edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata) {
 	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
 }
 
-void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+void
+edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 	malloc_mutex_prefork(tsdn, &edata_cache->mtx);
 }
 
-void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+void
+edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 	malloc_mutex_postfork_parent(tsdn, &edata_cache->mtx);
 }
 
-void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+void
+edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 	malloc_mutex_postfork_child(tsdn, &edata_cache->mtx);
 }
+
+void
+edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback) {
+	edata_list_init(&ecs->list);
+	ecs->count = 0;
+	ecs->fallback = fallback;
+}
+
+edata_t *
+edata_cache_small_get(edata_cache_small_t *ecs) {
+	assert(ecs->count > 0);
+	edata_t *edata = edata_list_first(&ecs->list);
+	assert(edata != NULL);
+	edata_list_remove(&ecs->list, edata);
+	ecs->count--;
+	return edata;
+}
+
+void
+edata_cache_small_put(edata_cache_small_t *ecs, edata_t *edata) {
+	assert(edata != NULL);
+	edata_list_append(&ecs->list, edata);
+	ecs->count++;
+}
+
+bool edata_cache_small_prepare(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    size_t num) {
+	while (ecs->count < num) {
+		/*
+		 * Obviously, we can be smarter here and batch the locking that
+		 * happens inside of edata_cache_get.  But for now, something
+		 * quick-and-dirty is fine.
+		 */
+		edata_t *edata = edata_cache_get(tsdn, ecs->fallback);
+		if (edata == NULL) {
+			return true;
+		}
+		ql_elm_new(edata, ql_link);
+		edata_cache_small_put(ecs, edata);
+	}
+	return false;
+}
+
+void edata_cache_small_finish(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    size_t num) {
+	while (ecs->count > num) {
+		/* Same deal here -- we should be batching. */
+		edata_t *edata = edata_cache_small_get(ecs);
+		edata_cache_put(tsdn, ecs->fallback, edata);
+	}
+}
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
index 638e2292..22c9dcb8 100644
--- a/test/unit/edata_cache.c
+++ b/test/unit/edata_cache.c
@@ -17,38 +17,75 @@ test_edata_cache_destroy(edata_cache_t *edata_cache) {
 }
 
 TEST_BEGIN(test_edata_cache) {
-	edata_cache_t edc;
-	test_edata_cache_init(&edc);
+	edata_cache_t ec;
+	test_edata_cache_init(&ec);
 
 	/* Get one */
-	edata_t *ed1 = edata_cache_get(TSDN_NULL, &edc);
+	edata_t *ed1 = edata_cache_get(TSDN_NULL, &ec);
 	assert_ptr_not_null(ed1, "");
 
 	/* Cache should be empty */
-	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
 	/* Get another */
-	edata_t *ed2 = edata_cache_get(TSDN_NULL, &edc);
+	edata_t *ed2 = edata_cache_get(TSDN_NULL, &ec);
 	assert_ptr_not_null(ed2, "");
 
 	/* Still empty */
-	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
 	/* Put one back, and the cache should now have one item */
-	edata_cache_put(TSDN_NULL, &edc, ed1);
-	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 1, "");
+	edata_cache_put(TSDN_NULL, &ec, ed1);
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 1, "");
 
 	/* Reallocating should reuse the item, and leave an empty cache. */
-	edata_t *ed1_again = edata_cache_get(TSDN_NULL, &edc);
+	edata_t *ed1_again = edata_cache_get(TSDN_NULL, &ec);
 	assert_ptr_eq(ed1, ed1_again, "");
-	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	test_edata_cache_destroy(&edc);
+	test_edata_cache_destroy(&ec);
+}
+TEST_END
+
+TEST_BEGIN(test_edata_cache_small) {
+	edata_cache_t ec;
+	edata_cache_small_t ecs;
+
+	test_edata_cache_init(&ec);
+	edata_cache_small_init(&ecs, &ec);
+
+	bool err = edata_cache_small_prepare(TSDN_NULL, &ecs, 2);
+	assert_false(err, "");
+	assert_zu_eq(ecs.count, 2, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_t *ed1 = edata_cache_small_get(&ecs);
+	assert_zu_eq(ecs.count, 1, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_t *ed2 = edata_cache_small_get(&ecs);
+	assert_zu_eq(ecs.count, 0, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_cache_small_put(&ecs, ed1);
+	assert_zu_eq(ecs.count, 1, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_cache_small_put(&ecs, ed2);
+	assert_zu_eq(ecs.count, 2, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_cache_small_finish(TSDN_NULL, &ecs, 1);
+	assert_zu_eq(ecs.count, 1, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 1, "");
+
+	test_edata_cache_destroy(&ec);
 }
 TEST_END
 
 int
 main(void) {
 	return test(
-	    test_edata_cache);
+	    test_edata_cache,
+	    test_edata_cache_small);
 }