From 7e11b389aac64ed59a286b91887bcf68c2a597c4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 11 Sep 2010 22:47:39 -0700 Subject: [PATCH] Move size class table to man page. Move the table of size classes from jemalloc.c to the manual page. When manually formatting the manual page, it is now necessary to use: nroff -man -t jemalloc.3 --- jemalloc/INSTALL | 7 ++++ jemalloc/doc/jemalloc.3.in | 62 +++++++++++++++++++++++++++- jemalloc/src/jemalloc.c | 82 -------------------------------------- 3 files changed, 67 insertions(+), 84 deletions(-) diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL index e9a87989..a8957436 100644 --- a/jemalloc/INSTALL +++ b/jemalloc/INSTALL @@ -217,3 +217,10 @@ directory, issue configuration and build commands: cd obj ../configure --enable-autogen make + +=== Documentation ============================================================== + +The manual page that the configure script generates can be manually formatted +prior to installation via the following command: + + nroff -man -t doc/jemalloc.3 diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in index dfc4d763..02340e62 100644 --- a/jemalloc/doc/jemalloc.3.in +++ b/jemalloc/doc/jemalloc.3.in @@ -564,8 +564,8 @@ However, it may make sense to reduce the number of arenas if an application does not make much use of the allocation functions. .Pp @roff_tcache@In addition to multiple arenas, this allocator supports -@roff_tcache@thread-specific caching for small objects, in order to make it -@roff_tcache@possible to completely avoid synchronization for most small +@roff_tcache@thread-specific caching for small and large objects, in order to +@roff_tcache@make it possible to completely avoid synchronization for most small @roff_tcache@allocation requests. @roff_tcache@Such caching allows very fast allocation in the common case, but it @roff_tcache@increases memory usage and fragmentation, since a bounded number of @@ -619,6 +619,64 @@ option), are rounded up to the nearest run size. Allocation requests that are too large to fit in an arena-managed chunk are rounded up to the nearest multiple of the chunk size. .Pp +Assuming 4 MiB chunks, 4 KiB pages, and a 16 byte quantum on a 64-bit system, +the size classes in each category are as follows: +.TS +expand allbox tab(;); +LLR +LLR +^LR +^^R +^^R +^^R +^^R +^LR +^^R +^^R +^^R +^^R +^LR +^^R +^^R +^^R +^^R +LsR +^^R +^^R +^^R +^^R +LsR +^^R +^^R +^^R. +Category;Subcategory;Size +Small;Tiny;8 +;Quantum-spaced;16 +;;32 +;;48 +;;... +;;128 +;Cacheline-spaced;192 +;;256 +;;320 +;;... +;;512 +;Sub-page;760 +;;1024 +;;1280 +;;... +;;3840 +Large;4 KiB +;;8 KiB +;;12 KiB +;;... +;;4084 KiB +Huge;4 MiB +;;8 MiB +;;12 MiB +;;... +.TE +.Pp Allocations are packed tightly together, which can be an issue for multi-threaded applications. If you need to assure that allocations do not suffer from cacheline sharing, diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index ebce3ca0..bf4ccc05 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -1,85 +1,3 @@ -/*- - * This allocator implementation is designed to provide scalable performance - * for multi-threaded programs on multi-processor systems. The following - * features are included for this purpose: - * - * + Multiple arenas are used if there are multiple CPUs, which reduces lock - * contention and cache sloshing. - * - * + Thread-specific caching is used if there are multiple threads, which - * reduces the amount of locking. - * - * + Cache line sharing between arenas is avoided for internal data - * structures. - * - * + Memory is managed in chunks and runs (chunks can be split into runs), - * rather than as individual pages. This provides a constant-time - * mechanism for associating allocations with particular arenas. - * - * Allocation requests are rounded up to the nearest size class, and no record - * of the original request size is maintained. Allocations are broken into - * categories according to size class. Assuming 1 MiB chunks, 4 KiB pages and - * a 16 byte quantum on a 32-bit system, the size classes in each category are - * as follows: - * - * |========================================| - * | Category | Subcategory | Size | - * |========================================| - * | Small | Tiny | 2 | - * | | | 4 | - * | | | 8 | - * | |------------------+----------| - * | | Quantum-spaced | 16 | - * | | | 32 | - * | | | 48 | - * | | | ... | - * | | | 96 | - * | | | 112 | - * | | | 128 | - * | |------------------+----------| - * | | Cacheline-spaced | 192 | - * | | | 256 | - * | | | 320 | - * | | | 384 | - * | | | 448 | - * | | | 512 | - * | |------------------+----------| - * | | Sub-page | 760 | - * | | | 1024 | - * | | | 1280 | - * | | | ... | - * | | | 3328 | - * | | | 3584 | - * | | | 3840 | - * |========================================| - * | Large | 4 KiB | - * | | 8 KiB | - * | | 12 KiB | - * | | ... | - * | | 1012 KiB | - * | | 1016 KiB | - * | | 1020 KiB | - * |========================================| - * | Huge | 1 MiB | - * | | 2 MiB | - * | | 3 MiB | - * | | ... | - * |========================================| - * - * Different mechanisms are used accoding to category: - * - * Small: Each size class is segregated into its own set of runs. Each run - * maintains a bitmap of which regions are free/allocated. - * - * Large : Each allocation is backed by a dedicated run. Metadata are stored - * in the associated arena chunk header maps. - * - * Huge : Each allocation is backed by a dedicated contiguous set of chunks. - * Metadata are stored in a separate red-black tree. - * - ******************************************************************************* - */ - #define JEMALLOC_C_ #include "jemalloc/internal/jemalloc_internal.h"