diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL
index e9a87989..a8957436 100644
--- a/jemalloc/INSTALL
+++ b/jemalloc/INSTALL
@@ -217,3 +217,10 @@ directory, issue configuration and build commands:
     cd obj
     ../configure --enable-autogen
     make
+
+=== Documentation ==============================================================
+
+The manual page that the configure script generates can be manually formatted
+prior to installation via the following command:
+
+    nroff -man -t doc/jemalloc.3
diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in
index dfc4d763..02340e62 100644
--- a/jemalloc/doc/jemalloc.3.in
+++ b/jemalloc/doc/jemalloc.3.in
@@ -564,8 +564,8 @@ However, it may make sense to reduce the number of arenas if an application
 does not make much use of the allocation functions.
 .Pp
 @roff_tcache@In addition to multiple arenas, this allocator supports
-@roff_tcache@thread-specific caching for small objects, in order to make it
-@roff_tcache@possible to completely avoid synchronization for most small
+@roff_tcache@thread-specific caching for small and large objects, in order to
+@roff_tcache@make it possible to completely avoid synchronization for most small
 @roff_tcache@allocation requests.
 @roff_tcache@Such caching allows very fast allocation in the common case, but it
 @roff_tcache@increases memory usage and fragmentation, since a bounded number of
@@ -619,6 +619,64 @@ option), are rounded up to the nearest run size.
 Allocation requests that are too large to fit in an arena-managed chunk are
 rounded up to the nearest multiple of the chunk size.
 .Pp
+Assuming 4 MiB chunks, 4 KiB pages, and a 16 byte quantum on a 64-bit system,
+the size classes in each category are as follows:
+.TS
+expand allbox tab(;);
+LLR
+LLR
+^LR
+^^R
+^^R
+^^R
+^^R
+^LR
+^^R
+^^R
+^^R
+^^R
+^LR
+^^R
+^^R
+^^R
+^^R
+LsR
+^^R
+^^R
+^^R
+^^R
+LsR
+^^R
+^^R
+^^R.
+Category;Subcategory;Size
+Small;Tiny;8
+;Quantum-spaced;16
+;;32
+;;48
+;;...
+;;128
+;Cacheline-spaced;192
+;;256
+;;320
+;;...
+;;512
+;Sub-page;760
+;;1024
+;;1280
+;;...
+;;3840
+Large;4 KiB
+;;8 KiB
+;;12 KiB
+;;...
+;;4084 KiB
+Huge;4 MiB
+;;8 MiB
+;;12 MiB
+;;...
+.TE
+.Pp
 Allocations are packed tightly together, which can be an issue for
 multi-threaded applications.
 If you need to assure that allocations do not suffer from cacheline sharing,
diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c
index ebce3ca0..bf4ccc05 100644
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@@ -1,85 +1,3 @@
-/*-
- * This allocator implementation is designed to provide scalable performance
- * for multi-threaded programs on multi-processor systems.  The following
- * features are included for this purpose:
- *
- *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
- *     contention and cache sloshing.
- *
- *   + Thread-specific caching is used if there are multiple threads, which
- *     reduces the amount of locking.
- *
- *   + Cache line sharing between arenas is avoided for internal data
- *     structures.
- *
- *   + Memory is managed in chunks and runs (chunks can be split into runs),
- *     rather than as individual pages.  This provides a constant-time
- *     mechanism for associating allocations with particular arenas.
- *
- * Allocation requests are rounded up to the nearest size class, and no record
- * of the original request size is maintained.  Allocations are broken into
- * categories according to size class.  Assuming 1 MiB chunks, 4 KiB pages and
- * a 16 byte quantum on a 32-bit system, the size classes in each category are
- * as follows:
- *
- *   |========================================|
- *   | Category | Subcategory      |     Size |
- *   |========================================|
- *   | Small    | Tiny             |        2 |
- *   |          |                  |        4 |
- *   |          |                  |        8 |
- *   |          |------------------+----------|
- *   |          | Quantum-spaced   |       16 |
- *   |          |                  |       32 |
- *   |          |                  |       48 |
- *   |          |                  |      ... |
- *   |          |                  |       96 |
- *   |          |                  |      112 |
- *   |          |                  |      128 |
- *   |          |------------------+----------|
- *   |          | Cacheline-spaced |      192 |
- *   |          |                  |      256 |
- *   |          |                  |      320 |
- *   |          |                  |      384 |
- *   |          |                  |      448 |
- *   |          |                  |      512 |
- *   |          |------------------+----------|
- *   |          | Sub-page         |      760 |
- *   |          |                  |     1024 |
- *   |          |                  |     1280 |
- *   |          |                  |      ... |
- *   |          |                  |     3328 |
- *   |          |                  |     3584 |
- *   |          |                  |     3840 |
- *   |========================================|
- *   | Large                       |    4 KiB |
- *   |                             |    8 KiB |
- *   |                             |   12 KiB |
- *   |                             |      ... |
- *   |                             | 1012 KiB |
- *   |                             | 1016 KiB |
- *   |                             | 1020 KiB |
- *   |========================================|
- *   | Huge                        |    1 MiB |
- *   |                             |    2 MiB |
- *   |                             |    3 MiB |
- *   |                             |      ... |
- *   |========================================|
- *
- * Different mechanisms are used accoding to category:
- *
- *   Small: Each size class is segregated into its own set of runs.  Each run
- *          maintains a bitmap of which regions are free/allocated.
- *
- *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
- *           in the associated arena chunk header maps.
- *
- *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
- *          Metadata are stored in a separate red-black tree.
- *
- *******************************************************************************
- */
-
 #define	JEMALLOC_C_
 #include "jemalloc/internal/jemalloc_internal.h"