Refactor chunk map.

Break the chunk map into two separate arrays, in order to improve cache
locality. This is related to issue #23.
This commit is contained in:
Qinfan Wu
2014-08-29 13:34:40 -07:00
committed by Jason Evans
parent f34f6037e8
commit ff6a31d3b9
7 changed files with 186 additions and 149 deletions

View File

@@ -43,7 +43,8 @@
*/
#define LG_DIRTY_MULT_DEFAULT 3
typedef struct arena_chunk_map_s arena_chunk_map_t;
typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
typedef struct arena_chunk_s arena_chunk_t;
typedef struct arena_run_s arena_run_t;
typedef struct arena_bin_info_s arena_bin_info_t;
@@ -55,34 +56,7 @@ typedef struct arena_s arena_t;
#ifdef JEMALLOC_H_STRUCTS
/* Each element of the chunk map corresponds to one page within the chunk. */
struct arena_chunk_map_s {
#ifndef JEMALLOC_PROF
/*
* Overlay prof_tctx in order to allow it to be referenced by dead code.
* Such antics aren't warranted for per arena data structures, but
* chunk map overhead accounts for a percentage of memory, rather than
* being just a fixed cost.
*/
union {
#endif
/*
* Linkage for run trees. There are two disjoint uses:
*
* 1) arena_t's runs_avail tree.
* 2) arena_run_t conceptually uses this linkage for in-use non-full
* runs, rather than directly embedding linkage.
*/
rb_node(arena_chunk_map_t) rb_link;
/* Profile counters, used for large object runs. */
prof_tctx_t *prof_tctx;
#ifndef JEMALLOC_PROF
}; /* union { ... }; */
#endif
/* Linkage for list of dirty runs. */
ql_elm(arena_chunk_map_t) dr_link;
struct arena_chunk_map_bits_s {
/*
* Run address (or size) and various flags are stored together. The bit
* layout looks like (assuming 32-bit system):
@@ -149,9 +123,43 @@ struct arena_chunk_map_s {
#define CHUNK_MAP_ALLOCATED ((size_t)0x1U)
#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED
};
typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
typedef ql_head(arena_chunk_map_t) arena_chunk_mapelms_t;
/*
* Each arena_chunk_map_misc_t corresponds to one page within the chunk, just
* like arena_chunk_map_bits_t. Two separate arrays are stored within each
* chunk header in order to improve cache locality.
*/
struct arena_chunk_map_misc_s {
#ifndef JEMALLOC_PROF
/*
* Overlay prof_tctx in order to allow it to be referenced by dead code.
* Such antics aren't warranted for per arena data structures, but
* chunk map overhead accounts for a percentage of memory, rather than
* being just a fixed cost.
*/
union {
#endif
/*
* Linkage for run trees. There are two disjoint uses:
*
* 1) arena_t's runs_avail tree.
* 2) arena_run_t conceptually uses this linkage for in-use non-full
* runs, rather than directly embedding linkage.
*/
rb_node(arena_chunk_map_misc_t) rb_link;
/* Profile counters, used for large object runs. */
prof_tctx_t *prof_tctx;
#ifndef JEMALLOC_PROF
}; /* union { ... }; */
#endif
/* Linkage for list of dirty runs. */
ql_elm(arena_chunk_map_misc_t) dr_link;
};
typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t;
typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t;
typedef ql_head(arena_chunk_map_misc_t) arena_chunk_miscelms_t;
/* Arena chunk header. */
struct arena_chunk_s {
@@ -164,7 +172,7 @@ struct arena_chunk_s {
* need to be tracked in the map. This omission saves a header page
* for common chunk sizes (e.g. 4 MiB).
*/
arena_chunk_map_t map[1]; /* Dynamically sized. */
arena_chunk_map_bits_t map_bits[1]; /* Dynamically sized. */
};
struct arena_run_s {
@@ -335,7 +343,7 @@ struct arena_s {
arena_avail_tree_t runs_avail;
/* List of dirty runs this arena manages. */
arena_chunk_mapelms_t runs_dirty;
arena_chunk_miscelms_t runs_dirty;
/*
* user-configureable chunk allocation and deallocation functions.
@@ -393,9 +401,9 @@ void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero);
void arena_prof_promoted(const void *ptr, size_t size);
void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
arena_chunk_map_t *mapelm);
arena_chunk_map_bits_t *bitselm);
void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
size_t pageind, arena_chunk_map_t *mapelm);
size_t pageind, arena_chunk_map_bits_t *bitselm);
void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
size_t pageind);
#ifdef JEMALLOC_JET
@@ -439,7 +447,10 @@ size_t small_bin2size(size_t binind);
size_t small_s2u_compute(size_t size);
size_t small_s2u_lookup(size_t size);
size_t small_s2u(size_t size);
arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind);
arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk,
size_t pageind);
arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk,
size_t pageind);
size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
size_t arena_mapbitsp_read(size_t *mapbitsp);
size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind);
@@ -623,21 +634,32 @@ small_s2u(size_t size)
# endif /* JEMALLOC_ARENA_INLINE_A */
# ifdef JEMALLOC_ARENA_INLINE_B
JEMALLOC_ALWAYS_INLINE arena_chunk_map_t *
arena_mapp_get(arena_chunk_t *chunk, size_t pageind)
JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t *
arena_bitselm_get(arena_chunk_t *chunk, size_t pageind)
{
assert(pageind >= map_bias);
assert(pageind < chunk_npages);
return (&chunk->map[pageind-map_bias]);
return (&chunk->map_bits[pageind-map_bias]);
}
JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
arena_miscelm_get(arena_chunk_t *chunk, size_t pageind)
{
assert(pageind >= map_bias);
assert(pageind < chunk_npages);
return ((arena_chunk_map_misc_t *)((uintptr_t)chunk +
(uintptr_t)map_misc_offset) + pageind-map_bias);
}
JEMALLOC_ALWAYS_INLINE size_t *
arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind)
{
return (&arena_mapp_get(chunk, pageind)->bits);
return (&arena_bitselm_get(chunk, pageind)->bits);
}
JEMALLOC_ALWAYS_INLINE size_t
@@ -1005,7 +1027,7 @@ arena_prof_tctx_get(const void *ptr)
if ((mapbits & CHUNK_MAP_LARGE) == 0)
ret = (prof_tctx_t *)(uintptr_t)1U;
else
ret = arena_mapp_get(chunk, pageind)->prof_tctx;
ret = arena_miscelm_get(chunk, pageind)->prof_tctx;
return (ret);
}
@@ -1025,7 +1047,7 @@ arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
if (arena_mapbits_large_get(chunk, pageind) != 0)
arena_mapp_get(chunk, pageind)->prof_tctx = tctx;
arena_miscelm_get(chunk, pageind)->prof_tctx = tctx;
}
JEMALLOC_ALWAYS_INLINE void *

View File

@@ -41,6 +41,7 @@ extern size_t chunksize;
extern size_t chunksize_mask; /* (chunksize - 1). */
extern size_t chunk_npages;
extern size_t map_bias; /* Number of arena chunk header pages. */
extern size_t map_misc_offset;
extern size_t arena_maxclass; /* Max size class for arenas. */
void *chunk_alloc_base(size_t size);

View File

@@ -4,6 +4,7 @@ a0malloc
arena_alloc_junk_small
arena_bin_index
arena_bin_info
arena_bitselm_get
arena_boot
arena_chunk_alloc_huge
arena_chunk_dalloc_huge
@@ -38,8 +39,8 @@ arena_mapbits_unzeroed_set
arena_mapbitsp_get
arena_mapbitsp_read
arena_mapbitsp_write
arena_mapp_get
arena_maxclass
arena_miscelm_get
arena_new
arena_palloc
arena_postfork_child
@@ -254,6 +255,7 @@ malloc_vcprintf
malloc_vsnprintf
malloc_write
map_bias
map_misc_offset
mb_write
mutex_boot
narenas_auto

View File

@@ -240,7 +240,7 @@ cat <<EOF
* The small_size2bin lookup table uses uint8_t to encode each bin index, so we
* cannot support more than 256 small size classes. Further constrain NBINS to
* 255 since all small size classes, plus a "not small" size class must be
* stored in 8 bits of arena_chunk_map_t's bits field.
* stored in 8 bits of arena_chunk_map_bits_t's bits field.
*/
#if (NBINS > 255)
# error "Too many small size classes"