From 9c3a10fdf6baa5ddb042b6adbef1ff1b3c613ce3 Mon Sep 17 00:00:00 2001 From: Richard Diamond Date: Wed, 28 May 2014 21:37:02 -0500 Subject: [PATCH] Try to use __builtin_ffsl if ffsl is unavailable. Some platforms (like those using Newlib) don't have ffs/ffsl. This commit adds a check to configure.ac for __builtin_ffsl if ffsl isn't found. __builtin_ffsl performs the same function as ffsl, and has the added benefit of being available on any platform utilizing Gcc-compatible compiler. This change does not address the used of ffs in the MALLOCX_ARENA() macro. --- configure.ac | 30 +++++++++++++++---- include/jemalloc/internal/arena.h | 2 +- include/jemalloc/internal/bitmap.h | 4 +-- .../jemalloc/internal/jemalloc_internal.h.in | 3 ++ .../internal/jemalloc_internal_decls.h | 10 ++++--- .../internal/jemalloc_internal_defs.h.in | 7 +++++ include/jemalloc/internal/util.h | 26 ++++++++++++++-- src/arena.c | 2 +- src/rtree.c | 4 +-- 9 files changed, 71 insertions(+), 17 deletions(-) diff --git a/configure.ac b/configure.ac index 4944c44a..3d36b5f8 100644 --- a/configure.ac +++ b/configure.ac @@ -1109,9 +1109,11 @@ elif test "x${force_tls}" = "x1" ; then fi dnl ============================================================================ -dnl Check for ffsl(3), and fail if not found. This function exists on all -dnl platforms that jemalloc currently has a chance of functioning on without -dnl modification. +dnl Check for ffsl(3), then __builtin_ffsl(), and fail if neither are found. +dnl One of those two functions should (theoretically) exist on all platforms +dnl that jemalloc currently has a chance of functioning on without modification. +dnl We additionally assume ffs() or __builtin_ffs() are defined if +dnl ffsl() or __builtin_ffsl() are defined, respectively. JE_COMPILABLE([a program using ffsl], [ #include #include @@ -1122,8 +1124,26 @@ JE_COMPILABLE([a program using ffsl], [ printf("%d\n", rv); } ], [je_cv_function_ffsl]) -if test "x${je_cv_function_ffsl}" != "xyes" ; then - AC_MSG_ERROR([Cannot build without ffsl(3)]) +if test "x${je_cv_function_ffsl}" == "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl]) + AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs]) +else + JE_COMPILABLE([a program using __builtin_ffsl], [ + #include + #include + #include + ], [ + { + int rv = __builtin_ffsl(0x08); + printf("%d\n", rv); + } + ], [je_cv_gcc_builtin_ffsl]) + if test "x${je_cv_gcc_builtin_ffsl}" == "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl]) + AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs]) + else + AC_MSG_ERROR([Cannot build without ffsl(3) or __builtin_ffsl()]) + fi fi dnl ============================================================================ diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 2dc9501d..cb73283b 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -970,7 +970,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) /* Rescale (factor powers of 2 out of the numerator and denominator). */ interval = bin_info->reg_interval; - shift = ffs(interval) - 1; + shift = jemalloc_ffs(interval) - 1; diff >>= shift; interval >>= shift; diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index 605ebac5..6db4ab70 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -130,11 +130,11 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) i = binfo->nlevels - 1; g = bitmap[binfo->levels[i].group_offset]; - bit = ffsl(g) - 1; + bit = jemalloc_ffsl(g) - 1; while (i > 0) { i--; g = bitmap[binfo->levels[i].group_offset + bit]; - bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1); + bit = (bit << LG_BITMAP_GROUP_NBITS) + (jemalloc_ffsl(g) - 1); } bitmap_set(bitmap, binfo, bit); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 491345c9..f2cd743f 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -215,6 +215,9 @@ static const bool config_ivsalloc = # ifdef __tile__ # define LG_QUANTUM 4 # endif +# ifdef __le32__ +# define LG_QUANTUM 4 +# endif # ifndef LG_QUANTUM # error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS" # endif diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h index 7775ab38..fa590404 100644 --- a/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/include/jemalloc/internal/jemalloc_internal_decls.h @@ -15,11 +15,13 @@ #else # include # include -# include -# if !defined(SYS_write) && defined(__NR_write) -# define SYS_write __NR_write +# if !defined(__pnacl__) && !defined(__native_client__) +# include +# if !defined(SYS_write) && defined(__NR_write) +# define SYS_write __NR_write +# endif +# include # endif -# include # include # include #endif diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index a9a50f14..65ac76c0 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -152,6 +152,13 @@ /* TLS is used to map arenas and magazine caches to threads. */ #undef JEMALLOC_TLS +/* + * ffs()/ffsl() functions to use for bitmapping. Don't use these directly; + * instead, use jemalloc_ffs() or jemalloc_ffsl() from util.h. + */ +#undef JEMALLOC_INTERNAL_FFSL +#undef JEMALLOC_INTERNAL_FFS + /* * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside * within jemalloc-owned chunks before dereferencing them. diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 54aed8ec..d2b7a967 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -109,6 +109,8 @@ void malloc_printf(const char *format, ...) #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +int jemalloc_ffsl(long bitmap); +int jemalloc_ffs(int bitmap); size_t pow2_ceil(size_t x); size_t lg_floor(size_t x); void set_errno(int errnum); @@ -116,6 +118,26 @@ int get_errno(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) + +/* Sanity check: */ +#if !defined(JEMALLOC_INTERNAL_FFSL) || !defined(JEMALLOC_INTERNAL_FFS) +# error Both JEMALLOC_INTERNAL_FFSL && JEMALLOC_INTERNAL_FFS should have been defined by configure +#endif + +JEMALLOC_ALWAYS_INLINE int +jemalloc_ffsl(long bitmap) +{ + + return (JEMALLOC_INTERNAL_FFSL(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE int +jemalloc_ffs(int bitmap) +{ + + return (JEMALLOC_INTERNAL_FFS(bitmap)); +} + /* Compute the smallest power of 2 that is >= x. */ JEMALLOC_INLINE size_t pow2_ceil(size_t x) @@ -174,12 +196,12 @@ lg_floor(size_t x) if (x == KZU(0xffffffffffffffff)) return (63); x++; - return (ffsl(x) - 2); + return (jemalloc_ffsl(x) - 2); #elif (LG_SIZEOF_PTR == 2) if (x == KZU(0xffffffff)) return (31); x++; - return (ffs(x) - 2); + return (jemalloc_ffs(x) - 2); #else # error "Unsupported type sizes for lg_floor()" #endif diff --git a/src/arena.c b/src/arena.c index c392419e..d3fe0fba 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2483,7 +2483,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) * be twice as large in order to maintain alignment. */ if (config_fill && opt_redzone) { - size_t align_min = ZU(1) << (ffs(bin_info->reg_size) - 1); + size_t align_min = ZU(1) << (jemalloc_ffs(bin_info->reg_size) - 1); if (align_min <= REDZONE_MINSIZE) { bin_info->redzone_size = REDZONE_MINSIZE; pad_size = 0; diff --git a/src/rtree.c b/src/rtree.c index 205957ac..87b0b154 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -9,8 +9,8 @@ rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc) assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); - bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; - bits_in_leaf = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(uint8_t)))) - 1; + bits_per_level = jemalloc_ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; + bits_in_leaf = jemalloc_ffs(pow2_ceil((RTREE_NODESIZE / sizeof(uint8_t)))) - 1; if (bits > bits_in_leaf) { height = 1 + (bits - bits_in_leaf) / bits_per_level; if ((height-1) * bits_per_level + bits_in_leaf != bits)