Support C++17 over-aligned allocation

Summary:
Add support for C++17 over-aligned allocation:
http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0035r4.html

Supporting all 10 operators means we avoid thunking thru libstdc++-v3/libsupc++ and just call jemalloc directly.

It's also worth noting that there is now an aligned *and sized* operator delete:
```
void operator delete(void* ptr, std::size_t size, std::align_val_t al) noexcept;
```

If JeMalloc did not provide this, the default implementation would ignore the size parameter entirely:
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/libsupc%2B%2B/del_opsa.cc#L30-L33

(I must also update ax_cxx_compile_stdcxx.m4 to a newer version with C++17 support.)

Test Plan:
Wrote a simple test that allocates and then deletes an over-aligned type:
```
struct alignas(32) Foo {};
Foo *f;

int main()
{
  f = new Foo;
  delete f;
}
```

Before this change, both new and delete go thru PLT, and we end up calling regular old free:
```
(gdb) disassemble
Dump of assembler code for function main():
...
   0x00000000004029b7 <+55>:    call   0x4022d0 <_ZnwmSt11align_val_t@plt>
...
   0x00000000004029d5 <+85>:    call   0x4022e0 <_ZdlPvmSt11align_val_t@plt>
...
(gdb) s
free (ptr=0x7ffff6408020) at /home/engshare/third-party2/jemalloc/master/src/jemalloc.git-trunk/src/jemalloc.c:2842
2842            if (!free_fastpath(ptr, 0, false)) {
```

After this change, we directly call new/delete and ultimately call sdallocx:
```
(gdb) disassemble
Dump of assembler code for function main():
...
   0x0000000000402b77 <+55>:    call   0x496ca0 <operator new(unsigned long, std::align_val_t)>
...
   0x0000000000402b95 <+85>:    call   0x496e60 <operator delete(void*, unsigned long, std::align_val_t)>
...
(gdb) s
116             je_sdallocx_noflags(ptr, size);
```
This commit is contained in:
Mark Santaniello 2019-10-26 23:28:42 -07:00 committed by Qi Wang
parent 9a3c738009
commit 8b2c2a596d
4 changed files with 536 additions and 42 deletions

View File

@ -290,8 +290,11 @@ if test "x$enable_cxx" = "x1" ; then
dnl Require at least c++14, which is the first version to support sized dnl Require at least c++14, which is the first version to support sized
dnl deallocation. C++ support is not compiled otherwise. dnl deallocation. C++ support is not compiled otherwise.
m4_include([m4/ax_cxx_compile_stdcxx.m4]) m4_include([m4/ax_cxx_compile_stdcxx.m4])
AX_CXX_COMPILE_STDCXX([17], [noext], [optional])
if test "x${HAVE_CXX17}" != "x1"; then
AX_CXX_COMPILE_STDCXX([14], [noext], [optional]) AX_CXX_COMPILE_STDCXX([14], [noext], [optional])
if test "x${HAVE_CXX14}" = "x1" ; then fi
if test "x${HAVE_CXX14}" = "x1" -o "x${HAVE_CXX17}" = "x1"; then
JE_CXXFLAGS_ADD([-Wall]) JE_CXXFLAGS_ADD([-Wall])
JE_CXXFLAGS_ADD([-Wextra]) JE_CXXFLAGS_ADD([-Wextra])
JE_CXXFLAGS_ADD([-g3]) JE_CXXFLAGS_ADD([-g3])

View File

@ -5,6 +5,7 @@
#ifdef _WIN32 #ifdef _WIN32
# include <windows.h> # include <windows.h>
# include "msvc_compat/windows_extra.h" # include "msvc_compat/windows_extra.h"
# include "msvc_compat/strings.h"
# ifdef _WIN64 # ifdef _WIN64
# if LG_VADDR <= 32 # if LG_VADDR <= 32
# error Generate the headers using x64 vcargs # error Generate the headers using x64 vcargs

View File

@ -1,5 +1,5 @@
# =========================================================================== # ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html # https://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html
# =========================================================================== # ===========================================================================
# #
# SYNOPSIS # SYNOPSIS
@ -33,21 +33,23 @@
# Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov <sokolov@google.com> # Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov <sokolov@google.com>
# Copyright (c) 2015 Paul Norman <penorman@mac.com> # Copyright (c) 2015 Paul Norman <penorman@mac.com>
# Copyright (c) 2015 Moritz Klammler <moritz@klammler.eu> # Copyright (c) 2015 Moritz Klammler <moritz@klammler.eu>
# Copyright (c) 2016, 2018 Krzesimir Nowak <qdlacz@gmail.com>
# Copyright (c) 2019 Enji Cooper <yaneurabeya@gmail.com>
# #
# Copying and distribution of this file, with or without modification, are # Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice # permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any # and this notice are preserved. This file is offered as-is, without any
# warranty. # warranty.
#serial 4 #serial 11
dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro
dnl (serial version number 13). dnl (serial version number 13).
AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
m4_if([$1], [11], [], m4_if([$1], [11], [ax_cxx_compile_alternatives="11 0x"],
[$1], [14], [], [$1], [14], [ax_cxx_compile_alternatives="14 1y"],
[$1], [17], [m4_fatal([support for C++17 not yet implemented in AX_CXX_COMPILE_STDCXX])], [$1], [17], [ax_cxx_compile_alternatives="17 1z"],
[m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl
m4_if([$2], [], [], m4_if([$2], [], [],
[$2], [ext], [], [$2], [ext], [],
@ -59,18 +61,11 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
[m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])]) [m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])])
AC_LANG_PUSH([C++])dnl AC_LANG_PUSH([C++])dnl
ac_success=no ac_success=no
AC_CACHE_CHECK(whether $CXX supports C++$1 features by default,
ax_cv_cxx_compile_cxx$1,
[AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
[ax_cv_cxx_compile_cxx$1=yes],
[ax_cv_cxx_compile_cxx$1=no])])
if test x$ax_cv_cxx_compile_cxx$1 = xyes; then
ac_success=yes
fi
m4_if([$2], [noext], [], [dnl m4_if([$2], [noext], [], [dnl
if test x$ac_success = xno; then if test x$ac_success = xno; then
for switch in -std=gnu++$1 -std=gnu++0x; do for alternative in ${ax_cxx_compile_alternatives}; do
switch="-std=gnu++${alternative}"
cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
$cachevar, $cachevar,
@ -96,7 +91,8 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
dnl HP's aCC needs +std=c++11 according to: dnl HP's aCC needs +std=c++11 according to:
dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf
dnl Cray's crayCC needs "-h std=c++11" dnl Cray's crayCC needs "-h std=c++11"
for switch in -std=c++$1 -std=c++0x +std=c++$1 "-h std=c++$1"; do for alternative in ${ax_cxx_compile_alternatives}; do
for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}"; do
cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
$cachevar, $cachevar,
@ -115,6 +111,10 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
break break
fi fi
done done
if test x$ac_success = xyes; then
break
fi
done
fi]) fi])
AC_LANG_POP([C++]) AC_LANG_POP([C++])
if test x$ax_cxx_compile_cxx$1_required = xtrue; then if test x$ax_cxx_compile_cxx$1_required = xtrue; then
@ -148,6 +148,11 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14],
_AX_CXX_COMPILE_STDCXX_testbody_new_in_14 _AX_CXX_COMPILE_STDCXX_testbody_new_in_14
) )
m4_define([_AX_CXX_COMPILE_STDCXX_testbody_17],
_AX_CXX_COMPILE_STDCXX_testbody_new_in_11
_AX_CXX_COMPILE_STDCXX_testbody_new_in_14
_AX_CXX_COMPILE_STDCXX_testbody_new_in_17
)
dnl Tests for new features in C++11 dnl Tests for new features in C++11
@ -185,11 +190,13 @@ namespace cxx11
struct Base struct Base
{ {
virtual ~Base() {}
virtual void f() {} virtual void f() {}
}; };
struct Derived : public Base struct Derived : public Base
{ {
virtual ~Derived() override {}
virtual void f() override {} virtual void f() override {}
}; };
@ -518,7 +525,7 @@ namespace cxx14
} }
namespace test_digit_seperators namespace test_digit_separators
{ {
constexpr auto ten_million = 100'000'000; constexpr auto ten_million = 100'000'000;
@ -560,3 +567,385 @@ namespace cxx14
#endif // __cplusplus >= 201402L #endif // __cplusplus >= 201402L
]]) ]])
dnl Tests for new features in C++17
m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_17], [[
// If the compiler admits that it is not ready for C++17, why torture it?
// Hopefully, this will speed up the test.
#ifndef __cplusplus
#error "This is not a C++ compiler"
#elif __cplusplus < 201703L
#error "This is not a C++17 compiler"
#else
#include <initializer_list>
#include <utility>
#include <type_traits>
namespace cxx17
{
namespace test_constexpr_lambdas
{
constexpr int foo = [](){return 42;}();
}
namespace test::nested_namespace::definitions
{
}
namespace test_fold_expression
{
template<typename... Args>
int multiply(Args... args)
{
return (args * ... * 1);
}
template<typename... Args>
bool all(Args... args)
{
return (args && ...);
}
}
namespace test_extended_static_assert
{
static_assert (true);
}
namespace test_auto_brace_init_list
{
auto foo = {5};
auto bar {5};
static_assert(std::is_same<std::initializer_list<int>, decltype(foo)>::value);
static_assert(std::is_same<int, decltype(bar)>::value);
}
namespace test_typename_in_template_template_parameter
{
template<template<typename> typename X> struct D;
}
namespace test_fallthrough_nodiscard_maybe_unused_attributes
{
int f1()
{
return 42;
}
[[nodiscard]] int f2()
{
[[maybe_unused]] auto unused = f1();
switch (f1())
{
case 17:
f1();
[[fallthrough]];
case 42:
f1();
}
return f1();
}
}
namespace test_extended_aggregate_initialization
{
struct base1
{
int b1, b2 = 42;
};
struct base2
{
base2() {
b3 = 42;
}
int b3;
};
struct derived : base1, base2
{
int d;
};
derived d1 {{1, 2}, {}, 4}; // full initialization
derived d2 {{}, {}, 4}; // value-initialized bases
}
namespace test_general_range_based_for_loop
{
struct iter
{
int i;
int& operator* ()
{
return i;
}
const int& operator* () const
{
return i;
}
iter& operator++()
{
++i;
return *this;
}
};
struct sentinel
{
int i;
};
bool operator== (const iter& i, const sentinel& s)
{
return i.i == s.i;
}
bool operator!= (const iter& i, const sentinel& s)
{
return !(i == s);
}
struct range
{
iter begin() const
{
return {0};
}
sentinel end() const
{
return {5};
}
};
void f()
{
range r {};
for (auto i : r)
{
[[maybe_unused]] auto v = i;
}
}
}
namespace test_lambda_capture_asterisk_this_by_value
{
struct t
{
int i;
int foo()
{
return [*this]()
{
return i;
}();
}
};
}
namespace test_enum_class_construction
{
enum class byte : unsigned char
{};
byte foo {42};
}
namespace test_constexpr_if
{
template <bool cond>
int f ()
{
if constexpr(cond)
{
return 13;
}
else
{
return 42;
}
}
}
namespace test_selection_statement_with_initializer
{
int f()
{
return 13;
}
int f2()
{
if (auto i = f(); i > 0)
{
return 3;
}
switch (auto i = f(); i + 4)
{
case 17:
return 2;
default:
return 1;
}
}
}
namespace test_template_argument_deduction_for_class_templates
{
template <typename T1, typename T2>
struct pair
{
pair (T1 p1, T2 p2)
: m1 {p1},
m2 {p2}
{}
T1 m1;
T2 m2;
};
void f()
{
[[maybe_unused]] auto p = pair{13, 42u};
}
}
namespace test_non_type_auto_template_parameters
{
template <auto n>
struct B
{};
B<5> b1;
B<'a'> b2;
}
namespace test_structured_bindings
{
int arr[2] = { 1, 2 };
std::pair<int, int> pr = { 1, 2 };
auto f1() -> int(&)[2]
{
return arr;
}
auto f2() -> std::pair<int, int>&
{
return pr;
}
struct S
{
int x1 : 2;
volatile double y1;
};
S f3()
{
return {};
}
auto [ x1, y1 ] = f1();
auto& [ xr1, yr1 ] = f1();
auto [ x2, y2 ] = f2();
auto& [ xr2, yr2 ] = f2();
const auto [ x3, y3 ] = f3();
}
namespace test_exception_spec_type_system
{
struct Good {};
struct Bad {};
void g1() noexcept;
void g2();
template<typename T>
Bad
f(T*, T*);
template<typename T1, typename T2>
Good
f(T1*, T2*);
static_assert (std::is_same_v<Good, decltype(f(g1, g2))>);
}
namespace test_inline_variables
{
template<class T> void f(T)
{}
template<class T> inline T g(T)
{
return T{};
}
template<> inline void f<>(int)
{}
template<> int g<>(int)
{
return 5;
}
}
} // namespace cxx17
#endif // __cplusplus < 201703L
]])

View File

@ -39,6 +39,20 @@ void operator delete(void *ptr, std::size_t size) noexcept;
void operator delete[](void *ptr, std::size_t size) noexcept; void operator delete[](void *ptr, std::size_t size) noexcept;
#endif #endif
#if __cpp_aligned_new >= 201606
/* C++17's over-aligned operators. */
void *operator new(std::size_t size, std::align_val_t);
void *operator new(std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
void *operator new[](std::size_t size, std::align_val_t);
void *operator new[](std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
void operator delete(void* ptr, std::align_val_t) noexcept;
void operator delete(void* ptr, std::align_val_t, const std::nothrow_t &) noexcept;
void operator delete(void* ptr, std::size_t size, std::align_val_t al) noexcept;
void operator delete[](void* ptr, std::align_val_t) noexcept;
void operator delete[](void* ptr, std::align_val_t, const std::nothrow_t &) noexcept;
void operator delete[](void* ptr, std::size_t size, std::align_val_t al) noexcept;
#endif
JEMALLOC_NOINLINE JEMALLOC_NOINLINE
static void * static void *
handleOOM(std::size_t size, bool nothrow) { handleOOM(std::size_t size, bool nothrow) {
@ -76,12 +90,46 @@ JEMALLOC_ALWAYS_INLINE
void * void *
newImpl(std::size_t size) noexcept(IsNoExcept) { newImpl(std::size_t size) noexcept(IsNoExcept) {
void *ptr = je_malloc(size); void *ptr = je_malloc(size);
if (likely(ptr != nullptr)) if (likely(ptr != nullptr)) {
return ptr; return ptr;
}
return handleOOM(size, IsNoExcept); return handleOOM(size, IsNoExcept);
} }
template <bool IsNoExcept>
JEMALLOC_ALWAYS_INLINE
void *
alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(IsNoExcept) {
void *ptr = je_aligned_alloc(static_cast<std::size_t>(alignment), size);
if (likely(ptr != nullptr)) {
return ptr;
}
return handleOOM(size, IsNoExcept);
}
JEMALLOC_ALWAYS_INLINE
void
sizedDeleteImpl(void* ptr, std::size_t size) noexcept {
if (unlikely(ptr == nullptr)) {
return;
}
je_sdallocx_noflags(ptr, size);
}
JEMALLOC_ALWAYS_INLINE
void
alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
if (config_debug) {
assert(((size_t)alignment & ((size_t)alignment - 1)) == 0);
}
if (unlikely(ptr == nullptr)) {
return;
}
je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment));
}
void * void *
operator new(std::size_t size) { operator new(std::size_t size) {
return newImpl<false>(size); return newImpl<false>(size);
@ -102,6 +150,30 @@ operator new[](std::size_t size, const std::nothrow_t &) noexcept {
return newImpl<true>(size); return newImpl<true>(size);
} }
#if __cpp_aligned_new >= 201606
void *
operator new(std::size_t size, std::align_val_t alignment) {
return alignedNewImpl<false>(size, alignment);
}
void *
operator new(std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
return alignedNewImpl<true>(size, alignment);
}
void *
operator new[](std::size_t size, std::align_val_t alignment) {
return alignedNewImpl<false>(size, alignment);
}
void *
operator new[](std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
return alignedNewImpl<true>(size, alignment);
}
#endif // __cpp_aligned_new
void void
operator delete(void *ptr) noexcept { operator delete(void *ptr) noexcept {
je_free(ptr); je_free(ptr);
@ -125,17 +197,46 @@ void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
void void
operator delete(void *ptr, std::size_t size) noexcept { operator delete(void *ptr, std::size_t size) noexcept {
if (unlikely(ptr == nullptr)) { sizedDeleteImpl(ptr, size);
return;
}
je_sdallocx_noflags(ptr, size);
} }
void operator delete[](void *ptr, std::size_t size) noexcept { void
if (unlikely(ptr == nullptr)) { operator delete[](void *ptr, std::size_t size) noexcept {
return; sizedDeleteImpl(ptr, size);
}
je_sdallocx_noflags(ptr, size);
} }
#endif // __cpp_sized_deallocation #endif // __cpp_sized_deallocation
#if __cpp_aligned_new >= 201606
void
operator delete(void* ptr, std::align_val_t) noexcept {
je_free(ptr);
}
void
operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
je_free(ptr);
}
void
operator delete[](void* ptr, std::align_val_t) noexcept {
je_free(ptr);
}
void
operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
je_free(ptr);
}
void
operator delete(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
alignedSizedDeleteImpl(ptr, size, alignment);
}
void
operator delete[](void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
alignedSizedDeleteImpl(ptr, size, alignment);
}
#endif // __cpp_aligned_new