From 83c6e0c63acdcab2c58c4ed7220bfa58879b1d57 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Wed, 11 Jan 2017 16:39:35 -0800 Subject: Switch open-source to use jemalloc for CPU Tensor memory allocation, gRPC, and other places where we call malloc/free. - Only enabled on Linux for now. - Added as a ./configure option defaulting to enabled. Change: 144266237 --- configure | 18 ++ tensorflow/c/c_api.cc | 15 +- tensorflow/compiler/jit/xla_device_context.cc | 5 +- .../compiler/tf2xla/xla_compilation_device.cc | 5 +- tensorflow/core/BUILD | 11 +- tensorflow/core/common_runtime/gpu/gpu_tracer.cc | 4 +- .../core/common_runtime/gpu/pool_allocator.h | 4 +- tensorflow/core/distributed_runtime/rpc/BUILD | 1 + .../distributed_runtime/rpc/grpc_server_lib.cc | 7 + tensorflow/core/framework/allocator.cc | 4 +- tensorflow/core/framework/load_library.cc | 3 +- .../core/framework/tracking_allocator_test.cc | 5 +- tensorflow/core/kernels/conv_ops.h | 5 +- tensorflow/core/lib/core/arena.cc | 30 +- tensorflow/core/lib/gtl/inlined_vector.h | 5 +- tensorflow/core/lib/gtl/manual_constructor.h | 6 +- .../core/platform/cloud/http_request_test.cc | 6 +- tensorflow/core/platform/default/build_config.bzl | 25 +- tensorflow/core/platform/mem.h | 11 +- tensorflow/core/platform/port_test.cc | 6 +- tensorflow/core/platform/posix/port.cc | 45 ++- tensorflow/core/platform/windows/port.cc | 10 +- tensorflow/tools/pip_package/BUILD | 1 + tensorflow/workspace.bzl | 11 + third_party/jemalloc.BUILD | 321 +++++++++++++++++++++ 25 files changed, 505 insertions(+), 59 deletions(-) create mode 100644 third_party/jemalloc.BUILD diff --git a/configure b/configure index 64add33bd5..1e4d786974 100755 --- a/configure +++ b/configure @@ -57,9 +57,27 @@ done if is_windows; then TF_NEED_GCP=0 TF_NEED_HDFS=0 + TF_NEED_JEMALLOC=0 TF_NEED_OPENCL=0 fi +while [ "$TF_NEED_JEMALLOC" == "" ]; do + read -p "Do you wish to use jemalloc as the malloc implementation? "\ +"(Linux only) [Y/n] " INPUT + case $INPUT in + [Yy]* ) echo "jemalloc enabled on Linux"; TF_NEED_JEMALLOC=1;; + [Nn]* ) echo "jemalloc disabled on Linux"; TF_NEED_JEMALLOC=0;; + "" ) echo "jemalloc enabled on Linux"; TF_NEED_JEMALLOC=1;; + * ) echo "Invalid selection: " $INPUT;; + esac +done + +if [ "$TF_NEED_JEMALLOC" == "1" ]; then + sed -i -e "s/WITH_JEMALLOC = False/WITH_JEMALLOC = True/" tensorflow/core/platform/default/build_config.bzl +else + sed -i -e "s/WITH_JEMALLOC = True/WITH_JEMALLOC = False/" tensorflow/core/platform/default/build_config.bzl +fi + while [ "$TF_NEED_GCP" == "" ]; do read -p "Do you wish to build TensorFlow with "\ "Google Cloud Platform support? [y/N] " INPUT diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 14988fbc4d..5e236a81fb 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -37,6 +37,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/mem.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/thread_annotations.h" @@ -159,11 +160,13 @@ Status MessageToBuffer(const tensorflow::protobuf::Message& in, return InvalidArgument("Passing non-empty TF_Buffer is invalid."); } const auto proto_size = in.ByteSize(); - void* buf = malloc(proto_size); + void* buf = tensorflow::port::Malloc(proto_size); in.SerializeToArray(buf, proto_size); out->data = buf; out->length = proto_size; - out->data_deallocator = [](void* data, size_t length) { free(data); }; + out->data_deallocator = [](void* data, size_t length) { + tensorflow::port::Free(data); + }; return Status::OK(); } @@ -287,13 +290,15 @@ void TF_SetConfig(TF_SessionOptions* options, const void* proto, TF_Buffer* TF_NewBuffer() { return new TF_Buffer{nullptr, 0, nullptr}; } TF_Buffer* TF_NewBufferFromString(const void* proto, size_t proto_len) { - void* copy = malloc(proto_len); + void* copy = tensorflow::port::Malloc(proto_len); memcpy(copy, proto, proto_len); TF_Buffer* buf = new TF_Buffer; buf->data = copy; buf->length = proto_len; - buf->data_deallocator = [](void* data, size_t length) { free(data); }; + buf->data_deallocator = [](void* data, size_t length) { + tensorflow::port::Free(data); + }; return buf; } @@ -694,7 +699,7 @@ TF_Library* TF_LoadLibrary(const char* library_filename, TF_Status* status) { TF_Buffer TF_GetOpList(TF_Library* lib_handle) { return lib_handle->op_list; } void TF_DeleteLibraryHandle(TF_Library* lib_handle) { - free(const_cast(lib_handle->op_list.data)); + tensorflow::port::Free(const_cast(lib_handle->op_list.data)); delete lib_handle; } diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc index 250960d395..f329e83e14 100644 --- a/tensorflow/compiler/jit/xla_device_context.cc +++ b/tensorflow/compiler/jit/xla_device_context.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/platform/mem.h" namespace tensorflow { @@ -41,7 +42,7 @@ void* XlaDeviceAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { // Regardless of the size requested, always allocate a XlaGlobalData. Respect // the aligment request because there is alignment checking even for Tensors // whose data is never accessed. - void* p = port::aligned_malloc(sizeof(XlaGlobalData), alignment); + void* p = port::AlignedMalloc(sizeof(XlaGlobalData), alignment); VLOG(2) << "Allocated XLA device tensor " << p; return new (p) XlaGlobalData(); } @@ -50,7 +51,7 @@ void XlaDeviceAllocator::DeallocateRaw(void* ptr) { XlaGlobalData* global_data = reinterpret_cast(ptr); VLOG(2) << "Deallocated XLA device tensor " << ptr; global_data->~XlaGlobalData(); - port::aligned_free(ptr); + port::AlignedFree(ptr); } void XlaDeviceAllocator::GetStats(AllocatorStats* stats) { stats->Clear(); } diff --git a/tensorflow/compiler/tf2xla/xla_compilation_device.cc b/tensorflow/compiler/tf2xla/xla_compilation_device.cc index 86a53c929e..ad3c921744 100644 --- a/tensorflow/compiler/tf2xla/xla_compilation_device.cc +++ b/tensorflow/compiler/tf2xla/xla_compilation_device.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/local_device.h" #include "tensorflow/core/framework/device_base.h" +#include "tensorflow/core/platform/mem.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" namespace tensorflow { @@ -47,7 +48,7 @@ class XlaCompilationAllocator : public Allocator { // XlaExpression. Respect the aligment request because there is // alignment checking even for Tensors whose data is never // accessed. - void* p = port::aligned_malloc(sizeof(XlaExpression), alignment); + void* p = port::AlignedMalloc(sizeof(XlaExpression), alignment); XlaExpression* expression = reinterpret_cast(p); new (expression) XlaExpression(); return expression; @@ -56,7 +57,7 @@ class XlaCompilationAllocator : public Allocator { void DeallocateRaw(void* ptr) override { XlaExpression* expression = reinterpret_cast(ptr); expression->~XlaExpression(); - port::aligned_free(ptr); + port::AlignedFree(ptr); } // Make sure that even tensors with 0 elements have allocated diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 0fc610a76e..324183c053 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -84,12 +84,14 @@ load( "//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library", "tf_proto_library_cc", + "tf_additional_core_deps", + "tf_additional_lib_defines", + "tf_additional_lib_deps", "tf_additional_lib_hdrs", "tf_additional_lib_srcs", "tf_additional_minimal_lib_srcs", "tf_additional_proto_hdrs", "tf_additional_proto_srcs", - "tf_additional_lib_deps", "tf_additional_stream_executor_srcs", "tf_additional_cupti_wrapper_deps", "tf_additional_libdevice_data", @@ -1126,12 +1128,13 @@ cc_library( "platform/tracing.h", ], copts = tf_copts(), + defines = tf_additional_lib_defines(), linkopts = ["-ldl"], - deps = [ + deps = tf_additional_lib_deps() + [ ":lib_proto_parsing", ":protos_all_cc", - "//tensorflow/core/platform/default/build_config:platformlib", "//third_party/eigen3", + "//tensorflow/core/platform/default/build_config:platformlib", "@zlib_archive//:zlib", ], ) @@ -1351,7 +1354,7 @@ tf_cuda_library( ":protos_all_cc", "//third_party/eigen3", "//tensorflow/core/kernels:required", - ] + tf_additional_lib_deps(), + ] + tf_additional_core_deps(), alwayslink = 1, ) diff --git a/tensorflow/core/common_runtime/gpu/gpu_tracer.cc b/tensorflow/core/common_runtime/gpu/gpu_tracer.cc index ee93b19d29..981a654988 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_tracer.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_tracer.cc @@ -215,7 +215,7 @@ Status CUPTIManager::DisableTrace() { void CUPTIManager::InternalBufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) { VLOG(2) << "BufferRequested"; - void *p = port::aligned_malloc(kBufferSize, kBufferAlignment); + void *p = port::AlignedMalloc(kBufferSize, kBufferAlignment); *size = kBufferSize; *buffer = reinterpret_cast(p); *maxNumRecords = 0; @@ -246,7 +246,7 @@ void CUPTIManager::InternalBufferCompleted(CUcontext ctx, uint32_t streamId, LOG(WARNING) << "Dropped " << dropped << " activity records"; } } - port::aligned_free(buffer); + port::AlignedFree(buffer); } CUPTIManager *GetCUPTIManager() { diff --git a/tensorflow/core/common_runtime/gpu/pool_allocator.h b/tensorflow/core/common_runtime/gpu/pool_allocator.h index 5842758f0e..91ce830df8 100644 --- a/tensorflow/core/common_runtime/gpu/pool_allocator.h +++ b/tensorflow/core/common_runtime/gpu/pool_allocator.h @@ -171,9 +171,9 @@ class BasicCPUAllocator : public SubAllocator { ~BasicCPUAllocator() override {} void* Alloc(size_t alignment, size_t num_bytes) override { - return port::aligned_malloc(num_bytes, alignment); + return port::AlignedMalloc(num_bytes, alignment); } - void Free(void* ptr, size_t num_bytes) override { port::aligned_free(ptr); } + void Free(void* ptr, size_t num_bytes) override { port::AlignedFree(ptr); } }; // Allocator for pinned CPU RAM that is made known to CUDA for the diff --git a/tensorflow/core/distributed_runtime/rpc/BUILD b/tensorflow/core/distributed_runtime/rpc/BUILD index 89710a4654..8ab8712c8c 100644 --- a/tensorflow/core/distributed_runtime/rpc/BUILD +++ b/tensorflow/core/distributed_runtime/rpc/BUILD @@ -275,6 +275,7 @@ cc_library( "//tensorflow/core/distributed_runtime:server_lib", "//tensorflow/core/distributed_runtime:worker_env", "@grpc//:grpc++_unsecure", + "@grpc//:grpc_unsecure", ], alwayslink = 1, ) diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc index addf09672a..99309a98ca 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc @@ -21,6 +21,7 @@ limitations under the License. #include "grpc++/grpc++.h" #include "grpc++/security/credentials.h" #include "grpc++/server_builder.h" +#include "grpc/support/alloc.h" #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/device_mgr.h" @@ -41,6 +42,7 @@ limitations under the License. #include "tensorflow/core/framework/op.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/mem.h" #include "tensorflow/core/public/session_options.h" namespace tensorflow { @@ -304,6 +306,11 @@ class GrpcServerFactory : public ServerFactory { class GrpcServerRegistrar { public: GrpcServerRegistrar() { + gpr_allocation_functions alloc_fns; + alloc_fns.malloc_fn = port::Malloc; + alloc_fns.realloc_fn = port::Realloc; + alloc_fns.free_fn = port::Free; + gpr_set_allocation_functions(alloc_fns); ServerFactory::Register("GRPC_SERVER", new GrpcServerFactory()); } }; diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc index 601d87fa55..812ce4bfe7 100644 --- a/tensorflow/core/framework/allocator.cc +++ b/tensorflow/core/framework/allocator.cc @@ -68,7 +68,7 @@ class CPUAllocator : public Allocator { string Name() override { return "cpu"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override { - void* p = port::aligned_malloc(num_bytes, alignment); + void* p = port::AlignedMalloc(num_bytes, alignment); if (cpu_allocator_collect_stats) { const std::size_t alloc_size = port::MallocExtension_GetAllocatedSize(p); mutex_lock l(mu_); @@ -89,7 +89,7 @@ class CPUAllocator : public Allocator { mutex_lock l(mu_); stats_.bytes_in_use -= alloc_size; } - port::aligned_free(ptr); + port::AlignedFree(ptr); } void GetStats(AllocatorStats* stats) override { diff --git a/tensorflow/core/framework/load_library.cc b/tensorflow/core/framework/load_library.cc index f56e5fae1b..f825335300 100644 --- a/tensorflow/core/framework/load_library.cc +++ b/tensorflow/core/framework/load_library.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/mem.h" namespace tensorflow { @@ -91,7 +92,7 @@ Status LoadLibrary(const char* library_filename, void** result, } string str; library.op_list.SerializeToString(&str); - char* str_buf = reinterpret_cast(malloc(str.length())); + char* str_buf = reinterpret_cast(port::Malloc(str.length())); memcpy(str_buf, str.data(), str.length()); *buf = str_buf; *len = str.length(); diff --git a/tensorflow/core/framework/tracking_allocator_test.cc b/tensorflow/core/framework/tracking_allocator_test.cc index 98134392ef..850cdc3909 100644 --- a/tensorflow/core/framework/tracking_allocator_test.cc +++ b/tensorflow/core/framework/tracking_allocator_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/mem.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -27,7 +28,7 @@ class TestableSizeTrackingAllocator : public Allocator { public: string Name() override { return "test"; } void* AllocateRaw(size_t /*alignment*/, size_t num_bytes) override { - void* ptr = malloc(num_bytes); + void* ptr = port::Malloc(num_bytes); size_map_[ptr] = num_bytes; return ptr; } @@ -35,7 +36,7 @@ class TestableSizeTrackingAllocator : public Allocator { const auto& iter = size_map_.find(ptr); EXPECT_NE(size_map_.end(), iter); size_map_.erase(iter); - free(ptr); + port::Free(ptr); } bool TracksAllocationSizes() override { return true; } size_t RequestedSize(void* ptr) override { diff --git a/tensorflow/core/kernels/conv_ops.h b/tensorflow/core/kernels/conv_ops.h index 897afe7796..60091fc27f 100644 --- a/tensorflow/core/kernels/conv_ops.h +++ b/tensorflow/core/kernels/conv_ops.h @@ -18,6 +18,7 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/platform/mem.h" #include "tensorflow/core/util/tensor_format.h" #if GOOGLE_CUDA @@ -44,9 +45,9 @@ class LaunchConv2DOp { template struct Im2ColBufferResource : public ResourceBase { Im2ColBufferResource() { - data = static_cast(malloc(size * sizeof(T))); + data = static_cast(port::Malloc(size * sizeof(T))); } - ~Im2ColBufferResource() { free(data); } + ~Im2ColBufferResource() { port::Free(data); } // This mutex ensures that only a single operation at a time is able to use // the buffer memory held by this resource. mutex mu; diff --git a/tensorflow/core/lib/core/arena.cc b/tensorflow/core/lib/core/arena.cc index a7148ed1c7..53998a1821 100644 --- a/tensorflow/core/lib/core/arena.cc +++ b/tensorflow/core/lib/core/arena.cc @@ -48,7 +48,8 @@ Arena::Arena(const size_t block_size) overflow_blocks_(NULL) { assert(block_size > kDefaultAlignment); - first_blocks_[0].mem = reinterpret_cast(malloc(block_size_)); + first_blocks_[0].mem = + reinterpret_cast(port::AlignedMalloc(block_size_, sizeof(void*))); first_blocks_[0].size = block_size_; @@ -59,7 +60,9 @@ Arena::~Arena() { FreeBlocks(); assert(overflow_blocks_ == NULL); // FreeBlocks() should do that // The first X blocks stay allocated always by default. Delete them now. - for (size_t i = 0; i < blocks_alloced_; ++i) free(first_blocks_[i].mem); + for (size_t i = 0; i < blocks_alloced_; ++i) { + port::AlignedFree(first_blocks_[i].mem); + } } // Returns true iff it advances freestart_ to the first position @@ -162,8 +165,11 @@ Arena::AllocatedBlock* Arena::AllocNewBlock(const size_t block_size, // Must be a multiple of kDefaultAlignment, unless requested // alignment is 1, in which case we don't care at all. - const uint32 adjusted_alignment = + uint32 adjusted_alignment = (alignment > 1 ? LeastCommonMultiple(alignment, kDefaultAlignment) : 1); + // Required minimum alignment for port::AlignedMalloc(). + adjusted_alignment = + std::max(adjusted_alignment, static_cast(sizeof(void*))); CHECK_LE(adjusted_alignment, static_cast(1 << 20)) << "Alignment on boundaries greater than 1MB not supported."; @@ -171,16 +177,12 @@ Arena::AllocatedBlock* Arena::AllocNewBlock(const size_t block_size, // If block_size > alignment we force block_size to be a multiple // of alignment; if block_size < alignment we make no adjustment. size_t adjusted_block_size = block_size; - if (adjusted_alignment > 1) { - if (adjusted_block_size > adjusted_alignment) { - const uint32 excess = adjusted_block_size % adjusted_alignment; - adjusted_block_size += (excess > 0 ? adjusted_alignment - excess : 0); - } - block->mem = reinterpret_cast( - port::aligned_malloc(adjusted_block_size, adjusted_alignment)); - } else { - block->mem = reinterpret_cast(malloc(adjusted_block_size)); + if (adjusted_block_size > adjusted_alignment) { + const uint32 excess = adjusted_block_size % adjusted_alignment; + adjusted_block_size += (excess > 0 ? adjusted_alignment - excess : 0); } + block->mem = reinterpret_cast( + port::AlignedMalloc(adjusted_block_size, adjusted_alignment)); block->size = adjusted_block_size; CHECK(NULL != block->mem) << "block_size=" << block_size << " adjusted_block_size=" << adjusted_block_size @@ -242,7 +244,7 @@ void* Arena::GetMemoryFallback(const size_t size, const int alignment) { void Arena::FreeBlocks() { for (size_t i = 1; i < blocks_alloced_; ++i) { // keep first block alloced - free(first_blocks_[i].mem); + port::AlignedFree(first_blocks_[i].mem); first_blocks_[i].mem = NULL; first_blocks_[i].size = 0; } @@ -250,7 +252,7 @@ void Arena::FreeBlocks() { if (overflow_blocks_ != NULL) { std::vector::iterator it; for (it = overflow_blocks_->begin(); it != overflow_blocks_->end(); ++it) { - free(it->mem); + port::AlignedFree(it->mem); } delete overflow_blocks_; // These should be used very rarely overflow_blocks_ = NULL; diff --git a/tensorflow/core/lib/gtl/inlined_vector.h b/tensorflow/core/lib/gtl/inlined_vector.h index fc439f9eb6..d6e5d9effa 100644 --- a/tensorflow/core/lib/gtl/inlined_vector.h +++ b/tensorflow/core/lib/gtl/inlined_vector.h @@ -45,6 +45,7 @@ limitations under the License. #include "tensorflow/core/lib/gtl/manual_constructor.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/mem.h" #include "tensorflow/core/platform/types.h" #include // NOLINT(build/include_order) @@ -353,7 +354,7 @@ class InlinedVector { size_t n = size(); Destroy(base, n); if (!is_inline()) { - free(base); + port::Free(base); } } @@ -434,7 +435,7 @@ class InlinedVector { } T* src = data(); - T* dst = static_cast(malloc(target * sizeof(T))); + T* dst = static_cast(port::Malloc(target * sizeof(T))); // Need to copy elem before discarding src since it might alias src. InitType{}(dst + s, std::forward(args)...); diff --git a/tensorflow/core/lib/gtl/manual_constructor.h b/tensorflow/core/lib/gtl/manual_constructor.h index 8f041a1353..0a76e0962e 100644 --- a/tensorflow/core/lib/gtl/manual_constructor.h +++ b/tensorflow/core/lib/gtl/manual_constructor.h @@ -30,7 +30,7 @@ limitations under the License. #include #include "tensorflow/core/platform/macros.h" -#include "tensorflow/core/platform/mem.h" // For aligned_malloc/aligned_free +#include "tensorflow/core/platform/mem.h" namespace tensorflow { namespace gtl { @@ -127,9 +127,9 @@ class ManualConstructor { // Support users creating arrays of ManualConstructor<>s. This ensures that // the array itself has the correct alignment. static void* operator new[](size_t size) { - return port::aligned_malloc(size, TF_LIB_GTL_ALIGN_OF(Type)); + return port::AlignedMalloc(size, TF_LIB_GTL_ALIGN_OF(Type)); } - static void operator delete[](void* mem) { port::aligned_free(mem); } + static void operator delete[](void* mem) { port::AlignedFree(mem); } inline Type* get() { return reinterpret_cast(space_); } inline const Type* get() const { diff --git a/tensorflow/core/platform/cloud/http_request_test.cc b/tensorflow/core/platform/cloud/http_request_test.cc index 93c4ec51d9..31ba3e337f 100644 --- a/tensorflow/core/platform/cloud/http_request_test.cc +++ b/tensorflow/core/platform/cloud/http_request_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/platform/mem.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -172,7 +173,8 @@ class FakeLibCurl : public LibCurl { temp_str.replace(n, victim.size(), encoded); n += encoded.size(); } - char* out_char_str = (char*)malloc(sizeof(char) * temp_str.size() + 1); + char* out_char_str = + (char*)port::Malloc(sizeof(char) * temp_str.size() + 1); std::copy(temp_str.begin(), temp_str.end(), out_char_str); out_char_str[temp_str.size()] = '\0'; return out_char_str; @@ -180,7 +182,7 @@ class FakeLibCurl : public LibCurl { void curl_slist_free_all(curl_slist* list) override { delete reinterpret_cast*>(list); } - void curl_free(void* p) override { free(p); } + void curl_free(void* p) override { port::Free(p); } // Variables defining the behavior of this fake. string response_content; diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 80c23b1df1..168f9df2e8 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -3,10 +3,11 @@ load("@protobuf//:protobuf.bzl", "cc_proto_library") load("@protobuf//:protobuf.bzl", "py_proto_library") -# configure may change the following lines to True +# configure may change the following lines WITH_GCP_SUPPORT = False WITH_HDFS_SUPPORT = False WITH_XLA_SUPPORT = False +WITH_JEMALLOC = True # Appends a suffix to a list of deps. def tf_deps(deps, suffix): @@ -176,7 +177,29 @@ def tf_additional_test_srcs(): def tf_kernel_tests_linkstatic(): return 0 +# jemalloc only enabled on Linux for now. +# TODO(jhseu): Enable on other platforms. +def tf_additional_lib_defines(): + defines = [] + if WITH_JEMALLOC: + defines += select({ + "//tensorflow:linux_x86_64": [ + "TENSORFLOW_USE_JEMALLOC" + ], + "//conditions:default": [], + }) + return defines + def tf_additional_lib_deps(): + deps = [] + if WITH_JEMALLOC: + deps += select({ + "//tensorflow:linux_x86_64": ["@jemalloc"], + "//conditions:default": [], + }) + return deps + +def tf_additional_core_deps(): deps = [] if WITH_GCP_SUPPORT: deps.append("//tensorflow/core/platform/cloud:gcs_file_system") diff --git a/tensorflow/core/platform/mem.h b/tensorflow/core/platform/mem.h index 6618145c3d..dc389a8741 100644 --- a/tensorflow/core/platform/mem.h +++ b/tensorflow/core/platform/mem.h @@ -24,9 +24,14 @@ limitations under the License. namespace tensorflow { namespace port { -// Aligned allocation/deallocation -void* aligned_malloc(size_t size, int minimum_alignment); -void aligned_free(void* aligned_memory); +// Aligned allocation/deallocation. `minimum_alignment` must be a power of 2 +// and a multiple of sizeof(void*). +void* AlignedMalloc(size_t size, int minimum_alignment); +void AlignedFree(void* aligned_memory); + +void* Malloc(size_t size); +void* Realloc(void* ptr, size_t size); +void Free(void* ptr); // Tries to release num_bytes of free memory back to the operating // system for reuse. Use this routine with caution -- to get this diff --git a/tensorflow/core/platform/port_test.cc b/tensorflow/core/platform/port_test.cc index 8d98eb25a2..8930e49ff8 100644 --- a/tensorflow/core/platform/port_test.cc +++ b/tensorflow/core/platform/port_test.cc @@ -25,11 +25,11 @@ namespace port { TEST(Port, AlignedMalloc) { for (size_t alignment = 1; alignment <= 1 << 20; alignment <<= 1) { - void* p = aligned_malloc(1, alignment); - ASSERT_TRUE(p != NULL) << "aligned_malloc(1, " << alignment << ")"; + void* p = AlignedMalloc(1, alignment); + ASSERT_TRUE(p != NULL) << "AlignedMalloc(1, " << alignment << ")"; uintptr_t pval = reinterpret_cast(p); EXPECT_EQ(pval % alignment, 0); - aligned_free(p); + AlignedFree(p); } } diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 84bc9492b5..7dce43f0cc 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -13,8 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#ifdef TENSORFLOW_USE_JEMALLOC +#include "jemalloc/jemalloc.h" +#endif + #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/mem.h" #include "tensorflow/core/platform/types.h" #if defined(__linux__) && !defined(__ANDROID__) #include @@ -60,7 +65,7 @@ int NumSchedulableCPUs() { return kDefaultCores; } -void *aligned_malloc(size_t size, int minimum_alignment) { +void *AlignedMalloc(size_t size, int minimum_alignment) { #if defined(__ANDROID__) return memalign(minimum_alignment, size); #else // !defined(__ANDROID__) @@ -69,15 +74,45 @@ void *aligned_malloc(size_t size, int minimum_alignment) { // sizeof(void*). In this case, fall back on malloc which should return // memory aligned to at least the size of a pointer. const int required_alignment = sizeof(void *); - if (minimum_alignment < required_alignment) return malloc(size); - if (posix_memalign(&ptr, minimum_alignment, size) != 0) + if (minimum_alignment < required_alignment) return Malloc(size); +#ifdef TENSORFLOW_USE_JEMALLOC + int err = jemalloc_posix_memalign(&ptr, minimum_alignment, size); +#else + int err = posix_memalign(&ptr, minimum_alignment, size); +#endif + if (err != 0) { return NULL; - else + } else { return ptr; + } #endif } -void aligned_free(void *aligned_memory) { free(aligned_memory); } +void AlignedFree(void *aligned_memory) { Free(aligned_memory); } + +void *Malloc(size_t size) { +#ifdef TENSORFLOW_USE_JEMALLOC + return jemalloc_malloc(size); +#else + return malloc(size); +#endif +} + +void *Realloc(void *ptr, size_t size) { +#ifdef TENSORFLOW_USE_JEMALLOC + return jemalloc_realloc(ptr, size); +#else + return realloc(ptr, size); +#endif +} + +void Free(void *ptr) { +#ifdef TENSORFLOW_USE_JEMALLOC + jemalloc_free(ptr); +#else + free(ptr); +#endif +} void MallocExtension_ReleaseToSystem(std::size_t num_bytes) { // No-op. diff --git a/tensorflow/core/platform/windows/port.cc b/tensorflow/core/platform/windows/port.cc index ee5be221cd..b2167081a6 100644 --- a/tensorflow/core/platform/windows/port.cc +++ b/tensorflow/core/platform/windows/port.cc @@ -52,11 +52,17 @@ int NumSchedulableCPUs() { return system_info.dwNumberOfProcessors; } -void* aligned_malloc(size_t size, int minimum_alignment) { +void* AlignedMalloc(size_t size, int minimum_alignment) { return _aligned_malloc(size, minimum_alignment); } -void aligned_free(void* aligned_memory) { _aligned_free(aligned_memory); } +void AlignedFree(void* aligned_memory) { _aligned_free(aligned_memory); } + +void* Malloc(size_t size) { return ::malloc(size); } + +void* Realloc(void* ptr, size_t size) { return ::realloc(ptr, size); } + +void Free(void* ptr) { ::free(ptr); } void MallocExtension_ReleaseToSystem(std::size_t num_bytes) { // No-op. diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 5570cea32f..62fb9b9176 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -87,6 +87,7 @@ filegroup( "@gif_archive//:COPYING", "@grpc//:LICENSE", "@highwayhash//:LICENSE", + "@jemalloc//:COPYING", "@jpeg//:LICENSE.md", "@libxsmm_archive//:LICENSE", "@local_config_sycl//sycl:LICENSE.text", diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 5cea08e2f3..06f9ca88a7 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -376,3 +376,14 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): name = "junit", actual = "@junit_jar//jar", ) + + native.new_http_archive( + name = "jemalloc", + urls = [ + "http://bazel-mirror.storage.googleapis.com/github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz", + "https://github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz", + ], + sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8", + strip_prefix = "jemalloc-4.4.0", + build_file = str(Label("//third_party:jemalloc.BUILD")), + ) diff --git a/third_party/jemalloc.BUILD b/third_party/jemalloc.BUILD new file mode 100644 index 0000000000..2496d12627 --- /dev/null +++ b/third_party/jemalloc.BUILD @@ -0,0 +1,321 @@ +# Description: +# jemalloc - a general-purpose scalable concurrent malloc implementation + +licenses(["notice"]) # BSD + +exports_files(["COPYING"]) + +load("@//third_party:common.bzl", "template_rule") + +cc_library( + name = "jemalloc", + srcs = [ + "src/arena.c", + "src/atomic.c", + "src/base.c", + "src/bitmap.c", + "src/chunk.c", + "src/chunk_dss.c", + "src/chunk_mmap.c", + "src/ckh.c", + "src/ctl.c", + "src/extent.c", + "src/hash.c", + "src/huge.c", + "src/jemalloc.c", + "src/mb.c", + "src/mutex.c", + "src/nstime.c", + "src/pages.c", + "src/prng.c", + "src/prof.c", + "src/quarantine.c", + "src/rtree.c", + "src/spin.c", + "src/stats.c", + "src/tcache.c", + "src/tsd.c", + "src/util.c", + "src/witness.c", + ], + hdrs = [ + "include/jemalloc/internal/arena.h", + "include/jemalloc/internal/assert.h", + "include/jemalloc/internal/atomic.h", + "include/jemalloc/internal/base.h", + "include/jemalloc/internal/bitmap.h", + "include/jemalloc/internal/chunk.h", + "include/jemalloc/internal/chunk_dss.h", + "include/jemalloc/internal/chunk_mmap.h", + "include/jemalloc/internal/ckh.h", + "include/jemalloc/internal/ctl.h", + "include/jemalloc/internal/extent.h", + "include/jemalloc/internal/hash.h", + "include/jemalloc/internal/huge.h", + "include/jemalloc/internal/jemalloc_internal.h", + "include/jemalloc/internal/jemalloc_internal_decls.h", + "include/jemalloc/internal/jemalloc_internal_defs.h", + "include/jemalloc/internal/jemalloc_internal_macros.h", + "include/jemalloc/internal/mb.h", + "include/jemalloc/internal/mutex.h", + "include/jemalloc/internal/nstime.h", + "include/jemalloc/internal/pages.h", + "include/jemalloc/internal/ph.h", + "include/jemalloc/internal/private_namespace.h", + "include/jemalloc/internal/prng.h", + "include/jemalloc/internal/prof.h", + "include/jemalloc/internal/ql.h", + "include/jemalloc/internal/qr.h", + "include/jemalloc/internal/quarantine.h", + "include/jemalloc/internal/rb.h", + "include/jemalloc/internal/rtree.h", + "include/jemalloc/internal/size_classes.h", + "include/jemalloc/internal/smoothstep.h", + "include/jemalloc/internal/spin.h", + "include/jemalloc/internal/stats.h", + "include/jemalloc/internal/tcache.h", + "include/jemalloc/internal/ticker.h", + "include/jemalloc/internal/tsd.h", + "include/jemalloc/internal/util.h", + "include/jemalloc/internal/valgrind.h", + "include/jemalloc/internal/witness.h", + "include/jemalloc/jemalloc.h", + ], + # Same flags that jemalloc uses to build. + copts = [ + "-O3", + "-funroll-loops", + "-D_GNU_SOURCE", + "-D_REENTRANT", + ], + includes = ["include"], + visibility = ["//visibility:public"], +) + +sh_binary( + name = "jemalloc_sh", + srcs = ["include/jemalloc/jemalloc.sh"], +) + +genrule( + name = "jemalloc_h", + srcs = [ + ":jemalloc_defs_h", + ":jemalloc_macros_h", + ":jemalloc_mangle_h", + ":jemalloc_protos_h", + ":jemalloc_rename_h", + ":jemalloc_typedefs_h", + ], + outs = ["include/jemalloc/jemalloc.h"], + cmd = "$(location :jemalloc_sh) $$(dirname $(location :jemalloc_defs_h))/../../ >$@", + tools = [":jemalloc_sh"], +) + +# Add to this list if you want to export more symbols from jemalloc. +genrule( + name = "public_symbols_txt", + outs = ["include/jemalloc/internal/public_symbols.txt"], + cmd = "\n".join([ + "cat <<'EOF' > $@", + "free:jemalloc_free", + "malloc:jemalloc_malloc", + "posix_memalign:jemalloc_posix_memalign", + "realloc:jemalloc_realloc", + "EOF", + ]), +) + +sh_binary( + name = "jemalloc_mangle_sh", + srcs = ["include/jemalloc/jemalloc_mangle.sh"], +) + +genrule( + name = "jemalloc_mangle_h", + srcs = [":public_symbols_txt"], + outs = ["include/jemalloc/jemalloc_mangle.h"], + cmd = "$(location :jemalloc_mangle_sh) $(location :public_symbols_txt) je_ >$@", + tools = [":jemalloc_mangle_sh"], +) + +sh_binary( + name = "jemalloc_rename_sh", + srcs = ["include/jemalloc/jemalloc_rename.sh"], +) + +genrule( + name = "jemalloc_rename_h", + srcs = [":public_symbols_txt"], + outs = ["include/jemalloc/jemalloc_rename.h"], + cmd = "$(location :jemalloc_rename_sh) $(location :public_symbols_txt) >$@", + tools = [":jemalloc_rename_sh"], +) + +sh_binary( + name = "private_namespace_sh", + srcs = ["include/jemalloc/internal/private_namespace.sh"], +) + +genrule( + name = "private_namespace_h", + srcs = ["include/jemalloc/internal/private_symbols.txt"], + outs = ["include/jemalloc/internal/private_namespace.h"], + cmd = "$(location :private_namespace_sh) $(location include/jemalloc/internal/private_symbols.txt) >$@", + tools = [":private_namespace_sh"], +) + +sh_binary( + name = "public_namespace_sh", + srcs = ["include/jemalloc/internal/public_namespace.sh"], +) + +genrule( + name = "public_namespace_h", + srcs = [":public_symbols_txt"], + outs = ["include/jemalloc/internal/public_namespace.h"], + cmd = "$(location :public_namespace_sh) $(location :public_symbols_txt) >$@", + tools = [":public_namespace_sh"], +) + +sh_binary( + name = "size_classes_sh", + srcs = ["include/jemalloc/internal/size_classes.sh"], +) + +# Size classes for Linux x86_64. Update if adding builds for other +# architectures. See size_classes.sh for details on the arguments. +genrule( + name = "size_classes_h", + outs = ["include/jemalloc/internal/size_classes.h"], + cmd = "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@", + tools = [":size_classes_sh"], +) + +template_rule( + name = "jemalloc_internal_h", + src = "include/jemalloc/internal/jemalloc_internal.h.in", + out = "include/jemalloc/internal/jemalloc_internal.h", + substitutions = { + "@private_namespace@": "je_", + "@install_suffix@": "", + }, +) + +template_rule( + name = "jemalloc_internal_defs_h", + src = "include/jemalloc/internal/jemalloc_internal_defs.h.in", + out = "include/jemalloc/internal/jemalloc_internal_defs.h", + substitutions = { + "#undef JEMALLOC_PREFIX": "#define JEMALLOC_PREFIX \"jemalloc_\"", + "#undef JEMALLOC_CPREFIX": "#define JEMALLOC_CPREFIX \"JEMALLOC_\"", + "#undef JEMALLOC_PRIVATE_NAMESPACE": "#define JEMALLOC_PRIVATE_NAMESPACE je_", + "#undef CPU_SPINWAIT": "#define CPU_SPINWAIT __asm__ volatile(\"pause\")", + "#undef JEMALLOC_HAVE_BUILTIN_CLZ": "#define JEMALLOC_HAVE_BUILTIN_CLZ", + "#undef JEMALLOC_USE_SYSCALL": "#define JEMALLOC_USE_SYSCALL", + "#undef JEMALLOC_HAVE_SECURE_GETENV": "#define JEMALLOC_HAVE_SECURE_GETENV", + "#undef JEMALLOC_HAVE_PTHREAD_ATFORK": "#define JEMALLOC_HAVE_PTHREAD_ATFORK", + "#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE": "#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1", + # Newline required because of substitution conflicts. + "#undef JEMALLOC_HAVE_CLOCK_MONOTONIC\n": "#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1\n", + "#undef JEMALLOC_THREADED_INIT": "#define JEMALLOC_THREADED_INIT", + "#undef JEMALLOC_TLS_MODEL": "#define JEMALLOC_TLS_MODEL __attribute__((tls_model(\"initial-exec\")))", + "#undef JEMALLOC_CC_SILENCE": "#define JEMALLOC_CC_SILENCE", + "#undef JEMALLOC_STATS": "#define JEMALLOC_STATS", + "#undef JEMALLOC_TCACHE": "#define JEMALLOC_TCACHE", + "#undef JEMALLOC_DSS": "#define JEMALLOC_DSS", + "#undef JEMALLOC_FILL": "#define JEMALLOC_FILL", + "#undef LG_TINY_MIN": "#define LG_TINY_MIN 3", + "#undef LG_PAGE": "#define LG_PAGE 12", + "#undef JEMALLOC_MAPS_COALESCE": "#define JEMALLOC_MAPS_COALESCE", + "#undef JEMALLOC_TLS": "#define JEMALLOC_TLS", + "#undef JEMALLOC_INTERNAL_UNREACHABLE": "#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable", + "#undef JEMALLOC_INTERNAL_FFSLL": "#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll", + # Newline required because of substitution conflicts. + "#undef JEMALLOC_INTERNAL_FFSL\n": "#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl\n", + "#undef JEMALLOC_INTERNAL_FFS\n": "#define JEMALLOC_INTERNAL_FFS __builtin_ffs\n", + "#undef JEMALLOC_CACHE_OBLIVIOUS": "#define JEMALLOC_CACHE_OBLIVIOUS", + "#undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY": "#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY", + "#undef JEMALLOC_HAVE_MADVISE": "#define JEMALLOC_HAVE_MADVISE", + "#undef JEMALLOC_PURGE_MADVISE_DONTNEED": "#define JEMALLOC_PURGE_MADVISE_DONTNEED", + "#undef JEMALLOC_THP": "#define JEMALLOC_THP", + "#undef JEMALLOC_HAS_ALLOCA_H": "#define JEMALLOC_HAS_ALLOCA_H 1", + # Newline required because of substitution conflicts. + "#undef LG_SIZEOF_INT\n": "#define LG_SIZEOF_INT 2\n", + "#undef LG_SIZEOF_LONG\n": "#define LG_SIZEOF_LONG 3\n", + "#undef LG_SIZEOF_LONG_LONG": "#define LG_SIZEOF_LONG_LONG 3", + "#undef LG_SIZEOF_INTMAX_T": "#define LG_SIZEOF_INTMAX_T 3", + "#undef JEMALLOC_GLIBC_MALLOC_HOOK": "#define JEMALLOC_GLIBC_MALLOC_HOOK", + "#undef JEMALLOC_GLIBC_MEMALIGN_HOOK": "#define JEMALLOC_GLIBC_MEMALIGN_HOOK", + "#undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP": "#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP", + "#undef JEMALLOC_CONFIG_MALLOC_CONF": "#define JEMALLOC_CONFIG_MALLOC_CONF \"\"", + }, +) + +template_rule( + name = "jemalloc_defs_h", + src = "include/jemalloc/jemalloc_defs.h.in", + out = "include/jemalloc/jemalloc_defs.h", + substitutions = { + "#undef JEMALLOC_HAVE_ATTR": "#define JEMALLOC_HAVE_ATTR", + "#undef JEMALLOC_HAVE_ATTR_ALLOC_SIZE": "#define JEMALLOC_HAVE_ATTR_ALLOC_SIZE", + "#undef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF": "#define JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF", + "#undef JEMALLOC_HAVE_ATTR_FORMAT_PRINTF": "#define JEMALLOC_HAVE_ATTR_FORMAT_PRINTF", + "#undef JEMALLOC_OVERRIDE_MEMALIGN": "#define JEMALLOC_OVERRIDE_MEMALIGN", + "#undef JEMALLOC_OVERRIDE_VALLOC": "#define JEMALLOC_OVERRIDE_VALLOC", + "#undef JEMALLOC_USABLE_SIZE_CONST": "#define JEMALLOC_USABLE_SIZE_CONST", + "#undef JEMALLOC_USE_CXX_THROW": "#define JEMALLOC_USE_CXX_THROW", + "#undef LG_SIZEOF_PTR": "#define LG_SIZEOF_PTR 3", + }, +) + +template_rule( + name = "jemalloc_macros_h", + src = "include/jemalloc/jemalloc_macros.h.in", + out = "include/jemalloc/jemalloc_macros.h", + substitutions = { + "@jemalloc_version@": "0.0.0", + "@jemalloc_version_major@": "0", + "@jemalloc_version_minor@": "0", + "@jemalloc_version_bugfix@": "0", + "@jemalloc_version_nrev@": "0", + "@jemalloc_version_gid@": "0000000000000000000000000000000000000000", + }, +) + +template_rule( + name = "jemalloc_protos_h", + src = "include/jemalloc/jemalloc_protos.h.in", + out = "include/jemalloc/jemalloc_protos.h", + substitutions = { + "@aligned_alloc": "aligned_alloc", + "@calloc": "calloc", + "@cbopaque": "cbopaque", + "@dallocx": "dallocx", + "@free": "free", + "@je": "je", + "@mallctl": "mallctl", + "@mallctlnametomib": "mallctlnametomib", + "@mallctlbymib": "mallctlbymib", + "@malloc_stats_print": "malloc_stats_print", + "@malloc_usable_size": "malloc_usable_size", + "@malloc": "malloc", + "@mallocx": "mallocx", + "@memalign": "memalign", + "@nallocx": "nallocx", + "@posix_memalign": "posix_memalign", + "@rallocx": "rallocx", + "@realloc": "realloc", + "@sallocx": "sallocx", + "@sdallocx": "sdallocx", + "@valloc": "valloc", + "@xallocx": "xallocx", + }, +) + +template_rule( + name = "jemalloc_typedefs_h", + src = "include/jemalloc/jemalloc_typedefs.h.in", + out = "include/jemalloc/jemalloc_typedefs.h", + substitutions = {}, +) -- cgit v1.2.3