diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-07-10 17:27:29 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-07-10 19:31:13 -0700 |
commit | 5c7a6fba35436fcf02826c5df953263dfe1f2340 (patch) | |
tree | 011233bdb1cbfffc75967ccb087d763b8745528b /tensorflow/core/platform | |
parent | 7903d8d3f4097fdf4340638b7e7faec7650d74ee (diff) |
Begin introducing NUMA support for CPU threads and memory
by extending the core/platform API with some basic functionality.
The new functions allow:
1. Determining how many NUMA nodes are available.
2. Setting the executing thread to be bound to a particular node,
or not bound at all.
3. Allocating memory affiliated with a particular node.
This change introduces the API only, there is not yet a real
implementation.
PiperOrigin-RevId: 204042160
Diffstat (limited to 'tensorflow/core/platform')
-rw-r--r-- | tensorflow/core/platform/numa.h | 62 | ||||
-rw-r--r-- | tensorflow/core/platform/numa_test.cc | 61 | ||||
-rw-r--r-- | tensorflow/core/platform/posix/port.cc | 24 |
3 files changed, 147 insertions, 0 deletions
diff --git a/tensorflow/core/platform/numa.h b/tensorflow/core/platform/numa.h new file mode 100644 index 0000000000..b1f08e4c4c --- /dev/null +++ b/tensorflow/core/platform/numa.h @@ -0,0 +1,62 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PLATFORM_NUMA_H_ +#define TENSORFLOW_CORE_PLATFORM_NUMA_H_ + +#include "tensorflow/core/platform/platform.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace port { + +// Returns true iff NUMA functions are supported. +bool NUMAEnabled(); + +// Returns the number of NUMA nodes present with respect to CPU operations. +// Typically this will be the number of sockets where some RAM has greater +// affinity with one socket than another. +int NUMANumNodes(); + +static const int kNUMANoAffinity = -1; + +// If possible sets affinity of the current thread to the specified NUMA node. +// If node == kNUMANoAffinity removes affinity to any particular node. +void NUMASetThreadNodeAffinity(int node); + +// Returns NUMA node affinity of the current thread, kNUMANoAffinity if none. +int NUMAGetThreadNodeAffinity(); + +// Like AlignedMalloc, but allocates memory with affinity to the specified NUMA +// node. +// +// Notes: +// 1. node must be >= 0 and < NUMANumNodes. +// 1. minimum_alignment must a factor of system page size, the memory +// returned will be page-aligned. +// 2. This function is likely significantly slower than AlignedMalloc +// and should not be used for lots of small allocations. It makes more +// sense as a backing allocator for BFCAllocator, PoolAllocator, or similar. +void* NUMAMalloc(int node, size_t size, int minimum_alignment); + +// Memory allocated by NUMAMalloc must be freed via NUMAFree. +void NUMAFree(void* ptr, size_t size); + +// Returns NUMA node affinity of memory address, kNUMANoAffinity if none. +int NUMAGetMemAffinity(const void* ptr); + +} // namespace port +} // namespace tensorflow +#endif // TENSORFLOW_CORE_PLATFORM_NUMA_H_ diff --git a/tensorflow/core/platform/numa_test.cc b/tensorflow/core/platform/numa_test.cc new file mode 100644 index 0000000000..8b39ecd59c --- /dev/null +++ b/tensorflow/core/platform/numa_test.cc @@ -0,0 +1,61 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/platform/numa.h" + +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace internal { + +TEST(Numa, NumNodes) { + if (port::NUMAEnabled()) { + EXPECT_GE(port::NUMANumNodes(), 1); + } +} + +TEST(Numa, Malloc) { + if (port::NUMAEnabled()) { + int num_nodes = port::NUMANumNodes(); + for (int request_node = 0; request_node < num_nodes; ++request_node) { + void* ptr = port::NUMAMalloc(request_node, 8, 0); + EXPECT_NE(ptr, nullptr); + // Affinity cannot be tested until page is touched, so save a value. + *(reinterpret_cast<int*>(ptr)) = 0; + int affinity_node = port::NUMAGetMemAffinity(ptr); + EXPECT_EQ(affinity_node, request_node); + port::NUMAFree(ptr, 8); + } + } +} + +TEST(Numa, SetNodeAffinity) { + // NOTE(tucker): This test is not reliable when executed under tap because + // the virtual machine may not have access to all of the availble NUMA + // nodes. Not sure what to do about that. + EXPECT_EQ(-1, port::NUMAGetThreadNodeAffinity()); + if (port::NUMAEnabled()) { + int num_nodes = port::NUMANumNodes(); + for (int request_node = 0; request_node < num_nodes; ++request_node) { + port::NUMASetThreadNodeAffinity(request_node); + int affinity_node = port::NUMAGetThreadNodeAffinity(); + EXPECT_EQ(affinity_node, request_node); + } + } +} + +} // namespace internal +} // namespace tensorflow diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 708f32ba80..1939cf72fb 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/mem.h" +#include "tensorflow/core/platform/numa.h" #include "tensorflow/core/platform/snappy.h" #include "tensorflow/core/platform/types.h" @@ -79,6 +80,19 @@ int NumHyperthreadsPerCore() { return (ht_per_core > 0) ? ht_per_core : 1; } +bool NUMAEnabled() { + // Not yet implemented: coming soon. + return false; +} + +int NUMANumNodes() { return 1; } + +void NUMASetThreadNodeAffinity(int node) {} + +int NUMAGetThreadNodeAffinity() { + return kNUMANoAffinity; +} + void* AlignedMalloc(size_t size, int minimum_alignment) { #if defined(__ANDROID__) return memalign(minimum_alignment, size); @@ -128,6 +142,16 @@ void Free(void* ptr) { #endif } +void* NUMAMalloc(int node, size_t size, int minimum_alignment) { + return AlignedMalloc(size, minimum_alignment); +} + +void NUMAFree(void* ptr, size_t size) { Free(ptr); } + +int NUMAGetMemAffinity(const void* addr) { + return kNUMANoAffinity; +} + void MallocExtension_ReleaseToSystem(std::size_t num_bytes) { // No-op. } |