aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/platform
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-07-10 17:27:29 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-07-10 19:31:13 -0700
commit5c7a6fba35436fcf02826c5df953263dfe1f2340 (patch)
tree011233bdb1cbfffc75967ccb087d763b8745528b /tensorflow/core/platform
parent7903d8d3f4097fdf4340638b7e7faec7650d74ee (diff)
Begin introducing NUMA support for CPU threads and memory
by extending the core/platform API with some basic functionality. The new functions allow: 1. Determining how many NUMA nodes are available. 2. Setting the executing thread to be bound to a particular node, or not bound at all. 3. Allocating memory affiliated with a particular node. This change introduces the API only, there is not yet a real implementation. PiperOrigin-RevId: 204042160
Diffstat (limited to 'tensorflow/core/platform')
-rw-r--r--tensorflow/core/platform/numa.h62
-rw-r--r--tensorflow/core/platform/numa_test.cc61
-rw-r--r--tensorflow/core/platform/posix/port.cc24
3 files changed, 147 insertions, 0 deletions
diff --git a/tensorflow/core/platform/numa.h b/tensorflow/core/platform/numa.h
new file mode 100644
index 0000000000..b1f08e4c4c
--- /dev/null
+++ b/tensorflow/core/platform/numa.h
@@ -0,0 +1,62 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_PLATFORM_NUMA_H_
+#define TENSORFLOW_CORE_PLATFORM_NUMA_H_
+
+#include "tensorflow/core/platform/platform.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace port {
+
+// Returns true iff NUMA functions are supported.
+bool NUMAEnabled();
+
+// Returns the number of NUMA nodes present with respect to CPU operations.
+// Typically this will be the number of sockets where some RAM has greater
+// affinity with one socket than another.
+int NUMANumNodes();
+
+static const int kNUMANoAffinity = -1;
+
+// If possible sets affinity of the current thread to the specified NUMA node.
+// If node == kNUMANoAffinity removes affinity to any particular node.
+void NUMASetThreadNodeAffinity(int node);
+
+// Returns NUMA node affinity of the current thread, kNUMANoAffinity if none.
+int NUMAGetThreadNodeAffinity();
+
+// Like AlignedMalloc, but allocates memory with affinity to the specified NUMA
+// node.
+//
+// Notes:
+// 1. node must be >= 0 and < NUMANumNodes.
+// 1. minimum_alignment must a factor of system page size, the memory
+// returned will be page-aligned.
+// 2. This function is likely significantly slower than AlignedMalloc
+// and should not be used for lots of small allocations. It makes more
+// sense as a backing allocator for BFCAllocator, PoolAllocator, or similar.
+void* NUMAMalloc(int node, size_t size, int minimum_alignment);
+
+// Memory allocated by NUMAMalloc must be freed via NUMAFree.
+void NUMAFree(void* ptr, size_t size);
+
+// Returns NUMA node affinity of memory address, kNUMANoAffinity if none.
+int NUMAGetMemAffinity(const void* ptr);
+
+} // namespace port
+} // namespace tensorflow
+#endif // TENSORFLOW_CORE_PLATFORM_NUMA_H_
diff --git a/tensorflow/core/platform/numa_test.cc b/tensorflow/core/platform/numa_test.cc
new file mode 100644
index 0000000000..8b39ecd59c
--- /dev/null
+++ b/tensorflow/core/platform/numa_test.cc
@@ -0,0 +1,61 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/numa.h"
+
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace internal {
+
+TEST(Numa, NumNodes) {
+ if (port::NUMAEnabled()) {
+ EXPECT_GE(port::NUMANumNodes(), 1);
+ }
+}
+
+TEST(Numa, Malloc) {
+ if (port::NUMAEnabled()) {
+ int num_nodes = port::NUMANumNodes();
+ for (int request_node = 0; request_node < num_nodes; ++request_node) {
+ void* ptr = port::NUMAMalloc(request_node, 8, 0);
+ EXPECT_NE(ptr, nullptr);
+ // Affinity cannot be tested until page is touched, so save a value.
+ *(reinterpret_cast<int*>(ptr)) = 0;
+ int affinity_node = port::NUMAGetMemAffinity(ptr);
+ EXPECT_EQ(affinity_node, request_node);
+ port::NUMAFree(ptr, 8);
+ }
+ }
+}
+
+TEST(Numa, SetNodeAffinity) {
+ // NOTE(tucker): This test is not reliable when executed under tap because
+ // the virtual machine may not have access to all of the availble NUMA
+ // nodes. Not sure what to do about that.
+ EXPECT_EQ(-1, port::NUMAGetThreadNodeAffinity());
+ if (port::NUMAEnabled()) {
+ int num_nodes = port::NUMANumNodes();
+ for (int request_node = 0; request_node < num_nodes; ++request_node) {
+ port::NUMASetThreadNodeAffinity(request_node);
+ int affinity_node = port::NUMAGetThreadNodeAffinity();
+ EXPECT_EQ(affinity_node, request_node);
+ }
+ }
+}
+
+} // namespace internal
+} // namespace tensorflow
diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc
index 708f32ba80..1939cf72fb 100644
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc
@@ -24,6 +24,7 @@ limitations under the License.
#include "tensorflow/core/platform/cpu_info.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/mem.h"
+#include "tensorflow/core/platform/numa.h"
#include "tensorflow/core/platform/snappy.h"
#include "tensorflow/core/platform/types.h"
@@ -79,6 +80,19 @@ int NumHyperthreadsPerCore() {
return (ht_per_core > 0) ? ht_per_core : 1;
}
+bool NUMAEnabled() {
+ // Not yet implemented: coming soon.
+ return false;
+}
+
+int NUMANumNodes() { return 1; }
+
+void NUMASetThreadNodeAffinity(int node) {}
+
+int NUMAGetThreadNodeAffinity() {
+ return kNUMANoAffinity;
+}
+
void* AlignedMalloc(size_t size, int minimum_alignment) {
#if defined(__ANDROID__)
return memalign(minimum_alignment, size);
@@ -128,6 +142,16 @@ void Free(void* ptr) {
#endif
}
+void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
+ return AlignedMalloc(size, minimum_alignment);
+}
+
+void NUMAFree(void* ptr, size_t size) { Free(ptr); }
+
+int NUMAGetMemAffinity(const void* addr) {
+ return kNUMANoAffinity;
+}
+
void MallocExtension_ReleaseToSystem(std::size_t num_bytes) {
// No-op.
}