From f41959ccb2d9d4c722fe8fc3351401d53bcf4900 Mon Sep 17 00:00:00 2001 From: Manjunath Kudlur Date: Fri, 6 Nov 2015 16:27:58 -0800 Subject: TensorFlow: Initial commit of TensorFlow library. TensorFlow is an open source software library for numerical computation using data flow graphs. Base CL: 107276108 --- .../common_runtime/gpu/gpu_debug_allocator_test.cc | 207 +++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc (limited to 'tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc') diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc new file mode 100644 index 0000000000..5f63906576 --- /dev/null +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc @@ -0,0 +1,207 @@ +#if GOOGLE_CUDA + +#include "tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h" + +#include +#include + +#include "tensorflow/core/platform/port.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/common_runtime/gpu/gpu_init.h" +#include "tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h" +#include "tensorflow/stream_executor/multi_platform_manager.h" +#include "tensorflow/stream_executor/stream_executor.h" +#include + +namespace gpu = ::perftools::gputools; + +namespace tensorflow { + +TEST(GPUDebugAllocatorTest, OverwriteDetection_None) { + const int device_id = 0; + GPUDebugAllocator a(new GPUBFCAllocator(device_id, 1 << 30), device_id); + auto stream_exec = + GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie(); + + for (int s : {8}) { + std::vector cpu_array(s); + memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64)); + int64* gpu_array = a.Allocate(cpu_array.size()); + gpu::DeviceMemory gpu_array_ptr{gpu::DeviceMemoryBase{gpu_array}}; + ASSERT_TRUE(stream_exec->SynchronousMemcpy(&gpu_array_ptr, &cpu_array[0], + s * sizeof(int64))); + EXPECT_TRUE(a.CheckHeader(gpu_array)); + EXPECT_TRUE(a.CheckFooter(gpu_array)); + + // Confirm no error on free. + a.DeallocateRaw(gpu_array); + } +} + +TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) { + for (int s : {8, 211}) { + EXPECT_DEATH( + { + const int device_id = 0; + GPUDebugAllocator a(new GPUBFCAllocator(device_id, 1 << 30), + device_id); + auto stream_exec = + GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie(); + + std::vector cpu_array(s); + memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64)); + int64* gpu_array = a.Allocate(cpu_array.size()); + + gpu::DeviceMemory gpu_array_ptr{ + gpu::DeviceMemoryBase{gpu_array}}; + ASSERT_TRUE(stream_exec->SynchronousMemcpy( + &gpu_array_ptr, &cpu_array[0], cpu_array.size() * sizeof(int64))); + + gpu::DeviceMemory gpu_hdr_ptr{ + gpu::DeviceMemoryBase{gpu_array - 1}}; + // Clobber first word of the header. + float pi = 3.1417; + ASSERT_TRUE( + stream_exec->SynchronousMemcpy(&gpu_hdr_ptr, &pi, sizeof(float))); + + // Expect error on free. + a.Deallocate(gpu_array); + }, + ""); + } +} + +TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) { + for (int s : {8, 22}) { + EXPECT_DEATH( + { + const int device_id = 0; + GPUDebugAllocator a(new GPUBFCAllocator(device_id, 1 << 30), + device_id); + auto stream_exec = + GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie(); + + std::vector cpu_array(s); + memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64)); + int64* gpu_array = a.Allocate(cpu_array.size()); + + gpu::DeviceMemory gpu_array_ptr{ + gpu::DeviceMemoryBase{gpu_array}}; + ASSERT_TRUE(stream_exec->SynchronousMemcpy( + &gpu_array_ptr, &cpu_array[0], cpu_array.size() * sizeof(int64))); + + // Clobber word of the footer. + gpu::DeviceMemory gpu_ftr_ptr{ + gpu::DeviceMemoryBase{gpu_array + s}}; + float pi = 3.1417; + ASSERT_TRUE( + stream_exec->SynchronousMemcpy(&gpu_ftr_ptr, &pi, sizeof(float))); + + // Expect error on free. + a.Deallocate(gpu_array); + }, + ""); + } +} + +TEST(GPUDebugAllocatorTest, ResetToNan) { + const int device_id = 0; + GPUNanResetAllocator a(new GPUBFCAllocator(device_id, 1 << 30), device_id); + auto stream_exec = + GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie(); + + std::vector cpu_array(1024); + std::vector cpu_array_result(1024); + + // Allocate 1024 floats + float* gpu_array = a.Allocate(cpu_array.size()); + gpu::DeviceMemory gpu_array_ptr{gpu::DeviceMemoryBase{gpu_array}}; + ASSERT_TRUE(stream_exec->SynchronousMemcpy(&cpu_array[0], gpu_array_ptr, + cpu_array.size() * sizeof(float))); + for (float f : cpu_array) { + ASSERT_FALSE(std::isfinite(f)); + } + + // Set one of the fields to 1.0. + cpu_array[0] = 1.0; + ASSERT_TRUE(stream_exec->SynchronousMemcpy(&gpu_array_ptr, &cpu_array[0], + cpu_array.size() * sizeof(float))); + // Copy the data back and verify. + ASSERT_TRUE( + stream_exec->SynchronousMemcpy(&cpu_array_result[0], gpu_array_ptr, + cpu_array_result.size() * sizeof(float))); + ASSERT_EQ(1.0, cpu_array_result[0]); + + // Free the array + a.Deallocate(gpu_array); + + // All values should be reset to nan. + ASSERT_TRUE( + stream_exec->SynchronousMemcpy(&cpu_array_result[0], gpu_array_ptr, + cpu_array_result.size() * sizeof(float))); + for (float f : cpu_array_result) { + ASSERT_FALSE(std::isfinite(f)); + } +} + +TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) { + const int device_id = 0; + // NaN reset must be the outer-most allocator. + GPUNanResetAllocator a( + new GPUDebugAllocator(new GPUBFCAllocator(device_id, 1 << 30), device_id), + device_id); + auto stream_exec = + GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie(); + + std::vector cpu_array(1024); + std::vector cpu_array_result(1024); + + // Allocate 1024 floats + float* gpu_array = a.Allocate(cpu_array.size()); + gpu::DeviceMemory gpu_array_ptr{gpu::DeviceMemoryBase{gpu_array}}; + ASSERT_TRUE(stream_exec->SynchronousMemcpy(&cpu_array[0], gpu_array_ptr, + cpu_array.size() * sizeof(float))); + for (float f : cpu_array) { + ASSERT_FALSE(std::isfinite(f)); + } + + // Set one of the fields to 1.0. + cpu_array[0] = 1.0; + ASSERT_TRUE(stream_exec->SynchronousMemcpy(&gpu_array_ptr, &cpu_array[0], + cpu_array.size() * sizeof(float))); + // Copy the data back and verify. + ASSERT_TRUE( + stream_exec->SynchronousMemcpy(&cpu_array_result[0], gpu_array_ptr, + cpu_array_result.size() * sizeof(float))); + ASSERT_EQ(1.0, cpu_array_result[0]); + + // Free the array + a.Deallocate(gpu_array); + + // All values should be reset to nan. + ASSERT_TRUE( + stream_exec->SynchronousMemcpy(&cpu_array_result[0], gpu_array_ptr, + cpu_array_result.size() * sizeof(float))); + for (float f : cpu_array_result) { + ASSERT_FALSE(std::isfinite(f)); + } +} + +TEST(GPUDebugAllocatorTest, TracksSizes) { + GPUDebugAllocator a(new GPUBFCAllocator(0, 1 << 30), 0); + EXPECT_EQ(true, a.TracksAllocationSizes()); +} + +TEST(GPUDebugAllocatorTest, AllocatedVsRequested) { + GPUNanResetAllocator a( + new GPUDebugAllocator(new GPUBFCAllocator(0, 1 << 30), 0), 0); + float* t1 = a.Allocate(1); + EXPECT_EQ(4, a.RequestedSize(t1)); + EXPECT_EQ(256, a.AllocatedSize(t1)); + a.Deallocate(t1); +} + +} // namespace tensorflow + +#endif // GOOGLE_CUDA -- cgit v1.2.3