aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/check_numerics_op_gpu.cu.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/check_numerics_op_gpu.cu.cc')
-rw-r--r--tensorflow/core/kernels/check_numerics_op_gpu.cu.cc62
1 files changed, 62 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/check_numerics_op_gpu.cu.cc b/tensorflow/core/kernels/check_numerics_op_gpu.cu.cc
new file mode 100644
index 0000000000..cb84f98731
--- /dev/null
+++ b/tensorflow/core/kernels/check_numerics_op_gpu.cu.cc
@@ -0,0 +1,62 @@
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+
+#include <stdio.h>
+#include <assert.h>
+
+#include <math.h>
+#include <algorithm>
+
+#include "tensorflow/core/platform/port.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+namespace tensorflow {
+
+namespace {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+// A Cuda kernel to check if each element is Inf or Nan. If any exists, the
+// relevant elements in abnormal_detected will be set
+template <typename T>
+__global__ void CheckNumericsKernel(const T *data, int size,
+ int abnormal_detected[2]) {
+ const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
+ const int32 total_thread_count = gridDim.x * blockDim.x;
+
+ int32 offset = thread_id;
+
+ while (offset < size) {
+ if (isnan(data[offset])) {
+ abnormal_detected[0] = 1;
+ }
+ if (isinf(data[offset])) {
+ abnormal_detected[1] = 1;
+ }
+ offset += total_thread_count;
+ }
+}
+
+} // namespace
+
+// A simple launch pad to launch the Cuda kernels that checks the numerical
+// abnormality in the given array
+template <typename T>
+struct CheckNumericsLaunch {
+ void Run(const GPUDevice &d, const T *data, int size,
+ int abnormal_detected[2]) {
+ const int32 block_size = d.maxCudaThreadsPerBlock();
+ const int32 num_blocks =
+ (d.getNumCudaMultiProcessors() * d.maxCudaThreadsPerMultiProcessor()) /
+ block_size;
+
+ CheckNumericsKernel<T><<<num_blocks, block_size, 0, d.stream()>>>(
+ data, size, abnormal_detected);
+ }
+};
+
+template struct CheckNumericsLaunch<float>;
+template struct CheckNumericsLaunch<double>;
+
+} // namespace tensorflow
+#endif // GOOGLE_CUDA