diff options
author | Manjunath Kudlur <keveman@gmail.com> | 2015-11-06 16:27:58 -0800 |
---|---|---|
committer | Manjunath Kudlur <keveman@gmail.com> | 2015-11-06 16:27:58 -0800 |
commit | f41959ccb2d9d4c722fe8fc3351401d53bcf4900 (patch) | |
tree | ef0ca22cb2a5ac4bdec9d080d8e0788a53ed496d /tensorflow/core/kernels/check_numerics_op.cc |
TensorFlow: Initial commit of TensorFlow library.
TensorFlow is an open source software library for numerical computation
using data flow graphs.
Base CL: 107276108
Diffstat (limited to 'tensorflow/core/kernels/check_numerics_op.cc')
-rw-r--r-- | tensorflow/core/kernels/check_numerics_op.cc | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/check_numerics_op.cc b/tensorflow/core/kernels/check_numerics_op.cc new file mode 100644 index 0000000000..65487a303c --- /dev/null +++ b/tensorflow/core/kernels/check_numerics_op.cc @@ -0,0 +1,190 @@ +// See docs in ../ops/array_ops.cc. + +#include <math.h> +#include <algorithm> + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/public/tensor.h" + +#if GOOGLE_CUDA +#include "tensorflow/core/common_runtime/gpu_device_context.h" +#include "tensorflow/stream_executor/stream.h" +#endif // GOOGLE_CUDA +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + +#if GOOGLE_CUDA +template <typename T> +struct CheckNumericsLaunch { + void Run(const GPUDevice& d, const T* data, int size, + int abnormal_detected[2]); +}; +#endif + +namespace { + +template <typename Device, typename T> +class CheckNumericsOp; + +// Partial specialization for CPU +template <typename T> +class CheckNumericsOp<CPUDevice, T> : public OpKernel { + public: + explicit CheckNumericsOp(OpKernelConstruction* context) : OpKernel(context) { + // message_ is used as the prefix for the assertion error message. For + // instance, this can be the name of the input op that produced the tensor. + OP_REQUIRES_OK(context, context->GetAttr("message", &message_)); + } + + void Compute(OpKernelContext* context) override { + // pass along the input to the output + context->set_output(0, context->input(0)); + + auto in = context->input(0).flat<T>(); + const T* data = in.data(); + const int size = in.size(); + // Check to see if any element of the tensor is NaN or Inf. + int fp_props = + std::accumulate(data, data + size, 0, [](const int& x, const T& y) { + int prop = std::fpclassify(y); + int result = x; + if (prop == FP_INFINITE) { + result |= kInfBit; + } else if (prop == FP_NAN) { + result |= kNaNBit; + } + return result; + }); + string status; + if ((fp_props & kInfBit) && (fp_props & kNaNBit)) { + status = "Inf and NaN"; + } else { + if (fp_props & kInfBit) { + status = "Inf"; + } + if (fp_props & kNaNBit) { + status = "NaN"; + } + } + if (!status.empty()) { + context->SetStatus(errors::InvalidArgument(message_, " : Tensor had ", + status, " values")); + } + } + + private: + string message_; + static const int kInfBit = 0x01; + static const int kNaNBit = 0x02; +}; + +#if GOOGLE_CUDA +// Partial specialization for GPU +template <typename T> +class CheckNumericsOp<GPUDevice, T> : public OpKernel { + public: + typedef GPUDevice Device; + + explicit CheckNumericsOp(OpKernelConstruction* context) : OpKernel(context) { + // message_ is used as the prefix for the assertion error message. For + // instance, this can be the name of the input op that produced the tensor. + OP_REQUIRES_OK(context, context->GetAttr("message", &message_)); + } + + void Compute(OpKernelContext* context) override { + // pass along the input to the output + context->set_output(0, context->input(0)); + auto input = context->input(0).flat<T>(); + + // Allocate and initialize the elements to hold the check results + const int abnormal_detected_size = 2; + Tensor abnormal_detected; + OP_REQUIRES_OK(context, context->allocate_temp( + DT_INT32, TensorShape({abnormal_detected_size}), + &abnormal_detected)); + + auto* stream = context->op_device_context<GPUDeviceContext>()->stream(); + OP_REQUIRES(context, stream, errors::Internal("No GPU stream available.")); + + perftools::gputools::DeviceMemoryBase abnormal_detected_ptr( + abnormal_detected.flat<int>().data(), + abnormal_detected.flat<int>().size()); + stream->ThenMemset32(&abnormal_detected_ptr, 0, + abnormal_detected.flat<int>().size() * sizeof(int)); + + // Call the Cuda kernels for the numerical checks + const Device& d = context->eigen_device<Device>(); + CheckNumericsLaunch<T>().Run(d, input.data(), input.size(), + abnormal_detected.flat<int>().data()); + + // Copy the results from device to host + AllocatorAttributes attr; + attr.set_on_host(true); + attr.set_gpu_compatible(true); + Tensor abnormal_detected_out; + OP_REQUIRES_OK(context, context->allocate_temp( + DT_INT32, TensorShape({abnormal_detected_size}), + &abnormal_detected_out, attr)); + int* abnormal_detected_host = abnormal_detected_out.flat<int>().data(); + stream->ThenMemcpy(abnormal_detected_host, abnormal_detected_ptr, + abnormal_detected_size * sizeof(int)); + stream->BlockHostUntilDone(); + OP_REQUIRES(context, stream->ok(), + errors::Internal("cudaMemcpy from device to host failed")); + + int is_nan = abnormal_detected_host[0]; + int is_inf = abnormal_detected_host[1]; + if (is_nan || is_inf) { + string status; + LOG(ERROR) << "abnormal_detected_host @" << abnormal_detected_host + << " = {" << is_nan << ", " << is_inf << "} " << message_; + + // Results should always be 1 or 0. If we see anything else then + // there has been some GPU memory corruption. + CHECK_GE(is_nan, 0); + CHECK_GE(is_inf, 0); + CHECK_LE(is_nan, 1); + CHECK_LE(is_inf, 1); + + if (is_nan && is_inf) { + status = "Inf and NaN"; + } else if (is_nan) { + status = "NaN"; + } else if (is_inf) { + status = "Inf"; + } + context->SetStatus(errors::InvalidArgument(message_, " : Tensor had ", + status, " values")); + } + } + + private: + string message_; +}; +#endif // GOOGLE_CUDA + +} // namespace + +REGISTER_KERNEL_BUILDER(Name("CheckNumerics") + .Device(DEVICE_CPU) + .TypeConstraint<float>("T"), + CheckNumericsOp<CPUDevice, float>); +REGISTER_KERNEL_BUILDER(Name("CheckNumerics") + .Device(DEVICE_CPU) + .TypeConstraint<double>("T"), + CheckNumericsOp<CPUDevice, double>); +#if GOOGLE_CUDA +REGISTER_KERNEL_BUILDER(Name("CheckNumerics") + .Device(DEVICE_GPU) + .TypeConstraint<float>("T"), + CheckNumericsOp<GPUDevice, float>); +REGISTER_KERNEL_BUILDER(Name("CheckNumerics") + .Device(DEVICE_GPU) + .TypeConstraint<double>("T"), + CheckNumericsOp<GPUDevice, double>); +#endif // GOOGLE_CUDA + +} // namespace tensorflow |