aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-05-26 15:14:00 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-05-26 16:18:36 -0700
commit36357e7e1127873165694a38e3a989df4e0b6ffe (patch)
treeeaf00810dfaefdc7f308a088829a3798af23e7bc
parente1b4934bb59904ee4dd243a34cc8356ff6bd266d (diff)
Added support for half floats to the batch normalization op
Change: 123368006
-rw-r--r--tensorflow/core/framework/tensor_testutil.h19
-rw-r--r--tensorflow/core/kernels/batch_norm_op.cc42
-rw-r--r--tensorflow/core/kernels/batch_norm_op.h4
-rw-r--r--tensorflow/core/kernels/batch_norm_op_gpu.cu.cc2
-rw-r--r--tensorflow/core/kernels/batch_norm_op_test.cc27
-rw-r--r--tensorflow/core/kernels/ops_testutil.h22
6 files changed, 96 insertions, 20 deletions
diff --git a/tensorflow/core/framework/tensor_testutil.h b/tensorflow/core/framework/tensor_testutil.h
index 8d14c25261..4efa7298d1 100644
--- a/tensorflow/core/framework/tensor_testutil.h
+++ b/tensorflow/core/framework/tensor_testutil.h
@@ -60,6 +60,19 @@ void FillValues(Tensor* tensor, gtl::ArraySlice<T> vals) {
}
}
+// Fills in '*tensor' with 'vals', converting the types as needed.
+template <typename T, typename SrcType>
+void FillValues(Tensor* tensor, std::initializer_list<SrcType> vals) {
+ auto flat = tensor->flat<T>();
+ CHECK_EQ(flat.size(), vals.size());
+ if (flat.size() > 0) {
+ size_t i = 0;
+ for (auto itr = vals.begin(); itr != vals.end(); ++itr, ++i) {
+ flat(i) = T(*itr);
+ }
+ }
+}
+
// Fills in '*tensor' with a sequence of value of val, val+1, val+2, ...
// Tensor x(&alloc, DT_FLOAT, TensorShape({2, 2}));
// test::FillIota<float>(&x, 1.0);
@@ -100,7 +113,8 @@ namespace internal {
template <typename T>
struct is_floating_point_type {
- static const bool value = std::is_same<T, float>::value ||
+ static const bool value = std::is_same<T, Eigen::half>::value ||
+ std::is_same<T, float>::value ||
std::is_same<T, double>::value ||
std::is_same<T, std::complex<float> >::value ||
std::is_same<T, std::complex<double> >::value;
@@ -175,7 +189,8 @@ struct Expector<T, true> {
static void Near(const T& a, const T& b, const double abs_err) {
if (a != b) { // Takes care of inf.
- EXPECT_LE(std::abs(a - b), abs_err) << "a = " << a << " b = " << b;
+ EXPECT_LE(double(Eigen::numext::abs(a - b)), abs_err) << "a = " << a
+ << " b = " << b;
}
}
diff --git a/tensorflow/core/kernels/batch_norm_op.cc b/tensorflow/core/kernels/batch_norm_op.cc
index 2389af050a..01936e9fd5 100644
--- a/tensorflow/core/kernels/batch_norm_op.cc
+++ b/tensorflow/core/kernels/batch_norm_op.cc
@@ -33,8 +33,10 @@ template <typename Device, typename T>
class BatchNormOp : public OpKernel {
public:
explicit BatchNormOp(OpKernelConstruction* context) : OpKernel(context) {
+ float variance_epsilon;
OP_REQUIRES_OK(context,
- context->GetAttr("variance_epsilon", &variance_epsilon_));
+ context->GetAttr("variance_epsilon", &variance_epsilon));
+ variance_epsilon_ = T(variance_epsilon);
OP_REQUIRES_OK(context, context->GetAttr("scale_after_normalization",
&scale_after_normalization_));
}
@@ -73,7 +75,7 @@ class BatchNormOp : public OpKernel {
}
private:
- float variance_epsilon_;
+ T variance_epsilon_;
bool scale_after_normalization_;
};
@@ -81,8 +83,10 @@ template <typename Device, typename T>
class BatchNormGradOp : public OpKernel {
public:
explicit BatchNormGradOp(OpKernelConstruction* context) : OpKernel(context) {
+ float variance_epsilon;
OP_REQUIRES_OK(context,
- context->GetAttr("variance_epsilon", &variance_epsilon_));
+ context->GetAttr("variance_epsilon", &variance_epsilon));
+ variance_epsilon_ = T(variance_epsilon);
OP_REQUIRES_OK(context, context->GetAttr("scale_after_normalization",
&scale_after_normalization_));
}
@@ -145,7 +149,7 @@ class BatchNormGradOp : public OpKernel {
}
private:
- float variance_epsilon_;
+ T variance_epsilon_;
bool scale_after_normalization_;
};
@@ -155,6 +159,7 @@ class BatchNormGradOp : public OpKernel {
.TypeConstraint<T>("T"), \
BatchNormOp<CPUDevice, T>);
+REGISTER_KERNEL(Eigen::half);
REGISTER_KERNEL(float);
REGISTER_KERNEL(double);
#undef REGISTER_KERNEL
@@ -168,12 +173,13 @@ namespace functor {
const GPUDevice& d, typename TTypes<T, 4>::ConstTensor input, \
typename TTypes<T>::ConstVec mean, typename TTypes<T>::ConstVec var, \
typename TTypes<T>::ConstVec beta, typename TTypes<T>::ConstVec gamma, \
- float variance_epsilon, bool scale_after_normalization, \
+ T variance_epsilon, bool scale_after_normalization, \
typename TTypes<T, 4>::Tensor output); \
extern template struct BatchNorm<GPUDevice, T>;
#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPEC(T);
+DECLARE_GPU_SPECS(Eigen::half);
DECLARE_GPU_SPECS(float);
#undef DECLARE_GPU_SPEC
} // namespace functor
@@ -185,6 +191,7 @@ DECLARE_GPU_SPECS(float);
.TypeConstraint<T>("T"), \
BatchNormOp<GPUDevice, T>);
+REGISTER_GPU_KERNEL(Eigen::half);
REGISTER_GPU_KERNEL(float);
#undef REGISTER_GPU_KERNEL
@@ -196,6 +203,7 @@ REGISTER_GPU_KERNEL(float);
.TypeConstraint<T>("T"), \
BatchNormGradOp<CPUDevice, T>);
+REGISTER_KERNEL(Eigen::half);
REGISTER_KERNEL(float);
REGISTER_KERNEL(double);
#undef REGISTER_KERNEL
@@ -203,21 +211,22 @@ REGISTER_KERNEL(double);
#if GOOGLE_CUDA
// Forward declarations of the functor specializations for GPU.
namespace functor {
-#define DECLARE_GPU_SPEC(T) \
- template <> \
- void BatchNormGrad<GPUDevice, T>::operator()( \
- const GPUDevice& d, typename TTypes<T, 4>::ConstTensor input, \
- typename TTypes<T>::ConstVec mean, typename TTypes<T>::ConstVec var, \
- typename TTypes<T>::ConstVec gamma, \
- typename TTypes<T, 4>::ConstTensor out_backprop, float variance_epsilon, \
- bool scale_after_normalization, typename TTypes<T, 4>::Tensor dx, \
- typename TTypes<T>::Vec dm, typename TTypes<T>::Vec dv, \
- typename TTypes<T>::Vec db, typename TTypes<T>::Vec dg, \
- typename TTypes<T>::Vec scratch1, typename TTypes<T>::Vec scratch2); \
+#define DECLARE_GPU_SPEC(T) \
+ template <> \
+ void BatchNormGrad<GPUDevice, T>::operator()( \
+ const GPUDevice& d, typename TTypes<T, 4>::ConstTensor input, \
+ typename TTypes<T>::ConstVec mean, typename TTypes<T>::ConstVec var, \
+ typename TTypes<T>::ConstVec gamma, \
+ typename TTypes<T, 4>::ConstTensor out_backprop, T variance_epsilon, \
+ bool scale_after_normalization, typename TTypes<T, 4>::Tensor dx, \
+ typename TTypes<T>::Vec dm, typename TTypes<T>::Vec dv, \
+ typename TTypes<T>::Vec db, typename TTypes<T>::Vec dg, \
+ typename TTypes<T>::Vec scratch1, typename TTypes<T>::Vec scratch2); \
extern template struct BatchNormGrad<GPUDevice, T>;
#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPEC(T);
+DECLARE_GPU_SPECS(Eigen::half);
DECLARE_GPU_SPECS(float);
#undef DECLARE_GPU_SPEC
} // namespace functor
@@ -229,6 +238,7 @@ DECLARE_GPU_SPECS(float);
.TypeConstraint<T>("T"), \
BatchNormGradOp<GPUDevice, T>);
+REGISTER_GPU_KERNEL(Eigen::half);
REGISTER_GPU_KERNEL(float);
#undef REGISTER_GPU_KERNEL
diff --git a/tensorflow/core/kernels/batch_norm_op.h b/tensorflow/core/kernels/batch_norm_op.h
index baef68125e..94707e9be9 100644
--- a/tensorflow/core/kernels/batch_norm_op.h
+++ b/tensorflow/core/kernels/batch_norm_op.h
@@ -29,7 +29,7 @@ struct BatchNorm {
typename TTypes<T>::ConstVec mean,
typename TTypes<T>::ConstVec var,
typename TTypes<T>::ConstVec beta,
- typename TTypes<T>::ConstVec gamma, float variance_epsilon,
+ typename TTypes<T>::ConstVec gamma, T variance_epsilon,
bool scale_after_normalization,
typename TTypes<T, 4>::Tensor output) {
const int depth = mean.dimension(0);
@@ -77,7 +77,7 @@ struct BatchNormGrad {
typename TTypes<T>::ConstVec var,
typename TTypes<T>::ConstVec gamma,
typename TTypes<T, 4>::ConstTensor out_backprop,
- float variance_epsilon, bool scale_after_normalization,
+ T variance_epsilon, bool scale_after_normalization,
typename TTypes<T, 4>::Tensor dx, typename TTypes<T>::Vec dm,
typename TTypes<T>::Vec dv, typename TTypes<T>::Vec db,
typename TTypes<T>::Vec dg, typename TTypes<T>::Vec scratch1,
diff --git a/tensorflow/core/kernels/batch_norm_op_gpu.cu.cc b/tensorflow/core/kernels/batch_norm_op_gpu.cu.cc
index 26433c9d12..2379cb612b 100644
--- a/tensorflow/core/kernels/batch_norm_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/batch_norm_op_gpu.cu.cc
@@ -25,7 +25,9 @@ namespace tensorflow {
typedef Eigen::GpuDevice GPUDevice;
template struct functor::BatchNorm<GPUDevice, float>;
+template struct functor::BatchNorm<GPUDevice, Eigen::half>;
template struct functor::BatchNormGrad<GPUDevice, float>;
+template struct functor::BatchNormGrad<GPUDevice, Eigen::half>;
} // namespace tensorflow
diff --git a/tensorflow/core/kernels/batch_norm_op_test.cc b/tensorflow/core/kernels/batch_norm_op_test.cc
index e70bcc5b4c..9b7bf6d149 100644
--- a/tensorflow/core/kernels/batch_norm_op_test.cc
+++ b/tensorflow/core/kernels/batch_norm_op_test.cc
@@ -59,4 +59,31 @@ TEST_F(BatchNormOpTest, Simple) {
test::ExpectTensorNear<float>(expected, *GetOutput(0), 0.01);
}
+TEST_F(BatchNormOpTest, Fp16) {
+ TF_EXPECT_OK(
+ NodeDefBuilder("batch_norm_op", "BatchNormWithGlobalNormalization")
+ .Input(FakeInput(DT_HALF))
+ .Input(FakeInput(DT_HALF))
+ .Input(FakeInput(DT_HALF))
+ .Input(FakeInput(DT_HALF))
+ .Input(FakeInput(DT_HALF))
+ .Attr("scale_after_normalization", false)
+ .Attr("variance_epsilon", 0.001)
+ .Finalize(node_def()));
+ TF_EXPECT_OK(InitOpWithGraphVersion(8));
+ AddInputFromList<Eigen::half>(TensorShape({1, 1, 6, 2}),
+ {1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6});
+ AddInputFromList<Eigen::half>(TensorShape({2}), {10, 20});
+ AddInputFromList<Eigen::half>(TensorShape({2}), {0.25, 0.5});
+ AddInputFromList<Eigen::half>(TensorShape({2}), {0.1, 0.6});
+ AddInputFromList<Eigen::half>(TensorShape({2}), {0.0, 0.0});
+ TF_ASSERT_OK(RunOpKernel());
+
+ Tensor expected(allocator(), DT_HALF, TensorShape({1, 1, 6, 2}));
+ test::FillValues<Eigen::half>(
+ &expected, {-17.86, -22.00, -15.87, -20.59, -13.87, -19.18, -21.86,
+ -33.31, -23.85, -34.72, -25.85, -36.13});
+ test::ExpectTensorNear<Eigen::half>(expected, *GetOutput(0), 0.1);
+}
+
} // namespace tensorflow
diff --git a/tensorflow/core/kernels/ops_testutil.h b/tensorflow/core/kernels/ops_testutil.h
index 521168cb17..b2b926e78d 100644
--- a/tensorflow/core/kernels/ops_testutil.h
+++ b/tensorflow/core/kernels/ops_testutil.h
@@ -150,6 +150,28 @@ class OpsTestBase : public ::testing::Test {
}
}
+ // Convenience function to add an input and populate it with the elements from
+ // an initializer list converting the types as needed.
+ template <typename T, typename SrcType>
+ void AddInputFromList(const TensorShape& shape,
+ std::initializer_list<SrcType> data) {
+ CHECK_GT(input_types_.size(), inputs_.size())
+ << "Adding more inputs than types; perhaps you need to call MakeOp";
+ bool is_ref = IsRefType(input_types_[inputs_.size()]);
+ Tensor* input = new Tensor(device_->GetAllocator(AllocatorAttributes()),
+ DataTypeToEnum<T>::v(), shape);
+ test::FillValues<T>(input, data);
+ tensors_.push_back(input);
+ if (is_ref) {
+ CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]),
+ DataTypeToEnum<T>::v());
+ inputs_.push_back({&lock_for_refs_, input});
+ } else {
+ CHECK_EQ(input_types_[inputs_.size()], DataTypeToEnum<T>::v());
+ inputs_.push_back({nullptr, input});
+ }
+ }
+
// Runs an operation producing 'num_outputs' outputs.
//
// Returns the context's status after running the operation.