1 files changed, 226 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/training_ops_test.cc b/tensorflow/core/kernels/training_ops_test.cc
new file mode 100644
index 0000000000..3c629badb6
--- /dev/null
+++ b/tensorflow/core/kernels/training_ops_test.cc
@@ -0,0 +1,226 @@
+#include <gtest/gtest.h>
+#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+#include "tensorflow/core/public/session_options.h"
+#include "tensorflow/core/public/tensor.h"
+
+namespace tensorflow {
+
+// We focus on the single thread performance of training ops.
+static SessionOptions InitSingleThreadedOptions() {
+  SessionOptions opts;
+  opts.config.set_intra_op_parallelism_threads(1);
+  opts.config.set_inter_op_parallelism_threads(1);
+  return opts;
+}
+
+static SessionOptions* GetOptions() {
+  static SessionOptions opts = InitSingleThreadedOptions();
+  return &opts;
+}
+
+static Node* Var(Graph* g, int n) {
+  return test::graph::Var(g, DT_FLOAT, TensorShape({n}));
+}
+
+static Node* Zeros(Graph* g, int n) {
+  Tensor data(DT_FLOAT, TensorShape({n}));
+  data.flat<float>().setZero();
+  return test::graph::Constant(g, data);
+}
+
+static Node* Random(Graph* g, int n) {
+  Tensor data(DT_FLOAT, TensorShape({n}));
+  data.flat<float>().setRandom();
+  return test::graph::Constant(g, data);
+}
+
+static Node* Scalar(Graph* g, float val) {
+  Tensor data(DT_FLOAT, TensorShape({}));
+  data.flat<float>()(0) = val;
+  return test::graph::Constant(g, data);
+}
+
+static void SGD(int32 n, Graph** init_g, Graph** train_g) {
+  RequireDefaultOps();
+  {
+    Graph* g = new Graph(OpRegistry::Global());
+    auto var = Var(g, n);
+    test::graph::Assign(g, var, Zeros(g, n));
+    *init_g = g;
+  }
+  {
+    Graph* g = new Graph(OpRegistry::Global());
+    auto var = Var(g, n);
+    auto lr = Scalar(g, 0.01);
+    auto grad = Random(g, n);
+    test::graph::Multi(g, "ApplyGradientDescent", {var, lr, grad});
+    *train_g = g;
+  }
+}
+
+static void BM_SGD(int iters, int params) {
+  const int64 tot = static_cast<int64>(iters) * params;
+  testing::ItemsProcessed(tot);
+  testing::BytesProcessed(tot * sizeof(float));
+  Graph* init;
+  Graph* train;
+  SGD(params, &init, &train);
+  test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
+}
+BENCHMARK(BM_SGD)->Arg(128 << 10)->Arg(256 << 10);
+
+static void Adagrad(int32 n, Graph** init_g, Graph** train_g) {
+  RequireDefaultOps();
+  {
+    Graph* g = new Graph(OpRegistry::Global());
+    auto var = Var(g, n);
+    auto accum = Var(g, n);
+    auto zero = Zeros(g, n);
+    test::graph::Assign(g, var, zero);
+    test::graph::Assign(g, accum, zero);
+    *init_g = g;
+  }
+  {
+    Graph* g = new Graph(OpRegistry::Global());
+    auto var = Var(g, n);
+    auto accum = Var(g, n);
+    auto lr = Scalar(g, 0.01);
+    auto grad = Random(g, n);
+    test::graph::Multi(g, "ApplyAdagrad", {var, accum, lr, grad});
+    *train_g = g;
+  }
+}
+
+static void BM_Adagrad(int iters, int params) {
+  const int64 tot = static_cast<int64>(iters) * params;
+  testing::ItemsProcessed(tot);
+  testing::BytesProcessed(tot * sizeof(float));
+  Graph* init;
+  Graph* train;
+  Adagrad(params, &init, &train);
+  test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
+}
+BENCHMARK(BM_Adagrad)->Arg(128 << 10)->Arg(256 << 10);
+
+static void Momentum(int32 n, Graph** init_g, Graph** train_g) {
+  RequireDefaultOps();
+  TensorShape shape({n});
+  {
+    Graph* g = new Graph(OpRegistry::Global());
+    auto var = Var(g, n);
+    auto accum = Var(g, n);
+    auto zero = Zeros(g, n);
+    test::graph::Assign(g, var, zero);
+    test::graph::Assign(g, accum, zero);
+    *init_g = g;
+  }
+  {
+    Graph* g = new Graph(OpRegistry::Global());
+    auto var = Var(g, n);
+    auto accum = Var(g, n);
+    auto lr = Scalar(g, 0.01);
+    auto grad = Random(g, n);
+    auto mom = Scalar(g, 0.01);
+    test::graph::Multi(g, "ApplyMomentum", {var, accum, lr, grad, mom});
+    *train_g = g;
+  }
+}
+
+static void BM_Momentum(int iters, int params) {
+  const int64 tot = static_cast<int64>(iters) * params;
+  testing::ItemsProcessed(tot);
+  testing::BytesProcessed(tot * sizeof(float));
+  Graph* init;
+  Graph* train;
+  Momentum(params, &init, &train);
+  test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
+}
+BENCHMARK(BM_Momentum)->Arg(128 << 10)->Arg(256 << 10);
+
+static void Adam(int32 n, Graph** init_g, Graph** train_g) {
+  RequireDefaultOps();
+  TensorShape shape({n});
+  {
+    Graph* g = new Graph(OpRegistry::Global());
+    auto var = Var(g, n);
+    auto m = Var(g, n);
+    auto v = Var(g, n);
+    auto zero = Zeros(g, n);
+    test::graph::Assign(g, var, zero);
+    test::graph::Assign(g, m, zero);
+    test::graph::Assign(g, v, zero);
+    *init_g = g;
+  }
+  {
+    Graph* g = new Graph(OpRegistry::Global());
+    auto var = Var(g, n);
+    auto m = Var(g, n);
+    auto v = Var(g, n);
+    auto beta1_power = Scalar(g, 0.9);
+    auto beta2_power = Scalar(g, 0.99);
+    auto lr = Scalar(g, 0.01);
+    auto beta1 = Scalar(g, 0.9);
+    auto beta2 = Scalar(g, 0.99);
+    auto epsilon = Scalar(g, 1e-8);
+    auto grad = Random(g, n);
+    test::graph::Multi(g, "ApplyAdam", {var, m, v, beta1_power, beta2_power, lr,
+                                        beta1, beta2, epsilon, grad});
+    *train_g = g;
+  }
+}
+
+static void BM_Adam(int iters, int params) {
+  const int64 tot = static_cast<int64>(iters) * params;
+  testing::ItemsProcessed(tot);
+  testing::BytesProcessed(tot * sizeof(float));
+  Graph* init;
+  Graph* train;
+  Adam(params, &init, &train);
+  test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
+}
+BENCHMARK(BM_Adam)->Arg(128 << 10)->Arg(256 << 10);
+
+static void RMSProp(int32 n, Graph** init_g, Graph** train_g) {
+  RequireDefaultOps();
+  TensorShape shape({n});
+  {
+    Graph* g = new Graph(OpRegistry::Global());
+    auto var = Var(g, n);
+    auto ms = Var(g, n);
+    auto mom = Var(g, n);
+    auto zero = Zeros(g, n);
+    test::graph::Assign(g, var, zero);
+    test::graph::Assign(g, ms, zero);
+    test::graph::Assign(g, mom, zero);
+    *init_g = g;
+  }
+  {
+    Graph* g = new Graph(OpRegistry::Global());
+    auto var = Var(g, n);
+    auto ms = Var(g, n);
+    auto mom = Var(g, n);
+    auto lr = Scalar(g, 0.01);
+    auto rho = Scalar(g, 0.9);
+    auto momentum = Scalar(g, 0.9);
+    auto epsilon = Scalar(g, 1e-8);
+    auto grad = Random(g, n);
+    test::graph::Multi(g, "ApplyRMSProp",
+                       {var, ms, mom, lr, rho, momentum, epsilon, grad});
+    *train_g = g;
+  }
+}
+
+static void BM_RMSProp(int iters, int params) {
+  const int64 tot = static_cast<int64>(iters) * params;
+  testing::ItemsProcessed(tot);
+  testing::BytesProcessed(tot * sizeof(float));
+  Graph* init;
+  Graph* train;
+  RMSProp(params, &init, &train);
+  test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
+}
+BENCHMARK(BM_RMSProp)->Arg(128 << 10)->Arg(256 << 10);
+
+}  // end namespace tensorflow