/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" #include "tensorflow/core/public/session_options.h" namespace tensorflow { namespace { const SessionOptions* GetSingleThreadedOptions() { static const SessionOptions* const kSessionOptions = []() { SessionOptions* const result = new SessionOptions(); result->config.set_intra_op_parallelism_threads(1); result->config.set_inter_op_parallelism_threads(1); result->config.add_session_inter_op_thread_pool()->set_num_threads(1); return result; }(); return kSessionOptions; } const SessionOptions* GetMultiThreadedOptions() { static const SessionOptions* const kSessionOptions = []() { SessionOptions* const result = new SessionOptions(); result->config.set_intra_op_parallelism_threads(0); // Auto-configured. result->config.set_inter_op_parallelism_threads(0); // Auto-configured. result->config.add_session_inter_op_thread_pool()->set_num_threads( 0); // Auto-configured. return result; }(); return kSessionOptions; } Node* Var(Graph* const g, const int n) { return test::graph::Var(g, DT_FLOAT, TensorShape({n})); } // Returns a vector of size 'nodes' with each node being of size 'node_size'. std::vector VarVector(Graph* const g, const int nodes, const int node_size) { std::vector result; result.reserve(nodes); for (int i = 0; i < nodes; ++i) { result.push_back(Var(g, node_size)); } return result; } Node* Zeros(Graph* const g, const TensorShape& shape) { Tensor data(DT_FLOAT, shape); data.flat().setZero(); return test::graph::Constant(g, data); } Node* Zeros(Graph* const g, const int n) { return Zeros(g, TensorShape({n})); } Node* Ones(Graph* const g, const int n) { Tensor data(DT_FLOAT, TensorShape({n})); test::FillFn(&data, [](const int i) { return 1.0f; }); return test::graph::Constant(g, data); } Node* SparseIndices(Graph* const g, const int sparse_features_per_group) { Tensor data(DT_INT64, TensorShape({sparse_features_per_group})); test::FillFn(&data, [&](const int i) { return i; }); return test::graph::Constant(g, data); } Node* SparseExampleIndices(Graph* const g, const int sparse_features_per_group, const int num_examples) { const int x_size = num_examples * 4; Tensor data(DT_INT64, TensorShape({x_size})); test::FillFn(&data, [&](const int i) { return i / 4; }); return test::graph::Constant(g, data); } Node* SparseFeatureIndices(Graph* const g, const int sparse_features_per_group, const int num_examples) { const int x_size = num_examples * 4; Tensor data(DT_INT64, TensorShape({x_size})); test::FillFn( &data, [&](const int i) { return i % sparse_features_per_group; }); return test::graph::Constant(g, data); } Node* RandomZeroOrOne(Graph* const g, const int n) { Tensor data(DT_FLOAT, TensorShape({n})); test::FillFn(&data, [](const int i) { // Fill with 0.0 or 1.0 at random. return (random::New64() % 2) == 0 ? 0.0f : 1.0f; }); return test::graph::Constant(g, data); } Node* RandomZeroOrOneMatrix(Graph* const g, const int n, int d) { Tensor data(DT_FLOAT, TensorShape({n, d})); test::FillFn(&data, [](const int i) { // Fill with 0.0 or 1.0 at random. return (random::New64() % 2) == 0 ? 0.0f : 1.0f; }); return test::graph::Constant(g, data); } void GetGraphs(const int32 num_examples, const int32 num_sparse_feature_groups, const int32 sparse_features_per_group, const int32 num_dense_feature_groups, const int32 dense_features_per_group, Graph** const init_g, Graph** train_g) { { // Build initialization graph Graph* g = new Graph(OpRegistry::Global()); // These nodes have to be created first, and in the same way as the // nodes in the graph below. std::vector sparse_weight_nodes = VarVector(g, num_sparse_feature_groups, sparse_features_per_group); std::vector dense_weight_nodes = VarVector(g, num_dense_feature_groups, dense_features_per_group); Node* const multi_zero = Zeros(g, sparse_features_per_group); for (Node* n : sparse_weight_nodes) { test::graph::Assign(g, n, multi_zero); } Node* const zero = Zeros(g, dense_features_per_group); for (Node* n : dense_weight_nodes) { test::graph::Assign(g, n, zero); } *init_g = g; } { // Build execution graph Graph* g = new Graph(OpRegistry::Global()); // These nodes have to be created first, and in the same way as the // nodes in the graph above. std::vector sparse_weight_nodes = VarVector(g, num_sparse_feature_groups, sparse_features_per_group); std::vector dense_weight_nodes = VarVector(g, num_dense_feature_groups, dense_features_per_group); std::vector sparse_indices; std::vector sparse_weights; for (Node* n : sparse_weight_nodes) { sparse_indices.push_back( NodeBuilder::NodeOut(SparseIndices(g, sparse_features_per_group))); sparse_weights.push_back(NodeBuilder::NodeOut(n)); } std::vector dense_weights; dense_weights.reserve(dense_weight_nodes.size()); for (Node* n : dense_weight_nodes) { dense_weights.push_back(NodeBuilder::NodeOut(n)); } std::vector sparse_example_indices; std::vector sparse_feature_indices; std::vector sparse_values; sparse_example_indices.reserve(num_sparse_feature_groups); for (int i = 0; i < num_sparse_feature_groups; ++i) { sparse_example_indices.push_back(NodeBuilder::NodeOut( SparseExampleIndices(g, sparse_features_per_group, num_examples))); } sparse_feature_indices.reserve(num_sparse_feature_groups); for (int i = 0; i < num_sparse_feature_groups; ++i) { sparse_feature_indices.push_back(NodeBuilder::NodeOut( SparseFeatureIndices(g, sparse_features_per_group, num_examples))); } sparse_values.reserve(num_sparse_feature_groups); for (int i = 0; i < num_sparse_feature_groups; ++i) { sparse_values.push_back( NodeBuilder::NodeOut(RandomZeroOrOne(g, num_examples * 4))); } std::vector dense_features; dense_features.reserve(num_dense_feature_groups); for (int i = 0; i < num_dense_feature_groups; ++i) { dense_features.push_back(NodeBuilder::NodeOut( RandomZeroOrOneMatrix(g, num_examples, dense_features_per_group))); } Node* const weights = Ones(g, num_examples); Node* const labels = RandomZeroOrOne(g, num_examples); Node* const example_state_data = Zeros(g, TensorShape({num_examples, 4})); Node* sdca = nullptr; TF_CHECK_OK( NodeBuilder(g->NewName("sdca"), "SdcaOptimizer") .Attr("loss_type", "logistic_loss") .Attr("num_sparse_features", num_sparse_feature_groups) .Attr("num_sparse_features_with_values", num_sparse_feature_groups) .Attr("num_dense_features", num_dense_feature_groups) .Attr("l1", 0.0) .Attr("l2", 1.0) .Attr("num_loss_partitions", 1) .Attr("num_inner_iterations", 2) .Input(sparse_example_indices) .Input(sparse_feature_indices) .Input(sparse_values) .Input(dense_features) .Input(weights) .Input(labels) .Input(sparse_indices) .Input(sparse_weights) .Input(dense_weights) .Input(example_state_data) .Finalize(g, &sdca)); *train_g = g; } } void BM_SDCA(const int iters, const int num_examples) { testing::StopTiming(); Graph* init = nullptr; Graph* train = nullptr; GetGraphs(num_examples, 20 /* sparse feature groups */, 5 /* sparse features per group */, 1 /* dense feature groups*/, 20 /* dense features per group */, &init, &train); testing::StartTiming(); test::Benchmark("cpu", train, GetSingleThreadedOptions(), init).Run(iters); } void BM_SDCA_LARGE_DENSE(const int iters, const int num_examples) { testing::StopTiming(); Graph* init = nullptr; Graph* train = nullptr; GetGraphs(num_examples, 0 /* sparse feature groups */, 0 /* sparse features per group */, 5 /* dense feature groups*/, 200000 /* dense features per group */, &init, &train); testing::StartTiming(); test::Benchmark("cpu", train, GetSingleThreadedOptions(), init).Run(iters); } void BM_SDCA_LARGE_SPARSE(const int iters, const int num_examples) { testing::StopTiming(); Graph* init = nullptr; Graph* train = nullptr; GetGraphs(num_examples, 65 /* sparse feature groups */, 1e6 /* sparse features per group */, 0 /* dense feature groups*/, 0 /* dense features per group */, &init, &train); testing::StartTiming(); test::Benchmark("cpu", train, GetMultiThreadedOptions(), init).Run(iters); } } // namespace BENCHMARK(BM_SDCA)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); BENCHMARK(BM_SDCA_LARGE_DENSE)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); BENCHMARK(BM_SDCA_LARGE_SPARSE)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); } // namespace tensorflow