aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <bsteiner@google.com>2016-09-21 17:08:11 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-09-21 18:17:34 -0700
commitca69f697538808174928d340b0e2a7d409f56fdb (patch)
treec361e091cab71cbb74b6cb8d1f5ae8063376f6f0 /tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
parent75b07290907abf759f05d8ea087f4623de316db0 (diff)
Use hardware stats to build the cost model for operations that run on GPU
whenever possible. Also delay the creation of the cost model until all the operations for a given step are run: this should minimize the impact the the overall execution, and therefore result in more accurate statistics. Change: 133901706
Diffstat (limited to 'tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc')
-rw-r--r--tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc82
1 files changed, 82 insertions, 0 deletions
diff --git a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
index 99100ed39c..6f0f12496f 100644
--- a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
@@ -109,5 +109,87 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelTest) {
ASSERT_EQ(2, graph_cnt);
}
+static void TestHWAccelerator(bool enableHWTrace) {
+ EnableCPUAllocatorFullStats(true);
+
+ Graph graph(OpRegistry::Global());
+
+ Tensor a_tensor(DT_FLOAT, TensorShape({2, 2}));
+ test::FillValues<float>(&a_tensor, {3, 2, -1, 0});
+ Node* a = test::graph::Constant(&graph, a_tensor);
+ a->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:0");
+
+ Tensor x_tensor(DT_FLOAT, TensorShape({2, 1}));
+ test::FillValues<float>(&x_tensor, {1, 1});
+ Node* x = test::graph::Constant(&graph, x_tensor);
+ x->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
+
+ // y = A * x
+ Node* y = test::graph::Matmul(&graph, a, x, false, false);
+ y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
+
+ Node* y_neg = test::graph::Unary(&graph, "Neg", y);
+ y_neg->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:0");
+
+ GraphDef def;
+ test::graph::ToGraphDef(&graph, &def);
+
+ SessionOptions options;
+ (*options.config.mutable_device_count())["CPU"] = 1;
+ (*options.config.mutable_device_count())["GPU"] = 1;
+ options.config.set_allow_soft_placement(true);
+ options.config.mutable_graph_options()->set_build_cost_model(true);
+ std::unique_ptr<Session> session(NewSession(options));
+ TF_ASSERT_OK(session->Create(def));
+ std::vector<std::pair<string, Tensor>> inputs;
+
+ // Request two targets: one fetch output and one non-fetched output.
+ std::vector<string> output_names = {y->name() + ":0"};
+ std::vector<string> target_nodes = {y_neg->name()};
+ std::vector<Tensor> outputs;
+ const int64 start_micros = Env::Default()->NowMicros();
+
+ RunOptions run_options;
+ if (enableHWTrace) {
+ run_options.set_trace_level(RunOptions::FULL_TRACE);
+ }
+ RunMetadata run_metadata;
+ Status s = session->Run(run_options, inputs, output_names, target_nodes,
+ &outputs, &run_metadata);
+ const int64 run_duration_micros = Env::Default()->NowMicros() - start_micros;
+ TF_ASSERT_OK(s);
+
+ DirectSession* ds = static_cast<DirectSession*>(session.get());
+ int graph_cnt = 0;
+ CostModelManager::CostModelMap cost_models;
+ ds->ExportCostModels(&cost_models);
+ for (auto& it : cost_models) {
+ const Graph* g = (it).first;
+ const CostModel* cm = (it).second;
+ for (Node* node : g->nodes()) {
+ if (node->name() == y->name()) {
+ EXPECT_LE(8, cm->MaxMemorySize(node, 0));
+ } else if (node->name() == y_neg->name()) {
+ EXPECT_LE(8, cm->MaxMemorySize(node, 0));
+ }
+ EXPECT_LE(0, cm->MaxExecutionTime(node));
+ EXPECT_GE(run_duration_micros, cm->MaxExecutionTime(node));
+ }
+ graph_cnt++;
+ }
+ // We should have 2 cost models since we requested 1 cpu and 1 gpu. However
+ // since the placement is soft, we might end up placing everything on cpu.
+ ASSERT_GE(2, graph_cnt);
+ ASSERT_LE(1, graph_cnt);
+}
+
+TEST(DirectSessionWithTrackingAllocTest, CostModelForAccelerator) {
+ TestHWAccelerator(false);
+}
+
+TEST(DirectSessionWithTrackingAllocTest, CostModelWithHardwareStats) {
+ TestHWAccelerator(true);
+}
+
} // namespace
} // namespace tensorflow