aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/grappler/optimizers/auto_parallel.h
blob: 63f6fe5b9db87017c1a76a56c697a3c85555c232 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_AUTO_PARALLEL_H_
#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_AUTO_PARALLEL_H_

#include "tensorflow/core/framework/variable.pb.h"
#include "tensorflow/core/grappler/optimizers/graph_optimizer.h"
#include "tensorflow/core/lib/core/status.h"

namespace tensorflow {
namespace grappler {

// Automatically parallelize a graph by splitting in the batch dimension.
class AutoParallel : public GraphOptimizer {
 public:
  AutoParallel(int num_replicas) : num_replicas_(num_replicas) {
    CHECK(num_replicas_ >= 2);
  }
  ~AutoParallel() override {}

  string name() const override { return "autoparallel"; };

  Status Optimize(Cluster* cluster, const GrapplerItem& item,
                  GraphDef* output) override;

  void Feedback(Cluster* cluster, const GrapplerItem& item,
                const GraphDef& optimize_output, double result) override;

 private:
  GraphDef graph_;
  std::map<string, NodeDef*> all_nodes_;
  std::set<string> apply_gradients_nodes_;
  std::set<string> replica_nodes_;
  std::set<string> shared_nodes_;
  const GrapplerItem* item_;
  int num_replicas_;
  int num_gpus_;
  Status Initialize(const GrapplerItem& item);
  NodeDef* AddNodeDivConst();
  NodeDef* AddNodeDiv(const string& name, const string& input_a,
                      const string& input_b);
  NodeDef* AddNodeControl(const string& name, const std::set<string>& deps,
                          GraphDef* graph);
  bool NotSharedNode(const string& name);
  void AddSharedNodes(GraphDef* graph);
  void AddOneReplica(GraphDef* graph, int number);
  void BuildGraph(GraphDef* graph);
};

}  // end namespace grappler
}  // end namespace tensorflow

#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_AUTO_PARALLEL_H_