aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/grappler/optimizers/layout_optimizer.h
blob: 49b697bb75b6b86870f633f24494bfb9d3d45c72 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_LAYOUT_OPTIMIZER_H_
#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_LAYOUT_OPTIMIZER_H_

#include "tensorflow/core/grappler/costs/graph_properties.h"
#include "tensorflow/core/grappler/costs/virtual_placer.h"
#include "tensorflow/core/grappler/optimizers/graph_optimizer.h"

namespace tensorflow {
namespace grappler {
// Convert the NHWC layout to NCHW for Conv-related ops on GPUs.
class LayoutOptimizer : public GraphOptimizer {
 public:
  LayoutOptimizer() {}
  ~LayoutOptimizer() override {}

  string name() const override { return "layout"; };

  struct TuningConfig {
    // If true, do not use the NHWC GEMM implementation. When filter size is
    // one or filter size is equal to input image size,
    // the NHWC implementation of Conv2D, Conv2DBackpropInput, and
    // Conv2DBackpropFilter will use a specialized GEMM implementation, which is
    // usually faster than the NCHW implementation. The downside is that this
    // might result in more non-cancellable layout conversion nodes (implemented
    // by the Transpose op).
    bool no_gemm;
  };

  Status Optimize(Cluster* cluster, const GrapplerItem& item,
                  GraphDef* output) override;

  void Feedback(Cluster* cluster, const GrapplerItem& item,
                const GraphDef& optimize_output, double result) override;

 private:
  std::unique_ptr<VirtualPlacer> virtual_placer_;
  std::unordered_set<string> nodes_to_preserve_;
  Status Tune(const GrapplerItem& item, const GraphProperties& graph_properties,
              const TuningConfig& config, GraphDef* output);
};

}  // end namespace grappler
}  // end namespace tensorflow

#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_LAYOUT_OPTIMIZER_H_