aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/dnn.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/stream_executor/dnn.cc')
-rw-r--r--tensorflow/stream_executor/dnn.cc297
1 files changed, 297 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc
new file mode 100644
index 0000000000..020de7f7bb
--- /dev/null
+++ b/tensorflow/stream_executor/dnn.cc
@@ -0,0 +1,297 @@
+#include "tensorflow/stream_executor/dnn.h"
+
+#include "tensorflow/stream_executor/lib/strcat.h"
+#include "tensorflow/stream_executor/lib/stringprintf.h"
+
+namespace perftools {
+namespace gputools {
+namespace dnn {
+
+string ActivationModeString(ActivationMode mode) {
+ switch (mode) {
+ case ActivationMode::kSigmoid:
+ return "sigmoid";
+ case ActivationMode::kRelu:
+ return "relu";
+ case ActivationMode::kRelu6:
+ return "relu6";
+ case ActivationMode::kReluX:
+ return "reluX";
+ case ActivationMode::kTanh:
+ return "tanh";
+ default:
+ LOG(FATAL) << "Unknown activation_mode " << static_cast<int32>(mode);
+ }
+}
+
+string ElementwiseOperationString(ElementwiseOperation op) {
+ switch (op) {
+ case ElementwiseOperation::kAdd:
+ return "add";
+ case ElementwiseOperation::kMultiply:
+ return "multiply";
+ default:
+ LOG(FATAL) << "Unknown elementwise op " << static_cast<int32>(op);
+ }
+}
+
+string DataLayoutString(DataLayout layout) {
+ switch (layout) {
+ case DataLayout::kYXDepthBatch:
+ return "YXDepthBatch";
+ case DataLayout::kYXBatchDepth:
+ return "YXBatchDepth";
+ case DataLayout::kBatchYXDepth:
+ return "BatchYXDepth";
+ case DataLayout::kBatchDepthYX:
+ return "BatchDepthYX";
+ default:
+ LOG(FATAL) << "Unknown data layout " << static_cast<int32>(layout);
+ }
+}
+
+string FilterLayoutString(FilterLayout layout) {
+ switch (layout) {
+ case FilterLayout::kOutputInputYX:
+ return "OutputInputYX";
+ case FilterLayout::kInputYXOutput:
+ return "InputYXOutput";
+ case FilterLayout::kYXInputOutput:
+ return "YXInputOutput";
+ default:
+ LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(layout);
+ }
+}
+
+// -- BatchDescriptor
+
+BatchDescriptor::BatchDescriptor()
+ : count_(0),
+ feature_map_count_(0),
+ height_(0),
+ width_(0),
+ value_max_(0.0),
+ value_min_(0.0),
+ layout_(DataLayout::kYXDepthBatch),
+ quantized_activation_mode_(QuantizedActivationMode::k8Bit) {}
+
+void BatchDescriptor::CloneFrom(const BatchDescriptor& other) {
+ count_ = other.count_;
+ feature_map_count_ = other.feature_map_count_;
+ height_ = other.height_;
+ width_ = other.width_;
+ value_max_ = other.value_max_;
+ value_min_ = other.value_min_;
+ layout_ = other.layout_;
+ quantized_activation_mode_ = other.quantized_activation_mode_;
+}
+
+string BatchDescriptor::ToString() const {
+ return port::Printf(
+ "{count: %lld feature_map_count: %lld height: %lld width: %lld "
+ "value_min: %f value_max: %f layout: %s}",
+ count_, feature_map_count_, height_, width_, value_min_, value_max_,
+ DataLayoutString(layout_).c_str());
+}
+
+string BatchDescriptor::ToShortString() const {
+ // All the constituent strings are less than 15 characters, so the
+ // small string optimization ensures that there will be at most one
+ // heap memory allocation.
+ string x = port::StrCat("x", width());
+ string y = port::StrCat("y", height());
+ string depth = port::StrCat("d", feature_map_count());
+ string batch = port::StrCat("b", count());
+
+ string suffix;
+ if (value_min() != value_max()) {
+ port::StrAppend(&suffix, "[", value_min(), ";", value_max(), "]");
+ }
+ if (quantized_activation_mode() == QuantizedActivationMode::k16Bit) {
+ suffix += "_16bit";
+ }
+
+ switch (layout()) {
+ case DataLayout::kYXDepthBatch:
+ return port::StrCat(y, x, depth, batch, suffix);
+ case DataLayout::kYXBatchDepth:
+ return port::StrCat(y, x, batch, depth, suffix);
+ case DataLayout::kBatchYXDepth:
+ return port::StrCat(batch, y, x, depth, suffix);
+ case DataLayout::kBatchDepthYX:
+ return port::StrCat(batch, depth, y, x, suffix);
+ default:
+ LOG(FATAL) << "Unknown layout " << static_cast<int32>(layout());
+ }
+}
+
+int64 BatchDescriptor::NodesPerFeatureMap() const { return width_ * height_; }
+
+int64 BatchDescriptor::NodesAcrossFeatureMaps() const {
+ return NodesPerFeatureMap() * feature_map_count_;
+}
+
+int64 BatchDescriptor::ElementCount() const {
+ return count_ * feature_map_count_ * height_ * width_;
+}
+
+int64 BatchDescriptor::FullyConnectedWeightCount(
+ const BatchDescriptor& input, const BatchDescriptor& output) {
+ return input.NodesAcrossFeatureMaps() * output.NodesAcrossFeatureMaps();
+}
+
+int64 BatchDescriptor::FullyConnectedBiasCount(const BatchDescriptor& output) {
+ return output.NodesAcrossFeatureMaps();
+}
+
+// -- FilterDescriptor
+
+FilterDescriptor::FilterDescriptor()
+ : output_feature_map_count_(0),
+ input_feature_map_count_(0),
+ input_filter_height_(0),
+ input_filter_width_(0),
+ layout_(FilterLayout::kOutputInputYX) {}
+
+FilterDescriptor::~FilterDescriptor() {}
+
+void FilterDescriptor::CloneFrom(const FilterDescriptor& other) {
+ set_output_feature_map_count(other.output_feature_map_count())
+ .set_input_feature_map_count(other.input_feature_map_count())
+ .set_input_filter_height(other.input_filter_height())
+ .set_input_filter_width(other.input_filter_width())
+ .set_layout(other.layout());
+}
+
+string FilterDescriptor::ToString() const {
+ return port::Printf(
+ "{output_feature_map_count: %lld input_feature_map_count: %lld "
+ "input_filter_height: %lld input_filter_width: %lld layout: %s}",
+ output_feature_map_count_, input_feature_map_count_, input_filter_height_,
+ input_filter_width_, FilterLayoutString(layout_).c_str());
+}
+
+string FilterDescriptor::ToShortString() const {
+ // All the constituent strings are less than 15 characters, so the
+ // small string optimization ensures that there will be at most one
+ // heap memory allocation.
+ string od = port::StrCat("od", output_feature_map_count_);
+ string id = port::StrCat("id", input_feature_map_count_);
+ string y = port::StrCat("y", input_filter_height_);
+ string x = port::StrCat("x", input_filter_width_);
+
+ switch (layout_) {
+ case FilterLayout::kOutputInputYX:
+ return port::StrCat(od, id, y, x);
+ case FilterLayout::kInputYXOutput:
+ return port::StrCat(id, y, x, od);
+ case FilterLayout::kYXInputOutput:
+ return port::StrCat(y, x, id, od);
+ default:
+ LOG(FATAL) << "Unknown layout " << static_cast<int32>(layout_);
+ }
+}
+
+int64 FilterDescriptor::ComputeWeightCount() const {
+ return output_feature_map_count_ * input_feature_map_count_ *
+ input_filter_height_ * input_filter_width_;
+}
+
+// -- ConvolutionDescriptor
+
+ConvolutionDescriptor::ConvolutionDescriptor()
+ : zero_padding_height_(0),
+ zero_padding_width_(0),
+ vertical_filter_stride_(1),
+ horizontal_filter_stride_(1) {}
+
+ConvolutionDescriptor::~ConvolutionDescriptor() {}
+
+string ConvolutionDescriptor::ToString() const {
+ return port::Printf(
+ "{zero_padding_height: %lld zero_padding_width: %lld "
+ "vertical_filter_stride: %lld horizontal_filter_stride: %lld}",
+ zero_padding_height_, zero_padding_width_, vertical_filter_stride_,
+ horizontal_filter_stride_);
+}
+
+string ConvolutionDescriptor::ToShortString() const {
+ return port::StrCat("py:", zero_padding_height_, "_px:", zero_padding_width_,
+ "_sy:", vertical_filter_stride_, "_sx:",
+ horizontal_filter_stride_);
+}
+
+// -- PoolingDescriptor
+
+PoolingDescriptor::PoolingDescriptor()
+ : mode_(dnn::PoolingMode::kMaximum),
+ window_height_(0),
+ window_width_(0),
+ vertical_padding_(0),
+ horizontal_padding_(0),
+ vertical_stride_(0),
+ horizontal_stride_(0) {}
+
+void PoolingDescriptor::CloneFrom(const PoolingDescriptor& other) {
+ mode_ = other.mode_;
+ window_height_ = other.window_height_;
+ window_width_ = other.window_width_;
+ vertical_padding_ = other.vertical_padding_;
+ horizontal_padding_ = other.horizontal_padding_;
+ vertical_stride_ = other.vertical_stride_;
+ horizontal_stride_ = other.horizontal_stride_;
+}
+
+string PoolingDescriptor::ToString() const {
+ const char* mode_string =
+ mode_ == dnn::PoolingMode::kMaximum ? "kMaximum" : "kAverage";
+ return port::Printf(
+ "{mode: %s window_height: %lld window_width: %lld vertical_stride: %lld "
+ "horizontal_stride: %lld vertical padding: %lld horizontal padding: "
+ "%lld}",
+ mode_string, window_height_, window_width_, vertical_stride_,
+ horizontal_stride_, vertical_padding_, horizontal_padding_);
+}
+
+string PoolingDescriptor::ToShortString() const {
+ return port::StrCat(mode_ == dnn::PoolingMode::kMaximum ? "max" : "avg",
+ "_y:", window_height_, "_x:", window_width_, "_py:",
+ vertical_padding_, "_px:", horizontal_padding_, "_sy:",
+ vertical_stride_, "_sx:", horizontal_stride_);
+}
+
+// -- NormalizeDescriptor
+
+NormalizeDescriptor::NormalizeDescriptor()
+ : bias_(0.0),
+ range_(0),
+ alpha_(0.0),
+ beta_(0.0),
+ wrap_around_(false),
+ segment_size_(0) {}
+
+void NormalizeDescriptor::CloneFrom(const NormalizeDescriptor& other) {
+ bias_ = other.bias_;
+ range_ = other.range_;
+ alpha_ = other.alpha_;
+ beta_ = other.beta_;
+ wrap_around_ = other.wrap_around_;
+ segment_size_ = other.segment_size_;
+}
+
+string NormalizeDescriptor::ToString() const {
+ return port::Printf(
+ "{bias: %f range: %d alpha: %f beta: %f wrap_around: %d "
+ "segment_size: %d}",
+ bias_, range_, alpha_, beta_, wrap_around_, segment_size_);
+}
+
+string NormalizeDescriptor::ToShortString() const {
+ return port::StrCat("bias:", bias_, "_range:", range_, "_alpha:", alpha_,
+ "_beta:", beta_, "_wrap:", wrap_around_, "_size:",
+ segment_size_);
+}
+
+} // namespace dnn
+} // namespace gputools
+} // namespace perftools