aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/dnn.h
diff options
context:
space:
mode:
authorGravatar Yangzihao Wang <yangzihao@google.com>2017-06-01 17:50:43 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-06-01 17:54:26 -0700
commit69075f3546dfc29dbef8b7c5d990f3af094cbd5f (patch)
tree2494878da9ce92431152d74419a1f984ed197d62 /tensorflow/stream_executor/dnn.h
parent7d7a40309693f01359537dce97fd6ff82e19755d (diff)
Add functional support for cudnnConvolutionBiasActivationForward().
PiperOrigin-RevId: 157788425
Diffstat (limited to 'tensorflow/stream_executor/dnn.h')
-rw-r--r--tensorflow/stream_executor/dnn.h70
1 files changed, 62 insertions, 8 deletions
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 8e56933ba3..e8b5bbf5b1 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -796,6 +796,7 @@ class NormalizeDescriptor {
// Describes a kind of non-linearity (threshold-like mathematical function).
enum class ActivationMode {
+ kNone,
kSigmoid,
// Rectified linear activation: f(x) = x < 0 ? 0 : x
kRelu,
@@ -910,9 +911,11 @@ class DnnSupport {
// input_data: un-owned device memory region which contains the
// convolution input.
// filter_descriptor: dimensions of the convolution filter.
- // weights: coefficients for the convolution filter, these are multiplied
- // against values in the input that the filter convolves over.
// convolution_descriptor: stride of the convolution filter.
+ // biases: un-owned device memory region containing biases to add to the
+ // input. This can be DeviceMemory pointing to NULL only when activation_mode
+ // is kNone.
+ // activation_mode: Type of activation to perform.
// output_descriptor: dimensions of the output layer.
// output_data: un-owned device memory region in which to place the
// convolution result.
@@ -945,16 +948,62 @@ class DnnSupport {
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<float>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const DeviceMemory<float>& biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
- ProfileResult* output_profile_result) = 0;
+ ProfileResult* output_profile_result) {
+ return false;
+ }
- // Return a list of algorithms supported by the forward convolution pass.
- virtual bool GetConvolveAlgorithms(
- bool with_winograd_nonfused, std::vector<AlgorithmType>* out_algorithms);
+ // Enqueues a double-precision fused convolution, bias add, and activation
+ // operation onto the stream. See DoConvolve above for argument details.
+ virtual bool DoConvolve(
+ Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
+ const DeviceMemory<double>& input_data,
+ const dnn::FilterDescriptor& filter_descriptor,
+ const DeviceMemory<double>& filter_data,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const DeviceMemory<double>& biases, dnn::ActivationMode activation_mode,
+ const dnn::BatchDescriptor& output_descriptor,
+ DeviceMemory<double>* output_data) {
+ return false;
+ }
+
+ // Enqueues a half-precision fused convolution, bias add, and activation
+ // operation onto the stream. See DoConvolve above for argument details.
+ virtual bool DoConvolve(
+ Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
+ const DeviceMemory<Eigen::half>& input_data,
+ const dnn::FilterDescriptor& filter_descriptor,
+ const DeviceMemory<Eigen::half>& filter_data,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const DeviceMemory<Eigen::half>& biases,
+ dnn::ActivationMode activation_mode,
+ const dnn::BatchDescriptor& output_descriptor,
+ DeviceMemory<Eigen::half>* output_data,
+ ScratchAllocator* scratch_allocator,
+ const dnn::AlgorithmConfig& algorithm_config,
+ ProfileResult* output_profile_result) {
+ return false;
+ }
- // Enqueues a double-precision convolution operation onto the stream.
+ // Enqueues a single-precision convolution operation (without bias add
+ // or activation) onto the stream.
+ // See DoConvolve above for argument details.
+ virtual bool DoConvolve(
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
+ const DeviceMemory<float>& input_data,
+ const dnn::FilterDescriptor& filter_descriptor,
+ const DeviceMemory<float>& filter_data,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
+ DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
+ const dnn::AlgorithmConfig& algorithm_config,
+ ProfileResult* output_profile_result) = 0;
+
+ // Enqueues a double-precision convolution operation (without bias add
+ // or activation) onto the stream.
// See DoConvolve above for argument details.
virtual bool DoConvolve(
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
@@ -965,7 +1014,8 @@ class DnnSupport {
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<double>* output_data) = 0;
- // Enqueues a half-precision convolution operation onto the stream.
+ // Enqueues a half-precision convolution operation (without bias add
+ // or activation) onto the stream.
// See DoConvolve above for argument details.
virtual bool DoConvolve(
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
@@ -979,6 +1029,10 @@ class DnnSupport {
const dnn::AlgorithmConfig& algorithm_config,
ProfileResult* output_profile_result) = 0;
+ // Return a list of algorithms supported by the forward convolution pass.
+ virtual bool GetConvolveAlgorithms(
+ bool with_winograd_nonfused, std::vector<AlgorithmType>* out_algorithms);
+
// Version of DoConvolve that uses pre-quantized 8 bit coefficients.
// coefficient_scales specifies the scaling of each column of coefficients:
// original float coefficient[row * num_columns + column] =