aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/dnn.h
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-01-17 12:46:28 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-01-17 13:07:02 -0800
commitb398f90411ddb7caee1be4b73de271beb73a07fe (patch)
tree8683b6f48bd4719d10b1f6df92b76437d09ca93f /tensorflow/stream_executor/dnn.h
parent21c0fa9e2dd966c242a4e89f1cac9e3e0f146ea8 (diff)
Add several operations to the StreamExecutor API
No implementations are yet provided for these operations. Change: 144743665
Diffstat (limited to 'tensorflow/stream_executor/dnn.h')
-rw-r--r--tensorflow/stream_executor/dnn.h223
1 files changed, 223 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index d83d3042d5..5db86cefc3 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -38,6 +38,7 @@ limitations under the License.
namespace perftools {
namespace gputools {
+class HostBuffer;
class Stream;
class ScratchAllocator;
@@ -125,6 +126,15 @@ enum class RnnDirectionMode {
kRnnBidirectional = 1,
};
+// Relevant to DepthToSpace and SpaceToDepth. This is the write layout when
+// performing depth to space and the read layout when performing space to depth.
+// It's specified with most-major dimension first and most-minor dimension last.
+// In DepthToSpace, the D*M² values are read in and then, for DepthHeightWidth,
+// written out to the output patch, by varying first width, then height, then
+// depth. In C array format, it looks like [depth][height][width]. See
+// DepthToSpace comment for more information.
+enum class DepthToSpaceLayout { DepthHeightWidth };
+
// Specifies the descriptor for a RNN model.
//
// An example use case:
@@ -530,6 +540,13 @@ enum class PoolingMode : int64 {
kAverage,
};
+// Specify the dimension in which to concatenate inputs in space.
+// Specify int64 so there's no padding in SpaceConcatenateMode.
+enum class SpaceConcatenateMode : int64 {
+ XDirection,
+ YDirection,
+};
+
// Returns a short name for the pooling mode, e.g. "Avg".
string ShortPoolingModeString(PoolingMode mode);
@@ -1319,6 +1336,129 @@ class DnnSupport {
port::ArraySlice<const DeviceMemory<float>*> input_data,
DeviceMemory<float>* output_data) = 0;
+ // Concatenates several layers into one, by concatenating each in the
+ // x-dimension or y-dimension, based on a user-specified flag.
+ // For x-concatenation, layers are aligned at matching y and depth
+ // coordinates, and for y-concatenation, they are aligned at matching x and
+ // depth coordinates. The inputs must all have the same depth and batch size.
+ // For x-concatenation, the inputs must have the same height (y-size), and the
+ // output will have the same depth and height as the inputs and its width (x-
+ // size) will be the sum of the input widths. For y-concatenation, the inputs
+ // must have the same width, and the output will have the same depth and width
+ // as the inputs, and its height will be the sum of the input heights.
+ //
+ // Arguments:
+ // stream: borrowed pointer to the stream that the 'space concatenate'
+ // operation should be enqueued onto.
+ // input_dimensions: the dimensions of each input.
+ // input_data: un-owned device memory region which contains the input data
+ // for each input layer.
+ // output_data: un-owned device memory region in which to place the space
+ // concatenate result.
+ // concat_direction: either dnn:SpaceConcatenateMode::XDirection or
+ // dnn::SpaceConcatenateMode::YDirection.
+ virtual bool DoSpaceConcatenate(
+ Stream* stream, port::ArraySlice<dnn::BatchDescriptor> input_dimensions,
+ port::ArraySlice<const DeviceMemory<float>*> input_data,
+ DeviceMemory<float>* output_data,
+ dnn::SpaceConcatenateMode concat_direction) {
+ return false;
+ }
+
+ // Change the layout of the data by shrinking one dimension (or set of
+ // dimensions) and growing another dimension (or set of dimensions), while
+ // keeping the total number of data elements constant, and maintaining the
+ // current data ordering.
+ //
+ // Currently, the only supported operation is depth into space by a power of
+ // 2. E.g. (y, x, z) -> (y*2, x*2, z/4)
+ //
+ // Note that Reshape may not be a no-op, depending on the platform and which
+ // dimensions are being changed.
+ //
+ // Example: forgetting about batch for the moment, let's take a tensor that's
+ // 2x1x8 (y by x by z) and reshape to a tensor that's 4x2x2. The memory layout
+ // is row-major order: y,x,z. I.e. z changes the fastest, then x, then y. The
+ // elements of the tensor range from 0 to 15. The x,y,z indices are below each
+ // element.
+ //
+ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ // y0 y0 y0 y0 y0 y0 y0 y0 y1 y1 y1 y1 y1 y1 y1 y1
+ // x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0
+ // z0 z1 z2 z3 z4 z5 z6 z7 z0 z1 z2 z3 z4 z5 z6 z7
+ //
+ // reshape to 4x2x2
+ //
+ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ // y0 y0 y0 y0 y1 y1 y1 y1 y2 y2 y2 y2 y3 y3 y3 y3
+ // x0 x0 x1 x1 x0 x0 x1 x1 x0 x0 x1 x1 x0 x0 x1 x1
+ // z0 z1 z0 z1 z0 z1 z0 z1 z0 z1 z0 z1 z0 z1 z0 z1
+ virtual bool DoReshape(Stream* stream,
+ const dnn::BatchDescriptor& input_dimensions,
+ const DeviceMemory<float>& input_data,
+ const dnn::BatchDescriptor& output_dimensions,
+ DeviceMemory<float>* output_data) {
+ return false;
+ }
+
+ // Depth to space takes an X by Y image with depth D*M² and changes it to an
+ // MX x MY image with depth D. Each input location (x,y) with depth D*M² in
+ // the input image is changed to an MxM contiguous area in the output image,
+ // with the values being laid out in the raster order by DepthToSpaceLayout,
+ // and will have a new depth of D.
+ //
+ // Example.
+ // M=2, Din =8, Xin=2, Yin=2. Xout=4, Yout=4, Dout=2
+ // DepthHeightWidth layout
+ // Values within a 'cell' are at different depths and same x & y.
+ // Input:
+ // abcdefgh ijklmnop
+ // qrstuvwx yz012345
+ // Output:
+ // ae bf im jn
+ // cg dh ko lp
+ // qu rv y2 z3
+ // sw tx 04 15
+ //
+ // sqrt_depth_reduction: 'M' in the comment above
+ virtual bool DoDepthToSpace(Stream* stream,
+ const dnn::BatchDescriptor& input_dimensions,
+ const DeviceMemory<float>& input_data,
+ const DepthToSpaceLayout& depth_to_space_layout,
+ const int& sqrt_depth_reduction,
+ DeviceMemory<float>* output_data) {
+ return false;
+ }
+
+ // Space to depth is the inverse of depth to space. Space to depth takes each
+ // non-overlapping M by M patch (in the X and Y dimensions) with depth D of
+ // the input, and transforms it to a 1 by 1 patch with depth D*M². If the
+ // input has size (MX, MY, D), the output has size (X, Y, D*M²). The number of
+ // data elements is not changed.
+ //
+ // Example.
+ // M=2, Din =2, Xin=4, Yin=4, Dout=8
+ // DepthHeightWidth layout
+ // Values within a 'cell' are at different depths and same x & y.
+ // Input:
+ // ae bf im jn
+ // cg dh ko lp
+ // qu rv y2 z3
+ // sw tx 04 15
+ // Output:
+ // abcdefgh ijklmnop
+ // qrstuvwx yz012345
+ //
+ // sqrt_depth_increase: 'M' in the comment above
+ virtual bool DoSpaceToDepth(Stream* stream,
+ const dnn::BatchDescriptor& input_dimensions,
+ const DeviceMemory<float>& input_data,
+ const DepthToSpaceLayout& space_to_depth_layout,
+ const int& sqrt_depth_increase,
+ DeviceMemory<float>* output_data) {
+ return false;
+ }
+
// Computes the specified operation (e.g. addition or multiplication)
// between corresponding elements in the inputs and stores the result in the
// output element.
@@ -1342,6 +1482,37 @@ class DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
DeviceMemory<float>* output_data) = 0;
+ // Computes the specified operation (e.g. addition or multiplication)
+ // between corresponding elements in the inputs and stores the result in the
+ // output element. Each input is multiplied by a scalar constant and the
+ // result is divided by a scalar constant.
+ // e.g. To perform Z = 0.9*X + 1.1*Y, set the input multiplicands to 9 and 11
+ // and the output divisor to 10.
+ // The inputs and output must all have the same dimensions, but may have
+ // different quantization parameters (min_value and max_value).
+ //
+ // Arguments (all borrowed):
+ // stream: borrowed pointer to the stream that the 'elementwise operation'
+ // should be enqueued onto.
+ // operation: The operation to perform.
+ // input_multiplicands: Amount to scale each input.
+ // output_divisor: Amount to divide the output.
+ // input_dimensions: The dimensions of each input.
+ // input_data: un-owned device memory region which contains the
+ // input data for each input layer.
+ // output_dimensions: The dimensions of the output.
+ // output_data: un-owned device memory region in which to place the
+ // operation result.
+ virtual bool DoElementwiseOperateScaledQuantized(
+ Stream* stream, ElementwiseOperation operation,
+ port::ArraySlice<int> input_multiplicands, int output_divisor,
+ port::ArraySlice<dnn::BatchDescriptor> input_dimensions,
+ port::ArraySlice<const DeviceMemory<float>*> input_data,
+ const dnn::BatchDescriptor& output_dimensions,
+ DeviceMemory<float>* output_data) {
+ return false;
+ }
+
// Pads the input with zeros in the X and Y dimensions. The feature_map
// dimension is unchanged.
//
@@ -1382,6 +1553,43 @@ class DnnSupport {
int64 left_trim, int64 right_trim, int64 top_trim,
int64 bottom_trim, DeviceMemory<float> *output_data) = 0;
+ // Grows the input tensor by replicating the X and Y dimensions. The batch and
+ // depth/feature_map dimensions are unchanged. Currently, the input tensor is
+ // limited to X=1 and Y=1.
+ //
+ // For example, the input has dimensions x=2, y=3, and replicate_x=3,
+ // replicate_y=2. The diagonal elements of the output would be: [x0y0, x1y1,
+ // x0y2, x1y0, x0y1, x1y2].
+ // Here is the example as a picture. input:
+ // AB
+ // CD
+ // EF
+ // broadcast result:
+ // ABABAB
+ // CDCDCD
+ // EFEFEF
+ // ABABAB
+ // CDCDCD
+ // EFEFEF
+ //
+ // Arguments (all borrowed):
+ // stream: borrowed pointer to the stream that the 'elementwise operation'
+ // should be enqueued onto.
+ // dimensions: The dimensions of the input.
+ // input_data: un-owned device memory region which contains the
+ // input data for the input layer.
+ // replicate_x: Amount to replicate the input's X dimension.
+ // replicate_y: Amount to replicate the input's Y dimension.
+ // output_data: un-owned device memory region in which to place the
+ // padded result.
+ virtual bool DoXYBroadcast(Stream* stream,
+ const dnn::BatchDescriptor& dimensions,
+ const DeviceMemory<float>& input_data,
+ int64 replicate_x, int64 replicate_y,
+ DeviceMemory<float>* output_data) {
+ return false;
+ }
+
// Enqueues an asynchronous memcpy of the *quantized* output of a layer (that
// is, bytes instead of scaled floats) into 'host_dst' if they are available
// for the underlying DNN implementation. If this quantized output is not
@@ -1425,6 +1633,21 @@ class DnnSupport {
QuantizedActivationMode mode,
DeviceMemory<float>* gpu_unquantized_dst) = 0;
+ // Enqueues an asynchronous copy of the contents of buffer_src to
+ // gpu_unquantized_dst.
+ virtual bool DoCopyHostBuffer2Device(
+ Stream* stream, HostBuffer* buffer_src,
+ DeviceMemory<float>* gpu_unquantized_dst) {
+ return false;
+ }
+
+ // Enqueues an asynchronous copy of the contents of gpu_unquantized_src to
+ // buffer_dst.
+ virtual bool DoCopyDevice2HostBuffer(
+ Stream* stream, const DeviceMemory<float>& gpu_unquantized_src,
+ HostBuffer* buffer_dst) {
+ return false;
+ }
// Create an RNN descriptor based on model shapes and configurations.
// The caller retains the ownership of the descriptor.