aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/stream_executor_internal.h
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <nobody@tensorflow.org>2016-05-10 10:50:45 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-05-10 12:01:18 -0700
commitaec09b6cb61c63a28e01f6b413499602e224da2f (patch)
treec6a4175f95e319c44a65f5535172e764dd6e0e31 /tensorflow/stream_executor/stream_executor_internal.h
parent885cc6bf55745142b8ecc578c61c5f03ff45e6ce (diff)
fp16 support for BiasAdd. Includes support for atomic adds for Eigen::half,
although beware, they are going to be very slow. Also: - Remove the testGradientBias() test, since it was a fp64-only duplication of tests we already had in other subtests. - Extend gradient microbenchmarks to measure NCHW and NHWC, not just the default in a two-dimensional tensor. - Fix cuda_builtin::__ldg() definition; seemingly calling ::__ldg() on gcudacc returns only zero, not the __ldg() function we defined a few lines further up. Change: 121970776
Diffstat (limited to 'tensorflow/stream_executor/stream_executor_internal.h')
-rw-r--r--tensorflow/stream_executor/stream_executor_internal.h2
1 files changed, 2 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/stream_executor_internal.h b/tensorflow/stream_executor/stream_executor_internal.h
index dff756c8fc..8ff9532a55 100644
--- a/tensorflow/stream_executor/stream_executor_internal.h
+++ b/tensorflow/stream_executor/stream_executor_internal.h
@@ -209,6 +209,8 @@ class StreamExecutorInterface {
uint64 size) = 0;
virtual bool MemZero(Stream *stream, DeviceMemoryBase *location,
uint64 size) = 0;
+ virtual bool Memset(Stream *stream, DeviceMemoryBase *location,
+ uint8 pattern, uint64 size) = 0;
virtual bool Memset32(Stream *stream, DeviceMemoryBase *location,
uint32 pattern, uint64 size) = 0;
virtual bool Memcpy(Stream *stream, void *host_dst,