aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Xiaoqiang Zheng <zhengxq@google.com>2017-08-30 14:37:32 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-08-30 14:41:16 -0700
commitbd64cddd4773708fc95cfab33d16e7785967d1e3 (patch)
treeefd76ed13280a23c23726f17d38c1de7dbba5fb7
parent7453a0fe7e31af6763748458aed21750bc1b4000 (diff)
Fix bias_add size calculation for half.
PiperOrigin-RevId: 167051419
-rw-r--r--tensorflow/core/kernels/bias_op_gpu.cu.cc6
1 files changed, 3 insertions, 3 deletions
diff --git a/tensorflow/core/kernels/bias_op_gpu.cu.cc b/tensorflow/core/kernels/bias_op_gpu.cu.cc
index e07ca5e0c4..ddc2d457b0 100644
--- a/tensorflow/core/kernels/bias_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bias_op_gpu.cu.cc
@@ -142,9 +142,9 @@ __global__ void BiasGradNCHW_SharedAtomics(const T* output_backprop,
int group_size) {
// Initialize the shared memory.
typedef typename AccumulatorType<T>::type AccT;
- __shared__ AccT s_data[32];
- int32 s_data_size = sizeof(s_data) / sizeof(T);
- for (int32 index = threadIdx.x; index < s_data_size; index += blockDim.x) {
+ const int32 kSDataSize = 32;
+ __shared__ AccT s_data[kSDataSize];
+ for (int32 index = threadIdx.x; index < kSDataSize; index += blockDim.x) {
s_data[index] = AccT(0);
}
__syncthreads();