diff options
author | Xiaoqiang Zheng <zhengxq@google.com> | 2017-08-30 14:37:32 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-08-30 14:41:16 -0700 |
commit | bd64cddd4773708fc95cfab33d16e7785967d1e3 (patch) | |
tree | efd76ed13280a23c23726f17d38c1de7dbba5fb7 | |
parent | 7453a0fe7e31af6763748458aed21750bc1b4000 (diff) |
Fix bias_add size calculation for half.
PiperOrigin-RevId: 167051419
-rw-r--r-- | tensorflow/core/kernels/bias_op_gpu.cu.cc | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/tensorflow/core/kernels/bias_op_gpu.cu.cc b/tensorflow/core/kernels/bias_op_gpu.cu.cc index e07ca5e0c4..ddc2d457b0 100644 --- a/tensorflow/core/kernels/bias_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bias_op_gpu.cu.cc @@ -142,9 +142,9 @@ __global__ void BiasGradNCHW_SharedAtomics(const T* output_backprop, int group_size) { // Initialize the shared memory. typedef typename AccumulatorType<T>::type AccT; - __shared__ AccT s_data[32]; - int32 s_data_size = sizeof(s_data) / sizeof(T); - for (int32 index = threadIdx.x; index < s_data_size; index += blockDim.x) { + const int32 kSDataSize = 32; + __shared__ AccT s_data[kSDataSize]; + for (int32 index = threadIdx.x; index < kSDataSize; index += blockDim.x) { s_data[index] = AccT(0); } __syncthreads(); |