diff options
author | 2018-06-01 00:30:10 -0700 | |
---|---|---|
committer | 2018-06-01 00:32:18 -0700 | |
commit | 73e5438b725b46e745e6e910c6557b51a321c70f (patch) | |
tree | 1752bf4a844619ccc1b1dcdc99a7185300311697 /tensorflow/core/kernels/conv_ops_gpu_3.cu.cc | |
parent | 961a39346d8be33cff473f1e81498b887c155070 (diff) |
Remove the constructor in shared memory.
PiperOrigin-RevId: 198837256
Diffstat (limited to 'tensorflow/core/kernels/conv_ops_gpu_3.cu.cc')
-rw-r--r-- | tensorflow/core/kernels/conv_ops_gpu_3.cu.cc | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc index a2e7342b04..a5fa48f85e 100644 --- a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc +++ b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc @@ -247,7 +247,13 @@ __global__ void SwapDimension1And2InTensor3UsingTiles( constexpr int ReadRowPerPass = NumThreads / TileSizeJ; constexpr int WriteRowPerPass = NumThreads / TileSizeI; // One extra line in the inner dimension to avoid share memory bank conflict. - __shared__ T shared_memory_tile[TileSizeI][TileSizeJ + 1]; + // This is to mimic the following, but no constructor of T can be invoked. + // __shared__ T shared_memory_tile[TileSizeI][TileSizeJ + 1]; + __shared__ __align__( + alignof(T)) char shared_mem_raw[TileSizeI * (TileSizeJ + 1) * sizeof(T)]; + typedef T(*SharedMemoryTile)[TileSizeJ + 1]; + SharedMemoryTile shared_memory_tile = + reinterpret_cast<SharedMemoryTile>(shared_mem_raw); int x = threadIdx.x; |