aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h10
1 files changed, 6 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index 25131600d..1ec5819a7 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -307,6 +307,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
enum {
IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<InputArgType, Device>::PacketAccess & TensorEvaluator<KernelArgType, Device>::PacketAccess,
+ BlockAccess = false,
Layout = TensorEvaluator<InputArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
@@ -577,11 +578,11 @@ __global__ void EigenConvolutionKernel1D(
const float* __restrict kernel, const int numPlanes, const int numX,
const int maxX, const int kernelSize, float* buffer) {
#if defined(EIGEN_HIPCC)
- HIP_DYNAMIC_SHARED(float, s)
+ HIP_DYNAMIC_SHARED(float, s)
#else
extern __shared__ float s[];
#endif
-
+
const int first_x = blockIdx.x * maxX;
const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSize>()(kernelSize);
@@ -630,7 +631,7 @@ __global__ void EigenConvolutionKernel2D(
const int maxX, const int numY, const int maxY, const int kernelSizeX,
const int kernelSizeY, float* buffer) {
#if defined(EIGEN_HIPCC)
- HIP_DYNAMIC_SHARED(float, s)
+ HIP_DYNAMIC_SHARED(float, s)
#else
extern __shared__ float s[];
#endif
@@ -702,7 +703,7 @@ __global__ void EigenConvolutionKernel3D(
const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY,
const size_t kernelSizeZ, float* buffer) {
#if defined(EIGEN_HIPCC)
- HIP_DYNAMIC_SHARED(float, s)
+ HIP_DYNAMIC_SHARED(float, s)
#else
extern __shared__ float s[];
#endif
@@ -778,6 +779,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
enum {
IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned & TensorEvaluator<KernelArgType, GpuDevice>::IsAligned,
PacketAccess = false,
+ BlockAccess = false,
Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false