aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CXX11/Tensor2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h13
3 files changed, 8 insertions, 9 deletions
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor
index 7ec60044e..47447f446 100644
--- a/unsupported/Eigen/CXX11/Tensor
+++ b/unsupported/Eigen/CXX11/Tensor
@@ -55,7 +55,7 @@
#include "unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h"
-//#include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h"
+#include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h"
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 1e6f276e0..cd992daab 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -766,7 +766,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
typedef typename internal::gemm_blocking_space<ColMajor, LhsScalar, RhsScalar, Dynamic, Dynamic, Dynamic> BlockingType;
// Sizes of the blocks to load in cache. See the Goto paper for details.
- BlockingType blocking(m, n, k, true);
+ BlockingType blocking(m, n, k, 1, true);
const Index kc = blocking.kc();
const Index mc = (std::min)(m, blocking.mc());
const Index nc = (std::min)(n, blocking.nc());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
index dc0513305..8e4c7c11d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
@@ -152,7 +152,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
Index mc = m;
Index nc = n;
Index kc = k;
- internal::computeProductBlockingSizes<LhsScalar,RhsScalar,1>(kc, mc, nc/*, num_threads*/);
+ internal::computeProductBlockingSizes<LhsScalar,RhsScalar,1>(kc, mc, nc, num_threads);
eigen_assert(mc <= m);
eigen_assert(nc <= n);
eigen_assert(kc <= k);
@@ -197,9 +197,10 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// this should really be numBlockAs * n_blocks;
const Index num_kernel_promises = num_threads * n_blocks;
- Promise p;
- p.set_value();
- std::vector<Promise> kernel_promises(num_kernel_promises, p);
+ std::vector<Promise> kernel_promises(num_kernel_promises);
+ for (int i = 0; i < kernel_promises.size(); ++i) {
+ kernel_promises[i].set_value();
+ }
for (Index k_block_idx = 0; k_block_idx < k_blocks; k_block_idx++) {
const Index k_start = k_block_idx * kc;
@@ -275,8 +276,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
need_to_pack, // need_to_pack
};
- typedef decltype(Self::packRhsAndKernel<packRKArg, RhsPacker, GebpKernel>) Func;
- this->m_device.enqueueNoFuture<Func, packRKArg>(&Self::packRhsAndKernel<packRKArg, RhsPacker, GebpKernel>, arg);
+ this->m_device.enqueueNoFuture(&Self::packRhsAndKernel<packRKArg, RhsPacker, GebpKernel>, arg);
}
}
}
@@ -338,7 +338,6 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
actual_mc, arg.kc, arg.nc, 1.0, -1, -1, 0, 0);
const Index set_idx = blockAId * arg.n_blocks + arg.n_block_idx;
- eigen_assert(!(*arg.kernel_promises)[set_idx].ready());
(*arg.kernel_promises)[set_idx].set_value();
}
}