aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/compiler/aot/codegen_test_h.golden4
-rw-r--r--tensorflow/compiler/aot/runtime.h4
-rw-r--r--tensorflow/compiler/aot/runtime_test.cc16
-rw-r--r--tensorflow/core/framework/allocator.h5
-rw-r--r--tensorflow/core/framework/tensor_test.cc24
-rw-r--r--tensorflow/core/kernels/scoped_allocator_ops_test.cc9
-rw-r--r--third_party/eigen.BUILD1
7 files changed, 32 insertions, 31 deletions
diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden
index 6e050cf564..6641d45e83 100644
--- a/tensorflow/compiler/aot/codegen_test_h.golden
+++ b/tensorflow/compiler/aot/codegen_test_h.golden
@@ -56,9 +56,9 @@ namespace bar {
//
// Memory stats:
// arg bytes total: 104
-// arg bytes aligned: 128
+// arg bytes aligned: 192
// temp bytes total: 126
-// temp bytes aligned: 224
+// temp bytes aligned: 320
class MyClass : public tensorflow::XlaCompiledCpuFunction {
public:
// Number of input arguments for the compiled computation.
diff --git a/tensorflow/compiler/aot/runtime.h b/tensorflow/compiler/aot/runtime.h
index d085864f00..d1a669ceb1 100644
--- a/tensorflow/compiler/aot/runtime.h
+++ b/tensorflow/compiler/aot/runtime.h
@@ -25,8 +25,8 @@ namespace tensorflow {
namespace tfcompile {
namespace runtime {
-// Align to 32-bytes, to mimic tensorflow::Allocator::kAllocatorAlignment.
-static constexpr size_t kAlign = 32;
+// Align to 64-bytes, to mimic tensorflow::Allocator::kAllocatorAlignment.
+static constexpr size_t kAlign = 64;
// aligned_buffer_bytes returns the sum of each size in `sizes`, skipping -1
// values. There are `n` entries in `sizes`. Each buffer is aligned to kAlign
diff --git a/tensorflow/compiler/aot/runtime_test.cc b/tensorflow/compiler/aot/runtime_test.cc
index 6d603a02eb..06ec623eb2 100644
--- a/tensorflow/compiler/aot/runtime_test.cc
+++ b/tensorflow/compiler/aot/runtime_test.cc
@@ -24,7 +24,7 @@ namespace runtime {
namespace {
TEST(Runtime, AlignmentValue) {
- // We've chosen 32 byte alignment for the tfcompile runtime to mimic the
+ // We've chosen 64 byte alignment for the tfcompile runtime to mimic the
// regular tensorflow allocator, which was chosen to play nicely with Eigen.
// The tfcompile runtime also has a requirement that comes from the xla
// generated code, on the relation: buffer_size >= 16 ? 2 * sizeof(void*) : 8
@@ -39,13 +39,13 @@ TEST(Runtime, AlignedBufferBytes) {
EXPECT_EQ(aligned_buffer_bytes(sizesA, 1), 0);
static constexpr intptr_t sizesB[1] = {3};
- EXPECT_EQ(aligned_buffer_bytes(sizesB, 1), 32);
+ EXPECT_EQ(aligned_buffer_bytes(sizesB, 1), 64);
static constexpr intptr_t sizesC[1] = {32};
- EXPECT_EQ(aligned_buffer_bytes(sizesC, 1), 32);
+ EXPECT_EQ(aligned_buffer_bytes(sizesC, 1), 64);
static constexpr intptr_t sizesD[7] = {1, -1, 32, -1, 64, 2, 3};
- EXPECT_EQ(aligned_buffer_bytes(sizesD, 7), 192);
+ EXPECT_EQ(aligned_buffer_bytes(sizesD, 7), 320);
}
void* add_ptr(void* base, uintptr_t delta) {
@@ -101,11 +101,11 @@ TEST(Runtime, MallocFreeContiguousBuffers) {
EXPECT_NE(base, nullptr);
EXPECT_EQ(bufD[0], add_ptr(base, 0));
EXPECT_EQ(bufD[1], nullptr);
- EXPECT_EQ(bufD[2], add_ptr(base, 32));
+ EXPECT_EQ(bufD[2], add_ptr(base, 64));
EXPECT_EQ(bufD[3], nullptr);
- EXPECT_EQ(bufD[4], add_ptr(base, 64));
- EXPECT_EQ(bufD[5], add_ptr(base, 128));
- EXPECT_EQ(bufD[6], add_ptr(base, 160));
+ EXPECT_EQ(bufD[4], add_ptr(base, 128));
+ EXPECT_EQ(bufD[5], add_ptr(base, 192));
+ EXPECT_EQ(bufD[6], add_ptr(base, 256));
for (int i = 0; i < 7; ++i) {
const intptr_t size = sizesD[i];
if (size != -1) {
diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h
index 2c87156dca..2bb4d32d57 100644
--- a/tensorflow/core/framework/allocator.h
+++ b/tensorflow/core/framework/allocator.h
@@ -67,13 +67,8 @@ struct AllocatorStats {
// device memory.
class Allocator {
public:
-#ifdef EIGEN_VECTORIZE_AVX512
// Align to 64 byte boundary.
static constexpr size_t kAllocatorAlignment = 64;
-#else
- // Align to 32 byte boundary.
- static constexpr size_t kAllocatorAlignment = 32;
-#endif
virtual ~Allocator();
diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc
index b613effd18..80e168df97 100644
--- a/tensorflow/core/framework/tensor_test.cc
+++ b/tensorflow/core/framework/tensor_test.cc
@@ -1147,29 +1147,29 @@ TEST(Tensor, FailureToAllocate) {
// On the alignment.
//
-// As of 2015/8, tensorflow::Tensor allocates its buffer with 32-byte
+// As of 2018/5, tensorflow::Tensor allocates its buffer with 64-byte
// alignment. Tensor::tensor/flat/vec/matrix methods requires the
// buffer satisfies Eigen::Aligned (e.g., 16-bytes aligned usually,
-// and 32-bytes for AVX). Tensor::Slice requires the caller to ensure
-// its result is aligned if the caller intends to use those methods.
-// In this test case, we simply make sure each slice is 32-byte
-// aligned: sizeof(float) * 4 * 2 = 32.
+// 32-bytes for AVX, and 64-bytes for AVX512). Tensor::Slice requires
+// the caller to ensure its result is aligned if the caller intends
+// to use those methods. In this test case, we simply make sure each
+// slice is 64-byte aligned: sizeof(float) * 4 * 36 = 576. 576 % 64 = 0.
TEST(Tensor, Slice_Basic) {
Tensor saved;
{ // General
- Tensor x(DT_FLOAT, TensorShape({10, 4, 34}));
+ Tensor x(DT_FLOAT, TensorShape({10, 4, 36}));
// Fills in known values.
for (int i = 0; i < 10; ++i) {
x.Slice(i, i + 1).flat<float>().setConstant(i * 1.f);
}
// A simple slice along dim0.
Tensor y = x.Slice(4, 8);
- EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 4, 34})));
+ EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 4, 36})));
auto tx = x.tensor<float, 3>();
auto ty = y.tensor<float, 3>();
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
- for (int k = 0; k < 34; ++k) {
+ for (int k = 0; k < 36; ++k) {
EXPECT_EQ(ty(i, j, k), 4.0 + i);
EXPECT_EQ(&tx(4 + i, j, k), &ty(i, j, k));
}
@@ -1186,7 +1186,7 @@ TEST(Tensor, Slice_Basic) {
auto tz = z.tensor<float, 3>();
EXPECT_EQ(1, z.dim_size(0));
for (int j = 0; j < 4; ++j) {
- for (int k = 0; k < 34; ++k) {
+ for (int k = 0; k < 36; ++k) {
EXPECT_EQ(tz(0, j, k), 6.0);
}
}
@@ -1198,16 +1198,16 @@ TEST(Tensor, Slice_Basic) {
EXPECT_EQ(1, saved.dim_size(0));
auto tsaved = saved.tensor<float, 3>();
for (int j = 0; j < 4; ++j) {
- for (int k = 0; k < 34; ++k) {
+ for (int k = 0; k < 36; ++k) {
EXPECT_EQ(tsaved(0, j, k), 6.0);
}
}
}
{ // Empty
- Tensor x(DT_FLOAT, TensorShape({10, 0, 34}));
+ Tensor x(DT_FLOAT, TensorShape({10, 0, 36}));
x.flat<float>().setRandom();
Tensor y = x.Slice(4, 8);
- EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 0, 34})));
+ EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 0, 36})));
}
{
diff --git a/tensorflow/core/kernels/scoped_allocator_ops_test.cc b/tensorflow/core/kernels/scoped_allocator_ops_test.cc
index 019c6619ee..d2918d2042 100644
--- a/tensorflow/core/kernels/scoped_allocator_ops_test.cc
+++ b/tensorflow/core/kernels/scoped_allocator_ops_test.cc
@@ -212,8 +212,13 @@ TEST_F(ScopedAllocatorConcatOpTest, Success3) {
}
TEST_F(ScopedAllocatorConcatOpTest, Reshape) {
- MakeOp({2, 2, 2}, DT_DOUBLE, true, "test", 120, 2);
- ExecOp(DT_DOUBLE, 120, {{2, 2}, {2, 2}});
+ MakeOp({2, 2, 4}, DT_DOUBLE, true, "test", 120, 2);
+
+ // The elements of the third parameter to ExecOp must be multiples of
+ // Allocator::kAllocatorAlignment in size. If they are not, the backing
+ // tensor allocated by PrepOp will have too many elements and reshaping
+ // will fail.
+ ExecOp(DT_DOUBLE, 120, {{2, 4}, {2, 4}});
}
TEST_F(ScopedAllocatorConcatOpTest, NoReshapeAttr) {
diff --git a/third_party/eigen.BUILD b/third_party/eigen.BUILD
index 07bb6645eb..e54c1a4501 100644
--- a/third_party/eigen.BUILD
+++ b/third_party/eigen.BUILD
@@ -64,6 +64,7 @@ cc_library(
# This define (mostly) guarantees we don't link any problematic
# code. We use it, but we do not rely on it, as evidenced above.
"EIGEN_MPL2_ONLY",
+ "EIGEN_MAX_ALIGN_BYTES=64",
],
includes = ["."],
visibility = ["//visibility:public"],