aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <nobody@tensorflow.org>2016-06-06 17:04:25 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-06-06 18:18:01 -0700
commitdb769cc4f04bac93221cdbe16cf4ed2e9785163e (patch)
treea2a1d92d4ab7ee410d97ec543ac7bef0df3d6828 /tensorflow
parent9eab455050438eb2f5b1df79e145236ee38c42ad (diff)
Fix MacOS failures with -mavx in Interleave Packet Test
Change: 124200069
Diffstat (limited to 'tensorflow')
-rw-r--r--tensorflow/core/kernels/sparse_matmul_op.h13
-rw-r--r--tensorflow/core/kernels/sparse_matmul_op_test.cc32
2 files changed, 25 insertions, 20 deletions
diff --git a/tensorflow/core/kernels/sparse_matmul_op.h b/tensorflow/core/kernels/sparse_matmul_op.h
index d10cbad1d2..613c6a15c5 100644
--- a/tensorflow/core/kernels/sparse_matmul_op.h
+++ b/tensorflow/core/kernels/sparse_matmul_op.h
@@ -157,10 +157,15 @@ EIGEN_STRONG_INLINE Packet8f pinterleave4x64<Packet8f>(const Packet8f& from) {
return _mm256_castsi256_ps(_mm256_permute4x64_epi64(_mm256_castps_si256(from),
_MM_SHUFFLE(3, 1, 2, 0)));
#else
- __int64_t tmp1 = _mm256_extract_epi64(_mm256_castps_si256(from), 1);
- __int64_t tmp2 = _mm256_extract_epi64(_mm256_castps_si256(from), 2);
- __m256i tmp3 = _mm256_insert_epi64(_mm256_castps_si256(from), tmp1, 2);
- return _mm256_castsi256_ps(_mm256_insert_epi64(tmp3, tmp2, 1));
+ auto tmp1 = _mm256_extract_epi32(_mm256_castps_si256(from), 2);
+ auto tmp2 = _mm256_extract_epi32(_mm256_castps_si256(from), 3);
+ auto tmp3 = _mm256_extract_epi32(_mm256_castps_si256(from), 4);
+ auto tmp4 = _mm256_extract_epi32(_mm256_castps_si256(from), 5);
+ auto tmp5 = _mm256_insert_epi32(_mm256_castps_si256(from), tmp1, 4);
+ tmp5 = _mm256_insert_epi32(tmp5, tmp2, 5);
+ tmp5 = _mm256_insert_epi32(tmp5, tmp3, 2);
+ tmp5 = _mm256_insert_epi32(tmp5, tmp4, 3);
+ return _mm256_castsi256_ps(tmp5);
#endif
}
// Return a Packet with 4 floats loaded from 4 bfloat16 values
diff --git a/tensorflow/core/kernels/sparse_matmul_op_test.cc b/tensorflow/core/kernels/sparse_matmul_op_test.cc
index cb88580800..45cad2e23b 100644
--- a/tensorflow/core/kernels/sparse_matmul_op_test.cc
+++ b/tensorflow/core/kernels/sparse_matmul_op_test.cc
@@ -238,25 +238,25 @@ class SparseMatmulOpTest : public ::testing::Test {
TEST_F(SparseMatmulOpTest, BroadcastPacketTest) {
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[0];
- internal::pstore(data2, internal::pbroadcast_first<Packet>(
- internal::pload<Packet>(data1)));
+ internal::pstoreu(data2, internal::pbroadcast_first<Packet>(
+ internal::ploadu<Packet>(data1)));
ASSERT_TRUE(areApprox(ref, data2, PacketSize));
if (PacketSize > 1) {
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[1];
- internal::pstore(data2, internal::pbroadcast_second<Packet>(
- internal::pload<Packet>(data1)));
+ internal::pstoreu(data2, internal::pbroadcast_second<Packet>(
+ internal::ploadu<Packet>(data1)));
ASSERT_TRUE(areApprox(ref, data2, PacketSize));
if (PacketSize > 2) {
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[2];
- internal::pstore(data2, internal::pbroadcast_third<Packet>(
- internal::pload<Packet>(data1)));
+ internal::pstoreu(data2, internal::pbroadcast_third<Packet>(
+ internal::ploadu<Packet>(data1)));
ASSERT_TRUE(areApprox(ref, data2, PacketSize));
if (PacketSize > 3) {
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[3];
- internal::pstore(data2, internal::pbroadcast_fourth<Packet>(
- internal::pload<Packet>(data1)));
+ internal::pstoreu(data2, internal::pbroadcast_fourth<Packet>(
+ internal::ploadu<Packet>(data1)));
ASSERT_TRUE(areApprox(ref, data2, PacketSize));
}
}
@@ -276,8 +276,8 @@ TEST_F(SparseMatmulOpTest, InterleavePacketTest) {
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[i];
}
- internal::pstore(
- data2, internal::pinterleave4x64<Packet>(internal::pload<Packet>(data1)));
+ internal::pstoreu(data2, internal::pinterleave4x64<Packet>(
+ internal::ploadu<Packet>(data1)));
ASSERT_TRUE(areApprox(ref, data2, PacketSize));
}
@@ -294,8 +294,8 @@ TEST_F(SparseMatmulOpTest, Bfloat16ExpandTest) {
ref[i] = data3[i];
}
}
- internal::pstore(data2, internal::pexpand_bf16_l<Packet>(
- internal::pload<Packet>(data3_bfloat16)));
+ internal::pstoreu(data2, internal::pexpand_bf16_l<Packet>(
+ internal::ploadu<Packet>(data3_bfloat16)));
ASSERT_TRUE(areApprox(ref, data2, PacketSize));
if (PacketSize == 8) { // AVX
@@ -311,18 +311,18 @@ TEST_F(SparseMatmulOpTest, Bfloat16ExpandTest) {
}
}
- internal::pstore(data2, internal::pexpand_bf16_u<Packet>(
- internal::pload<Packet>(data3_bfloat16)));
+ internal::pstoreu(data2, internal::pexpand_bf16_u<Packet>(
+ internal::ploadu<Packet>(data3_bfloat16)));
ASSERT_TRUE(areApprox(ref, data2, PacketSize));
}
TEST_F(SparseMatmulOpTest, Bfloat16LoadTest) {
if (PacketSize >= 4) {
for (int i = 0; i < 4; ++i) ref[i] = data3[i];
- internal::pstore(data2, internal::pload4bf16<Packet>(data3_bfloat16));
+ internal::pstoreu(data2, internal::pload4bf16<Packet>(data3_bfloat16));
ASSERT_TRUE(areApprox(ref, data2, 4));
- internal::pstore(data2, internal::pload2bf16<Packet>(data3_bfloat16));
+ internal::pstoreu(data2, internal::pload2bf16<Packet>(data3_bfloat16));
ASSERT_TRUE(areApprox(ref, data2, 2));
}
}