diff options
author | 2016-06-06 17:04:25 -0800 | |
---|---|---|
committer | 2016-06-06 18:18:01 -0700 | |
commit | db769cc4f04bac93221cdbe16cf4ed2e9785163e (patch) | |
tree | a2a1d92d4ab7ee410d97ec543ac7bef0df3d6828 /tensorflow | |
parent | 9eab455050438eb2f5b1df79e145236ee38c42ad (diff) |
Fix MacOS failures with -mavx in Interleave Packet Test
Change: 124200069
Diffstat (limited to 'tensorflow')
-rw-r--r-- | tensorflow/core/kernels/sparse_matmul_op.h | 13 | ||||
-rw-r--r-- | tensorflow/core/kernels/sparse_matmul_op_test.cc | 32 |
2 files changed, 25 insertions, 20 deletions
diff --git a/tensorflow/core/kernels/sparse_matmul_op.h b/tensorflow/core/kernels/sparse_matmul_op.h index d10cbad1d2..613c6a15c5 100644 --- a/tensorflow/core/kernels/sparse_matmul_op.h +++ b/tensorflow/core/kernels/sparse_matmul_op.h @@ -157,10 +157,15 @@ EIGEN_STRONG_INLINE Packet8f pinterleave4x64<Packet8f>(const Packet8f& from) { return _mm256_castsi256_ps(_mm256_permute4x64_epi64(_mm256_castps_si256(from), _MM_SHUFFLE(3, 1, 2, 0))); #else - __int64_t tmp1 = _mm256_extract_epi64(_mm256_castps_si256(from), 1); - __int64_t tmp2 = _mm256_extract_epi64(_mm256_castps_si256(from), 2); - __m256i tmp3 = _mm256_insert_epi64(_mm256_castps_si256(from), tmp1, 2); - return _mm256_castsi256_ps(_mm256_insert_epi64(tmp3, tmp2, 1)); + auto tmp1 = _mm256_extract_epi32(_mm256_castps_si256(from), 2); + auto tmp2 = _mm256_extract_epi32(_mm256_castps_si256(from), 3); + auto tmp3 = _mm256_extract_epi32(_mm256_castps_si256(from), 4); + auto tmp4 = _mm256_extract_epi32(_mm256_castps_si256(from), 5); + auto tmp5 = _mm256_insert_epi32(_mm256_castps_si256(from), tmp1, 4); + tmp5 = _mm256_insert_epi32(tmp5, tmp2, 5); + tmp5 = _mm256_insert_epi32(tmp5, tmp3, 2); + tmp5 = _mm256_insert_epi32(tmp5, tmp4, 3); + return _mm256_castsi256_ps(tmp5); #endif } // Return a Packet with 4 floats loaded from 4 bfloat16 values diff --git a/tensorflow/core/kernels/sparse_matmul_op_test.cc b/tensorflow/core/kernels/sparse_matmul_op_test.cc index cb88580800..45cad2e23b 100644 --- a/tensorflow/core/kernels/sparse_matmul_op_test.cc +++ b/tensorflow/core/kernels/sparse_matmul_op_test.cc @@ -238,25 +238,25 @@ class SparseMatmulOpTest : public ::testing::Test { TEST_F(SparseMatmulOpTest, BroadcastPacketTest) { for (int i = 0; i < PacketSize; ++i) ref[i] = data1[0]; - internal::pstore(data2, internal::pbroadcast_first<Packet>( - internal::pload<Packet>(data1))); + internal::pstoreu(data2, internal::pbroadcast_first<Packet>( + internal::ploadu<Packet>(data1))); ASSERT_TRUE(areApprox(ref, data2, PacketSize)); if (PacketSize > 1) { for (int i = 0; i < PacketSize; ++i) ref[i] = data1[1]; - internal::pstore(data2, internal::pbroadcast_second<Packet>( - internal::pload<Packet>(data1))); + internal::pstoreu(data2, internal::pbroadcast_second<Packet>( + internal::ploadu<Packet>(data1))); ASSERT_TRUE(areApprox(ref, data2, PacketSize)); if (PacketSize > 2) { for (int i = 0; i < PacketSize; ++i) ref[i] = data1[2]; - internal::pstore(data2, internal::pbroadcast_third<Packet>( - internal::pload<Packet>(data1))); + internal::pstoreu(data2, internal::pbroadcast_third<Packet>( + internal::ploadu<Packet>(data1))); ASSERT_TRUE(areApprox(ref, data2, PacketSize)); if (PacketSize > 3) { for (int i = 0; i < PacketSize; ++i) ref[i] = data1[3]; - internal::pstore(data2, internal::pbroadcast_fourth<Packet>( - internal::pload<Packet>(data1))); + internal::pstoreu(data2, internal::pbroadcast_fourth<Packet>( + internal::ploadu<Packet>(data1))); ASSERT_TRUE(areApprox(ref, data2, PacketSize)); } } @@ -276,8 +276,8 @@ TEST_F(SparseMatmulOpTest, InterleavePacketTest) { for (int i = 0; i < PacketSize; ++i) ref[i] = data1[i]; } - internal::pstore( - data2, internal::pinterleave4x64<Packet>(internal::pload<Packet>(data1))); + internal::pstoreu(data2, internal::pinterleave4x64<Packet>( + internal::ploadu<Packet>(data1))); ASSERT_TRUE(areApprox(ref, data2, PacketSize)); } @@ -294,8 +294,8 @@ TEST_F(SparseMatmulOpTest, Bfloat16ExpandTest) { ref[i] = data3[i]; } } - internal::pstore(data2, internal::pexpand_bf16_l<Packet>( - internal::pload<Packet>(data3_bfloat16))); + internal::pstoreu(data2, internal::pexpand_bf16_l<Packet>( + internal::ploadu<Packet>(data3_bfloat16))); ASSERT_TRUE(areApprox(ref, data2, PacketSize)); if (PacketSize == 8) { // AVX @@ -311,18 +311,18 @@ TEST_F(SparseMatmulOpTest, Bfloat16ExpandTest) { } } - internal::pstore(data2, internal::pexpand_bf16_u<Packet>( - internal::pload<Packet>(data3_bfloat16))); + internal::pstoreu(data2, internal::pexpand_bf16_u<Packet>( + internal::ploadu<Packet>(data3_bfloat16))); ASSERT_TRUE(areApprox(ref, data2, PacketSize)); } TEST_F(SparseMatmulOpTest, Bfloat16LoadTest) { if (PacketSize >= 4) { for (int i = 0; i < 4; ++i) ref[i] = data3[i]; - internal::pstore(data2, internal::pload4bf16<Packet>(data3_bfloat16)); + internal::pstoreu(data2, internal::pload4bf16<Packet>(data3_bfloat16)); ASSERT_TRUE(areApprox(ref, data2, 4)); - internal::pstore(data2, internal::pload2bf16<Packet>(data3_bfloat16)); + internal::pstoreu(data2, internal::pload2bf16<Packet>(data3_bfloat16)); ASSERT_TRUE(areApprox(ref, data2, 2)); } } |