From dba753a986b527a17c8cc62474d0487aec7c2b36 Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Mon, 24 May 2021 21:34:35 -0700 Subject: Add missing NEON ptranspose implementations. Unified implementation using only `vzip`. --- test/packetmath.cpp | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'test') diff --git a/test/packetmath.cpp b/test/packetmath.cpp index c81ca63c4..121ec7283 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -546,22 +546,24 @@ void packetmath() { } } - const int m_size = PacketSize < 4 ? 1 : 4; - internal::PacketBlock kernel2; - for (int i = 0; i < m_size; ++i) { - kernel2.packet[i] = internal::pload(data1 + i * PacketSize); - } - ptranspose(kernel2); - int data_counter = 0; - for (int i = 0; i < PacketSize; ++i) { - for (int j = 0; j < m_size; ++j) { - data2[data_counter++] = data1[j*PacketSize + i]; + // GeneralBlockPanelKernel also checks PacketBlock; + if (PacketSize > 4 && PacketSize % 4 == 0) { + internal::PacketBlock kernel2; + for (int i = 0; i < 4; ++i) { + kernel2.packet[i] = internal::pload(data1 + i * PacketSize); } - } - for (int i = 0; i < m_size; ++i) { - internal::pstore(data3, kernel2.packet[i]); - for (int j = 0; j < PacketSize; ++j) { - VERIFY(test::isApproxAbs(data3[j], data2[i*PacketSize + j], refvalue) && "ptranspose"); + ptranspose(kernel2); + int data_counter = 0; + for (int i = 0; i < PacketSize; ++i) { + for (int j = 0; j < 4; ++j) { + data2[data_counter++] = data1[j*PacketSize + i]; + } + } + for (int i = 0; i < 4; ++i) { + internal::pstore(data3, kernel2.packet[i]); + for (int j = 0; j < PacketSize; ++j) { + VERIFY(test::isApproxAbs(data3[j], data2[i*PacketSize + j], refvalue) && "ptranspose"); + } } } -- cgit v1.2.3