diff options
Diffstat (limited to 'src/google/protobuf/wire_format_lite.cc')
-rw-r--r-- | src/google/protobuf/wire_format_lite.cc | 233 |
1 files changed, 233 insertions, 0 deletions
diff --git a/src/google/protobuf/wire_format_lite.cc b/src/google/protobuf/wire_format_lite.cc index 5a8857ed..e46ac400 100644 --- a/src/google/protobuf/wire_format_lite.cc +++ b/src/google/protobuf/wire_format_lite.cc @@ -34,6 +34,9 @@ #include <google/protobuf/wire_format_lite_inl.h> +#ifdef __SSE_4_1__ +#include <immintrin.h> +#endif #include <stack> #include <string> #include <vector> @@ -337,6 +340,90 @@ bool WireFormatLite::ReadPackedEnumPreserveUnknowns( return true; } +namespace { +void EncodeFixedSizeValue(float v, uint8* dest) { + WireFormatLite::WriteFloatNoTagToArray(v, dest); +} + +void EncodeFixedSizeValue(double v, uint8* dest) { + WireFormatLite::WriteDoubleNoTagToArray(v, dest); +} + +void EncodeFixedSizeValue(uint32 v, uint8* dest) { + WireFormatLite::WriteFixed32NoTagToArray(v, dest); +} + +void EncodeFixedSizeValue(uint64 v, uint8* dest) { + WireFormatLite::WriteFixed64NoTagToArray(v, dest); +} + +void EncodeFixedSizeValue(int32 v, uint8* dest) { + WireFormatLite::WriteSFixed32NoTagToArray(v, dest); +} + +void EncodeFixedSizeValue(int64 v, uint8* dest) { + WireFormatLite::WriteSFixed64NoTagToArray(v, dest); +} + +void EncodeFixedSizeValue(bool v, uint8* dest) { + WireFormatLite::WriteBoolNoTagToArray(v, dest); +} +} // anonymous namespace + +template <typename CType> +static void WriteArray(const CType* a, int n, io::CodedOutputStream* output) { +#if defined(PROTOBUF_LITTLE_ENDIAN) + output->WriteRaw(reinterpret_cast<const char*>(a), n * sizeof(a[0])); +#else + const int kAtATime = 128; + uint8 buf[sizeof(CType) * kAtATime]; + for (int i = 0; i < n; i += kAtATime) { + int to_do = min(kAtATime, n - i); + uint8* ptr = buf; + for (int j = 0; j < to_do; j++) { + EncodeFixedSizeValue(a[i+j], ptr); + ptr += sizeof(a[0]); + } + output->WriteRaw(buf, to_do * sizeof(a[0])); + } +#endif +} + +void WireFormatLite::WriteFloatArray(const float* a, int n, + io::CodedOutputStream* output) { + WriteArray<float>(a, n, output); +} + +void WireFormatLite::WriteDoubleArray(const double* a, int n, + io::CodedOutputStream* output) { + WriteArray<double>(a, n, output); +} + +void WireFormatLite::WriteFixed32Array(const uint32* a, int n, + io::CodedOutputStream* output) { + WriteArray<uint32>(a, n, output); +} + +void WireFormatLite::WriteFixed64Array(const uint64* a, int n, + io::CodedOutputStream* output) { + WriteArray<uint64>(a, n, output); +} + +void WireFormatLite::WriteSFixed32Array(const int32* a, int n, + io::CodedOutputStream* output) { + WriteArray<int32>(a, n, output); +} + +void WireFormatLite::WriteSFixed64Array(const int64* a, int n, + io::CodedOutputStream* output) { + WriteArray<int64>(a, n, output); +} + +void WireFormatLite::WriteBoolArray(const bool* a, int n, + io::CodedOutputStream* output) { + WriteArray<bool>(a, n, output); +} + void WireFormatLite::WriteInt32(int field_number, int32 value, io::CodedOutputStream* output) { WriteTag(field_number, WIRETYPE_VARINT, output); @@ -540,6 +627,152 @@ bool WireFormatLite::VerifyUtf8String(const char* data, return true; } +#ifdef __SSE_4_1__ +template<typename T, bool ZigZag, bool SignExtended> +static size_t VarintSize( + const T* data, const int n, + const internal::enable_if<sizeof(T) == 4>::type* = NULL) { +#if __cplusplus >= 201103L + // is_unsigned<T> => !ZigZag + static_assert((std::is_unsigned<T>::value ^ ZigZag) || + std::is_signed<T>::value, + "Cannot ZigZag encode unsigned types"); + // is_unsigned<T> => !SignExtended + static_assert((std::is_unsigned<T>::value ^ SignExtended) || + std::is_signed<T>::value, + "Cannot SignExtended unsigned types"); +#endif + + union vus32 { + uint32 u[4]; + int32 s[4]; + __m128i v; + }; + + static const vus32 ones = {{1, 1, 1, 1}}; + + // CodedOutputStream::VarintSize32SignExtended returns 10 for negative + // numbers. We can apply the UInt32Size algorithm, and simultaneously logical + // shift the MSB into the LSB to determine if it is negative. + static const vus32 fives = {{5, 5, 5, 5}}; + + // sum is the vectorized-output of calling CodedOutputStream::VarintSize32 on + // the processed elements. + // + // msb_sum is the count of set most-significant bits. When computing the + // vectorized CodedOutputStream::VarintSize32SignExtended, negative values + // have the most significant bit set. VarintSize32SignExtended returns 10 and + // VarintSize32 returns 5. msb_sum allows us to compute: + // VarintSize32SignExtended = msb_sum * 5 + VarintSize32 + vus32 sum, v, msb_sum; + sum.v = _mm_setzero_si128(); + msb_sum.v = _mm_setzero_si128(); + + int rounded = n & ~(3); + int i; + for (i = 0; i < rounded; i += 4) { + v.v = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&data[i])); + + if (ZigZag) { + // Note: the right-shift must be arithmetic + v.v = _mm_xor_si128(_mm_slli_epi32(v.v, 1), _mm_srai_epi32(v.v, 31)); + } + + sum.v = _mm_add_epi32(sum.v, ones.v); + if (SignExtended) { + msb_sum.v = _mm_add_epi32(msb_sum.v, _mm_srli_epi32(v.v, 31)); + } + + v.v = _mm_srli_epi32(v.v, 7); + + for (int j = 0; j < 4; j++) { + __m128i min = _mm_min_epi32(v.v, ones.v); + + sum.v = _mm_add_epi32(sum.v, min); + v.v = _mm_srli_epi32(v.v, 7); + } + } + + if (SignExtended) { + vus32 extensions; + extensions.v = _mm_mullo_epi32(msb_sum.v, fives.v); + + sum.v = _mm_add_epi32(sum.v, extensions.v); + } + + // TODO(ckennelly): Can we avoid the sign conversion? + size_t out = _mm_cvtsi128_si32( + _mm_hadd_epi32(_mm_hadd_epi32(sum.v, ones.v), ones.v)); + + // Finish tail. + for (; i < n; i++) { + if (ZigZag) { + out += WireFormatLite::SInt32Size(data[i]); + } else if (SignExtended) { + out += WireFormatLite::Int32Size(data[i]); + } else { + out += WireFormatLite::UInt32Size(data[i]); + } + } + + return out; +} + +size_t WireFormatLite::Int32Size(const RepeatedField<int32>& value) { + return VarintSize<int32, false, true>(value.data(), value.size()); +} + +size_t WireFormatLite::UInt32Size(const RepeatedField<uint32>& value) { + return VarintSize<uint32, false, false>(value.data(), value.size()); +} + +size_t WireFormatLite::SInt32Size(const RepeatedField<int32>& value) { + return VarintSize<int32, true, true>(value.data(), value.size()); +} + +size_t WireFormatLite::EnumSize(const RepeatedField<int>& value) { + // On ILP64, sizeof(int) == 8, which would require a different template. + return VarintSize<int, false, true>(value.data(), value.size()); +} + +#else // !__SSE_4_1__ +size_t WireFormatLite::Int32Size(const RepeatedField<int32>& value) { + size_t out = 0; + const int n = value.size(); + for (int i = 0; i < n; i++) { + out += Int32Size(value.Get(i)); + } + return out; +} + +size_t WireFormatLite::UInt32Size(const RepeatedField<uint32>& value) { + size_t out = 0; + const int n = value.size(); + for (int i = 0; i < n; i++) { + out += UInt32Size(value.Get(i)); + } + return out; +} + +size_t WireFormatLite::SInt32Size(const RepeatedField<int32>& value) { + size_t out = 0; + const int n = value.size(); + for (int i = 0; i < n; i++) { + out += SInt32Size(value.Get(i)); + } + return out; +} + +size_t WireFormatLite::EnumSize(const RepeatedField<int>& value) { + size_t out = 0; + const int n = value.size(); + for (int i = 0; i < n; i++) { + out += EnumSize(value.Get(i)); + } + return out; +} +#endif + } // namespace internal } // namespace protobuf } // namespace google |