From af6976a907b0d2a9fadbb14d7258bab44f075364 Mon Sep 17 00:00:00 2001 From: zxu Date: Sat, 27 Jan 2018 17:53:27 -0500 Subject: normalize and port the rest of Firebase/Port code (#713) * normalize bits * normalize ordered_code --- .../src/firebase/firestore/util/CMakeLists.txt | 4 + Firestore/core/src/firebase/firestore/util/bits.cc | 43 ++ Firestore/core/src/firebase/firestore/util/bits.h | 166 ++++++ .../src/firebase/firestore/util/ordered_code.cc | 622 +++++++++++++++++++++ .../src/firebase/firestore/util/ordered_code.h | 129 +++++ .../src/firebase/firestore/util/secure_random.h | 16 + 6 files changed, 980 insertions(+) create mode 100644 Firestore/core/src/firebase/firestore/util/bits.cc create mode 100644 Firestore/core/src/firebase/firestore/util/bits.h create mode 100644 Firestore/core/src/firebase/firestore/util/ordered_code.cc create mode 100644 Firestore/core/src/firebase/firestore/util/ordered_code.h (limited to 'Firestore/core/src/firebase/firestore/util') diff --git a/Firestore/core/src/firebase/firestore/util/CMakeLists.txt b/Firestore/core/src/firebase/firestore/util/CMakeLists.txt index 51621a0..b763a63 100644 --- a/Firestore/core/src/firebase/firestore/util/CMakeLists.txt +++ b/Firestore/core/src/firebase/firestore/util/CMakeLists.txt @@ -107,11 +107,15 @@ cc_library( SOURCES autoid.cc autoid.h + bits.cc + bits.h comparison.cc comparison.h config.h firebase_assert.h log.h + ordered_code.cc + ordered_code.h secure_random.h string_util.cc string_util.h diff --git a/Firestore/core/src/firebase/firestore/util/bits.cc b/Firestore/core/src/firebase/firestore/util/bits.cc new file mode 100644 index 0000000..0bfe4c3 --- /dev/null +++ b/Firestore/core/src/firebase/firestore/util/bits.cc @@ -0,0 +1,43 @@ +/* + * Copyright 2017 Google + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Firestore/core/src/firebase/firestore/util/bits.h" + +#include "Firestore/core/src/firebase/firestore/util/firebase_assert.h" + +namespace firebase { +namespace firestore { +namespace util { + +int Bits::Log2Floor_Portable(uint32_t n) { + if (n == 0) return -1; + int log = 0; + uint32_t value = n; + for (int i = 4; i >= 0; --i) { + int shift = (1 << i); + uint32_t x = value >> shift; + if (x != 0) { + value = x; + log += shift; + } + } + FIREBASE_ASSERT(value == 1); + return log; +} + +} // namespace util +} // namespace firestore +} // namespace firebase diff --git a/Firestore/core/src/firebase/firestore/util/bits.h b/Firestore/core/src/firebase/firestore/util/bits.h new file mode 100644 index 0000000..185273f --- /dev/null +++ b/Firestore/core/src/firebase/firestore/util/bits.h @@ -0,0 +1,166 @@ +/* + * Copyright 2017 Google + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_UTIL_BITS_H_ +#define FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_UTIL_BITS_H_ + +// Various bit-twiddling functions, all of which are static members of the Bits +// class (making it effectively a namespace). Operands are unsigned integers. +// Munging bits in _signed_ integers is fraught with peril! For example, +// -5 << n has undefined behavior (for some values of n). + +#include + +class Bits_Port32_Test; +class Bits_Port64_Test; + +namespace firebase { +namespace firestore { +namespace util { + +class Bits { + public: + /** Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. */ + static int Log2Floor(uint32_t n); + static int Log2Floor64(uint64_t n); + + /** + * Potentially faster version of Log2Floor() that returns an + * undefined value if n == 0. + */ + static int Log2FloorNonZero(uint32_t n); + static int Log2FloorNonZero64(uint64_t n); + + private: + // Portable implementations. + static int Log2Floor_Portable(uint32_t n); + static int Log2Floor64_Portable(uint64_t n); + static int Log2FloorNonZero_Portable(uint32_t n); + static int Log2FloorNonZero64_Portable(uint64_t n); + + Bits(Bits const&) = delete; + void operator=(Bits const&) = delete; + + // Allow tests to call _Portable variants directly. + friend class Bits_Port32_Test; + friend class Bits_Port64_Test; +}; + +// ------------------------------------------------------------------------ +// Implementation details follow +// ------------------------------------------------------------------------ + +#if defined(__GNUC__) + +inline int Bits::Log2Floor(uint32_t n) { + return n == 0 ? -1 : 31 ^ __builtin_clz(n); +} + +inline int Bits::Log2FloorNonZero(uint32_t n) { + return 31 ^ __builtin_clz(n); +} + +inline int Bits::Log2Floor64(uint64_t n) { + return n == 0 ? -1 : 63 ^ __builtin_clzll(n); +} + +inline int Bits::Log2FloorNonZero64(uint64_t n) { + return 63 ^ __builtin_clzll(n); +} + +#elif defined(COMPILER_MSVC) + +inline int Bits::Log2FloorNonZero(uint32 n) { +#ifdef _M_IX86 + _asm { + bsr ebx, n + mov n, ebx + } + return n; +#else + return Bits::Log2FloorNonZero_Portable(n); +#endif +} + +inline int Bits::Log2Floor(uint32 n) { +#ifdef _M_IX86 + _asm { + xor ebx, ebx + mov eax, n + and eax, eax + jz return_ebx + bsr ebx, eax + return_ebx: + mov n, ebx + } + return n; +#else + return Bits::Log2Floor_Portable(n); +#endif +} + +inline int Bits::Log2Floor64(uint64_t n) { + return Bits::Log2Floor64_Portable(n); +} + +inline int Bits::Log2FloorNonZero64(uint64_t n) { + return Bits::Log2FloorNonZero64_Portable(n); +} + +#else // !__GNUC__ && !COMPILER_MSVC + +inline int Bits::Log2Floor64(uint64_t n) { + return Bits::Log2Floor64_Portable(n); +} + +inline int Bits::Log2FloorNonZero64(uint64_t n) { + return Bits::Log2FloorNonZero64_Portable(n); +} + +#endif + +inline int Bits::Log2FloorNonZero_Portable(uint32_t n) { + // Just use the common routine + return Log2Floor(n); +} + +// Log2Floor64() is defined in terms of Log2Floor32(), Log2FloorNonZero32() +inline int Bits::Log2Floor64_Portable(uint64_t n) { + const uint32_t topbits = static_cast(n >> 32); + if (topbits == 0) { + // Top bits are zero, so scan in bottom bits + return Log2Floor(static_cast(n)); + } else { + return 32 + Log2FloorNonZero(topbits); + } +} + +// Log2FloorNonZero64() is defined in terms of Log2FloorNonZero32() +inline int Bits::Log2FloorNonZero64_Portable(uint64_t n) { + const uint32_t topbits = static_cast(n >> 32); + if (topbits == 0) { + // Top bits are zero, so scan in bottom bits + return Log2FloorNonZero(static_cast(n)); + } else { + return 32 + Log2FloorNonZero(topbits); + } +} + +} // namespace util +} // namespace firestore +} // namespace firebase + +#endif // FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_UTIL_BITS_H_ diff --git a/Firestore/core/src/firebase/firestore/util/ordered_code.cc b/Firestore/core/src/firebase/firestore/util/ordered_code.cc new file mode 100644 index 0000000..688f15c --- /dev/null +++ b/Firestore/core/src/firebase/firestore/util/ordered_code.cc @@ -0,0 +1,622 @@ +/* + * Copyright 2017 Google + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Firestore/core/src/firebase/firestore/util/ordered_code.h" + +#include +#include +#include + +#include "Firestore/core/src/firebase/firestore/util/bits.h" +#include "Firestore/core/src/firebase/firestore/util/firebase_assert.h" + +#define UNALIGNED_LOAD32 ABSL_INTERNAL_UNALIGNED_LOAD32 +#define UNALIGNED_LOAD64 ABSL_INTERNAL_UNALIGNED_LOAD64 +#define UNALIGNED_STORE32 ABSL_INTERNAL_UNALIGNED_STORE32 +#define UNALIGNED_STORE64 ABSL_INTERNAL_UNALIGNED_STORE64 + +// We encode a string in different ways depending on whether the item +// should be in lexicographically increasing or decreasing order. +// +// +// Lexicographically increasing order +// +// We want a string-to-string mapping F(x) such that for any two strings +// +// x < y => F(x) < F(y) +// +// In addition to the normal characters '\x00' through '\xff', we want to +// encode a few extra symbols in strings: +// +// Separator between items +// Infinite string +// +// Therefore we need an alphabet with at least 258 symbols. Each +// character '\1' through '\xfe' is mapped to itself. The other four are +// encoded into two-letter sequences starting with '\0' and '\xff': +// +// encoded as => \0\1 +// \0 encoded as => \0\xff +// \xff encoded as => \xff\x00 +// encoded as => \xff\xff +// +// The remaining two-letter sequences starting with '\0' and '\xff' are +// currently unused. +// +// F() is defined above. For any finite string x, F(x) is the +// the encodings of x's characters followed by the encoding for . The +// ordering of two finite strings is the same as the ordering of the +// respective characters at the first position where they differ, which in +// turn is the same as the ordering of the encodings of those two +// characters. Moreover, for every finite string x, F(x) < F(). + +namespace firebase { +namespace firestore { +namespace util { + +static const char kEscape1 = '\000'; +static const char kNullCharacter = '\xff'; // Combined with kEscape1 +static const char kSeparator = '\001'; // Combined with kEscape1 + +static const char kEscape2 = '\xff'; +static const char kInfinity = '\xff'; // Combined with kEscape2 +static const char kFFCharacter = '\000'; // Combined with kEscape2 + +static const char kEscape1_Separator[2] = {kEscape1, kSeparator}; + +/** Append to "*dest" the "len" bytes starting from "*src". */ +inline static void AppendBytes(std::string* dest, const char* src, size_t len) { + dest->append(src, len); +} + +inline static bool IsSpecialByte(char c) { + return ((unsigned char)(c + 1)) < 2; +} + +/** + * Returns 0 if one or more of the bytes in the specified uint32 value + * are the special values 0 or 255, and returns 4 otherwise. The + * result of this routine can be added to "p" to either advance past + * the next 4 bytes if they do not contain a special byte, or to + * remain on this set of four bytes if they contain the next special + * byte occurrence. + * + * REQUIRES: v_32 is the value of loading the next 4 bytes from "*p" (we + * pass in v_32 rather than loading it because in some cases, the client + * may already have the value in a register: "p" is just used for + * assertion checking). + */ +inline static int AdvanceIfNoSpecialBytes(uint32_t v_32, const char* p) { + FIREBASE_ASSERT(UNALIGNED_LOAD32(p) == v_32); + // See comments in SkipToNextSpecialByte if you wish to + // understand this expression (which checks for the occurrence + // of the special byte values 0 or 255 in any of the bytes of v_32). + if ((v_32 - 0x01010101u) & ~(v_32 + 0x01010101u) & 0x80808080u) { + // Special byte is in p[0..3] + FIREBASE_ASSERT(IsSpecialByte(p[0]) || IsSpecialByte(p[1]) || + IsSpecialByte(p[2]) || IsSpecialByte(p[3])); + return 0; + } else { + FIREBASE_ASSERT(!IsSpecialByte(p[0])); + FIREBASE_ASSERT(!IsSpecialByte(p[1])); + FIREBASE_ASSERT(!IsSpecialByte(p[2])); + FIREBASE_ASSERT(!IsSpecialByte(p[3])); + return 4; + } +} + +/** + * Return a pointer to the first byte in the range "[start..limit)" + * whose value is 0 or 255 (kEscape1 or kEscape2). If no such byte + * exists in the range, returns "limit". + */ +inline static const char* SkipToNextSpecialByte(const char* start, + const char* limit) { + // If these constants were ever changed, this routine needs to change + FIREBASE_ASSERT(kEscape1 == 0); + FIREBASE_ASSERT((kEscape2 & 0xff) == 255); + const char* p = start; + while (p + 8 <= limit) { + // Find out if any of the next 8 bytes are either 0 or 255 (our + // two characters that require special handling). We do this using + // the technique described in: + // + // http://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord + // + // We use the test (x + 1) < 2 to check x = 0 or -1(255) + // + // If x is a byte value (0x00..0xff): + // (x - 0x01) & 0x80 is true only when x = 0x81..0xff, 0x00 + // ~(x + 0x01) & 0x80 is true only when x = 0x00..0x7e, 0xff + // The intersection of the above two sets is x = 0x00 or 0xff. + // We can ignore carry between bytes because only x = 0x00 or 0xff + // can cause carry in the expression -- and such x already makes the + // result value non-zero. + uint64_t v = UNALIGNED_LOAD64(p); + bool hasZeroOr255Byte = (v - 0x0101010101010101ull) & + ~(v + 0x0101010101010101ull) & + 0x8080808080808080ull; + if (!hasZeroOr255Byte) { + // No special values in the next 8 bytes + p += 8; + } else { +// We know the next 8 bytes have a special byte: find it +#ifdef IS_LITTLE_ENDIAN + uint32_t v_32 = static_cast(v); // Low 32 bits of v +#else + uint32_t v_32 = UNALIGNED_LOAD32(p); +#endif + // Test 32 bits at once to see if special byte is in next 4 bytes + // or the following 4 bytes + p += AdvanceIfNoSpecialBytes(v_32, p); + if (IsSpecialByte(p[0])) return p; + if (IsSpecialByte(p[1])) return p + 1; + if (IsSpecialByte(p[2])) return p + 2; + FIREBASE_ASSERT( + IsSpecialByte(p[3])); // Last byte must be the special one + return p + 3; + } + } + if (p + 4 <= limit) { + uint32_t v_32 = UNALIGNED_LOAD32(p); + p += AdvanceIfNoSpecialBytes(v_32, p); + } + while (p < limit && !IsSpecialByte(*p)) { + p++; + } + return p; +} + +// Expose SkipToNextSpecialByte for testing purposes +const char* OrderedCode::TEST_SkipToNextSpecialByte(const char* start, + const char* limit) { + return SkipToNextSpecialByte(start, limit); +} + +/** + * Helper routine to encode "s" and append to "*dest", escaping special + * characters. + */ +inline static void EncodeStringFragment(std::string* dest, + absl::string_view s) { + const char* p = s.data(); + const char* limit = p + s.size(); + const char* copy_start = p; + while (true) { + p = SkipToNextSpecialByte(p, limit); + if (p >= limit) break; // No more special characters that need escaping + char c = *(p++); + FIREBASE_ASSERT(IsSpecialByte(c)); + if (c == kEscape1) { + AppendBytes(dest, copy_start, static_cast(p - copy_start) - 1); + dest->push_back(kEscape1); + dest->push_back(kNullCharacter); + copy_start = p; + } else { + FIREBASE_ASSERT(c == kEscape2); + AppendBytes(dest, copy_start, static_cast(p - copy_start) - 1); + dest->push_back(kEscape2); + dest->push_back(kFFCharacter); + copy_start = p; + } + } + if (p > copy_start) { + AppendBytes(dest, copy_start, static_cast(p - copy_start)); + } +} + +void OrderedCode::WriteString(std::string* dest, absl::string_view s) { + EncodeStringFragment(dest, s); + AppendBytes(dest, kEscape1_Separator, 2); +} + +/** + * Return number of bytes needed to encode the non-length portion + * of val in ordered coding. Returns number in range [0,8]. + */ +static inline unsigned int OrderedNumLength(uint64_t val) { + const int lg = Bits::Log2Floor64(val); // -1 if val==0 + return static_cast(lg + 1 + 7) / 8; +} + +/** + * Append n bytes from src to *dst. + * REQUIRES: n <= 9 + * REQUIRES: src[0..8] are readable bytes (even if n is smaller) + * + * If we use string::append() instead of this routine, it increases the + * runtime of WriteNumIncreasing from ~9ns to ~13ns. + */ +static inline void AppendUpto9(std::string* dst, + const char* src, + unsigned int n) { + dst->append(src, 9); // Fixed-length append + const size_t extra = 9 - n; // How many extra bytes we added + dst->erase(dst->size() - extra, extra); +} + +void OrderedCode::WriteNumIncreasing(std::string* dest, uint64_t val) { + // Values are encoded with a single byte length prefix, followed + // by the actual value in big-endian format with leading 0 bytes + // dropped. + + // 8 bytes for value plus one byte for length. In addition, we have + // 8 extra bytes at the end so that we can have a fixed-length append + // call on *dest. + char buf[17]; + + UNALIGNED_STORE64(buf + 1, + absl::ghtonll(val)); // buf[0] may be needed for length + const unsigned int length = OrderedNumLength(val); + char* start = buf + 9 - length - 1; + *start = static_cast(length); + AppendUpto9(dest, start, length + 1); +} + +inline static void WriteInfinityInternal(std::string* dest) { + // Make an array so that we can just do one string operation for performance + static const char buf[2] = {kEscape2, kInfinity}; + dest->append(buf, 2); +} + +void OrderedCode::WriteInfinity(std::string* dest) { + WriteInfinityInternal(dest); +} + +void OrderedCode::WriteTrailingString(std::string* dest, + absl::string_view str) { + dest->append(str.data(), str.size()); +} + +/** + * Parse the encoding of a string previously encoded with or without + * inversion. If parse succeeds, return true, consume encoding from + * "*src", and if result != NULL append the decoded string to "*result". + * Otherwise, return false and leave both undefined. + */ +inline static bool ReadStringInternal(absl::string_view* src, + std::string* result) { + const char* start = src->data(); + const char* string_limit = src->data() + src->size(); + + // We only scan up to "limit-2" since a valid string must end with + // a two character terminator: 'kEscape1 kSeparator' + const char* limit = string_limit - 1; + const char* copy_start = start; + while (true) { + start = SkipToNextSpecialByte(start, limit); + if (start >= limit) break; // No terminator sequence found + const char c = *(start++); + // If inversion is required, instead of inverting 'c', we invert the + // character constants to which 'c' is compared. We get the same + // behavior but save the runtime cost of inverting 'c'. + FIREBASE_ASSERT(IsSpecialByte(c)); + if (c == kEscape1) { + if (result) { + AppendBytes(result, copy_start, + static_cast(start - copy_start) - 1); + } + // kEscape1 kSeparator ends component + // kEscape1 kNullCharacter represents '\0' + const char next = *(start++); + if (next == kSeparator) { + src->remove_prefix(static_cast(start - src->data())); + return true; + } else if (next == kNullCharacter) { + if (result) { + *result += '\0'; + } + } else { + return false; + } + copy_start = start; + } else { + FIREBASE_ASSERT(c == kEscape2); + if (result) { + AppendBytes(result, copy_start, + static_cast(start - copy_start) - 1); + } + // kEscape2 kFFCharacter represents '\xff' + // kEscape2 kInfinity is an error + const char next = *(start++); + if (next == kFFCharacter) { + if (result) { + *result += '\xff'; + } + } else { + return false; + } + copy_start = start; + } + } + return false; +} + +bool OrderedCode::ReadString(absl::string_view* src, std::string* result) { + return ReadStringInternal(src, result); +} + +bool OrderedCode::ReadNumIncreasing(absl::string_view* src, uint64_t* result) { + if (src->empty()) { + return false; // Not enough bytes + } + + // Decode length byte + const size_t len = static_cast((*src)[0]); + + // If len > 0 and src is longer than 1, the first byte of "payload" + // must be non-zero (otherwise the encoding is not minimal). + // In opt mode, we don't enforce that encodings must be minimal. + FIREBASE_ASSERT(0 == len || src->size() == 1 || (*src)[1] != '\0'); + + if (len + 1 > src->size() || len > 8) { + return false; // Not enough bytes or too many bytes + } + + if (result) { + uint64_t tmp = 0; + for (size_t i = 0; i < len; i++) { + tmp <<= 8; + tmp |= static_cast((*src)[1 + i]); + } + *result = tmp; + } + src->remove_prefix(len + 1); + return true; +} + +inline static bool ReadInfinityInternal(absl::string_view* src) { + if (src->size() >= 2 && ((*src)[0] == kEscape2) && ((*src)[1] == kInfinity)) { + src->remove_prefix(2); + return true; + } else { + return false; + } +} + +bool OrderedCode::ReadInfinity(absl::string_view* src) { + return ReadInfinityInternal(src); +} + +inline static bool ReadStringOrInfinityInternal(absl::string_view* src, + std::string* result, + bool* inf) { + if (ReadInfinityInternal(src)) { + if (inf) *inf = true; + return true; + } + + // We don't use ReadStringInternal here because that would inline + // the whole encoded string parsing code here. Depending on INVERT, only + // one of the following two calls will be generated at compile time. + bool success = OrderedCode::ReadString(src, result); + if (success) { + if (inf) *inf = false; + return true; + } else { + return false; + } +} + +bool OrderedCode::ReadStringOrInfinity(absl::string_view* src, + std::string* result, + bool* inf) { + return ReadStringOrInfinityInternal(src, result, inf); +} + +bool OrderedCode::ReadTrailingString(absl::string_view* src, + std::string* result) { + if (result) result->assign(src->data(), src->size()); + src->remove_prefix(src->size()); + return true; +} + +void OrderedCode::TEST_Corrupt(std::string* str, int k) { + int seen_seps = 0; + for (size_t i = 0; i < str->size() - 1; i++) { + if ((*str)[i] == kEscape1 && (*str)[i + 1] == kSeparator) { + seen_seps++; + if (seen_seps == k) { + (*str)[i + 1] = kSeparator + 1; + return; + } + } + } +} + +// Signed number encoding/decoding ///////////////////////////////////// +// +// The format is as follows: +// +// The first bit (the most significant bit of the first byte) +// represents the sign, 0 if the number is negative and +// 1 if the number is >= 0. +// +// Any unbroken sequence of successive bits with the same value as the sign +// bit, up to 9 (the 8th and 9th are the most significant bits of the next +// byte), are size bits that count the number of bytes after the first byte. +// That is, the total length is between 1 and 10 bytes. +// +// The value occupies the bits after the sign bit and the "size bits" +// till the end of the string, in network byte order. If the number +// is negative, the bits are in 2-complement. +// +// +// Example 1: number 0x424242 -> 4 byte big-endian hex string 0xf0424242: +// +// +---------------+---------------+---------------+---------------+ +// 1 1 1 1 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 +// +---------------+---------------+---------------+---------------+ +// ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ +// | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | +// | | | | payload: the remaining bits after the sign and size bits +// | | | | and the delimiter bit, the value is 0x424242 +// | | | | +// | size bits: 3 successive bits with the same value as the sign bit +// | (followed by a delimiter bit with the opposite value) +// | mean that there are 3 bytes after the first byte, 4 total +// | +// sign bit: 1 means that the number is non-negative +// +// Example 2: negative number -0x800 -> 2 byte big-endian hex string 0x3800: +// +// +---------------+---------------+ +// 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 +// +---------------+---------------+ +// ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ +// | | | | | | | | | | | | | | | | | | | | | | | | | | | | | +// | | payload: the remaining bits after the sign and size bits and the +// | | delimiter bit, 2-complement because of the negative sign, +// | | value is ~0x7ff, represents the value -0x800 +// | | +// | size bits: 1 bit with the same value as the sign bit +// | (followed by a delimiter bit with the opposite value) +// | means that there is 1 byte after the first byte, 2 total +// | +// sign bit: 0 means that the number is negative +// +// +// Compared with the simpler unsigned format used for uint64_t numbers, +// this format is more compact for small numbers, namely one byte encodes +// numbers in the range [-64,64), two bytes cover the range [-2^13,2^13), etc. +// In general, n bytes encode numbers in the range [-2^(n*7-1),2^(n*7-1)). +// (The cross-over point for compactness of representation is 8 bytes, +// where this format only covers the range [-2^55,2^55), +// whereas an encoding with sign bit and length in the first byte and +// payload in all following bytes would cover [-2^56,2^56).) + +static const int kMaxSigned64Length = 10; + +// This array maps encoding length to header bits in the first two bytes. +static const char kLengthToHeaderBits[1 + kMaxSigned64Length][2] = { + {0, 0}, {'\x80', 0}, {'\xc0', 0}, {'\xe0', 0}, + {'\xf0', 0}, {'\xf8', 0}, {'\xfc', 0}, {'\xfe', 0}, + {'\xff', 0}, {'\xff', '\x80'}, {'\xff', '\xc0'}}; + +// This array maps encoding lengths to the header bits that overlap with +// the payload and need fixing when reading. +static const uint64_t kLengthToMask[1 + kMaxSigned64Length] = { + 0ULL, + 0x80ULL, + 0xc000ULL, + 0xe00000ULL, + 0xf0000000ULL, + 0xf800000000ULL, + 0xfc0000000000ULL, + 0xfe000000000000ULL, + 0xff00000000000000ULL, + 0x8000000000000000ULL, + 0ULL}; + +// This array maps the number of bits in a number to the encoding +// length produced by WriteSignedNumIncreasing. +// For positive numbers, the number of bits is 1 plus the most significant +// bit position (the highest bit position in a positive int64_t is 63). +// For a negative number n, we count the bits in ~n. +// That is, length = kBitsToLength[Bits::Log2Floor64(n < 0 ? ~n : n) + 1]. +static const int8_t kBitsToLength[1 + 63] = { + 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, + 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 7, 7, + 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10}; + +/** Calculates the encoding length in bytes of the signed number n. */ +static inline int SignedEncodingLength(int64_t n) { + return kBitsToLength[Bits::Log2Floor64( + static_cast(n < 0 ? ~n : n)) + + 1]; +} + +/** Slightly faster version for n > 0. */ +static inline int SignedEncodingLengthPositive(int64_t n) { + return kBitsToLength[Bits::Log2FloorNonZero64(static_cast(n)) + 1]; +} + +void OrderedCode::WriteSignedNumIncreasing(std::string* dest, int64_t val) { + const int64_t x = val < 0 ? ~val : val; + if (x < 64) { // fast path for encoding length == 1 + *dest += static_cast(kLengthToHeaderBits[1][0] ^ val); + return; + } + // buf = val in network byte order, sign extended to 10 bytes + const char sign_byte = val < 0 ? '\xff' : '\0'; + char buf[10] = { + sign_byte, + sign_byte, + }; + UNALIGNED_STORE64(buf + 2, absl::ghtonll(static_cast(val))); + + FIREBASE_ASSERT_MESSAGE_WITH_EXPRESSION(sizeof(buf) == kMaxSigned64Length, + sizeof(buf) == kMaxSigned64Length, + "max length size mismatch"); + const size_t len = static_cast(SignedEncodingLengthPositive(x)); + FIREBASE_ASSERT(len >= 2); + char* const begin = buf + sizeof(buf) - len; + begin[0] ^= kLengthToHeaderBits[len][0]; + begin[1] ^= kLengthToHeaderBits[len][1]; // ok because len >= 2 + dest->append(begin, len); +} + +bool OrderedCode::ReadSignedNumIncreasing(absl::string_view* src, + int64_t* result) { + if (src->empty()) return false; + const uint64_t xor_mask = (!((*src)[0] & 0x80)) ? ~0ULL : 0ULL; + const unsigned char first_byte = static_cast( + static_cast((*src)[0]) ^ (xor_mask & 0xff)); + + // now calculate and test length, and set x to raw (unmasked) result + size_t len; + uint64_t x; + if (first_byte != 0xff) { + len = static_cast(7 - Bits::Log2FloorNonZero(first_byte ^ 0xff)); + if (src->size() < len) return false; + x = xor_mask; // sign extend using xor_mask + for (size_t i = 0; i < len; ++i) + x = (x << 8) | static_cast((*src)[i]); + } else { + len = 8; + if (src->size() < len) return false; + const unsigned char second_byte = static_cast( + static_cast((*src)[1]) ^ (xor_mask & 0xff)); + if (second_byte >= 0x80) { + if (second_byte < 0xc0) { + len = 9; + } else { + const unsigned char third_byte = static_cast( + static_cast((*src)[2]) ^ (xor_mask & 0xff)); + if (second_byte == 0xc0 && third_byte < 0x80) { + len = 10; + } else { + return false; // either len > 10 or len == 10 and #bits > 63 + } + } + if (src->size() < len) return false; + } + x = absl::gntohll(UNALIGNED_LOAD64(src->data() + len - 8)); + } + + x ^= kLengthToMask[len]; // remove spurious header bits + + FIREBASE_ASSERT(len == static_cast( + SignedEncodingLength(static_cast(x)))); + + if (result) *result = static_cast(x); + src->remove_prefix(static_cast(len)); + return true; +} + +} // namespace util +} // namespace firestore +} // namespace firebase diff --git a/Firestore/core/src/firebase/firestore/util/ordered_code.h b/Firestore/core/src/firebase/firestore/util/ordered_code.h new file mode 100644 index 0000000..57b84bd --- /dev/null +++ b/Firestore/core/src/firebase/firestore/util/ordered_code.h @@ -0,0 +1,129 @@ +/* + * Copyright 2017 Google + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This module provides routines for encoding a sequence of typed +// entities into a string. The resulting strings can be +// lexicographically compared to yield the same comparison value that +// would have been generated if the encoded items had been compared +// one by one according to their type. +// +// More precisely, suppose: +// 1. string A is generated by encoding the sequence of items [A_1..A_n] +// 2. string B is generated by encoding the sequence of items [B_1..B_n] +// 3. The types match; i.e., for all i: A_i was encoded using +// the same routine as B_i +// Then: +// Comparing A vs. B lexicographically is the same as comparing +// the vectors [A_1..A_n] and [B_1..B_n] lexicographically. +// +// Furthermore, if n < m, the encoding of [A_1..A_n] is a strict prefix of +// [A_1..A_m] (unless m = n+1 and A_m is the empty string encoded with +// WriteTrailingString, in which case the encodings are equal). +// +// This module is often useful when generating multi-part sstable +// keys that have to be ordered in a particular fashion. + +#ifndef FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_UTIL_ORDERED_CODE_H_ +#define FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_UTIL_ORDERED_CODE_H_ + +#include + +#include "absl/strings/string_view.h" + +namespace firebase { +namespace firestore { +namespace util { + +class OrderedCode { + public: + // ------------------------------------------------------------------- + // Encoding routines: each one of the following routines append + // one item to "*dest". The Write(Signed)NumIncreasing() and + // Write(Signed)NumDecreasing() routines differ in whether the resulting + // encoding is ordered by increasing number or decreasing number. + // Similarly, WriteString() and WriteStringDecreasing() differ in whether + // the resulting encoding is ordered by the original string in + // lexicographically increasing or decreasing order. WriteString() + // is not called WriteStringIncreasing() for convenience and backward + // compatibility. + + static void WriteString(std::string* dest, absl::string_view str); + static void WriteNumIncreasing(std::string* dest, uint64_t num); + static void WriteSignedNumIncreasing(std::string* dest, int64_t num); + + /** + * Creates an encoding for the "infinite string", a value considered to + * be lexicographically after any real string. Note that in the case of + * WriteInfinityDecreasing(), this would come before any real string as + * the ordering puts lexicographically greater values first. + */ + static void WriteInfinity(std::string* dest); + + /** + * Special string append that can only be used at the tail end of + * an encoded string -- blindly appends "str" to "*dest". + */ + static void WriteTrailingString(std::string* dest, absl::string_view str); + + // ------------------------------------------------------------------- + // Decoding routines: these extract an item earlier encoded using + // the corresponding WriteXXX() routines above. The item is read + // from "*src"; "*src" is modified to point past the decoded item; + // and if "result" is non-NULL, "*result" is modified to contain the + // result. In case of string result, the decoded string is appended to + // "*result". Returns true if the next item was read successfully, false + // otherwise. + + static bool ReadString(absl::string_view* src, std::string* result); + static bool ReadNumIncreasing(absl::string_view* src, uint64_t* result); + static bool ReadSignedNumIncreasing(absl::string_view* src, int64_t* result); + + static bool ReadInfinity(absl::string_view* src); + static bool ReadTrailingString(absl::string_view* src, std::string* result); + + /** REQUIRES: next item was encoded by WriteInfinity() or WriteString(). */ + static bool ReadStringOrInfinity(absl::string_view* src, + std::string* result, + bool* inf); + + /** + * Helper for testing: corrupt "*str" by changing the kth item separator + * in the string. + */ + static void TEST_Corrupt(std::string* str, int k); + + /** + * Helper for testing. + * SkipToNextSpecialByte is an internal routine defined in the .cc file + * with the following semantics. Return a pointer to the first byte + * in the range "[start..limit)" whose value is 0 or 255. If no such + * byte exists in the range, returns "limit". + */ + static const char* TEST_SkipToNextSpecialByte(const char* start, + const char* limit); + + // Not an instantiable class, but the class exists to make it easy to + // use with a single using statement. + OrderedCode() = delete; + OrderedCode(const OrderedCode&) = delete; + OrderedCode& operator=(const OrderedCode&) = delete; +}; + +} // namespace util +} // namespace firestore +} // namespace firebase + +#endif // FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_UTIL_ORDERED_CODE_H_ diff --git a/Firestore/core/src/firebase/firestore/util/secure_random.h b/Firestore/core/src/firebase/firestore/util/secure_random.h index 72be1bd..95b41e1 100644 --- a/Firestore/core/src/firebase/firestore/util/secure_random.h +++ b/Firestore/core/src/firebase/firestore/util/secure_random.h @@ -50,6 +50,22 @@ class SecureRandom { } result_type operator()(); + + /** Returns a uniformly distributed pseudorandom integer in [0, n). */ + inline result_type Uniform(result_type n) { + // Divides the range into buckets of size n plus leftovers. + const result_type rem = (max() - min()) % n + min() + 1; + result_type rnd; + // Generates random number until the number falls into a bucket. + do { + rnd = (*this)(); + } while (rnd < rem); + return rnd % n; + } + + inline bool OneIn(result_type n) { + return Uniform(n) == 0; + } }; } // namespace util -- cgit v1.2.3