aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/lib/strings
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/lib/strings')
-rw-r--r--tensorflow/core/lib/strings/numbers.cc260
-rw-r--r--tensorflow/core/lib/strings/numbers.h92
-rw-r--r--tensorflow/core/lib/strings/numbers_test.cc113
-rw-r--r--tensorflow/core/lib/strings/ordered_code.cc515
-rw-r--r--tensorflow/core/lib/strings/ordered_code.h77
-rw-r--r--tensorflow/core/lib/strings/ordered_code_test.cc1183
-rw-r--r--tensorflow/core/lib/strings/str_util.cc312
-rw-r--r--tensorflow/core/lib/strings/str_util.h149
-rw-r--r--tensorflow/core/lib/strings/str_util_test.cc258
-rw-r--r--tensorflow/core/lib/strings/strcat.cc194
-rw-r--r--tensorflow/core/lib/strings/strcat.h229
-rw-r--r--tensorflow/core/lib/strings/strcat_test.cc324
-rw-r--r--tensorflow/core/lib/strings/stringprintf.cc85
-rw-r--r--tensorflow/core/lib/strings/stringprintf.h37
-rw-r--r--tensorflow/core/lib/strings/stringprintf_test.cc113
15 files changed, 3941 insertions, 0 deletions
diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc
new file mode 100644
index 0000000000..d61129fb3f
--- /dev/null
+++ b/tensorflow/core/lib/strings/numbers.cc
@@ -0,0 +1,260 @@
+#include "tensorflow/core/lib/strings/numbers.h"
+
+#include <float.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <algorithm>
+#include <cmath>
+
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+namespace strings {
+
+char* FastInt32ToBufferLeft(int32 i, char* buffer) {
+ uint32 u = i;
+ if (i < 0) {
+ *buffer++ = '-';
+ // We need to do the negation in modular (i.e., "unsigned")
+ // arithmetic; MSVC++ apprently warns for plain "-u", so
+ // we write the equivalent expression "0 - u" instead.
+ u = 0 - u;
+ }
+ return FastUInt32ToBufferLeft(u, buffer);
+}
+
+char* FastUInt32ToBufferLeft(uint32 i, char* buffer) {
+ char* start = buffer;
+ do {
+ *buffer++ = ((i % 10) + '0');
+ i /= 10;
+ } while (i > 0);
+ *buffer = 0;
+ std::reverse(start, buffer);
+ return buffer;
+}
+
+char* FastInt64ToBufferLeft(int64 i, char* buffer) {
+ uint64 u = i;
+ if (i < 0) {
+ *buffer++ = '-';
+ u = 0 - u;
+ }
+ return FastUInt64ToBufferLeft(u, buffer);
+}
+
+char* FastUInt64ToBufferLeft(uint64 i, char* buffer) {
+ char* start = buffer;
+ do {
+ *buffer++ = ((i % 10) + '0');
+ i /= 10;
+ } while (i > 0);
+ *buffer = 0;
+ std::reverse(start, buffer);
+ return buffer;
+}
+
+static const double kDoublePrecisionCheckMax = DBL_MAX / 1.000000000000001;
+
+char* DoubleToBuffer(double value, char* buffer) {
+ // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
+ // platforms these days. Just in case some system exists where DBL_DIG
+ // is significantly larger -- and risks overflowing our buffer -- we have
+ // this assert.
+ static_assert(DBL_DIG < 20, "DBL_DIG is too big");
+
+ bool full_precision_needed = true;
+ if (std::abs(value) <= kDoublePrecisionCheckMax) {
+ int snprintf_result =
+ snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG, value);
+
+ // The snprintf should never overflow because the buffer is significantly
+ // larger than the precision we asked for.
+ DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
+
+ full_precision_needed = strtod(buffer, NULL) != value;
+ }
+
+ if (full_precision_needed) {
+ int snprintf_result =
+ snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG + 2, value);
+
+ // Should never overflow; see above.
+ DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
+ }
+ return buffer;
+}
+
+bool safe_strto64(const char* str, int64* value) {
+ if (!str) return false;
+
+ // Skip leading space.
+ while (isspace(*str)) ++str;
+
+ int64 vlimit = kint64max;
+ int sign = 1;
+ if (*str == '-') {
+ sign = -1;
+ ++str;
+ // Different limit for positive and negative integers.
+ vlimit = kint64min;
+ }
+
+ if (!isdigit(*str)) return false;
+
+ int64 result = 0;
+ if (sign == 1) {
+ do {
+ int digit = *str - '0';
+ if ((vlimit - digit) / 10 < result) {
+ return false;
+ }
+ result = result * 10 + digit;
+ ++str;
+ } while (isdigit(*str));
+ } else {
+ do {
+ int digit = *str - '0';
+ if ((vlimit + digit) / 10 > result) {
+ return false;
+ }
+ result = result * 10 - digit;
+ ++str;
+ } while (isdigit(*str));
+ }
+
+ // Skip trailing space.
+ while (isspace(*str)) ++str;
+
+ if (*str) return false;
+
+ *value = result;
+ return true;
+}
+
+bool safe_strto32(const char* str, int32* value) {
+ if (!str) return false;
+
+ // Skip leading space.
+ while (isspace(*str)) ++str;
+
+ int64 vmax = kint32max;
+ int sign = 1;
+ if (*str == '-') {
+ sign = -1;
+ ++str;
+ // Different max for positive and negative integers.
+ ++vmax;
+ }
+
+ if (!isdigit(*str)) return false;
+
+ int64 result = 0;
+ do {
+ result = result * 10 + *str - '0';
+ if (result > vmax) {
+ return false;
+ }
+ ++str;
+ } while (isdigit(*str));
+
+ // Skip trailing space.
+ while (isspace(*str)) ++str;
+
+ if (*str) return false;
+
+ *value = result * sign;
+ return true;
+}
+
+bool safe_strtof(const char* str, float* value) {
+ char* endptr;
+ *value = strtof(str, &endptr);
+ while (isspace(*endptr)) ++endptr;
+ // Ignore range errors from strtod/strtof.
+ // The values it returns on underflow and
+ // overflow are the right fallback in a
+ // robust setting.
+ return *str != '\0' && *endptr == '\0';
+}
+
+char* FloatToBuffer(float value, char* buffer) {
+ // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
+ // platforms these days. Just in case some system exists where FLT_DIG
+ // is significantly larger -- and risks overflowing our buffer -- we have
+ // this assert.
+ static_assert(FLT_DIG < 10, "FLT_DIG is too big");
+
+ int snprintf_result =
+ snprintf(buffer, kFastToBufferSize, "%.*g", FLT_DIG, value);
+
+ // The snprintf should never overflow because the buffer is significantly
+ // larger than the precision we asked for.
+ DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
+
+ float parsed_value;
+ if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
+ snprintf_result =
+ snprintf(buffer, kFastToBufferSize, "%.*g", FLT_DIG + 2, value);
+
+ // Should never overflow; see above.
+ DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
+ }
+ return buffer;
+}
+
+string FpToString(Fprint fp) {
+ char buf[17];
+ snprintf(buf, sizeof(buf), "%016llx", static_cast<uint64>(fp));
+ return string(buf);
+}
+
+bool StringToFp(const string& s, Fprint* fp) {
+ char junk;
+ uint64 result;
+ if (sscanf(s.c_str(), "%llx%c", &result, &junk) == 1) {
+ *fp = result;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+string HumanReadableNumBytes(int64 num_bytes) {
+ if (num_bytes == kint64min) {
+ // Special case for number with not representable negation.
+ return "-8E";
+ }
+
+ const char* neg_str = (num_bytes < 0) ? "-" : "";
+ if (num_bytes < 0) {
+ num_bytes = -num_bytes;
+ }
+
+ // Special case for bytes.
+ if (num_bytes < 1024) {
+ // No fractions for bytes.
+ char buf[8]; // Longest possible string is '-XXXXB'
+ snprintf(buf, sizeof(buf), "%s%lldB", neg_str,
+ static_cast<int64>(num_bytes));
+ return string(buf);
+ }
+
+ static const char units[] = "KMGTPE"; // int64 only goes up to E.
+ const char* unit = units;
+ while (num_bytes >= static_cast<int64>(1024) * 1024) {
+ num_bytes /= 1024;
+ ++unit;
+ CHECK(unit < units + TF_ARRAYSIZE(units));
+ }
+
+ // We use SI prefixes.
+ char buf[16];
+ snprintf(buf, sizeof(buf), ((*unit == 'K') ? "%s%.1f%ciB" : "%s%.2f%ciB"),
+ neg_str, num_bytes / 1024.0, *unit);
+ return string(buf);
+}
+
+} // namespace strings
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/strings/numbers.h b/tensorflow/core/lib/strings/numbers.h
new file mode 100644
index 0000000000..a30a862279
--- /dev/null
+++ b/tensorflow/core/lib/strings/numbers.h
@@ -0,0 +1,92 @@
+#ifndef TENSORFLOW_LIB_STRINGS_NUMBERS_H_
+#define TENSORFLOW_LIB_STRINGS_NUMBERS_H_
+
+#include <string>
+
+#include "tensorflow/core/platform/port.h"
+
+namespace tensorflow {
+namespace strings {
+
+// ----------------------------------------------------------------------
+// FastIntToBufferLeft()
+// These are intended for speed.
+//
+// All functions take the output buffer as an arg. FastInt() uses
+// at most 22 bytes, FastTime() uses exactly 30 bytes. They all
+// return a pointer to the beginning of the output, which is the same as
+// the beginning of the input buffer.
+//
+// NOTE: In 64-bit land, sizeof(time_t) is 8, so it is possible
+// to pass to FastTimeToBuffer() a time whose year cannot be
+// represented in 4 digits. In this case, the output buffer
+// will contain the string "Invalid:<value>"
+// ----------------------------------------------------------------------
+
+// Previously documented minimums -- the buffers provided must be at least this
+// long, though these numbers are subject to change:
+// Int32, UInt32: 12 bytes
+// Int64, UInt64, Int, Uint: 22 bytes
+// Time: 30 bytes
+// Use kFastToBufferSize rather than hardcoding constants.
+static const int kFastToBufferSize = 32;
+
+// ----------------------------------------------------------------------
+// FastInt32ToBufferLeft()
+// FastUInt32ToBufferLeft()
+// FastInt64ToBufferLeft()
+// FastUInt64ToBufferLeft()
+//
+// These functions convert their numeric argument to an ASCII
+// representation of the numeric value in base 10, with the
+// representation being left-aligned in the buffer. The caller is
+// responsible for ensuring that the buffer has enough space to hold
+// the output. The buffer should typically be at least kFastToBufferSize
+// bytes.
+//
+// Returns a pointer to the end of the string (i.e. the null character
+// terminating the string).
+// ----------------------------------------------------------------------
+
+char* FastInt32ToBufferLeft(int32 i, char* buffer); // at least 12 bytes
+char* FastUInt32ToBufferLeft(uint32 i, char* buffer); // at least 12 bytes
+char* FastInt64ToBufferLeft(int64 i, char* buffer); // at least 22 bytes
+char* FastUInt64ToBufferLeft(uint64 i, char* buffer); // at least 22 bytes
+
+// Required buffer size for DoubleToBuffer is kFastToBufferSize.
+// Required buffer size for FloatToBuffer is kFastToBufferSize.
+char* DoubleToBuffer(double i, char* buffer);
+char* FloatToBuffer(float i, char* buffer);
+
+// Convert a 64-bit fingerprint value to an ASCII representation.
+string FpToString(Fprint fp);
+
+// Attempt to parse a fingerprint in the form encoded by FpToString. If
+// successsful, stores the fingerprint in *fp and returns true. Otherwise,
+// returns false.
+bool StringToFp(const string& s, Fprint* fp);
+
+// Convert strings to 32bit integer values.
+// Leading and trailing spaces are allowed.
+// Return false with overflow or invalid input.
+bool safe_strto32(const char* str, int32* value);
+
+// Convert strings to 64bit integer values.
+// Leading and trailing spaces are allowed.
+// Return false with overflow or invalid input.
+bool safe_strto64(const char* str, int64* value);
+
+// Convert strings to floating point values.
+// Leading and trailing spaces are allowed.
+// Values may be rounded on over- and underflow.
+bool safe_strtof(const char* str, float* value);
+
+// Converts from an int64 representing a number of bytes to a
+// human readable string representing the same number.
+// e.g. 12345678 -> "11.77MiB".
+string HumanReadableNumBytes(int64 num_bytes);
+
+} // namespace strings
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_STRINGS_NUMBERS_H_
diff --git a/tensorflow/core/lib/strings/numbers_test.cc b/tensorflow/core/lib/strings/numbers_test.cc
new file mode 100644
index 0000000000..b178e6af53
--- /dev/null
+++ b/tensorflow/core/lib/strings/numbers_test.cc
@@ -0,0 +1,113 @@
+#include "tensorflow/core/lib/strings/numbers.h"
+
+#include <string>
+#include <gtest/gtest.h>
+
+namespace tensorflow {
+namespace strings {
+
+// NOTE: most of the routines in numbers.h are tested indirectly through
+// strcat_test.cc in this directory.
+
+// Test StrCat of ints and longs of various sizes and signdedness.
+TEST(FpToString, Ints) {
+ for (int s = 0; s < 64; s++) {
+ for (int delta = -1; delta <= 1; delta++) {
+ uint64 fp = (1ull << s) + delta;
+ string s = FpToString(fp);
+ uint64 fp2;
+ EXPECT_TRUE(StringToFp(s, &fp2));
+ EXPECT_EQ(fp, fp2);
+ }
+ }
+ Fprint dummy;
+ EXPECT_FALSE(StringToFp("", &dummy));
+ EXPECT_FALSE(StringToFp("xyz", &dummy));
+ EXPECT_FALSE(StringToFp("0000000000000000xyz", &dummy));
+}
+
+TEST(HumanReadableNumBytes, Bytes) {
+ EXPECT_EQ("0B", HumanReadableNumBytes(0));
+ EXPECT_EQ("4B", HumanReadableNumBytes(4));
+ EXPECT_EQ("1023B", HumanReadableNumBytes(1023));
+
+ EXPECT_EQ("1.0KiB", HumanReadableNumBytes(1024));
+ EXPECT_EQ("1.0KiB", HumanReadableNumBytes(1025));
+ EXPECT_EQ("1.5KiB", HumanReadableNumBytes(1500));
+ EXPECT_EQ("1.9KiB", HumanReadableNumBytes(1927));
+
+ EXPECT_EQ("2.0KiB", HumanReadableNumBytes(2048));
+ EXPECT_EQ("1.00MiB", HumanReadableNumBytes(1 << 20));
+ EXPECT_EQ("11.77MiB", HumanReadableNumBytes(12345678));
+ EXPECT_EQ("1.00GiB", HumanReadableNumBytes(1 << 30));
+
+ EXPECT_EQ("1.00TiB", HumanReadableNumBytes(1LL << 40));
+ EXPECT_EQ("1.00PiB", HumanReadableNumBytes(1LL << 50));
+ EXPECT_EQ("1.00EiB", HumanReadableNumBytes(1LL << 60));
+
+ // Try a few negative numbers
+ EXPECT_EQ("-1B", HumanReadableNumBytes(-1));
+ EXPECT_EQ("-4B", HumanReadableNumBytes(-4));
+ EXPECT_EQ("-1000B", HumanReadableNumBytes(-1000));
+ EXPECT_EQ("-11.77MiB", HumanReadableNumBytes(-12345678));
+ EXPECT_EQ("-8E", HumanReadableNumBytes(kint64min));
+}
+
+TEST(safe_strto32, Int32s) {
+ int32 result;
+
+ EXPECT_EQ(true, safe_strto32("1", &result));
+ EXPECT_EQ(1, result);
+ EXPECT_EQ(true, safe_strto32("123", &result));
+ EXPECT_EQ(123, result);
+ EXPECT_EQ(true, safe_strto32(" -123 ", &result));
+ EXPECT_EQ(-123, result);
+ EXPECT_EQ(true, safe_strto32("2147483647", &result));
+ EXPECT_EQ(2147483647, result);
+ EXPECT_EQ(true, safe_strto32("-2147483648", &result));
+ EXPECT_EQ(-2147483648, result);
+
+ // Invalid argument
+ EXPECT_EQ(false, safe_strto32(" 132as ", &result));
+ EXPECT_EQ(false, safe_strto32(" 132.2 ", &result));
+ EXPECT_EQ(false, safe_strto32(" -", &result));
+ EXPECT_EQ(false, safe_strto32("", &result));
+ EXPECT_EQ(false, safe_strto32(" ", &result));
+ EXPECT_EQ(false, safe_strto32("123 a", &result));
+
+ // Overflow
+ EXPECT_EQ(false, safe_strto32("2147483648", &result));
+ EXPECT_EQ(false, safe_strto32("-2147483649", &result));
+}
+
+TEST(safe_strto64, Int64s) {
+ int64 result;
+
+ EXPECT_EQ(true, safe_strto64("1", &result));
+ EXPECT_EQ(1, result);
+ EXPECT_EQ(true, safe_strto64("123", &result));
+ EXPECT_EQ(123, result);
+ EXPECT_EQ(true, safe_strto64(" -123 ", &result));
+ EXPECT_EQ(-123, result);
+ EXPECT_EQ(true, safe_strto64("9223372036854775807", &result));
+ EXPECT_EQ(9223372036854775807, result);
+ EXPECT_EQ(true, safe_strto64("-9223372036854775808", &result));
+ // kint64min == -9223372036854775808
+ // Use -9223372036854775808 directly results in out of range error
+ EXPECT_EQ(kint64min, result);
+
+ // Invalid argument
+ EXPECT_EQ(false, safe_strto64(" 132as ", &result));
+ EXPECT_EQ(false, safe_strto64(" 132.2 ", &result));
+ EXPECT_EQ(false, safe_strto64(" -", &result));
+ EXPECT_EQ(false, safe_strto64("", &result));
+ EXPECT_EQ(false, safe_strto64(" ", &result));
+ EXPECT_EQ(false, safe_strto64("123 a", &result));
+
+ // Overflow
+ EXPECT_EQ(false, safe_strto64("9223372036854775808", &result));
+ EXPECT_EQ(false, safe_strto64("-9223372036854775809", &result));
+}
+
+} // namespace strings
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/strings/ordered_code.cc b/tensorflow/core/lib/strings/ordered_code.cc
new file mode 100644
index 0000000000..ec67595ebb
--- /dev/null
+++ b/tensorflow/core/lib/strings/ordered_code.cc
@@ -0,0 +1,515 @@
+#include "tensorflow/core/lib/strings/ordered_code.h"
+
+#include <assert.h>
+#include <stddef.h>
+
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+namespace strings {
+
+// We encode a string in different ways depending on whether the item
+// should be in lexicographically increasing or decreasing order.
+//
+//
+// Lexicographically increasing order
+//
+// We want a string-to-string mapping F(x) such that for any two strings
+//
+// x < y => F(x) < F(y)
+//
+// In addition to the normal characters '\x00' through '\xff', we want to
+// encode a few extra symbols in strings:
+//
+// <sep> Separator between items
+// <infinity> Infinite string
+//
+// Therefore we need an alphabet with at least 258 symbols. Each
+// character '\1' through '\xfe' is mapped to itself. The other four are
+// encoded into two-letter sequences starting with '\0' and '\xff':
+//
+// <sep> encoded as => \0\1
+// \0 encoded as => \0\xff
+// \xff encoded as => \xff\x00
+// <infinity> encoded as => \xff\xff
+//
+// The remaining two-letter sequences starting with '\0' and '\xff' are
+// currently unused.
+//
+// F(<infinity>) is defined above. For any finite string x, F(x) is the
+// the encodings of x's characters followed by the encoding for <sep>. The
+// ordering of two finite strings is the same as the ordering of the
+// respective characters at the first position where they differ, which in
+// turn is the same as the ordering of the encodings of those two
+// characters. Moreover, for every finite string x, F(x) < F(<infinity>).
+//
+//
+// Lexicographically decreasing order
+//
+// We want a string-to-string mapping G(x) such that for any two strings,
+// whether finite or not,
+//
+// x < y => G(x) > G(y)
+//
+// To achieve this, define G(x) to be the inversion of F(x): I(F(x)). In
+// other words, invert every bit in F(x) to get G(x). For example,
+//
+// x = \x00\x13\xff
+// F(x) = \x00\xff\x13\xff\x00\x00\x01 escape \0, \xff, append F(<sep>)
+// G(x) = \xff\x00\xec\x00\xff\xff\xfe invert every bit in F(x)
+//
+// x = <infinity>
+// F(x) = \xff\xff
+// G(x) = \x00\x00
+//
+// Another example is
+//
+// x F(x) G(x) = I(F(x))
+// - ---- --------------
+// <infinity> \xff\xff \x00\x00
+// "foo" foo\0\1 \x99\x90\x90\xff\xfe
+// "aaa" aaa\0\1 \x9e\x9e\x9e\xff\xfe
+// "aa" aa\0\1 \x9e\x9e\xff\xfe
+// "" \0\1 \xff\xfe
+//
+// More generally and rigorously, if for any two strings x and y
+//
+// F(x) < F(y) => I(F(x)) > I(F(y)) (1)
+//
+// it would follow that x < y => G(x) > G(y) because
+//
+// x < y => F(x) < F(y) => G(x) = I(F(x)) > I(F(y)) = G(y)
+//
+// We now show why (1) is true, in two parts. Notice that for any two
+// strings x < y, F(x) is *not* a proper prefix of F(y). Suppose x is a
+// proper prefix of y (say, x="abc" < y="abcd"). F(x) and F(y) diverge at
+// the F(<sep>) in F(x) (v. F('d') in the example). Suppose x is not a
+// proper prefix of y (say, x="abce" < y="abd"), F(x) and F(y) diverge at
+// their respective encodings of the characters where x and y diverge
+// (F('c') v. F('d')). Finally, if y=<infinity>, we can see that
+// F(y)=\xff\xff is not the prefix of F(x) for any finite string x, simply
+// by considering all the possible first characters of F(x).
+//
+// Given that F(x) is not a proper prefix F(y), the order of F(x) and F(y)
+// is determined by the byte where F(x) and F(y) diverge. For example, the
+// order of F(x)="eefh" and F(y)="eeg" is determined by their third
+// characters. I(p) inverts each byte in p, which effectively subtracts
+// each byte from 0xff. So, in this example, I('f') > I('g'), and thus
+// I(F(x)) > I(F(y)).
+//
+//
+// Implementation
+//
+// To implement G(x) efficiently, we use C++ template to instantiate two
+// versions of the code to produce F(x), one for normal encoding (giving us
+// F(x)) and one for inverted encoding (giving us G(x) = I(F(x))).
+
+static const char kEscape1 = '\000';
+static const char kNullCharacter = '\xff'; // Combined with kEscape1
+static const char kSeparator = '\001'; // Combined with kEscape1
+
+static const char kEscape2 = '\xff';
+static const char kInfinity = '\xff'; // Combined with kEscape2
+static const char kFFCharacter = '\000'; // Combined with kEscape2
+
+static const char kEscape1_Separator[2] = {kEscape1, kSeparator};
+
+// Append to "*dest" the "len" bytes starting from "*src".
+inline static void AppendBytes(string* dest, const char* src, int len) {
+ dest->append(src, len);
+}
+
+inline bool IsSpecialByte(char c) { return ((unsigned char)(c + 1)) < 2; }
+
+// Return a pointer to the first byte in the range "[start..limit)"
+// whose value is 0 or 255 (kEscape1 or kEscape2). If no such byte
+// exists in the range, returns "limit".
+inline const char* SkipToNextSpecialByte(const char* start, const char* limit) {
+ // If these constants were ever changed, this routine needs to change
+ DCHECK_EQ(kEscape1, 0);
+ DCHECK_EQ(kEscape2 & 0xffu, 255u);
+ const char* p = start;
+ while (p < limit && !IsSpecialByte(*p)) {
+ p++;
+ }
+ return p;
+}
+
+// Expose SkipToNextSpecialByte for testing purposes
+const char* OrderedCode::TEST_SkipToNextSpecialByte(const char* start,
+ const char* limit) {
+ return SkipToNextSpecialByte(start, limit);
+}
+
+// Helper routine to encode "s" and append to "*dest", escaping special
+// characters.
+inline static void EncodeStringFragment(string* dest, StringPiece s) {
+ const char* p = s.data();
+ const char* limit = p + s.size();
+ const char* copy_start = p;
+ while (true) {
+ p = SkipToNextSpecialByte(p, limit);
+ if (p >= limit) break; // No more special characters that need escaping
+ char c = *(p++);
+ DCHECK(IsSpecialByte(c));
+ if (c == kEscape1) {
+ AppendBytes(dest, copy_start, p - copy_start - 1);
+ dest->push_back(kEscape1);
+ dest->push_back(kNullCharacter);
+ copy_start = p;
+ } else {
+ assert(c == kEscape2);
+ AppendBytes(dest, copy_start, p - copy_start - 1);
+ dest->push_back(kEscape2);
+ dest->push_back(kFFCharacter);
+ copy_start = p;
+ }
+ }
+ if (p > copy_start) {
+ AppendBytes(dest, copy_start, p - copy_start);
+ }
+}
+
+void OrderedCode::WriteString(string* dest, StringPiece s) {
+ EncodeStringFragment(dest, s);
+ AppendBytes(dest, kEscape1_Separator, 2);
+}
+
+void OrderedCode::WriteNumIncreasing(string* dest, uint64 val) {
+ // Values are encoded with a single byte length prefix, followed
+ // by the actual value in big-endian format with leading 0 bytes
+ // dropped.
+ unsigned char buf[9]; // 8 bytes for value plus one byte for length
+ int len = 0;
+ while (val > 0) {
+ len++;
+ buf[9 - len] = (val & 0xff);
+ val >>= 8;
+ }
+ buf[9 - len - 1] = (unsigned char)len;
+ len++;
+ AppendBytes(dest, reinterpret_cast<const char*>(buf + 9 - len), len);
+}
+
+// Parse the encoding of a previously encoded string.
+// If parse succeeds, return true, consume encoding from
+// "*src", and if result != NULL append the decoded string to "*result".
+// Otherwise, return false and leave both undefined.
+inline static bool ReadStringInternal(StringPiece* src, string* result) {
+ const char* start = src->data();
+ const char* string_limit = src->data() + src->size();
+
+ // We only scan up to "limit-2" since a valid string must end with
+ // a two character terminator: 'kEscape1 kSeparator'
+ const char* limit = string_limit - 1;
+ const char* copy_start = start;
+ while (true) {
+ start = SkipToNextSpecialByte(start, limit);
+ if (start >= limit) break; // No terminator sequence found
+ const char c = *(start++);
+ // If inversion is required, instead of inverting 'c', we invert the
+ // character constants to which 'c' is compared. We get the same
+ // behavior but save the runtime cost of inverting 'c'.
+ DCHECK(IsSpecialByte(c));
+ if (c == kEscape1) {
+ if (result) {
+ AppendBytes(result, copy_start, start - copy_start - 1);
+ }
+ // kEscape1 kSeparator ends component
+ // kEscape1 kNullCharacter represents '\0'
+ const char next = *(start++);
+ if (next == kSeparator) {
+ src->remove_prefix(start - src->data());
+ return true;
+ } else if (next == kNullCharacter) {
+ if (result) {
+ *result += '\0';
+ }
+ } else {
+ return false;
+ }
+ copy_start = start;
+ } else {
+ assert(c == kEscape2);
+ if (result) {
+ AppendBytes(result, copy_start, start - copy_start - 1);
+ }
+ // kEscape2 kFFCharacter represents '\xff'
+ // kEscape2 kInfinity is an error
+ const char next = *(start++);
+ if (next == kFFCharacter) {
+ if (result) {
+ *result += '\xff';
+ }
+ } else {
+ return false;
+ }
+ copy_start = start;
+ }
+ }
+ return false;
+}
+
+bool OrderedCode::ReadString(StringPiece* src, string* result) {
+ return ReadStringInternal(src, result);
+}
+
+bool OrderedCode::ReadNumIncreasing(StringPiece* src, uint64* result) {
+ if (src->empty()) {
+ return false; // Not enough bytes
+ }
+
+ // Decode length byte
+ const size_t len = static_cast<unsigned char>((*src)[0]);
+
+ // If len > 0 and src is longer than 1, the first byte of "payload"
+ // must be non-zero (otherwise the encoding is not minimal).
+ // In opt mode, we don't enforce that encodings must be minimal.
+ DCHECK(0 == len || src->size() == 1 || (*src)[1] != '\0')
+ << "invalid encoding";
+
+ if (len + 1 > src->size() || len > 8) {
+ return false; // Not enough bytes or too many bytes
+ }
+
+ if (result) {
+ uint64 tmp = 0;
+ for (size_t i = 0; i < len; i++) {
+ tmp <<= 8;
+ tmp |= static_cast<unsigned char>((*src)[1 + i]);
+ }
+ *result = tmp;
+ }
+ src->remove_prefix(len + 1);
+ return true;
+}
+
+void OrderedCode::TEST_Corrupt(string* str, int k) {
+ int seen_seps = 0;
+ for (size_t i = 0; i + 1 < str->size(); i++) {
+ if ((*str)[i] == kEscape1 && (*str)[i + 1] == kSeparator) {
+ seen_seps++;
+ if (seen_seps == k) {
+ (*str)[i + 1] = kSeparator + 1;
+ return;
+ }
+ }
+ }
+}
+
+// Signed number encoding/decoding /////////////////////////////////////
+//
+// The format is as follows:
+//
+// The first bit (the most significant bit of the first byte)
+// represents the sign, 0 if the number is negative and
+// 1 if the number is >= 0.
+//
+// Any unbroken sequence of successive bits with the same value as the sign
+// bit, up to 9 (the 8th and 9th are the most significant bits of the next
+// byte), are size bits that count the number of bytes after the first byte.
+// That is, the total length is between 1 and 10 bytes.
+//
+// The value occupies the bits after the sign bit and the "size bits"
+// till the end of the string, in network byte order. If the number
+// is negative, the bits are in 2-complement.
+//
+//
+// Example 1: number 0x424242 -> 4 byte big-endian hex string 0xf0424242:
+//
+// +---------------+---------------+---------------+---------------+
+// 1 1 1 1 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0
+// +---------------+---------------+---------------+---------------+
+// ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
+// | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+// | | | | payload: the remaining bits after the sign and size bits
+// | | | | and the delimiter bit, the value is 0x424242
+// | | | |
+// | size bits: 3 successive bits with the same value as the sign bit
+// | (followed by a delimiter bit with the opposite value)
+// | mean that there are 3 bytes after the first byte, 4 total
+// |
+// sign bit: 1 means that the number is non-negative
+//
+// Example 2: negative number -0x800 -> 2 byte big-endian hex string 0x3800:
+//
+// +---------------+---------------+
+// 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0
+// +---------------+---------------+
+// ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
+// | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+// | | payload: the remaining bits after the sign and size bits and the
+// | | delimiter bit, 2-complement because of the negative sign,
+// | | value is ~0x7ff, represents the value -0x800
+// | |
+// | size bits: 1 bit with the same value as the sign bit
+// | (followed by a delimiter bit with the opposite value)
+// | means that there is 1 byte after the first byte, 2 total
+// |
+// sign bit: 0 means that the number is negative
+//
+//
+// Compared with the simpler unsigned format used for uint64 numbers,
+// this format is more compact for small numbers, namely one byte encodes
+// numbers in the range [-64,64), two bytes cover the range [-2^13,2^13), etc.
+// In general, n bytes encode numbers in the range [-2^(n*7-1),2^(n*7-1)).
+// (The cross-over point for compactness of representation is 8 bytes,
+// where this format only covers the range [-2^55,2^55),
+// whereas an encoding with sign bit and length in the first byte and
+// payload in all following bytes would cover [-2^56,2^56).)
+
+static const int kMaxSigned64Length = 10;
+
+// This array maps encoding length to header bits in the first two bytes.
+static const char kLengthToHeaderBits[1 + kMaxSigned64Length][2] = {
+ {0, 0}, {'\x80', 0}, {'\xc0', 0}, {'\xe0', 0},
+ {'\xf0', 0}, {'\xf8', 0}, {'\xfc', 0}, {'\xfe', 0},
+ {'\xff', 0}, {'\xff', '\x80'}, {'\xff', '\xc0'}};
+
+// This array maps encoding lengths to the header bits that overlap with
+// the payload and need fixing when reading.
+static const uint64 kLengthToMask[1 + kMaxSigned64Length] = {
+ 0ULL,
+ 0x80ULL,
+ 0xc000ULL,
+ 0xe00000ULL,
+ 0xf0000000ULL,
+ 0xf800000000ULL,
+ 0xfc0000000000ULL,
+ 0xfe000000000000ULL,
+ 0xff00000000000000ULL,
+ 0x8000000000000000ULL,
+ 0ULL};
+
+// This array maps the number of bits in a number to the encoding
+// length produced by WriteSignedNumIncreasing.
+// For positive numbers, the number of bits is 1 plus the most significant
+// bit position (the highest bit position in a positive int64 is 63).
+// For a negative number n, we count the bits in ~n.
+// That is, length = kBitsToLength[Bits::Log2Floor64(n < 0 ? ~n : n) + 1].
+static const int8 kBitsToLength[1 + 63] = {
+ 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4,
+ 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 7, 7,
+ 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10};
+
+#if defined(__GNUC__)
+// Returns floor(lg(n)). Returns -1 if n == 0.
+static int Log2Floor64(uint64 n) {
+ return n == 0 ? -1 : 63 ^ __builtin_clzll(n);
+}
+#else
+// Portable slow version
+static int Log2Floor32_Portable(uint32 n) {
+ if (n == 0) return -1;
+ int log = 0;
+ uint32 value = n;
+ for (int i = 4; i >= 0; --i) {
+ int shift = (1 << i);
+ uint32 x = value >> shift;
+ if (x != 0) {
+ value = x;
+ log += shift;
+ }
+ }
+ assert(value == 1);
+ return log;
+}
+// Returns floor(lg(n)). Returns -1 if n == 0.
+static int Log2Floor64(uint64 n) {
+ const uint32 topbits = static_cast<uint32>(n >> 32);
+ if (topbits == 0) {
+ // Top bits are zero, so scan in bottom bits
+ return Log2Floor32_Portable(static_cast<uint32>(n));
+ } else {
+ return 32 + Log2Floor32_Portable(topbits);
+ }
+}
+#endif
+
+// Calculates the encoding length in bytes of the signed number n.
+static inline int SignedEncodingLength(int64 n) {
+ return kBitsToLength[Log2Floor64(n < 0 ? ~n : n) + 1];
+}
+
+static void StoreBigEndian64(char* dst, uint64 v) {
+ for (int i = 0; i < 8; i++) {
+ dst[i] = (v >> (56 - 8 * i)) & 0xff;
+ }
+}
+
+static uint64 LoadBigEndian64(const char* src) {
+ uint64 result = 0;
+ for (int i = 0; i < 8; i++) {
+ unsigned char c = static_cast<unsigned char>(src[i]);
+ result |= static_cast<uint64>(c) << (56 - 8 * i);
+ }
+ return result;
+}
+
+void OrderedCode::WriteSignedNumIncreasing(string* dest, int64 val) {
+ const uint64 x = val < 0 ? ~val : val;
+ if (x < 64) { // fast path for encoding length == 1
+ *dest += kLengthToHeaderBits[1][0] ^ val;
+ return;
+ }
+ // buf = val in network byte order, sign extended to 10 bytes
+ const char sign_byte = val < 0 ? '\xff' : '\0';
+ char buf[10] = {
+ sign_byte, sign_byte,
+ };
+ StoreBigEndian64(buf + 2, val);
+ static_assert(sizeof(buf) == kMaxSigned64Length, "max length size mismatch");
+ const int len = SignedEncodingLength(x);
+ DCHECK_GE(len, 2);
+ char* const begin = buf + sizeof(buf) - len;
+ begin[0] ^= kLengthToHeaderBits[len][0];
+ begin[1] ^= kLengthToHeaderBits[len][1]; // ok because len >= 2
+ dest->append(begin, len);
+}
+
+bool OrderedCode::ReadSignedNumIncreasing(StringPiece* src, int64* result) {
+ if (src->empty()) return false;
+ const uint64 xor_mask = (!((*src)[0] & 0x80)) ? ~0ULL : 0ULL;
+ const unsigned char first_byte = (*src)[0] ^ (xor_mask & 0xff);
+
+ // now calculate and test length, and set x to raw (unmasked) result
+ int len;
+ uint64 x;
+ if (first_byte != 0xff) {
+ len = 7 - Log2Floor64(first_byte ^ 0xff);
+ if (src->size() < static_cast<size_t>(len)) return false;
+ x = xor_mask; // sign extend using xor_mask
+ for (int i = 0; i < len; ++i)
+ x = (x << 8) | static_cast<unsigned char>((*src)[i]);
+ } else {
+ len = 8;
+ if (src->size() < static_cast<size_t>(len)) return false;
+ const unsigned char second_byte = (*src)[1] ^ (xor_mask & 0xff);
+ if (second_byte >= 0x80) {
+ if (second_byte < 0xc0) {
+ len = 9;
+ } else {
+ const unsigned char third_byte = (*src)[2] ^ (xor_mask & 0xff);
+ if (second_byte == 0xc0 && third_byte < 0x80) {
+ len = 10;
+ } else {
+ return false; // either len > 10 or len == 10 and #bits > 63
+ }
+ }
+ if (src->size() < static_cast<size_t>(len)) return false;
+ }
+ x = LoadBigEndian64(src->data() + len - 8);
+ }
+
+ x ^= kLengthToMask[len]; // remove spurious header bits
+
+ DCHECK_EQ(len, SignedEncodingLength(x)) << "invalid encoding";
+
+ if (result) *result = x;
+ src->remove_prefix(len);
+ return true;
+}
+
+} // namespace strings
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/strings/ordered_code.h b/tensorflow/core/lib/strings/ordered_code.h
new file mode 100644
index 0000000000..39f1df9a94
--- /dev/null
+++ b/tensorflow/core/lib/strings/ordered_code.h
@@ -0,0 +1,77 @@
+// This module provides routines for encoding a sequence of typed
+// entities into a string. The resulting strings can be
+// lexicographically compared to yield the same comparison value that
+// would have been generated if the encoded items had been compared
+// one by one according to their type.
+//
+// More precisely, suppose:
+// 1. string A is generated by encoding the sequence of items [A_1..A_n]
+// 2. string B is generated by encoding the sequence of items [B_1..B_n]
+// 3. The types match; i.e., for all i: A_i was encoded using
+// the same routine as B_i
+// Then:
+// Comparing A vs. B lexicographically is the same as comparing
+// the vectors [A_1..A_n] and [B_1..B_n] lexicographically.
+//
+// Furthermore, if n < m, the encoding of [A_1..A_n] is a strict prefix of
+// [A_1..A_m] (unless m = n+1 and A_m is the empty string encoded with
+// WriteTrailingString, in which case the encodings are equal).
+//
+// This module is often useful when generating multi-part sstable
+// keys that have to be ordered in a particular fashion.
+
+#ifndef TENSORFLOW_LIB_STRINGS_ORDERED_CODE_H__
+#define TENSORFLOW_LIB_STRINGS_ORDERED_CODE_H__
+
+#include <string>
+#include "tensorflow/core/platform/port.h"
+
+namespace tensorflow {
+class StringPiece;
+
+namespace strings {
+
+class OrderedCode {
+ public:
+ // -------------------------------------------------------------------
+ // Encoding routines: each one of the following routines append
+ // one item to "*dest" in an encoding where larger values are
+ // ordered lexicographically after smaller values.
+ static void WriteString(string* dest, StringPiece str);
+ static void WriteNumIncreasing(string* dest, uint64 num);
+ static void WriteSignedNumIncreasing(string* dest, int64 num);
+
+ // -------------------------------------------------------------------
+ // Decoding routines: these extract an item earlier encoded using
+ // the corresponding WriteXXX() routines above. The item is read
+ // from "*src"; "*src" is modified to point past the decoded item;
+ // and if "result" is non-NULL, "*result" is modified to contain the
+ // result. In case of string result, the decoded string is appended to
+ // "*result". Returns true if the next item was read successfully, false
+ // otherwise.
+ static bool ReadString(StringPiece* src, string* result);
+ static bool ReadNumIncreasing(StringPiece* src, uint64* result);
+ static bool ReadSignedNumIncreasing(StringPiece* src, int64* result);
+
+ // Helper for testing: corrupt "*str" by changing the kth item separator
+ // in the string.
+ static void TEST_Corrupt(string* str, int k);
+
+ // Helper for testing.
+ // SkipToNextSpecialByte is an internal routine defined in the .cc file
+ // with the following semantics. Return a pointer to the first byte
+ // in the range "[start..limit)" whose value is 0 or 255. If no such
+ // byte exists in the range, returns "limit".
+ static const char* TEST_SkipToNextSpecialByte(const char* start,
+ const char* limit);
+
+ private:
+ // This has only static methods, so disallow construction entirely
+ OrderedCode();
+ TF_DISALLOW_COPY_AND_ASSIGN(OrderedCode);
+};
+
+} // namespace strings
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_STRINGS_ORDERED_CODE_H__
diff --git a/tensorflow/core/lib/strings/ordered_code_test.cc b/tensorflow/core/lib/strings/ordered_code_test.cc
new file mode 100644
index 0000000000..d517d14f4a
--- /dev/null
+++ b/tensorflow/core/lib/strings/ordered_code_test.cc
@@ -0,0 +1,1183 @@
+#include "tensorflow/core/lib/strings/ordered_code.h"
+
+#include <float.h>
+#include <stddef.h>
+#include <limits>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/random/simple_philox.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+
+namespace tensorflow {
+namespace strings {
+
+static string RandomString(random::SimplePhilox* rnd, int len) {
+ string x;
+ for (int i = 0; i < len; i++) {
+ x += rnd->Uniform(256);
+ }
+ return x;
+}
+
+// ---------------------------------------------------------------------
+// Utility template functions (they help templatize the tests below)
+
+// Read/WriteIncreasing are defined for string, uint64, int64 below.
+template <typename T>
+static void OCWriteIncreasing(string* dest, const T& val);
+template <typename T>
+static bool OCReadIncreasing(StringPiece* src, T* result);
+
+// Read/WriteIncreasing<string>
+template <>
+void OCWriteIncreasing<string>(string* dest, const string& val) {
+ OrderedCode::WriteString(dest, val);
+}
+template <>
+bool OCReadIncreasing<string>(StringPiece* src, string* result) {
+ return OrderedCode::ReadString(src, result);
+}
+
+// Read/WriteIncreasing<uint64>
+template <>
+void OCWriteIncreasing<uint64>(string* dest, const uint64& val) {
+ OrderedCode::WriteNumIncreasing(dest, val);
+}
+template <>
+bool OCReadIncreasing<uint64>(StringPiece* src, uint64* result) {
+ return OrderedCode::ReadNumIncreasing(src, result);
+}
+
+// Read/WriteIncreasing<int64>
+template <>
+void OCWriteIncreasing<int64>(string* dest, const int64& val) {
+ OrderedCode::WriteSignedNumIncreasing(dest, val);
+}
+template <>
+bool OCReadIncreasing<int64>(StringPiece* src, int64* result) {
+ return OrderedCode::ReadSignedNumIncreasing(src, result);
+}
+
+template <typename T>
+string OCWrite(T val) {
+ string result;
+ OCWriteIncreasing<T>(&result, val);
+ return result;
+}
+
+template <typename T>
+void OCWriteToString(string* result, T val) {
+ OCWriteIncreasing<T>(result, val);
+}
+
+template <typename T>
+bool OCRead(StringPiece* s, T* val) {
+ return OCReadIncreasing<T>(s, val);
+}
+
+// ---------------------------------------------------------------------
+// Numbers
+
+template <typename T>
+static T TestRead(const string& a) {
+ // gracefully reject any proper prefix of an encoding
+ for (int i = 0; i < a.size() - 1; ++i) {
+ StringPiece s(a.data(), i);
+ CHECK(!OCRead<T>(&s, NULL));
+ CHECK_EQ(s, a.substr(0, i));
+ }
+
+ StringPiece s(a);
+ T v;
+ CHECK(OCRead<T>(&s, &v));
+ CHECK(s.empty());
+ return v;
+}
+
+template <typename T>
+static void TestWriteRead(T expected) {
+ EXPECT_EQ(expected, TestRead<T>(OCWrite<T>(expected)));
+}
+
+// Verifies that the second Write* call appends a non-empty string to its
+// output.
+template <typename T, typename U>
+static void TestWriteAppends(T first, U second) {
+ string encoded;
+ OCWriteToString<T>(&encoded, first);
+ string encoded_first_only = encoded;
+ OCWriteToString<U>(&encoded, second);
+ EXPECT_NE(encoded, encoded_first_only);
+ EXPECT_TRUE(StringPiece(encoded).starts_with(encoded_first_only));
+}
+
+template <typename T>
+static void TestNumbers(T multiplier) {
+ // first test powers of 2 (and nearby numbers)
+ for (T x = std::numeric_limits<T>().max(); x != 0; x /= 2) {
+ TestWriteRead(multiplier * (x - 1));
+ TestWriteRead(multiplier * x);
+ if (x != std::numeric_limits<T>::max()) {
+ TestWriteRead(multiplier * (x + 1));
+ } else if (multiplier < 0 && multiplier == -1) {
+ TestWriteRead(-x - 1);
+ }
+ }
+
+ random::PhiloxRandom philox(301, 17);
+ random::SimplePhilox rnd(&philox);
+ for (int bits = 1; bits <= std::numeric_limits<T>().digits; ++bits) {
+ // test random non-negative numbers with given number of significant bits
+ const uint64 mask = (~0ULL) >> (64 - bits);
+ for (int i = 0; i < 1000; i++) {
+ T x = rnd.Rand64() & mask;
+ TestWriteRead(multiplier * x);
+ T y = rnd.Rand64() & mask;
+ TestWriteAppends(multiplier * x, multiplier * y);
+ }
+ }
+}
+
+// Return true iff 'a' is "before" 'b'
+static bool CompareStrings(const string& a, const string& b) { return (a < b); }
+
+template <typename T>
+static void TestNumberOrdering() {
+ // first the negative numbers (if T is signed, otherwise no-op)
+ string laststr = OCWrite<T>(std::numeric_limits<T>().min());
+ for (T num = std::numeric_limits<T>().min() / 2; num != 0; num /= 2) {
+ string strminus1 = OCWrite<T>(num - 1);
+ string str = OCWrite<T>(num);
+ string strplus1 = OCWrite<T>(num + 1);
+
+ CHECK(CompareStrings(strminus1, str));
+ CHECK(CompareStrings(str, strplus1));
+
+ // Compare 'str' with 'laststr'. When we approach 0, 'laststr' is
+ // not necessarily before 'strminus1'.
+ CHECK(CompareStrings(laststr, str));
+ laststr = str;
+ }
+
+ // then the positive numbers
+ laststr = OCWrite<T>(0);
+ T num = 1;
+ while (num < std::numeric_limits<T>().max() / 2) {
+ num *= 2;
+ string strminus1 = OCWrite<T>(num - 1);
+ string str = OCWrite<T>(num);
+ string strplus1 = OCWrite<T>(num + 1);
+
+ CHECK(CompareStrings(strminus1, str));
+ CHECK(CompareStrings(str, strplus1));
+
+ // Compare 'str' with 'laststr'.
+ CHECK(CompareStrings(laststr, str));
+ laststr = str;
+ }
+}
+
+// Helper routine for testing TEST_SkipToNextSpecialByte
+static int FindSpecial(const string& x) {
+ const char* p = x.data();
+ const char* limit = p + x.size();
+ const char* result = OrderedCode::TEST_SkipToNextSpecialByte(p, limit);
+ return result - p;
+}
+
+TEST(OrderedCode, SkipToNextSpecialByte) {
+ for (size_t len = 0; len < 256; len++) {
+ random::PhiloxRandom philox(301, 17);
+ random::SimplePhilox rnd(&philox);
+ string x;
+ while (x.size() < len) {
+ char c = 1 + rnd.Uniform(254);
+ ASSERT_NE(c, 0);
+ ASSERT_NE(c, 255);
+ x += c; // No 0 bytes, no 255 bytes
+ }
+ EXPECT_EQ(FindSpecial(x), x.size());
+ for (size_t special_pos = 0; special_pos < len; special_pos++) {
+ for (size_t special_test = 0; special_test < 2; special_test++) {
+ const char special_byte = (special_test == 0) ? 0 : 255;
+ string y = x;
+ y[special_pos] = special_byte;
+ EXPECT_EQ(FindSpecial(y), special_pos);
+ if (special_pos < 16) {
+ // Add some special bytes after the one at special_pos to make sure
+ // we still return the earliest special byte in the string
+ for (size_t rest = special_pos + 1; rest < len; rest++) {
+ if (rnd.OneIn(3)) {
+ y[rest] = rnd.OneIn(2) ? 0 : 255;
+ EXPECT_EQ(FindSpecial(y), special_pos);
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(OrderedCode, ExhaustiveFindSpecial) {
+ char buf[16];
+ char* limit = buf + sizeof(buf);
+ int count = 0;
+ for (int start_offset = 0; start_offset <= 5; start_offset += 5) {
+ // We test exhaustively with all combinations of 3 bytes starting
+ // at offset 0 and offset 5 (so as to test with the bytes at both
+ // ends of a 64-bit word).
+ for (size_t i = 0; i < sizeof(buf); i++) {
+ buf[i] = 'a'; // Not a special byte
+ }
+ for (int b0 = 0; b0 < 256; b0++) {
+ for (int b1 = 0; b1 < 256; b1++) {
+ for (int b2 = 0; b2 < 256; b2++) {
+ buf[start_offset + 0] = b0;
+ buf[start_offset + 1] = b1;
+ buf[start_offset + 2] = b2;
+ char* expected;
+ if (b0 == 0 || b0 == 255) {
+ expected = &buf[start_offset];
+ } else if (b1 == 0 || b1 == 255) {
+ expected = &buf[start_offset + 1];
+ } else if (b2 == 0 || b2 == 255) {
+ expected = &buf[start_offset + 2];
+ } else {
+ expected = limit;
+ }
+ count++;
+ EXPECT_EQ(expected,
+ OrderedCode::TEST_SkipToNextSpecialByte(buf, limit));
+ }
+ }
+ }
+ }
+ EXPECT_EQ(count, 256 * 256 * 256 * 2);
+}
+
+TEST(Uint64, EncodeDecode) { TestNumbers<uint64>(1); }
+
+TEST(Uint64, Ordering) { TestNumberOrdering<uint64>(); }
+
+TEST(Int64, EncodeDecode) {
+ TestNumbers<int64>(1);
+ TestNumbers<int64>(-1);
+}
+
+TEST(Int64, Ordering) { TestNumberOrdering<int64>(); }
+
+// Returns the bitwise complement of s.
+static inline string StrNot(const string& s) {
+ string result;
+ for (string::const_iterator it = s.begin(); it != s.end(); ++it)
+ result.push_back(~*it);
+ return result;
+}
+
+template <typename T>
+static void TestInvalidEncoding(const string& s) {
+ StringPiece p(s);
+ EXPECT_FALSE(OCRead<T>(&p, static_cast<T*>(NULL)));
+ EXPECT_EQ(s, p);
+}
+
+TEST(OrderedCodeInvalidEncodingsTest, Overflow) {
+ // 1U << 64, increasing and decreasing
+ const string k2xx64U = "\x09\x01" + string(8, 0);
+ TestInvalidEncoding<uint64>(k2xx64U);
+
+ // 1 << 63 and ~(1 << 63), increasing and decreasing
+ const string k2xx63 = "\xff\xc0\x80" + string(7, 0);
+ TestInvalidEncoding<int64>(k2xx63);
+ TestInvalidEncoding<int64>(StrNot(k2xx63));
+}
+
+TEST(OrderedCodeInvalidEncodingsDeathTest, NonCanonical) {
+ // Test "ambiguous"/"non-canonical" encodings.
+ // These are non-minimal (but otherwise "valid") encodings that
+ // differ from the minimal encoding chosen by OrderedCode::WriteXXX
+ // and thus should be avoided to not mess up the string ordering of
+ // encodings.
+
+ random::PhiloxRandom philox(301, 17);
+ random::SimplePhilox rnd(&philox);
+
+ for (int n = 2; n <= 9; ++n) {
+ // The zero in non_minimal[1] is "redundant".
+ string non_minimal =
+ string(1, n - 1) + string(1, 0) + RandomString(&rnd, n - 2);
+ EXPECT_EQ(n, non_minimal.length());
+
+ EXPECT_NE(OCWrite<uint64>(0), non_minimal);
+#ifndef NDEBUG
+ StringPiece s(non_minimal);
+ EXPECT_DEATH(OrderedCode::ReadNumIncreasing(&s, NULL), "invalid encoding");
+#else
+ TestRead<uint64>(non_minimal);
+#endif
+ }
+
+ for (int n = 2; n <= 10; ++n) {
+ // Header with 1 sign bit and n-1 size bits.
+ string header = string(n / 8, 0xff) + string(1, 0xff << (8 - (n % 8)));
+ // There are more than 7 zero bits between header bits and "payload".
+ string non_minimal = header +
+ string(1, rnd.Uniform(256) & ~*header.rbegin()) +
+ RandomString(&rnd, n - header.length() - 1);
+ EXPECT_EQ(n, non_minimal.length());
+
+ EXPECT_NE(OCWrite<int64>(0), non_minimal);
+#ifndef NDEBUG
+ StringPiece s(non_minimal);
+ EXPECT_DEATH(OrderedCode::ReadSignedNumIncreasing(&s, NULL),
+ "invalid encoding")
+ << n;
+#else
+ TestRead<int64>(non_minimal);
+#endif
+ }
+}
+
+// Returns random number with specified number of bits,
+// i.e., in the range [2^(bits-1),2^bits).
+static uint64 NextBits(random::SimplePhilox* rnd, int bits) {
+ return (bits != 0)
+ ? (rnd->Rand64() % (1LL << (bits - 1))) + (1LL << (bits - 1))
+ : 0;
+}
+
+template <typename T>
+static void BM_WriteNum(int n, T multiplier) {
+ static const int kValues = 64;
+ T values[kValues];
+ random::PhiloxRandom philox(301, 17);
+ random::SimplePhilox rnd(&philox);
+ // Use enough distinct values to confuse the branch predictor
+ for (int i = 0; i < kValues; i++) {
+ values[i] = NextBits(&rnd, n % 64) * multiplier;
+ }
+ string result;
+ int index = 0;
+ while (n-- > 0) {
+ result.clear();
+ OCWriteToString<T>(&result, values[index % kValues]);
+ index++;
+ }
+}
+
+template <typename T>
+static void BM_ReadNum(int n, T multiplier) {
+ string x;
+ random::PhiloxRandom philox(301, 17);
+ random::SimplePhilox rnd(&philox);
+ // Use enough distinct values to confuse the branch predictor
+ static const int kValues = 64;
+ string values[kValues];
+ for (int i = 0; i < kValues; i++) {
+ T val = NextBits(&rnd, i % 64) * multiplier;
+ values[i] = OCWrite<T>(val);
+ }
+ uint32 index = 0;
+ while (n-- > 0) {
+ T val;
+ StringPiece s = values[index++ % kValues];
+ OCRead<T>(&s, &val);
+ }
+}
+
+#define BENCHMARK_NUM(name, T, multiplier) \
+ static void BM_Write##name(int n) { BM_WriteNum<T>(n, multiplier); } \
+ BENCHMARK(BM_Write##name); \
+ static void BM_Read##name(int n) { BM_ReadNum<T>(n, multiplier); } \
+ BENCHMARK(BM_Read##name)
+
+BENCHMARK_NUM(NumIncreasing, uint64, 1);
+BENCHMARK_NUM(SignedNum, int64, 1);
+BENCHMARK_NUM(SignedNumNegative, int64, -1);
+
+#undef BENCHMARK_NUM
+
+// ---------------------------------------------------------------------
+// Strings
+
+TEST(String, EncodeDecode) {
+ random::PhiloxRandom philox(301, 17);
+ random::SimplePhilox rnd(&philox);
+
+ for (int len = 0; len < 256; len++) {
+ const string a = RandomString(&rnd, len);
+ TestWriteRead(a);
+ for (int len2 = 0; len2 < 64; len2++) {
+ const string b = RandomString(&rnd, len2);
+
+ TestWriteAppends(a, b);
+
+ string out;
+ OCWriteToString<string>(&out, a);
+ OCWriteToString<string>(&out, b);
+
+ string a2, b2, dummy;
+ StringPiece s = out;
+ StringPiece s2 = out;
+ CHECK(OCRead<string>(&s, &a2));
+ CHECK(OCRead<string>(&s2, NULL));
+ CHECK_EQ(s, s2);
+
+ CHECK(OCRead<string>(&s, &b2));
+ CHECK(OCRead<string>(&s2, NULL));
+ CHECK_EQ(s, s2);
+
+ CHECK(!OCRead<string>(&s, &dummy));
+ CHECK(!OCRead<string>(&s2, NULL));
+ CHECK_EQ(a, a2);
+ CHECK_EQ(b, b2);
+ CHECK(s.empty());
+ CHECK(s2.empty());
+ }
+ }
+}
+
+// 'str' is a static C-style string that may contain '\0'
+#define STATIC_STR(str) StringPiece((str), sizeof(str) - 1)
+
+static string EncodeStringIncreasing(StringPiece value) {
+ string encoded;
+ OrderedCode::WriteString(&encoded, value);
+ return encoded;
+}
+
+TEST(String, Increasing) {
+ // Here are a series of strings in non-decreasing order, including
+ // consecutive strings such that the second one is equal to, a proper
+ // prefix of, or has the same length as the first one. Most also contain
+ // the special escaping characters '\x00' and '\xff'.
+ ASSERT_EQ(EncodeStringIncreasing(STATIC_STR("")),
+ EncodeStringIncreasing(STATIC_STR("")));
+
+ ASSERT_LT(EncodeStringIncreasing(STATIC_STR("")),
+ EncodeStringIncreasing(STATIC_STR("\x00")));
+
+ ASSERT_EQ(EncodeStringIncreasing(STATIC_STR("\x00")),
+ EncodeStringIncreasing(STATIC_STR("\x00")));
+
+ ASSERT_LT(EncodeStringIncreasing(STATIC_STR("\x00")),
+ EncodeStringIncreasing(STATIC_STR("\x01")));
+
+ ASSERT_LT(EncodeStringIncreasing(STATIC_STR("\x01")),
+ EncodeStringIncreasing(STATIC_STR("a")));
+
+ ASSERT_EQ(EncodeStringIncreasing(STATIC_STR("a")),
+ EncodeStringIncreasing(STATIC_STR("a")));
+
+ ASSERT_LT(EncodeStringIncreasing(STATIC_STR("a")),
+ EncodeStringIncreasing(STATIC_STR("aa")));
+
+ ASSERT_LT(EncodeStringIncreasing(STATIC_STR("aa")),
+ EncodeStringIncreasing(STATIC_STR("\xff")));
+
+ ASSERT_LT(EncodeStringIncreasing(STATIC_STR("\xff")),
+ EncodeStringIncreasing(STATIC_STR("\xff\x00")));
+
+ ASSERT_LT(EncodeStringIncreasing(STATIC_STR("\xff\x00")),
+ EncodeStringIncreasing(STATIC_STR("\xff\x01")));
+}
+
+TEST(EncodingIsExpected, String) {
+ std::vector<std::pair<string, string>> data = {
+ {"", string("\x00\x01", 2)},
+ {"foo", string("foo\x00\x01", 5)},
+ {"hello", string("hello\x00\x01", 7)},
+ {string("\x00\x01\xff", 3), string("\x00\xff\x01\xff\x00\x00\x01", 7)},
+ };
+ for (const auto& t : data) {
+ string result;
+ OrderedCode::WriteString(&result, t.first);
+ EXPECT_EQ(t.second, result);
+
+ StringPiece in = result;
+ string decoded;
+ EXPECT_TRUE(OrderedCode::ReadString(&in, &decoded));
+ EXPECT_EQ(t.first, decoded);
+ EXPECT_EQ("", in);
+ }
+}
+
+TEST(EncodingIsExpected, Unsigned) {
+ std::vector<std::pair<uint64, string>> data = {
+ {0x0ull, string("\000", 1)},
+ {0x1ull, string("\001\001", 2)},
+ {0x2ull, string("\001\002", 2)},
+ {0x1ull, string("\001\001", 2)},
+ {0x2ull, string("\001\002", 2)},
+ {0x3ull, string("\001\003", 2)},
+ {0x3ull, string("\001\003", 2)},
+ {0x4ull, string("\001\004", 2)},
+ {0x5ull, string("\001\005", 2)},
+ {0x7ull, string("\001\007", 2)},
+ {0x8ull, string("\001\010", 2)},
+ {0x9ull, string("\001\t", 2)},
+ {0xfull, string("\001\017", 2)},
+ {0x10ull, string("\001\020", 2)},
+ {0x11ull, string("\001\021", 2)},
+ {0x1full, string("\001\037", 2)},
+ {0x20ull, string("\001 ", 2)},
+ {0x21ull, string("\001!", 2)},
+ {0x3full, string("\001?", 2)},
+ {0x40ull, string("\001@", 2)},
+ {0x41ull, string("\001A", 2)},
+ {0x7full, string("\001\177", 2)},
+ {0x80ull, string("\001\200", 2)},
+ {0x81ull, string("\001\201", 2)},
+ {0xffull, string("\001\377", 2)},
+ {0x100ull, string("\002\001\000", 3)},
+ {0x101ull, string("\002\001\001", 3)},
+ {0x1ffull, string("\002\001\377", 3)},
+ {0x200ull, string("\002\002\000", 3)},
+ {0x201ull, string("\002\002\001", 3)},
+ {0x3ffull, string("\002\003\377", 3)},
+ {0x400ull, string("\002\004\000", 3)},
+ {0x401ull, string("\002\004\001", 3)},
+ {0x7ffull, string("\002\007\377", 3)},
+ {0x800ull, string("\002\010\000", 3)},
+ {0x801ull, string("\002\010\001", 3)},
+ {0xfffull, string("\002\017\377", 3)},
+ {0x1000ull, string("\002\020\000", 3)},
+ {0x1001ull, string("\002\020\001", 3)},
+ {0x1fffull, string("\002\037\377", 3)},
+ {0x2000ull, string("\002 \000", 3)},
+ {0x2001ull, string("\002 \001", 3)},
+ {0x3fffull, string("\002?\377", 3)},
+ {0x4000ull, string("\002@\000", 3)},
+ {0x4001ull, string("\002@\001", 3)},
+ {0x7fffull, string("\002\177\377", 3)},
+ {0x8000ull, string("\002\200\000", 3)},
+ {0x8001ull, string("\002\200\001", 3)},
+ {0xffffull, string("\002\377\377", 3)},
+ {0x10000ull, string("\003\001\000\000", 4)},
+ {0x10001ull, string("\003\001\000\001", 4)},
+ {0x1ffffull, string("\003\001\377\377", 4)},
+ {0x20000ull, string("\003\002\000\000", 4)},
+ {0x20001ull, string("\003\002\000\001", 4)},
+ {0x3ffffull, string("\003\003\377\377", 4)},
+ {0x40000ull, string("\003\004\000\000", 4)},
+ {0x40001ull, string("\003\004\000\001", 4)},
+ {0x7ffffull, string("\003\007\377\377", 4)},
+ {0x80000ull, string("\003\010\000\000", 4)},
+ {0x80001ull, string("\003\010\000\001", 4)},
+ {0xfffffull, string("\003\017\377\377", 4)},
+ {0x100000ull, string("\003\020\000\000", 4)},
+ {0x100001ull, string("\003\020\000\001", 4)},
+ {0x1fffffull, string("\003\037\377\377", 4)},
+ {0x200000ull, string("\003 \000\000", 4)},
+ {0x200001ull, string("\003 \000\001", 4)},
+ {0x3fffffull, string("\003?\377\377", 4)},
+ {0x400000ull, string("\003@\000\000", 4)},
+ {0x400001ull, string("\003@\000\001", 4)},
+ {0x7fffffull, string("\003\177\377\377", 4)},
+ {0x800000ull, string("\003\200\000\000", 4)},
+ {0x800001ull, string("\003\200\000\001", 4)},
+ {0xffffffull, string("\003\377\377\377", 4)},
+ {0x1000000ull, string("\004\001\000\000\000", 5)},
+ {0x1000001ull, string("\004\001\000\000\001", 5)},
+ {0x1ffffffull, string("\004\001\377\377\377", 5)},
+ {0x2000000ull, string("\004\002\000\000\000", 5)},
+ {0x2000001ull, string("\004\002\000\000\001", 5)},
+ {0x3ffffffull, string("\004\003\377\377\377", 5)},
+ {0x4000000ull, string("\004\004\000\000\000", 5)},
+ {0x4000001ull, string("\004\004\000\000\001", 5)},
+ {0x7ffffffull, string("\004\007\377\377\377", 5)},
+ {0x8000000ull, string("\004\010\000\000\000", 5)},
+ {0x8000001ull, string("\004\010\000\000\001", 5)},
+ {0xfffffffull, string("\004\017\377\377\377", 5)},
+ {0x10000000ull, string("\004\020\000\000\000", 5)},
+ {0x10000001ull, string("\004\020\000\000\001", 5)},
+ {0x1fffffffull, string("\004\037\377\377\377", 5)},
+ {0x20000000ull, string("\004 \000\000\000", 5)},
+ {0x20000001ull, string("\004 \000\000\001", 5)},
+ {0x3fffffffull, string("\004?\377\377\377", 5)},
+ {0x40000000ull, string("\004@\000\000\000", 5)},
+ {0x40000001ull, string("\004@\000\000\001", 5)},
+ {0x7fffffffull, string("\004\177\377\377\377", 5)},
+ {0x80000000ull, string("\004\200\000\000\000", 5)},
+ {0x80000001ull, string("\004\200\000\000\001", 5)},
+ {0xffffffffull, string("\004\377\377\377\377", 5)},
+ {0x100000000ull, string("\005\001\000\000\000\000", 6)},
+ {0x100000001ull, string("\005\001\000\000\000\001", 6)},
+ {0x1ffffffffull, string("\005\001\377\377\377\377", 6)},
+ {0x200000000ull, string("\005\002\000\000\000\000", 6)},
+ {0x200000001ull, string("\005\002\000\000\000\001", 6)},
+ {0x3ffffffffull, string("\005\003\377\377\377\377", 6)},
+ {0x400000000ull, string("\005\004\000\000\000\000", 6)},
+ {0x400000001ull, string("\005\004\000\000\000\001", 6)},
+ {0x7ffffffffull, string("\005\007\377\377\377\377", 6)},
+ {0x800000000ull, string("\005\010\000\000\000\000", 6)},
+ {0x800000001ull, string("\005\010\000\000\000\001", 6)},
+ {0xfffffffffull, string("\005\017\377\377\377\377", 6)},
+ {0x1000000000ull, string("\005\020\000\000\000\000", 6)},
+ {0x1000000001ull, string("\005\020\000\000\000\001", 6)},
+ {0x1fffffffffull, string("\005\037\377\377\377\377", 6)},
+ {0x2000000000ull, string("\005 \000\000\000\000", 6)},
+ {0x2000000001ull, string("\005 \000\000\000\001", 6)},
+ {0x3fffffffffull, string("\005?\377\377\377\377", 6)},
+ {0x4000000000ull, string("\005@\000\000\000\000", 6)},
+ {0x4000000001ull, string("\005@\000\000\000\001", 6)},
+ {0x7fffffffffull, string("\005\177\377\377\377\377", 6)},
+ {0x8000000000ull, string("\005\200\000\000\000\000", 6)},
+ {0x8000000001ull, string("\005\200\000\000\000\001", 6)},
+ {0xffffffffffull, string("\005\377\377\377\377\377", 6)},
+ {0x10000000000ull, string("\006\001\000\000\000\000\000", 7)},
+ {0x10000000001ull, string("\006\001\000\000\000\000\001", 7)},
+ {0x1ffffffffffull, string("\006\001\377\377\377\377\377", 7)},
+ {0x20000000000ull, string("\006\002\000\000\000\000\000", 7)},
+ {0x20000000001ull, string("\006\002\000\000\000\000\001", 7)},
+ {0x3ffffffffffull, string("\006\003\377\377\377\377\377", 7)},
+ {0x40000000000ull, string("\006\004\000\000\000\000\000", 7)},
+ {0x40000000001ull, string("\006\004\000\000\000\000\001", 7)},
+ {0x7ffffffffffull, string("\006\007\377\377\377\377\377", 7)},
+ {0x80000000000ull, string("\006\010\000\000\000\000\000", 7)},
+ {0x80000000001ull, string("\006\010\000\000\000\000\001", 7)},
+ {0xfffffffffffull, string("\006\017\377\377\377\377\377", 7)},
+ {0x100000000000ull, string("\006\020\000\000\000\000\000", 7)},
+ {0x100000000001ull, string("\006\020\000\000\000\000\001", 7)},
+ {0x1fffffffffffull, string("\006\037\377\377\377\377\377", 7)},
+ {0x200000000000ull, string("\006 \000\000\000\000\000", 7)},
+ {0x200000000001ull, string("\006 \000\000\000\000\001", 7)},
+ {0x3fffffffffffull, string("\006?\377\377\377\377\377", 7)},
+ {0x400000000000ull, string("\006@\000\000\000\000\000", 7)},
+ {0x400000000001ull, string("\006@\000\000\000\000\001", 7)},
+ {0x7fffffffffffull, string("\006\177\377\377\377\377\377", 7)},
+ {0x800000000000ull, string("\006\200\000\000\000\000\000", 7)},
+ {0x800000000001ull, string("\006\200\000\000\000\000\001", 7)},
+ {0xffffffffffffull, string("\006\377\377\377\377\377\377", 7)},
+ {0x1000000000000ull, string("\007\001\000\000\000\000\000\000", 8)},
+ {0x1000000000001ull, string("\007\001\000\000\000\000\000\001", 8)},
+ {0x1ffffffffffffull, string("\007\001\377\377\377\377\377\377", 8)},
+ {0x2000000000000ull, string("\007\002\000\000\000\000\000\000", 8)},
+ {0x2000000000001ull, string("\007\002\000\000\000\000\000\001", 8)},
+ {0x3ffffffffffffull, string("\007\003\377\377\377\377\377\377", 8)},
+ {0x4000000000000ull, string("\007\004\000\000\000\000\000\000", 8)},
+ {0x4000000000001ull, string("\007\004\000\000\000\000\000\001", 8)},
+ {0x7ffffffffffffull, string("\007\007\377\377\377\377\377\377", 8)},
+ {0x8000000000000ull, string("\007\010\000\000\000\000\000\000", 8)},
+ {0x8000000000001ull, string("\007\010\000\000\000\000\000\001", 8)},
+ {0xfffffffffffffull, string("\007\017\377\377\377\377\377\377", 8)},
+ {0x10000000000000ull, string("\007\020\000\000\000\000\000\000", 8)},
+ {0x10000000000001ull, string("\007\020\000\000\000\000\000\001", 8)},
+ {0x1fffffffffffffull, string("\007\037\377\377\377\377\377\377", 8)},
+ {0x20000000000000ull, string("\007 \000\000\000\000\000\000", 8)},
+ {0x20000000000001ull, string("\007 \000\000\000\000\000\001", 8)},
+ {0x3fffffffffffffull, string("\007?\377\377\377\377\377\377", 8)},
+ {0x40000000000000ull, string("\007@\000\000\000\000\000\000", 8)},
+ {0x40000000000001ull, string("\007@\000\000\000\000\000\001", 8)},
+ {0x7fffffffffffffull, string("\007\177\377\377\377\377\377\377", 8)},
+ {0x80000000000000ull, string("\007\200\000\000\000\000\000\000", 8)},
+ {0x80000000000001ull, string("\007\200\000\000\000\000\000\001", 8)},
+ {0xffffffffffffffull, string("\007\377\377\377\377\377\377\377", 8)},
+ {0x100000000000000ull, string("\010\001\000\000\000\000\000\000\000", 9)},
+ {0x100000000000001ull, string("\010\001\000\000\000\000\000\000\001", 9)},
+ {0x1ffffffffffffffull, string("\010\001\377\377\377\377\377\377\377", 9)},
+ {0x200000000000000ull, string("\010\002\000\000\000\000\000\000\000", 9)},
+ {0x200000000000001ull, string("\010\002\000\000\000\000\000\000\001", 9)},
+ {0x3ffffffffffffffull, string("\010\003\377\377\377\377\377\377\377", 9)},
+ {0x400000000000000ull, string("\010\004\000\000\000\000\000\000\000", 9)},
+ {0x400000000000001ull, string("\010\004\000\000\000\000\000\000\001", 9)},
+ {0x7ffffffffffffffull, string("\010\007\377\377\377\377\377\377\377", 9)},
+ {0x800000000000000ull, string("\010\010\000\000\000\000\000\000\000", 9)},
+ {0x800000000000001ull, string("\010\010\000\000\000\000\000\000\001", 9)},
+ {0xfffffffffffffffull, string("\010\017\377\377\377\377\377\377\377", 9)},
+ {0x1000000000000000ull,
+ string("\010\020\000\000\000\000\000\000\000", 9)},
+ {0x1000000000000001ull,
+ string("\010\020\000\000\000\000\000\000\001", 9)},
+ {0x1fffffffffffffffull,
+ string("\010\037\377\377\377\377\377\377\377", 9)},
+ {0x2000000000000000ull, string("\010 \000\000\000\000\000\000\000", 9)},
+ {0x2000000000000001ull, string("\010 \000\000\000\000\000\000\001", 9)},
+ {0x3fffffffffffffffull, string("\010?\377\377\377\377\377\377\377", 9)},
+ {0x4000000000000000ull, string("\010@\000\000\000\000\000\000\000", 9)},
+ {0x4000000000000001ull, string("\010@\000\000\000\000\000\000\001", 9)},
+ {0x7fffffffffffffffull,
+ string("\010\177\377\377\377\377\377\377\377", 9)},
+ {0x8000000000000000ull,
+ string("\010\200\000\000\000\000\000\000\000", 9)},
+ {0x8000000000000001ull,
+ string("\010\200\000\000\000\000\000\000\001", 9)},
+ };
+ for (const auto& t : data) {
+ uint64 num = t.first;
+ string result;
+ OrderedCode::WriteNumIncreasing(&result, num);
+ EXPECT_EQ(t.second, result) << std::hex << num;
+
+ StringPiece in = result;
+ uint64 decoded;
+ EXPECT_TRUE(OrderedCode::ReadNumIncreasing(&in, &decoded));
+ EXPECT_EQ(num, decoded);
+ EXPECT_EQ("", in);
+ }
+}
+
+TEST(EncodingIsExpected, Signed) {
+ std::vector<std::pair<int64, string>> data = {
+ {0ll, string("\200", 1)},
+ {1ll, string("\201", 1)},
+ {2ll, string("\202", 1)},
+ {1ll, string("\201", 1)},
+ {2ll, string("\202", 1)},
+ {3ll, string("\203", 1)},
+ {3ll, string("\203", 1)},
+ {4ll, string("\204", 1)},
+ {5ll, string("\205", 1)},
+ {7ll, string("\207", 1)},
+ {8ll, string("\210", 1)},
+ {9ll, string("\211", 1)},
+ {15ll, string("\217", 1)},
+ {16ll, string("\220", 1)},
+ {17ll, string("\221", 1)},
+ {31ll, string("\237", 1)},
+ {32ll, string("\240", 1)},
+ {33ll, string("\241", 1)},
+ {63ll, string("\277", 1)},
+ {64ll, string("\300@", 2)},
+ {65ll, string("\300A", 2)},
+ {127ll, string("\300\177", 2)},
+ {128ll, string("\300\200", 2)},
+ {129ll, string("\300\201", 2)},
+ {255ll, string("\300\377", 2)},
+ {256ll, string("\301\000", 2)},
+ {257ll, string("\301\001", 2)},
+ {511ll, string("\301\377", 2)},
+ {512ll, string("\302\000", 2)},
+ {513ll, string("\302\001", 2)},
+ {1023ll, string("\303\377", 2)},
+ {1024ll, string("\304\000", 2)},
+ {1025ll, string("\304\001", 2)},
+ {2047ll, string("\307\377", 2)},
+ {2048ll, string("\310\000", 2)},
+ {2049ll, string("\310\001", 2)},
+ {4095ll, string("\317\377", 2)},
+ {4096ll, string("\320\000", 2)},
+ {4097ll, string("\320\001", 2)},
+ {8191ll, string("\337\377", 2)},
+ {8192ll, string("\340 \000", 3)},
+ {8193ll, string("\340 \001", 3)},
+ {16383ll, string("\340?\377", 3)},
+ {16384ll, string("\340@\000", 3)},
+ {16385ll, string("\340@\001", 3)},
+ {32767ll, string("\340\177\377", 3)},
+ {32768ll, string("\340\200\000", 3)},
+ {32769ll, string("\340\200\001", 3)},
+ {65535ll, string("\340\377\377", 3)},
+ {65536ll, string("\341\000\000", 3)},
+ {65537ll, string("\341\000\001", 3)},
+ {131071ll, string("\341\377\377", 3)},
+ {131072ll, string("\342\000\000", 3)},
+ {131073ll, string("\342\000\001", 3)},
+ {262143ll, string("\343\377\377", 3)},
+ {262144ll, string("\344\000\000", 3)},
+ {262145ll, string("\344\000\001", 3)},
+ {524287ll, string("\347\377\377", 3)},
+ {524288ll, string("\350\000\000", 3)},
+ {524289ll, string("\350\000\001", 3)},
+ {1048575ll, string("\357\377\377", 3)},
+ {1048576ll, string("\360\020\000\000", 4)},
+ {1048577ll, string("\360\020\000\001", 4)},
+ {2097151ll, string("\360\037\377\377", 4)},
+ {2097152ll, string("\360 \000\000", 4)},
+ {2097153ll, string("\360 \000\001", 4)},
+ {4194303ll, string("\360?\377\377", 4)},
+ {4194304ll, string("\360@\000\000", 4)},
+ {4194305ll, string("\360@\000\001", 4)},
+ {8388607ll, string("\360\177\377\377", 4)},
+ {8388608ll, string("\360\200\000\000", 4)},
+ {8388609ll, string("\360\200\000\001", 4)},
+ {16777215ll, string("\360\377\377\377", 4)},
+ {16777216ll, string("\361\000\000\000", 4)},
+ {16777217ll, string("\361\000\000\001", 4)},
+ {33554431ll, string("\361\377\377\377", 4)},
+ {33554432ll, string("\362\000\000\000", 4)},
+ {33554433ll, string("\362\000\000\001", 4)},
+ {67108863ll, string("\363\377\377\377", 4)},
+ {67108864ll, string("\364\000\000\000", 4)},
+ {67108865ll, string("\364\000\000\001", 4)},
+ {134217727ll, string("\367\377\377\377", 4)},
+ {134217728ll, string("\370\010\000\000\000", 5)},
+ {134217729ll, string("\370\010\000\000\001", 5)},
+ {268435455ll, string("\370\017\377\377\377", 5)},
+ {268435456ll, string("\370\020\000\000\000", 5)},
+ {268435457ll, string("\370\020\000\000\001", 5)},
+ {536870911ll, string("\370\037\377\377\377", 5)},
+ {536870912ll, string("\370 \000\000\000", 5)},
+ {536870913ll, string("\370 \000\000\001", 5)},
+ {1073741823ll, string("\370?\377\377\377", 5)},
+ {1073741824ll, string("\370@\000\000\000", 5)},
+ {1073741825ll, string("\370@\000\000\001", 5)},
+ {2147483647ll, string("\370\177\377\377\377", 5)},
+ {2147483648ll, string("\370\200\000\000\000", 5)},
+ {2147483649ll, string("\370\200\000\000\001", 5)},
+ {4294967295ll, string("\370\377\377\377\377", 5)},
+ {4294967296ll, string("\371\000\000\000\000", 5)},
+ {4294967297ll, string("\371\000\000\000\001", 5)},
+ {8589934591ll, string("\371\377\377\377\377", 5)},
+ {8589934592ll, string("\372\000\000\000\000", 5)},
+ {8589934593ll, string("\372\000\000\000\001", 5)},
+ {17179869183ll, string("\373\377\377\377\377", 5)},
+ {17179869184ll, string("\374\004\000\000\000\000", 6)},
+ {17179869185ll, string("\374\004\000\000\000\001", 6)},
+ {34359738367ll, string("\374\007\377\377\377\377", 6)},
+ {34359738368ll, string("\374\010\000\000\000\000", 6)},
+ {34359738369ll, string("\374\010\000\000\000\001", 6)},
+ {68719476735ll, string("\374\017\377\377\377\377", 6)},
+ {68719476736ll, string("\374\020\000\000\000\000", 6)},
+ {68719476737ll, string("\374\020\000\000\000\001", 6)},
+ {137438953471ll, string("\374\037\377\377\377\377", 6)},
+ {137438953472ll, string("\374 \000\000\000\000", 6)},
+ {137438953473ll, string("\374 \000\000\000\001", 6)},
+ {274877906943ll, string("\374?\377\377\377\377", 6)},
+ {274877906944ll, string("\374@\000\000\000\000", 6)},
+ {274877906945ll, string("\374@\000\000\000\001", 6)},
+ {549755813887ll, string("\374\177\377\377\377\377", 6)},
+ {549755813888ll, string("\374\200\000\000\000\000", 6)},
+ {549755813889ll, string("\374\200\000\000\000\001", 6)},
+ {1099511627775ll, string("\374\377\377\377\377\377", 6)},
+ {1099511627776ll, string("\375\000\000\000\000\000", 6)},
+ {1099511627777ll, string("\375\000\000\000\000\001", 6)},
+ {2199023255551ll, string("\375\377\377\377\377\377", 6)},
+ {2199023255552ll, string("\376\002\000\000\000\000\000", 7)},
+ {2199023255553ll, string("\376\002\000\000\000\000\001", 7)},
+ {4398046511103ll, string("\376\003\377\377\377\377\377", 7)},
+ {4398046511104ll, string("\376\004\000\000\000\000\000", 7)},
+ {4398046511105ll, string("\376\004\000\000\000\000\001", 7)},
+ {8796093022207ll, string("\376\007\377\377\377\377\377", 7)},
+ {8796093022208ll, string("\376\010\000\000\000\000\000", 7)},
+ {8796093022209ll, string("\376\010\000\000\000\000\001", 7)},
+ {17592186044415ll, string("\376\017\377\377\377\377\377", 7)},
+ {17592186044416ll, string("\376\020\000\000\000\000\000", 7)},
+ {17592186044417ll, string("\376\020\000\000\000\000\001", 7)},
+ {35184372088831ll, string("\376\037\377\377\377\377\377", 7)},
+ {35184372088832ll, string("\376 \000\000\000\000\000", 7)},
+ {35184372088833ll, string("\376 \000\000\000\000\001", 7)},
+ {70368744177663ll, string("\376?\377\377\377\377\377", 7)},
+ {70368744177664ll, string("\376@\000\000\000\000\000", 7)},
+ {70368744177665ll, string("\376@\000\000\000\000\001", 7)},
+ {140737488355327ll, string("\376\177\377\377\377\377\377", 7)},
+ {140737488355328ll, string("\376\200\000\000\000\000\000", 7)},
+ {140737488355329ll, string("\376\200\000\000\000\000\001", 7)},
+ {281474976710655ll, string("\376\377\377\377\377\377\377", 7)},
+ {281474976710656ll, string("\377\001\000\000\000\000\000\000", 8)},
+ {281474976710657ll, string("\377\001\000\000\000\000\000\001", 8)},
+ {562949953421311ll, string("\377\001\377\377\377\377\377\377", 8)},
+ {562949953421312ll, string("\377\002\000\000\000\000\000\000", 8)},
+ {562949953421313ll, string("\377\002\000\000\000\000\000\001", 8)},
+ {1125899906842623ll, string("\377\003\377\377\377\377\377\377", 8)},
+ {1125899906842624ll, string("\377\004\000\000\000\000\000\000", 8)},
+ {1125899906842625ll, string("\377\004\000\000\000\000\000\001", 8)},
+ {2251799813685247ll, string("\377\007\377\377\377\377\377\377", 8)},
+ {2251799813685248ll, string("\377\010\000\000\000\000\000\000", 8)},
+ {2251799813685249ll, string("\377\010\000\000\000\000\000\001", 8)},
+ {4503599627370495ll, string("\377\017\377\377\377\377\377\377", 8)},
+ {4503599627370496ll, string("\377\020\000\000\000\000\000\000", 8)},
+ {4503599627370497ll, string("\377\020\000\000\000\000\000\001", 8)},
+ {9007199254740991ll, string("\377\037\377\377\377\377\377\377", 8)},
+ {9007199254740992ll, string("\377 \000\000\000\000\000\000", 8)},
+ {9007199254740993ll, string("\377 \000\000\000\000\000\001", 8)},
+ {18014398509481983ll, string("\377?\377\377\377\377\377\377", 8)},
+ {18014398509481984ll, string("\377@\000\000\000\000\000\000", 8)},
+ {18014398509481985ll, string("\377@\000\000\000\000\000\001", 8)},
+ {36028797018963967ll, string("\377\177\377\377\377\377\377\377", 8)},
+ {36028797018963968ll, string("\377\200\200\000\000\000\000\000\000", 9)},
+ {36028797018963969ll, string("\377\200\200\000\000\000\000\000\001", 9)},
+ {72057594037927935ll, string("\377\200\377\377\377\377\377\377\377", 9)},
+ {72057594037927936ll, string("\377\201\000\000\000\000\000\000\000", 9)},
+ {72057594037927937ll, string("\377\201\000\000\000\000\000\000\001", 9)},
+ {144115188075855871ll, string("\377\201\377\377\377\377\377\377\377", 9)},
+ {144115188075855872ll, string("\377\202\000\000\000\000\000\000\000", 9)},
+ {144115188075855873ll, string("\377\202\000\000\000\000\000\000\001", 9)},
+ {288230376151711743ll, string("\377\203\377\377\377\377\377\377\377", 9)},
+ {288230376151711744ll, string("\377\204\000\000\000\000\000\000\000", 9)},
+ {288230376151711745ll, string("\377\204\000\000\000\000\000\000\001", 9)},
+ {576460752303423487ll, string("\377\207\377\377\377\377\377\377\377", 9)},
+ {576460752303423488ll, string("\377\210\000\000\000\000\000\000\000", 9)},
+ {576460752303423489ll, string("\377\210\000\000\000\000\000\000\001", 9)},
+ {1152921504606846975ll,
+ string("\377\217\377\377\377\377\377\377\377", 9)},
+ {1152921504606846976ll,
+ string("\377\220\000\000\000\000\000\000\000", 9)},
+ {1152921504606846977ll,
+ string("\377\220\000\000\000\000\000\000\001", 9)},
+ {2305843009213693951ll,
+ string("\377\237\377\377\377\377\377\377\377", 9)},
+ {2305843009213693952ll,
+ string("\377\240\000\000\000\000\000\000\000", 9)},
+ {2305843009213693953ll,
+ string("\377\240\000\000\000\000\000\000\001", 9)},
+ {4611686018427387903ll,
+ string("\377\277\377\377\377\377\377\377\377", 9)},
+ {4611686018427387904ll,
+ string("\377\300@\000\000\000\000\000\000\000", 10)},
+ {4611686018427387905ll,
+ string("\377\300@\000\000\000\000\000\000\001", 10)},
+ {9223372036854775807ll,
+ string("\377\300\177\377\377\377\377\377\377\377", 10)},
+ {-9223372036854775807ll,
+ string("\000?\200\000\000\000\000\000\000\001", 10)},
+ {0ll, string("\200", 1)},
+ {-1ll, string("\177", 1)},
+ {-2ll, string("~", 1)},
+ {-1ll, string("\177", 1)},
+ {-2ll, string("~", 1)},
+ {-3ll, string("}", 1)},
+ {-3ll, string("}", 1)},
+ {-4ll, string("|", 1)},
+ {-5ll, string("{", 1)},
+ {-7ll, string("y", 1)},
+ {-8ll, string("x", 1)},
+ {-9ll, string("w", 1)},
+ {-15ll, string("q", 1)},
+ {-16ll, string("p", 1)},
+ {-17ll, string("o", 1)},
+ {-31ll, string("a", 1)},
+ {-32ll, string("`", 1)},
+ {-33ll, string("_", 1)},
+ {-63ll, string("A", 1)},
+ {-64ll, string("@", 1)},
+ {-65ll, string("?\277", 2)},
+ {-127ll, string("?\201", 2)},
+ {-128ll, string("?\200", 2)},
+ {-129ll, string("?\177", 2)},
+ {-255ll, string("?\001", 2)},
+ {-256ll, string("?\000", 2)},
+ {-257ll, string(">\377", 2)},
+ {-511ll, string(">\001", 2)},
+ {-512ll, string(">\000", 2)},
+ {-513ll, string("=\377", 2)},
+ {-1023ll, string("<\001", 2)},
+ {-1024ll, string("<\000", 2)},
+ {-1025ll, string(";\377", 2)},
+ {-2047ll, string("8\001", 2)},
+ {-2048ll, string("8\000", 2)},
+ {-2049ll, string("7\377", 2)},
+ {-4095ll, string("0\001", 2)},
+ {-4096ll, string("0\000", 2)},
+ {-4097ll, string("/\377", 2)},
+ {-8191ll, string(" \001", 2)},
+ {-8192ll, string(" \000", 2)},
+ {-8193ll, string("\037\337\377", 3)},
+ {-16383ll, string("\037\300\001", 3)},
+ {-16384ll, string("\037\300\000", 3)},
+ {-16385ll, string("\037\277\377", 3)},
+ {-32767ll, string("\037\200\001", 3)},
+ {-32768ll, string("\037\200\000", 3)},
+ {-32769ll, string("\037\177\377", 3)},
+ {-65535ll, string("\037\000\001", 3)},
+ {-65536ll, string("\037\000\000", 3)},
+ {-65537ll, string("\036\377\377", 3)},
+ {-131071ll, string("\036\000\001", 3)},
+ {-131072ll, string("\036\000\000", 3)},
+ {-131073ll, string("\035\377\377", 3)},
+ {-262143ll, string("\034\000\001", 3)},
+ {-262144ll, string("\034\000\000", 3)},
+ {-262145ll, string("\033\377\377", 3)},
+ {-524287ll, string("\030\000\001", 3)},
+ {-524288ll, string("\030\000\000", 3)},
+ {-524289ll, string("\027\377\377", 3)},
+ {-1048575ll, string("\020\000\001", 3)},
+ {-1048576ll, string("\020\000\000", 3)},
+ {-1048577ll, string("\017\357\377\377", 4)},
+ {-2097151ll, string("\017\340\000\001", 4)},
+ {-2097152ll, string("\017\340\000\000", 4)},
+ {-2097153ll, string("\017\337\377\377", 4)},
+ {-4194303ll, string("\017\300\000\001", 4)},
+ {-4194304ll, string("\017\300\000\000", 4)},
+ {-4194305ll, string("\017\277\377\377", 4)},
+ {-8388607ll, string("\017\200\000\001", 4)},
+ {-8388608ll, string("\017\200\000\000", 4)},
+ {-8388609ll, string("\017\177\377\377", 4)},
+ {-16777215ll, string("\017\000\000\001", 4)},
+ {-16777216ll, string("\017\000\000\000", 4)},
+ {-16777217ll, string("\016\377\377\377", 4)},
+ {-33554431ll, string("\016\000\000\001", 4)},
+ {-33554432ll, string("\016\000\000\000", 4)},
+ {-33554433ll, string("\r\377\377\377", 4)},
+ {-67108863ll, string("\014\000\000\001", 4)},
+ {-67108864ll, string("\014\000\000\000", 4)},
+ {-67108865ll, string("\013\377\377\377", 4)},
+ {-134217727ll, string("\010\000\000\001", 4)},
+ {-134217728ll, string("\010\000\000\000", 4)},
+ {-134217729ll, string("\007\367\377\377\377", 5)},
+ {-268435455ll, string("\007\360\000\000\001", 5)},
+ {-268435456ll, string("\007\360\000\000\000", 5)},
+ {-268435457ll, string("\007\357\377\377\377", 5)},
+ {-536870911ll, string("\007\340\000\000\001", 5)},
+ {-536870912ll, string("\007\340\000\000\000", 5)},
+ {-536870913ll, string("\007\337\377\377\377", 5)},
+ {-1073741823ll, string("\007\300\000\000\001", 5)},
+ {-1073741824ll, string("\007\300\000\000\000", 5)},
+ {-1073741825ll, string("\007\277\377\377\377", 5)},
+ {-2147483647ll, string("\007\200\000\000\001", 5)},
+ {-2147483648ll, string("\007\200\000\000\000", 5)},
+ {-2147483649ll, string("\007\177\377\377\377", 5)},
+ {-4294967295ll, string("\007\000\000\000\001", 5)},
+ {-4294967296ll, string("\007\000\000\000\000", 5)},
+ {-4294967297ll, string("\006\377\377\377\377", 5)},
+ {-8589934591ll, string("\006\000\000\000\001", 5)},
+ {-8589934592ll, string("\006\000\000\000\000", 5)},
+ {-8589934593ll, string("\005\377\377\377\377", 5)},
+ {-17179869183ll, string("\004\000\000\000\001", 5)},
+ {-17179869184ll, string("\004\000\000\000\000", 5)},
+ {-17179869185ll, string("\003\373\377\377\377\377", 6)},
+ {-34359738367ll, string("\003\370\000\000\000\001", 6)},
+ {-34359738368ll, string("\003\370\000\000\000\000", 6)},
+ {-34359738369ll, string("\003\367\377\377\377\377", 6)},
+ {-68719476735ll, string("\003\360\000\000\000\001", 6)},
+ {-68719476736ll, string("\003\360\000\000\000\000", 6)},
+ {-68719476737ll, string("\003\357\377\377\377\377", 6)},
+ {-137438953471ll, string("\003\340\000\000\000\001", 6)},
+ {-137438953472ll, string("\003\340\000\000\000\000", 6)},
+ {-137438953473ll, string("\003\337\377\377\377\377", 6)},
+ {-274877906943ll, string("\003\300\000\000\000\001", 6)},
+ {-274877906944ll, string("\003\300\000\000\000\000", 6)},
+ {-274877906945ll, string("\003\277\377\377\377\377", 6)},
+ {-549755813887ll, string("\003\200\000\000\000\001", 6)},
+ {-549755813888ll, string("\003\200\000\000\000\000", 6)},
+ {-549755813889ll, string("\003\177\377\377\377\377", 6)},
+ {-1099511627775ll, string("\003\000\000\000\000\001", 6)},
+ {-1099511627776ll, string("\003\000\000\000\000\000", 6)},
+ {-1099511627777ll, string("\002\377\377\377\377\377", 6)},
+ {-2199023255551ll, string("\002\000\000\000\000\001", 6)},
+ {-2199023255552ll, string("\002\000\000\000\000\000", 6)},
+ {-2199023255553ll, string("\001\375\377\377\377\377\377", 7)},
+ {-4398046511103ll, string("\001\374\000\000\000\000\001", 7)},
+ {-4398046511104ll, string("\001\374\000\000\000\000\000", 7)},
+ {-4398046511105ll, string("\001\373\377\377\377\377\377", 7)},
+ {-8796093022207ll, string("\001\370\000\000\000\000\001", 7)},
+ {-8796093022208ll, string("\001\370\000\000\000\000\000", 7)},
+ {-8796093022209ll, string("\001\367\377\377\377\377\377", 7)},
+ {-17592186044415ll, string("\001\360\000\000\000\000\001", 7)},
+ {-17592186044416ll, string("\001\360\000\000\000\000\000", 7)},
+ {-17592186044417ll, string("\001\357\377\377\377\377\377", 7)},
+ {-35184372088831ll, string("\001\340\000\000\000\000\001", 7)},
+ {-35184372088832ll, string("\001\340\000\000\000\000\000", 7)},
+ {-35184372088833ll, string("\001\337\377\377\377\377\377", 7)},
+ {-70368744177663ll, string("\001\300\000\000\000\000\001", 7)},
+ {-70368744177664ll, string("\001\300\000\000\000\000\000", 7)},
+ {-70368744177665ll, string("\001\277\377\377\377\377\377", 7)},
+ {-140737488355327ll, string("\001\200\000\000\000\000\001", 7)},
+ {-140737488355328ll, string("\001\200\000\000\000\000\000", 7)},
+ {-140737488355329ll, string("\001\177\377\377\377\377\377", 7)},
+ {-281474976710655ll, string("\001\000\000\000\000\000\001", 7)},
+ {-281474976710656ll, string("\001\000\000\000\000\000\000", 7)},
+ {-281474976710657ll, string("\000\376\377\377\377\377\377\377", 8)},
+ {-562949953421311ll, string("\000\376\000\000\000\000\000\001", 8)},
+ {-562949953421312ll, string("\000\376\000\000\000\000\000\000", 8)},
+ {-562949953421313ll, string("\000\375\377\377\377\377\377\377", 8)},
+ {-1125899906842623ll, string("\000\374\000\000\000\000\000\001", 8)},
+ {-1125899906842624ll, string("\000\374\000\000\000\000\000\000", 8)},
+ {-1125899906842625ll, string("\000\373\377\377\377\377\377\377", 8)},
+ {-2251799813685247ll, string("\000\370\000\000\000\000\000\001", 8)},
+ {-2251799813685248ll, string("\000\370\000\000\000\000\000\000", 8)},
+ {-2251799813685249ll, string("\000\367\377\377\377\377\377\377", 8)},
+ {-4503599627370495ll, string("\000\360\000\000\000\000\000\001", 8)},
+ {-4503599627370496ll, string("\000\360\000\000\000\000\000\000", 8)},
+ {-4503599627370497ll, string("\000\357\377\377\377\377\377\377", 8)},
+ {-9007199254740991ll, string("\000\340\000\000\000\000\000\001", 8)},
+ {-9007199254740992ll, string("\000\340\000\000\000\000\000\000", 8)},
+ {-9007199254740993ll, string("\000\337\377\377\377\377\377\377", 8)},
+ {-18014398509481983ll, string("\000\300\000\000\000\000\000\001", 8)},
+ {-18014398509481984ll, string("\000\300\000\000\000\000\000\000", 8)},
+ {-18014398509481985ll, string("\000\277\377\377\377\377\377\377", 8)},
+ {-36028797018963967ll, string("\000\200\000\000\000\000\000\001", 8)},
+ {-36028797018963968ll, string("\000\200\000\000\000\000\000\000", 8)},
+ {-36028797018963969ll, string("\000\177\177\377\377\377\377\377\377", 9)},
+ {-72057594037927935ll, string("\000\177\000\000\000\000\000\000\001", 9)},
+ {-72057594037927936ll, string("\000\177\000\000\000\000\000\000\000", 9)},
+ {-72057594037927937ll, string("\000~\377\377\377\377\377\377\377", 9)},
+ {-144115188075855871ll, string("\000~\000\000\000\000\000\000\001", 9)},
+ {-144115188075855872ll, string("\000~\000\000\000\000\000\000\000", 9)},
+ {-144115188075855873ll, string("\000}\377\377\377\377\377\377\377", 9)},
+ {-288230376151711743ll, string("\000|\000\000\000\000\000\000\001", 9)},
+ {-288230376151711744ll, string("\000|\000\000\000\000\000\000\000", 9)},
+ {-288230376151711745ll, string("\000{\377\377\377\377\377\377\377", 9)},
+ {-576460752303423487ll, string("\000x\000\000\000\000\000\000\001", 9)},
+ {-576460752303423488ll, string("\000x\000\000\000\000\000\000\000", 9)},
+ {-576460752303423489ll, string("\000w\377\377\377\377\377\377\377", 9)},
+ {-1152921504606846975ll, string("\000p\000\000\000\000\000\000\001", 9)},
+ {-1152921504606846976ll, string("\000p\000\000\000\000\000\000\000", 9)},
+ {-1152921504606846977ll, string("\000o\377\377\377\377\377\377\377", 9)},
+ {-2305843009213693951ll, string("\000`\000\000\000\000\000\000\001", 9)},
+ {-2305843009213693952ll, string("\000`\000\000\000\000\000\000\000", 9)},
+ {-2305843009213693953ll, string("\000_\377\377\377\377\377\377\377", 9)},
+ {-4611686018427387903ll, string("\000@\000\000\000\000\000\000\001", 9)},
+ {-4611686018427387904ll, string("\000@\000\000\000\000\000\000\000", 9)},
+ {-4611686018427387905ll,
+ string("\000?\277\377\377\377\377\377\377\377", 10)},
+ {-9223372036854775807ll,
+ string("\000?\200\000\000\000\000\000\000\001", 10)},
+ {9223372036854775807ll,
+ string("\377\300\177\377\377\377\377\377\377\377", 10)},
+ };
+ for (const auto& t : data) {
+ int64 num = t.first;
+ string result;
+ OrderedCode::WriteSignedNumIncreasing(&result, num);
+ EXPECT_EQ(t.second, result) << std::hex << num;
+
+ StringPiece in = result;
+ int64 decoded;
+ EXPECT_TRUE(OrderedCode::ReadSignedNumIncreasing(&in, &decoded));
+ EXPECT_EQ(num, decoded);
+ EXPECT_EQ("", in);
+ }
+}
+
+static void BM_WriteString(int n, int len) {
+ testing::StopTiming();
+ random::PhiloxRandom philox(301, 17);
+ random::SimplePhilox rnd(&philox);
+ string x;
+ for (int i = 0; i < len; i++) {
+ x += rnd.Uniform(256);
+ }
+ string y;
+
+ testing::BytesProcessed(n * len);
+ testing::StartTiming();
+ while (n-- > 0) {
+ y.clear();
+ OCWriteToString<string>(&y, x);
+ }
+}
+
+static void BM_ReadString(int n, int len) {
+ testing::StopTiming();
+ random::PhiloxRandom philox(301, 17);
+ random::SimplePhilox rnd(&philox);
+ string x;
+ for (int i = 0; i < len; i++) {
+ x += rnd.Uniform(256);
+ }
+ string data;
+ OCWriteToString<string>(&data, x);
+ string result;
+
+ testing::BytesProcessed(n * len);
+ testing::StartTiming();
+ while (n-- > 0) {
+ result.clear();
+ StringPiece s = data;
+ OCRead<string>(&s, &result);
+ }
+}
+
+static void BM_WriteStringIncreasing(int n, int len) { BM_WriteString(n, len); }
+static void BM_ReadStringIncreasing(int n, int len) { BM_ReadString(n, len); }
+
+BENCHMARK(BM_WriteStringIncreasing)->Range(0, 1024);
+BENCHMARK(BM_ReadStringIncreasing)->Range(0, 1024);
+
+} // namespace strings
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc
new file mode 100644
index 0000000000..cccd50c7ff
--- /dev/null
+++ b/tensorflow/core/lib/strings/str_util.cc
@@ -0,0 +1,312 @@
+#include "tensorflow/core/lib/strings/str_util.h"
+#include <ctype.h>
+
+namespace tensorflow {
+namespace str_util {
+
+static char hex_char[] = "0123456789abcdef";
+
+string CEscape(const string& src) {
+ string dest;
+
+ for (unsigned char c : src) {
+ switch (c) {
+ case '\n':
+ dest.append("\\n");
+ break;
+ case '\r':
+ dest.append("\\r");
+ break;
+ case '\t':
+ dest.append("\\t");
+ break;
+ case '\"':
+ dest.append("\\\"");
+ break;
+ case '\'':
+ dest.append("\\'");
+ break;
+ case '\\':
+ dest.append("\\\\");
+ break;
+ default:
+ // Note that if we emit \xNN and the src character after that is a hex
+ // digit then that digit must be escaped too to prevent it being
+ // interpreted as part of the character code by C.
+ if ((c >= 0x80) || !isprint(c)) {
+ dest.append("\\");
+ dest.push_back(hex_char[c / 64]);
+ dest.push_back(hex_char[(c % 64) / 8]);
+ dest.push_back(hex_char[c % 8]);
+ } else {
+ dest.push_back(c);
+ break;
+ }
+ }
+ }
+
+ return dest;
+}
+
+namespace { // Private helpers for CUnescape().
+
+inline bool is_octal_digit(unsigned char c) { return c >= '0' && c <= '7'; }
+
+inline bool ascii_isxdigit(unsigned char c) {
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
+ (c >= 'A' && c <= 'F');
+}
+
+inline int hex_digit_to_int(char c) {
+ int x = static_cast<unsigned char>(c);
+ if (x > '9') {
+ x += 9;
+ }
+ return x & 0xf;
+}
+
+bool CUnescapeInternal(StringPiece source, char* dest, int* dest_len,
+ string* error) {
+ char* d = dest;
+ const char* p = source.data();
+ const char* end = source.end();
+ const char* last_byte = end - 1;
+
+ // Small optimization for case where source = dest and there's no escaping
+ while (p == d && p < end && *p != '\\') p++, d++;
+
+ while (p < end) {
+ if (*p != '\\') {
+ *d++ = *p++;
+ } else {
+ if (++p > last_byte) { // skip past the '\\'
+ if (error) *error = "String cannot end with \\";
+ return false;
+ }
+ switch (*p) {
+ case 'a':
+ *d++ = '\a';
+ break;
+ case 'b':
+ *d++ = '\b';
+ break;
+ case 'f':
+ *d++ = '\f';
+ break;
+ case 'n':
+ *d++ = '\n';
+ break;
+ case 'r':
+ *d++ = '\r';
+ break;
+ case 't':
+ *d++ = '\t';
+ break;
+ case 'v':
+ *d++ = '\v';
+ break;
+ case '\\':
+ *d++ = '\\';
+ break;
+ case '?':
+ *d++ = '\?';
+ break; // \? Who knew?
+ case '\'':
+ *d++ = '\'';
+ break;
+ case '"':
+ *d++ = '\"';
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3': // octal digit: 1 to 3 digits
+ case '4':
+ case '5':
+ case '6':
+ case '7': {
+ const char* octal_start = p;
+ unsigned int ch = *p - '0';
+ if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p - '0';
+ if (p < last_byte && is_octal_digit(p[1]))
+ ch = ch * 8 + *++p - '0'; // now points at last digit
+ if (ch > 0xff) {
+ if (error) {
+ *error = "Value of \\" +
+ string(octal_start, p + 1 - octal_start) +
+ " exceeds 0xff";
+ }
+ return false;
+ }
+ *d++ = ch;
+ break;
+ }
+ case 'x':
+ case 'X': {
+ if (p >= last_byte) {
+ if (error) *error = "String cannot end with \\x";
+ return false;
+ } else if (!ascii_isxdigit(p[1])) {
+ if (error) *error = "\\x cannot be followed by a non-hex digit";
+ return false;
+ }
+ unsigned int ch = 0;
+ const char* hex_start = p;
+ while (p < last_byte && ascii_isxdigit(p[1]))
+ // Arbitrarily many hex digits
+ ch = (ch << 4) + hex_digit_to_int(*++p);
+ if (ch > 0xFF) {
+ if (error) {
+ *error = "Value of \\" + string(hex_start, p + 1 - hex_start) +
+ " exceeds 0xff";
+ }
+ return false;
+ }
+ *d++ = ch;
+ break;
+ }
+ default: {
+ if (error) *error = string("Unknown escape sequence: \\") + *p;
+ return false;
+ }
+ }
+ p++; // read past letter we escaped
+ }
+ }
+ *dest_len = d - dest;
+ return true;
+}
+
+} // namespace
+
+bool CUnescape(StringPiece source, string* dest, string* error) {
+ dest->resize(source.size());
+ int dest_size;
+ if (!CUnescapeInternal(source, const_cast<char*>(dest->data()), &dest_size,
+ error)) {
+ return false;
+ }
+ dest->erase(dest_size);
+ return true;
+}
+
+bool NumericParse32(const string& text, int32* val) {
+ // Slow, but this code is not performance critical, and this
+ // doesn't bring in any new dependencies
+ char junk;
+ if (sscanf(text.c_str(), "%d%c", val, &junk) == 1) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+void StripTrailingWhitespace(string* s) {
+ string::size_type i;
+ for (i = s->size(); i > 0 && isspace((*s)[i - 1]); --i) {
+ }
+ s->resize(i);
+}
+
+// Return lower-cased version of s.
+string Lowercase(StringPiece s) {
+ string result(s.data(), s.size());
+ for (char& c : result) {
+ c = tolower(c);
+ }
+ return result;
+}
+
+// Return upper-cased version of s.
+string Uppercase(StringPiece s) {
+ string result(s.data(), s.size());
+ for (char& c : result) {
+ c = toupper(c);
+ }
+ return result;
+}
+
+void TitlecaseString(string* s, StringPiece delimiters) {
+ bool upper = true;
+ for (string::iterator ss = s->begin(); ss != s->end(); ++ss) {
+ if (upper) {
+ *ss = toupper(*ss);
+ }
+ upper = (delimiters.find(*ss) != StringPiece::npos);
+ }
+}
+
+size_t RemoveLeadingWhitespace(StringPiece* text) {
+ size_t count = 0;
+ const char* ptr = text->data();
+ while (count < text->size() && isspace(*ptr)) {
+ count++;
+ ptr++;
+ }
+ text->remove_prefix(count);
+ return count;
+}
+
+size_t RemoveTrailingWhitespace(StringPiece* text) {
+ size_t count = 0;
+ const char* ptr = text->data() + text->size() - 1;
+ while (count < text->size() && isspace(*ptr)) {
+ ++count;
+ --ptr;
+ }
+ text->remove_suffix(count);
+ return count;
+}
+
+size_t RemoveWhitespaceContext(StringPiece* text) {
+ // use RemoveLeadingWhitespace() and RemoveTrailingWhitespace() to do the job
+ return (RemoveLeadingWhitespace(text) + RemoveTrailingWhitespace(text));
+}
+
+bool ConsumePrefix(StringPiece* s, StringPiece expected) {
+ if (s->starts_with(expected)) {
+ s->remove_prefix(expected.size());
+ return true;
+ }
+ return false;
+}
+
+bool ConsumeLeadingDigits(StringPiece* s, uint64* val) {
+ const char* p = s->data();
+ const char* limit = p + s->size();
+ uint64 v = 0;
+ while (p < limit) {
+ const char c = *p;
+ if (c < '0' || c > '9') break;
+ uint64 new_v = (v * 10) + (c - '0');
+ if (new_v < v) {
+ // Overflow occurred
+ return false;
+ }
+ v = new_v;
+ p++;
+ }
+ if (p > s->data()) {
+ // Consume some digits
+ s->remove_prefix(p - s->data());
+ *val = v;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool SplitAndParseAsInts(StringPiece text, char delim,
+ std::vector<int32>* result) {
+ result->clear();
+ std::vector<string> num_strings = Split(text, delim);
+ for (const auto& s : num_strings) {
+ int32 num;
+ if (!NumericParse32(s, &num)) return false;
+ result->push_back(num);
+ }
+ return true;
+}
+
+} // namespace str_util
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/strings/str_util.h b/tensorflow/core/lib/strings/str_util.h
new file mode 100644
index 0000000000..34ea462b2d
--- /dev/null
+++ b/tensorflow/core/lib/strings/str_util.h
@@ -0,0 +1,149 @@
+#ifndef TENSORFLOW_LIB_STRINGS_STR_UTIL_H_
+#define TENSORFLOW_LIB_STRINGS_STR_UTIL_H_
+
+#include <string>
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+
+// Basic string utility routines
+namespace tensorflow {
+namespace str_util {
+
+// Returns a version of 'src' where unprintable characters have been
+// escaped using C-style escape sequences.
+string CEscape(const string& src);
+
+// Copies "source" to "dest", rewriting C-style escape sequences --
+// '\n', '\r', '\\', '\ooo', etc -- to their ASCII equivalents.
+//
+// Errors: Sets the description of the first encountered error in
+// 'error'. To disable error reporting, set 'error' to NULL.
+//
+// NOTE: Does not support \u or \U!
+bool CUnescape(StringPiece source, string* dest, string* error);
+
+// If "text" can be successfully parsed as the ASCII representation of
+// an integer, sets "*val" to the value and returns true. Otherwise,
+// returns false.
+bool NumericParse32(const string& text, int32* val);
+
+// Removes any trailing whitespace from "*s".
+void StripTrailingWhitespace(string* s);
+
+// Removes leading ascii_isspace() characters.
+// Returns number of characters removed.
+size_t RemoveLeadingWhitespace(StringPiece* text);
+
+// Removes trailing ascii_isspace() characters.
+// Returns number of characters removed.
+size_t RemoveTrailingWhitespace(StringPiece* text);
+
+// Removes leading and trailing ascii_isspace() chars.
+// Returns number of chars removed.
+size_t RemoveWhitespaceContext(StringPiece* text);
+
+// Consume a leading positive integer value. If any digits were
+// found, store the value of the leading unsigned number in "*val",
+// advance "*s" past the consumed number, and return true. If
+// overflow occurred, returns false. Otherwise, returns false.
+bool ConsumeLeadingDigits(StringPiece* s, uint64* val);
+
+// If "*s" starts with "expected", consume it and return true.
+// Otherwise, return false.
+bool ConsumePrefix(StringPiece* s, StringPiece expected);
+
+// Return lower-cased version of s.
+string Lowercase(StringPiece s);
+
+// Return upper-cased version of s.
+string Uppercase(StringPiece s);
+
+// Capitalize first character of each word in "*s". "delimiters" is a
+// set of characters that can be used as word boundaries.
+void TitlecaseString(string* s, StringPiece delimiters);
+
+// Join functionality
+template <typename T>
+string Join(const std::vector<T>& s, const char* sep);
+template <typename T>
+string Join(const gtl::ArraySlice<T>& s, const char* sep);
+
+struct AllowEmpty {
+ bool operator()(StringPiece sp) const { return true; }
+};
+struct SkipEmpty {
+ bool operator()(StringPiece sp) const { return !sp.empty(); }
+};
+struct SkipWhitespace {
+ bool operator()(StringPiece sp) const {
+ RemoveTrailingWhitespace(&sp);
+ return !sp.empty();
+ }
+};
+
+std::vector<string> Split(StringPiece text, char delim);
+template <typename Predicate>
+std::vector<string> Split(StringPiece text, char delim, Predicate p);
+
+// Split "text" at "delim" characters, and parse each component as
+// an integer. If successful, adds the individual numbers in order
+// to "*result" and returns true. Otherwise returns false.
+bool SplitAndParseAsInts(StringPiece text, char delim,
+ std::vector<int32>* result);
+
+// ------------------------------------------------------------------
+// Implementation details below
+namespace internal {
+template <typename T>
+string JoinHelper(typename gtl::ArraySlice<T>::const_iterator begin,
+ typename gtl::ArraySlice<T>::const_iterator end,
+ const char* sep) {
+ string result;
+ bool first = true;
+ for (typename gtl::ArraySlice<T>::const_iterator it = begin; it != end;
+ ++it) {
+ tensorflow::strings::StrAppend(&result, (first ? "" : sep), *it);
+ first = false;
+ }
+ return result;
+}
+} // namespace internal
+
+template <typename T>
+string Join(const std::vector<T>& s, const char* sep) {
+ return Join<T>(gtl::ArraySlice<T>(s), sep);
+}
+
+template <typename T>
+string Join(const gtl::ArraySlice<T>& s, const char* sep) {
+ return internal::JoinHelper<T>(s.begin(), s.end(), sep);
+}
+
+inline std::vector<string> Split(StringPiece text, char delim) {
+ return Split(text, delim, AllowEmpty());
+}
+
+template <typename Predicate>
+std::vector<string> Split(StringPiece text, char delim, Predicate p) {
+ std::vector<string> result;
+ int token_start = 0;
+ if (!text.empty()) {
+ for (int i = 0; i < text.size() + 1; i++) {
+ if ((i == text.size()) || (text[i] == delim)) {
+ StringPiece token(text.data() + token_start, i - token_start);
+ if (p(token)) {
+ result.push_back(token.ToString());
+ }
+ token_start = i + 1;
+ }
+ }
+ }
+ return result;
+}
+
+} // namespace str_util
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_STRINGS_STR_UTIL_H_
diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc
new file mode 100644
index 0000000000..f71cc6c609
--- /dev/null
+++ b/tensorflow/core/lib/strings/str_util_test.cc
@@ -0,0 +1,258 @@
+#include "tensorflow/core/lib/strings/str_util.h"
+
+#include <gtest/gtest.h>
+
+namespace tensorflow {
+
+TEST(CEscape, Basic) {
+ EXPECT_EQ(str_util::CEscape("hello"), "hello");
+ EXPECT_EQ(str_util::CEscape("hello\n"), "hello\\n");
+ EXPECT_EQ(str_util::CEscape("hello\r"), "hello\\r");
+ EXPECT_EQ(str_util::CEscape("\t\r\"'"), "\\t\\r\\\"\\'");
+ EXPECT_EQ(str_util::CEscape("\320hi\200"), "\\320hi\\200");
+}
+
+string ExpectCUnescapeSuccess(StringPiece source) {
+ string dest;
+ string error;
+ EXPECT_TRUE(str_util::CUnescape(source, &dest, &error)) << error;
+ return dest;
+}
+
+TEST(CUnescape, Basic) {
+ EXPECT_EQ("hello", ExpectCUnescapeSuccess("hello"));
+ EXPECT_EQ("hello\n", ExpectCUnescapeSuccess("hello\\n"));
+ EXPECT_EQ("hello\r", ExpectCUnescapeSuccess("hello\\r"));
+ EXPECT_EQ("\t\r\"'", ExpectCUnescapeSuccess("\\t\\r\\\"\\'"));
+ EXPECT_EQ("\320hi\200", ExpectCUnescapeSuccess("\\320hi\\200"));
+}
+
+TEST(NumericParse32, Basic) {
+ int32 val = -1234;
+ EXPECT_TRUE(str_util::NumericParse32("0", &val) && val == 0);
+ EXPECT_TRUE(str_util::NumericParse32("123", &val) && val == 123);
+ EXPECT_TRUE(str_util::NumericParse32("-375", &val) && val == -375);
+ EXPECT_FALSE(str_util::NumericParse32("123hello", &val));
+ EXPECT_FALSE(str_util::NumericParse32("hello123", &val));
+}
+
+TEST(StripTrailingWhitespace, Basic) {
+ string test;
+ test = "hello";
+ str_util::StripTrailingWhitespace(&test);
+ EXPECT_EQ(test, "hello");
+
+ test = "foo ";
+ str_util::StripTrailingWhitespace(&test);
+ EXPECT_EQ(test, "foo");
+
+ test = " ";
+ str_util::StripTrailingWhitespace(&test);
+ EXPECT_EQ(test, "");
+
+ test = "";
+ str_util::StripTrailingWhitespace(&test);
+ EXPECT_EQ(test, "");
+
+ test = " abc\t";
+ str_util::StripTrailingWhitespace(&test);
+ EXPECT_EQ(test, " abc");
+}
+
+TEST(RemoveLeadingWhitespace, Basic) {
+ string text = " \t \n \r Quick\t";
+ StringPiece data(text);
+ // check that all whitespace is removed
+ EXPECT_EQ(str_util::RemoveLeadingWhitespace(&data), 11);
+ EXPECT_EQ(data, StringPiece("Quick\t"));
+ // check that non-whitespace is not removed
+ EXPECT_EQ(str_util::RemoveLeadingWhitespace(&data), 0);
+ EXPECT_EQ(data, StringPiece("Quick\t"));
+}
+
+TEST(RemoveLeadingWhitespace, TerminationHandling) {
+ // check termination handling
+ string text = "\t";
+ StringPiece data(text);
+ EXPECT_EQ(str_util::RemoveLeadingWhitespace(&data), 1);
+ EXPECT_EQ(data, StringPiece(""));
+
+ // check termination handling again
+ EXPECT_EQ(str_util::RemoveLeadingWhitespace(&data), 0);
+ EXPECT_EQ(data, StringPiece(""));
+}
+
+TEST(RemoveTrailingWhitespace, Basic) {
+ string text = " \t \n \r Quick \t";
+ StringPiece data(text);
+ // check that all whitespace is removed
+ EXPECT_EQ(str_util::RemoveTrailingWhitespace(&data), 2);
+ EXPECT_EQ(data, StringPiece(" \t \n \r Quick"));
+ // check that non-whitespace is not removed
+ EXPECT_EQ(str_util::RemoveTrailingWhitespace(&data), 0);
+ EXPECT_EQ(data, StringPiece(" \t \n \r Quick"));
+}
+
+TEST(RemoveTrailingWhitespace, TerminationHandling) {
+ // check termination handling
+ string text = "\t";
+ StringPiece data(text);
+ EXPECT_EQ(str_util::RemoveTrailingWhitespace(&data), 1);
+ EXPECT_EQ(data, StringPiece(""));
+
+ // check termination handling again
+ EXPECT_EQ(str_util::RemoveTrailingWhitespace(&data), 0);
+ EXPECT_EQ(data, StringPiece(""));
+}
+
+TEST(RemoveWhitespaceContext, Basic) {
+ string text = " \t \n \r Quick \t";
+ StringPiece data(text);
+ // check that all whitespace is removed
+ EXPECT_EQ(str_util::RemoveWhitespaceContext(&data), 13);
+ EXPECT_EQ(data, StringPiece("Quick"));
+ // check that non-whitespace is not removed
+ EXPECT_EQ(str_util::RemoveWhitespaceContext(&data), 0);
+ EXPECT_EQ(data, StringPiece("Quick"));
+
+ // Test empty string
+ text = "";
+ data = text;
+ EXPECT_EQ(str_util::RemoveWhitespaceContext(&data), 0);
+ EXPECT_EQ(data, StringPiece(""));
+}
+
+void TestConsumeLeadingDigits(StringPiece s, int64 expected,
+ StringPiece remaining) {
+ uint64 v;
+ StringPiece input(s);
+ if (str_util::ConsumeLeadingDigits(&input, &v)) {
+ EXPECT_EQ(v, static_cast<uint64>(expected));
+ EXPECT_EQ(input, remaining);
+ } else {
+ EXPECT_LT(expected, 0);
+ EXPECT_EQ(input, remaining);
+ }
+}
+
+TEST(ConsumeLeadingDigits, Basic) {
+ TestConsumeLeadingDigits("123", 123, "");
+ TestConsumeLeadingDigits("a123", -1, "a123");
+ TestConsumeLeadingDigits("9_", 9, "_");
+ TestConsumeLeadingDigits("11111111111xyz", 11111111111ll, "xyz");
+
+ // Overflow case
+ TestConsumeLeadingDigits("1111111111111111111111111111111xyz", -1,
+ "1111111111111111111111111111111xyz");
+
+ // 2^64
+ TestConsumeLeadingDigits("18446744073709551616xyz", -1,
+ "18446744073709551616xyz");
+ // 2^64-1
+ TestConsumeLeadingDigits("18446744073709551615xyz", 18446744073709551615ull,
+ "xyz");
+}
+
+TEST(ConsumePrefix, Basic) {
+ string s("abcdef");
+ StringPiece input(s);
+ EXPECT_FALSE(str_util::ConsumePrefix(&input, "abcdefg"));
+ EXPECT_EQ(input, "abcdef");
+
+ EXPECT_FALSE(str_util::ConsumePrefix(&input, "abce"));
+ EXPECT_EQ(input, "abcdef");
+
+ EXPECT_TRUE(str_util::ConsumePrefix(&input, ""));
+ EXPECT_EQ(input, "abcdef");
+
+ EXPECT_FALSE(str_util::ConsumePrefix(&input, "abcdeg"));
+ EXPECT_EQ(input, "abcdef");
+
+ EXPECT_TRUE(str_util::ConsumePrefix(&input, "abcdef"));
+ EXPECT_EQ(input, "");
+
+ input = s;
+ EXPECT_TRUE(str_util::ConsumePrefix(&input, "abcde"));
+ EXPECT_EQ(input, "f");
+}
+
+TEST(JoinStrings, Basic) {
+ std::vector<string> s;
+ s = {"hi"};
+ EXPECT_EQ(str_util::Join(s, " "), "hi");
+ s = {"hi", "there", "strings"};
+ EXPECT_EQ(str_util::Join(s, " "), "hi there strings");
+
+ std::vector<StringPiece> sp;
+ sp = {"hi"};
+ EXPECT_EQ(str_util::Join(sp, ",,"), "hi");
+ sp = {"hi", "there", "strings"};
+ EXPECT_EQ(str_util::Join(sp, "--"), "hi--there--strings");
+}
+
+TEST(Split, Basic) {
+ EXPECT_TRUE(str_util::Split("", ',').empty());
+ EXPECT_EQ(str_util::Join(str_util::Split("a", ','), "|"), "a");
+ EXPECT_EQ(str_util::Join(str_util::Split(",", ','), "|"), "|");
+ EXPECT_EQ(str_util::Join(str_util::Split("a,b,c", ','), "|"), "a|b|c");
+ EXPECT_EQ(str_util::Join(str_util::Split("a,,,b,,c,", ','), "|"),
+ "a|||b||c|");
+ EXPECT_EQ(str_util::Join(
+ str_util::Split("a,,,b,,c,", ',', str_util::SkipEmpty()), "|"),
+ "a|b|c");
+ EXPECT_EQ(
+ str_util::Join(
+ str_util::Split("a, ,b,,c,", ',', str_util::SkipWhitespace()), "|"),
+ "a|b|c");
+}
+
+TEST(SplitAndParseAsInts, Basic) {
+ std::vector<int32> nums;
+ EXPECT_TRUE(str_util::SplitAndParseAsInts("", ',', &nums));
+ EXPECT_EQ(nums.size(), 0);
+
+ EXPECT_TRUE(str_util::SplitAndParseAsInts("134", ',', &nums));
+ EXPECT_EQ(nums.size(), 1);
+ EXPECT_EQ(nums[0], 134);
+
+ EXPECT_TRUE(str_util::SplitAndParseAsInts("134,2,13,-5", ',', &nums));
+ EXPECT_EQ(nums.size(), 4);
+ EXPECT_EQ(nums[0], 134);
+ EXPECT_EQ(nums[1], 2);
+ EXPECT_EQ(nums[2], 13);
+ EXPECT_EQ(nums[3], -5);
+
+ EXPECT_FALSE(str_util::SplitAndParseAsInts("abc", ',', &nums));
+
+ EXPECT_FALSE(str_util::SplitAndParseAsInts("-13,abc", ',', &nums));
+
+ EXPECT_FALSE(str_util::SplitAndParseAsInts("13,abc,5", ',', &nums));
+}
+
+TEST(Lowercase, Basic) {
+ EXPECT_EQ("", str_util::Lowercase(""));
+ EXPECT_EQ("hello", str_util::Lowercase("hello"));
+ EXPECT_EQ("hello world", str_util::Lowercase("Hello World"));
+}
+
+TEST(Uppercase, Basic) {
+ EXPECT_EQ("", str_util::Uppercase(""));
+ EXPECT_EQ("HELLO", str_util::Uppercase("hello"));
+ EXPECT_EQ("HELLO WORLD", str_util::Uppercase("Hello World"));
+}
+
+TEST(TitlecaseString, Basic) {
+ string s = "sparse_lookup";
+ str_util::TitlecaseString(&s, "_");
+ ASSERT_EQ(s, "Sparse_Lookup");
+
+ s = "sparse_lookup";
+ str_util::TitlecaseString(&s, " ");
+ ASSERT_EQ(s, "Sparse_lookup");
+
+ s = "dense";
+ str_util::TitlecaseString(&s, " ");
+ ASSERT_EQ(s, "Dense");
+}
+
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/strings/strcat.cc b/tensorflow/core/lib/strings/strcat.cc
new file mode 100644
index 0000000000..e564b9eb73
--- /dev/null
+++ b/tensorflow/core/lib/strings/strcat.cc
@@ -0,0 +1,194 @@
+#include "tensorflow/core/lib/strings/strcat.h"
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/lib/gtl/stl_util.h"
+
+namespace tensorflow {
+namespace strings {
+
+AlphaNum gEmptyAlphaNum("");
+
+AlphaNum::AlphaNum(Hex hex) {
+ char *const end = &digits_[kFastToBufferSize];
+ char *writer = end;
+ uint64 value = hex.value;
+ uint64 width = hex.spec;
+ // We accomplish minimum width by OR'ing in 0x10000 to the user's value,
+ // where 0x10000 is the smallest hex number that is as wide as the user
+ // asked for.
+ uint64 mask = ((static_cast<uint64>(1) << (width - 1) * 4)) | value;
+ static const char hexdigits[] = "0123456789abcdef";
+ do {
+ *--writer = hexdigits[value & 0xF];
+ value >>= 4;
+ mask >>= 4;
+ } while (mask != 0);
+ piece_.set(writer, end - writer);
+}
+
+// ----------------------------------------------------------------------
+// StrCat()
+// This merges the given strings or integers, with no delimiter. This
+// is designed to be the fastest possible way to construct a string out
+// of a mix of raw C strings, StringPieces, strings, and integer values.
+// ----------------------------------------------------------------------
+
+// Append is merely a version of memcpy that returns the address of the byte
+// after the area just overwritten. It comes in multiple flavors to minimize
+// call overhead.
+static char *Append1(char *out, const AlphaNum &x) {
+ memcpy(out, x.data(), x.size());
+ return out + x.size();
+}
+
+static char *Append2(char *out, const AlphaNum &x1, const AlphaNum &x2) {
+ memcpy(out, x1.data(), x1.size());
+ out += x1.size();
+
+ memcpy(out, x2.data(), x2.size());
+ return out + x2.size();
+}
+
+static char *Append4(char *out, const AlphaNum &x1, const AlphaNum &x2,
+ const AlphaNum &x3, const AlphaNum &x4) {
+ memcpy(out, x1.data(), x1.size());
+ out += x1.size();
+
+ memcpy(out, x2.data(), x2.size());
+ out += x2.size();
+
+ memcpy(out, x3.data(), x3.size());
+ out += x3.size();
+
+ memcpy(out, x4.data(), x4.size());
+ return out + x4.size();
+}
+
+string StrCat(const AlphaNum &a, const AlphaNum &b) {
+ string result;
+ gtl::STLStringResizeUninitialized(&result, a.size() + b.size());
+ char *const begin = &*result.begin();
+ char *out = Append2(begin, a, b);
+ DCHECK_EQ(out, begin + result.size());
+ return result;
+}
+
+string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {
+ string result;
+ gtl::STLStringResizeUninitialized(&result, a.size() + b.size() + c.size());
+ char *const begin = &*result.begin();
+ char *out = Append2(begin, a, b);
+ out = Append1(out, c);
+ DCHECK_EQ(out, begin + result.size());
+ return result;
+}
+
+string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
+ const AlphaNum &d) {
+ string result;
+ gtl::STLStringResizeUninitialized(&result,
+ a.size() + b.size() + c.size() + d.size());
+ char *const begin = &*result.begin();
+ char *out = Append4(begin, a, b, c, d);
+ DCHECK_EQ(out, begin + result.size());
+ return result;
+}
+
+namespace internal {
+
+// Do not call directly - these are not part of the public API.
+string CatPieces(std::initializer_list<StringPiece> pieces) {
+ string result;
+ size_t total_size = 0;
+ for (const StringPiece piece : pieces) total_size += piece.size();
+ gtl::STLStringResizeUninitialized(&result, total_size);
+
+ char *const begin = &*result.begin();
+ char *out = begin;
+ for (const StringPiece piece : pieces) {
+ const size_t this_size = piece.size();
+ memcpy(out, piece.data(), this_size);
+ out += this_size;
+ }
+ DCHECK_EQ(out, begin + result.size());
+ return result;
+}
+
+// It's possible to call StrAppend with a StringPiece that is itself a fragment
+// of the string we're appending to. However the results of this are random.
+// Therefore, check for this in debug mode. Use unsigned math so we only have
+// to do one comparison.
+#define DCHECK_NO_OVERLAP(dest, src) \
+ DCHECK_GE(uintptr_t((src).data() - (dest).data()), uintptr_t((dest).size()))
+
+void AppendPieces(string *result, std::initializer_list<StringPiece> pieces) {
+ size_t old_size = result->size();
+ size_t total_size = old_size;
+ for (const StringPiece piece : pieces) {
+ DCHECK_NO_OVERLAP(*result, piece);
+ total_size += piece.size();
+ }
+ gtl::STLStringResizeUninitialized(result, total_size);
+
+ char *const begin = &*result->begin();
+ char *out = begin + old_size;
+ for (const StringPiece piece : pieces) {
+ const size_t this_size = piece.size();
+ memcpy(out, piece.data(), this_size);
+ out += this_size;
+ }
+ DCHECK_EQ(out, begin + result->size());
+}
+
+} // namespace internal
+
+void StrAppend(string *result, const AlphaNum &a) {
+ DCHECK_NO_OVERLAP(*result, a);
+ result->append(a.data(), a.size());
+}
+
+void StrAppend(string *result, const AlphaNum &a, const AlphaNum &b) {
+ DCHECK_NO_OVERLAP(*result, a);
+ DCHECK_NO_OVERLAP(*result, b);
+ string::size_type old_size = result->size();
+ gtl::STLStringResizeUninitialized(result, old_size + a.size() + b.size());
+ char *const begin = &*result->begin();
+ char *out = Append2(begin + old_size, a, b);
+ DCHECK_EQ(out, begin + result->size());
+}
+
+void StrAppend(string *result, const AlphaNum &a, const AlphaNum &b,
+ const AlphaNum &c) {
+ DCHECK_NO_OVERLAP(*result, a);
+ DCHECK_NO_OVERLAP(*result, b);
+ DCHECK_NO_OVERLAP(*result, c);
+ string::size_type old_size = result->size();
+ gtl::STLStringResizeUninitialized(result,
+ old_size + a.size() + b.size() + c.size());
+ char *const begin = &*result->begin();
+ char *out = Append2(begin + old_size, a, b);
+ out = Append1(out, c);
+ DCHECK_EQ(out, begin + result->size());
+}
+
+void StrAppend(string *result, const AlphaNum &a, const AlphaNum &b,
+ const AlphaNum &c, const AlphaNum &d) {
+ DCHECK_NO_OVERLAP(*result, a);
+ DCHECK_NO_OVERLAP(*result, b);
+ DCHECK_NO_OVERLAP(*result, c);
+ DCHECK_NO_OVERLAP(*result, d);
+ string::size_type old_size = result->size();
+ gtl::STLStringResizeUninitialized(
+ result, old_size + a.size() + b.size() + c.size() + d.size());
+ char *const begin = &*result->begin();
+ char *out = Append4(begin + old_size, a, b, c, d);
+ DCHECK_EQ(out, begin + result->size());
+}
+
+} // namespace strings
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/strings/strcat.h b/tensorflow/core/lib/strings/strcat.h
new file mode 100644
index 0000000000..763ad8368a
--- /dev/null
+++ b/tensorflow/core/lib/strings/strcat.h
@@ -0,0 +1,229 @@
+// #status: RECOMMENDED
+// #category: operations on strings
+// #summary: Merges strings or numbers with no delimiter.
+//
+#ifndef TENSORFLOW_LIB_STRINGS_STRCAT_H_
+#define TENSORFLOW_LIB_STRINGS_STRCAT_H_
+
+#include <string>
+
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/platform/port.h"
+
+// The AlphaNum type was designed to be used as the parameter type for StrCat().
+// Any routine accepting either a string or a number may accept it.
+// The basic idea is that by accepting a "const AlphaNum &" as an argument
+// to your function, your callers will automagically convert bools, integers,
+// and floating point values to strings for you.
+//
+// NOTE: Use of AlphaNum outside of the //strings package is unsupported except
+// for the specific case of function parameters of type "AlphaNum" or "const
+// AlphaNum &". In particular, instantiating AlphaNum directly as a stack
+// variable is not supported.
+//
+// Conversion from 8-bit values is not accepted because if it were, then an
+// attempt to pass ':' instead of ":" might result in a 58 ending up in your
+// result.
+//
+// Bools convert to "0" or "1".
+//
+// Floating point values are converted to a string which, if passed to strtod(),
+// would produce the exact same original double (except in case of NaN; all NaNs
+// are considered the same value). We try to keep the string short but it's not
+// guaranteed to be as short as possible.
+//
+// You can convert to Hexadecimal output rather than Decimal output using Hex.
+// To do this, pass strings::Hex(my_int) as a parameter to StrCat. You may
+// specify a minimum field width using a separate parameter, so the equivalent
+// of Printf("%04x", my_int) is StrCat(Hex(my_int, strings::ZERO_PAD_4))
+//
+// This class has implicit constructors.
+namespace tensorflow {
+namespace strings {
+
+enum PadSpec {
+ NO_PAD = 1,
+ ZERO_PAD_2,
+ ZERO_PAD_3,
+ ZERO_PAD_4,
+ ZERO_PAD_5,
+ ZERO_PAD_6,
+ ZERO_PAD_7,
+ ZERO_PAD_8,
+ ZERO_PAD_9,
+ ZERO_PAD_10,
+ ZERO_PAD_11,
+ ZERO_PAD_12,
+ ZERO_PAD_13,
+ ZERO_PAD_14,
+ ZERO_PAD_15,
+ ZERO_PAD_16,
+};
+
+struct Hex {
+ uint64 value;
+ enum PadSpec spec;
+ template <class Int>
+ explicit Hex(Int v, PadSpec s = NO_PAD)
+ : spec(s) {
+ // Prevent sign-extension by casting integers to
+ // their unsigned counterparts.
+ static_assert(
+ sizeof(v) == 1 || sizeof(v) == 2 || sizeof(v) == 4 || sizeof(v) == 8,
+ "Unknown integer type");
+ value = sizeof(v) == 1
+ ? static_cast<uint8>(v)
+ : sizeof(v) == 2 ? static_cast<uint16>(v)
+ : sizeof(v) == 4 ? static_cast<uint32>(v)
+ : static_cast<uint64>(v);
+ }
+};
+
+class AlphaNum {
+ public:
+ // No bool ctor -- bools convert to an integral type.
+ // A bool ctor would also convert incoming pointers (bletch).
+
+ AlphaNum(int i32) // NOLINT(runtime/explicit)
+ : piece_(digits_, FastInt32ToBufferLeft(i32, digits_) - &digits_[0]) {}
+ AlphaNum(unsigned int u32) // NOLINT(runtime/explicit)
+ : piece_(digits_, FastUInt32ToBufferLeft(u32, digits_) - &digits_[0]) {}
+ AlphaNum(long x) // NOLINT(runtime/explicit)
+ : piece_(digits_, FastInt64ToBufferLeft(x, digits_) - &digits_[0]) {}
+ AlphaNum(unsigned long x) // NOLINT(runtime/explicit)
+ : piece_(digits_, FastUInt64ToBufferLeft(x, digits_) - &digits_[0]) {}
+ AlphaNum(long long int i64) // NOLINT(runtime/explicit)
+ : piece_(digits_, FastInt64ToBufferLeft(i64, digits_) - &digits_[0]) {}
+ AlphaNum(unsigned long long int u64) // NOLINT(runtime/explicit)
+ : piece_(digits_, FastUInt64ToBufferLeft(u64, digits_) - &digits_[0]) {}
+
+ AlphaNum(float f) // NOLINT(runtime/explicit)
+ : piece_(digits_, strlen(FloatToBuffer(f, digits_))) {}
+ AlphaNum(double f) // NOLINT(runtime/explicit)
+ : piece_(digits_, strlen(DoubleToBuffer(f, digits_))) {}
+
+ AlphaNum(Hex hex); // NOLINT(runtime/explicit)
+
+ AlphaNum(const char *c_str) : piece_(c_str) {} // NOLINT(runtime/explicit)
+ AlphaNum(const StringPiece &pc) : piece_(pc) {} // NOLINT(runtime/explicit)
+ AlphaNum(const tensorflow::string &str) // NOLINT(runtime/explicit)
+ : piece_(str) {}
+
+ StringPiece::size_type size() const { return piece_.size(); }
+ const char *data() const { return piece_.data(); }
+ StringPiece Piece() const { return piece_; }
+
+ private:
+ StringPiece piece_;
+ char digits_[kFastToBufferSize];
+
+ // Use ":" not ':'
+ AlphaNum(char c); // NOLINT(runtime/explicit)
+
+ TF_DISALLOW_COPY_AND_ASSIGN(AlphaNum);
+};
+
+extern AlphaNum gEmptyAlphaNum;
+
+using strings::AlphaNum;
+using strings::gEmptyAlphaNum;
+
+// ----------------------------------------------------------------------
+// StrCat()
+// This merges the given strings or numbers, with no delimiter. This
+// is designed to be the fastest possible way to construct a string out
+// of a mix of raw C strings, StringPieces, strings, bool values,
+// and numeric values.
+//
+// Don't use this for user-visible strings. The localization process
+// works poorly on strings built up out of fragments.
+//
+// For clarity and performance, don't use StrCat when appending to a
+// string. In particular, avoid using any of these (anti-)patterns:
+// str.append(StrCat(...))
+// str += StrCat(...)
+// str = StrCat(str, ...)
+// where the last is the worse, with the potential to change a loop
+// from a linear time operation with O(1) dynamic allocations into a
+// quadratic time operation with O(n) dynamic allocations. StrAppend
+// is a better choice than any of the above, subject to the restriction
+// of StrAppend(&str, a, b, c, ...) that none of the a, b, c, ... may
+// be a reference into str.
+// ----------------------------------------------------------------------
+
+// For performance reasons, we have specializations for <= 4 args.
+string StrCat(const AlphaNum &a) TF_MUST_USE_RESULT;
+string StrCat(const AlphaNum &a, const AlphaNum &b) TF_MUST_USE_RESULT;
+string StrCat(const AlphaNum &a, const AlphaNum &b,
+ const AlphaNum &c) TF_MUST_USE_RESULT;
+string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
+ const AlphaNum &d) TF_MUST_USE_RESULT;
+
+// inline definitions must be duplicated due to TF_MUST_USE_RESULT
+inline string StrCat(const AlphaNum &a) { return string(a.data(), a.size()); }
+
+namespace internal {
+
+// Do not call directly - this is not part of the public API.
+string CatPieces(std::initializer_list<StringPiece> pieces);
+void AppendPieces(string *dest, std::initializer_list<StringPiece> pieces);
+
+} // namespace internal
+
+// Support 5 or more arguments
+template <typename... AV>
+string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
+ const AlphaNum &d, const AlphaNum &e,
+ const AV &... args) TF_MUST_USE_RESULT;
+
+template <typename... AV>
+inline string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
+ const AlphaNum &d, const AlphaNum &e, const AV &... args) {
+ return internal::CatPieces({a.Piece(), b.Piece(), c.Piece(), d.Piece(),
+ e.Piece(),
+ static_cast<const AlphaNum &>(args).Piece()...});
+}
+
+// ----------------------------------------------------------------------
+// StrAppend()
+// Same as above, but adds the output to the given string.
+// WARNING: For speed, StrAppend does not try to check each of its input
+// arguments to be sure that they are not a subset of the string being
+// appended to. That is, while this will work:
+//
+// string s = "foo";
+// s += s;
+//
+// This will not (necessarily) work:
+//
+// string s = "foo";
+// StrAppend(&s, s);
+//
+// Note: while StrCat supports appending up to 26 arguments, StrAppend
+// is currently limited to 9. That's rarely an issue except when
+// automatically transforming StrCat to StrAppend, and can easily be
+// worked around as consecutive calls to StrAppend are quite efficient.
+// ----------------------------------------------------------------------
+
+void StrAppend(string *dest, const AlphaNum &a);
+void StrAppend(string *dest, const AlphaNum &a, const AlphaNum &b);
+void StrAppend(string *dest, const AlphaNum &a, const AlphaNum &b,
+ const AlphaNum &c);
+void StrAppend(string *dest, const AlphaNum &a, const AlphaNum &b,
+ const AlphaNum &c, const AlphaNum &d);
+
+// Support 5 or more arguments
+template <typename... AV>
+inline void StrAppend(string *dest, const AlphaNum &a, const AlphaNum &b,
+ const AlphaNum &c, const AlphaNum &d, const AlphaNum &e,
+ const AV &... args) {
+ internal::AppendPieces(dest,
+ {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(),
+ static_cast<const AlphaNum &>(args).Piece()...});
+}
+
+} // namespace strings
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_STRINGS_STRCAT_H_
diff --git a/tensorflow/core/lib/strings/strcat_test.cc b/tensorflow/core/lib/strings/strcat_test.cc
new file mode 100644
index 0000000000..9ff7d81af9
--- /dev/null
+++ b/tensorflow/core/lib/strings/strcat_test.cc
@@ -0,0 +1,324 @@
+#include "tensorflow/core/lib/strings/strcat.h"
+
+#include <string>
+
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/port.h"
+#include <gtest/gtest.h>
+
+namespace tensorflow {
+namespace strings {
+
+// Test StrCat of ints and longs of various sizes and signdedness.
+TEST(StrCat, Ints) {
+ const int16 s = -1;
+ const uint16 us = 2;
+ const int i = -3;
+ const unsigned int ui = 4;
+ const int32 l = -5;
+ const uint32 ul = 6;
+ const int64 ll = -7;
+ const uint64 ull = 8;
+ const ptrdiff_t ptrdiff = -9;
+ const size_t size = 10;
+ const ssize_t ssize = -11;
+ const intptr_t intptr = -12;
+ const uintptr_t uintptr = 13;
+ string answer;
+ answer = StrCat(s, us);
+ EXPECT_EQ(answer, "-12");
+ answer = StrCat(i, ui);
+ EXPECT_EQ(answer, "-34");
+ answer = StrCat(l, ul);
+ EXPECT_EQ(answer, "-56");
+ answer = StrCat(ll, ull);
+ EXPECT_EQ(answer, "-78");
+ answer = StrCat(ptrdiff, size);
+ EXPECT_EQ(answer, "-910");
+ answer = StrCat(ssize, intptr);
+ EXPECT_EQ(answer, "-11-12");
+ answer = StrCat(uintptr, 0);
+ EXPECT_EQ(answer, "130");
+}
+
+TEST(StrCat, Basics) {
+ string result;
+
+ string strs[] = {"Hello", "Cruel", "World"};
+
+ StringPiece pieces[] = {"Hello", "Cruel", "World"};
+
+ const char *c_strs[] = {"Hello", "Cruel", "World"};
+
+ int32 i32s[] = {'H', 'C', 'W'};
+ uint64 ui64s[] = {12345678910LL, 10987654321LL};
+
+ result = StrCat(false, true, 2, 3);
+ EXPECT_EQ(result, "0123");
+
+ result = StrCat(-1);
+ EXPECT_EQ(result, "-1");
+
+ result = StrCat(0.5);
+ EXPECT_EQ(result, "0.5");
+
+ result = StrCat(strs[1], pieces[2]);
+ EXPECT_EQ(result, "CruelWorld");
+
+ result = StrCat(strs[0], ", ", pieces[2]);
+ EXPECT_EQ(result, "Hello, World");
+
+ result = StrCat(strs[0], ", ", strs[1], " ", strs[2], "!");
+ EXPECT_EQ(result, "Hello, Cruel World!");
+
+ result = StrCat(pieces[0], ", ", pieces[1], " ", pieces[2]);
+ EXPECT_EQ(result, "Hello, Cruel World");
+
+ result = StrCat(c_strs[0], ", ", c_strs[1], " ", c_strs[2]);
+ EXPECT_EQ(result, "Hello, Cruel World");
+
+ result = StrCat("ASCII ", i32s[0], ", ", i32s[1], " ", i32s[2], "!");
+ EXPECT_EQ(result, "ASCII 72, 67 87!");
+
+ result = StrCat(ui64s[0], ", ", ui64s[1], "!");
+ EXPECT_EQ(result, "12345678910, 10987654321!");
+
+ string one = "1"; // Actually, it's the size of this string that we want; a
+ // 64-bit build distinguishes between size_t and uint64,
+ // even though they're both unsigned 64-bit values.
+ result = StrCat("And a ", one.size(), " and a ", &result[2] - &result[0],
+ " and a ", one, " 2 3 4", "!");
+ EXPECT_EQ(result, "And a 1 and a 2 and a 1 2 3 4!");
+
+ // result = StrCat("Single chars won't compile", '!');
+ // result = StrCat("Neither will NULLs", NULL);
+ result = StrCat("To output a char by ASCII/numeric value, use +: ", '!' + 0);
+ EXPECT_EQ(result, "To output a char by ASCII/numeric value, use +: 33");
+
+ float f = 100000.5;
+ result = StrCat("A hundred K and a half is ", f);
+ EXPECT_EQ(result, "A hundred K and a half is 100000.5");
+
+ double d = f;
+ d *= d;
+ result = StrCat("A hundred K and a half squared is ", d);
+ EXPECT_EQ(result, "A hundred K and a half squared is 10000100000.25");
+
+ result = StrCat(1, 2, 333, 4444, 55555, 666666, 7777777, 88888888, 999999999);
+ EXPECT_EQ(result, "12333444455555666666777777788888888999999999");
+}
+
+TEST(StrCat, MaxArgs) {
+ string result;
+ // Test 10 up to 26 arguments, the current maximum
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a");
+ EXPECT_EQ(result, "123456789a");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b");
+ EXPECT_EQ(result, "123456789ab");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c");
+ EXPECT_EQ(result, "123456789abc");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d");
+ EXPECT_EQ(result, "123456789abcd");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e");
+ EXPECT_EQ(result, "123456789abcde");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f");
+ EXPECT_EQ(result, "123456789abcdef");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g");
+ EXPECT_EQ(result, "123456789abcdefg");
+ result =
+ StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g", "h");
+ EXPECT_EQ(result, "123456789abcdefgh");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
+ "h", "i");
+ EXPECT_EQ(result, "123456789abcdefghi");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
+ "h", "i", "j");
+ EXPECT_EQ(result, "123456789abcdefghij");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
+ "h", "i", "j", "k");
+ EXPECT_EQ(result, "123456789abcdefghijk");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
+ "h", "i", "j", "k", "l");
+ EXPECT_EQ(result, "123456789abcdefghijkl");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
+ "h", "i", "j", "k", "l", "m");
+ EXPECT_EQ(result, "123456789abcdefghijklm");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
+ "h", "i", "j", "k", "l", "m", "n");
+ EXPECT_EQ(result, "123456789abcdefghijklmn");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
+ "h", "i", "j", "k", "l", "m", "n", "o");
+ EXPECT_EQ(result, "123456789abcdefghijklmno");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
+ "h", "i", "j", "k", "l", "m", "n", "o", "p");
+ EXPECT_EQ(result, "123456789abcdefghijklmnop");
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
+ "h", "i", "j", "k", "l", "m", "n", "o", "p", "q");
+ EXPECT_EQ(result, "123456789abcdefghijklmnopq");
+ // No limit thanks to C++11's variadic templates
+ result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, "a", "b", "c", "d", "e", "f",
+ "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
+ "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D",
+ "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
+ "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z");
+ EXPECT_EQ(result,
+ "12345678910abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
+}
+
+TEST(StrAppend, Basics) {
+ string result = "existing text";
+
+ string strs[] = {"Hello", "Cruel", "World"};
+
+ StringPiece pieces[] = {"Hello", "Cruel", "World"};
+
+ const char *c_strs[] = {"Hello", "Cruel", "World"};
+
+ int32 i32s[] = {'H', 'C', 'W'};
+ uint64 ui64s[] = {12345678910LL, 10987654321LL};
+
+ string::size_type old_size = result.size();
+ StrAppend(&result, strs[0]);
+ EXPECT_EQ(result.substr(old_size), "Hello");
+
+ old_size = result.size();
+ StrAppend(&result, strs[1], pieces[2]);
+ EXPECT_EQ(result.substr(old_size), "CruelWorld");
+
+ old_size = result.size();
+ StrAppend(&result, strs[0], ", ", pieces[2]);
+ EXPECT_EQ(result.substr(old_size), "Hello, World");
+
+ old_size = result.size();
+ StrAppend(&result, strs[0], ", ", strs[1], " ", strs[2], "!");
+ EXPECT_EQ(result.substr(old_size), "Hello, Cruel World!");
+
+ old_size = result.size();
+ StrAppend(&result, pieces[0], ", ", pieces[1], " ", pieces[2]);
+ EXPECT_EQ(result.substr(old_size), "Hello, Cruel World");
+
+ old_size = result.size();
+ StrAppend(&result, c_strs[0], ", ", c_strs[1], " ", c_strs[2]);
+ EXPECT_EQ(result.substr(old_size), "Hello, Cruel World");
+
+ old_size = result.size();
+ StrAppend(&result, "ASCII ", i32s[0], ", ", i32s[1], " ", i32s[2], "!");
+ EXPECT_EQ(result.substr(old_size), "ASCII 72, 67 87!");
+
+ old_size = result.size();
+ StrAppend(&result, ui64s[0], ", ", ui64s[1], "!");
+ EXPECT_EQ(result.substr(old_size), "12345678910, 10987654321!");
+
+ string one = "1"; // Actually, it's the size of this string that we want; a
+ // 64-bit build distinguishes between size_t and uint64,
+ // even though they're both unsigned 64-bit values.
+ old_size = result.size();
+ StrAppend(&result, "And a ", one.size(), " and a ", &result[2] - &result[0],
+ " and a ", one, " 2 3 4", "!");
+ EXPECT_EQ(result.substr(old_size), "And a 1 and a 2 and a 1 2 3 4!");
+
+ // result = StrCat("Single chars won't compile", '!');
+ // result = StrCat("Neither will NULLs", NULL);
+ old_size = result.size();
+ StrAppend(&result, "To output a char by ASCII/numeric value, use +: ",
+ '!' + 0);
+ EXPECT_EQ(result.substr(old_size),
+ "To output a char by ASCII/numeric value, use +: 33");
+
+ float f = 100000.5;
+ old_size = result.size();
+ StrAppend(&result, "A hundred K and a half is ", f);
+ EXPECT_EQ(result.substr(old_size), "A hundred K and a half is 100000.5");
+
+ double d = f;
+ d *= d;
+ old_size = result.size();
+ StrAppend(&result, "A hundred K and a half squared is ", d);
+ EXPECT_EQ(result.substr(old_size),
+ "A hundred K and a half squared is 10000100000.25");
+
+ // Test 9 arguments, the old maximum
+ old_size = result.size();
+ StrAppend(&result, 1, 22, 333, 4444, 55555, 666666, 7777777, 88888888, 9);
+ EXPECT_EQ(result.substr(old_size), "1223334444555556666667777777888888889");
+
+ // No limit thanks to C++11's variadic templates
+ old_size = result.size();
+ StrAppend(&result, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, "a", "b", "c", "d", "e",
+ "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
+ "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E",
+ "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R",
+ "S", "T", "U", "V", "W", "X", "Y", "Z",
+ "No limit thanks to C++11's variadic templates");
+ EXPECT_EQ(result.substr(old_size),
+ "12345678910abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "No limit thanks to C++11's variadic templates");
+}
+
+TEST(StrAppend, Death) {
+ string s = "self";
+ EXPECT_DEBUG_DEATH(StrAppend(&s, s.c_str() + 1), "Check failed:");
+ EXPECT_DEBUG_DEATH(StrAppend(&s, s), "Check failed:");
+}
+
+static void CheckHex64(uint64 v) {
+ using tensorflow::strings::Hex;
+ string actual = StrCat(Hex(v, tensorflow::strings::ZERO_PAD_16));
+ string expected = Printf("%016llx", static_cast<unsigned long long>(v));
+ EXPECT_EQ(expected, actual) << " decimal value " << v;
+
+ actual = StrCat(Hex(v, tensorflow::strings::ZERO_PAD_8));
+ expected = Printf("%08llx", static_cast<unsigned long long>(v));
+ EXPECT_EQ(expected, actual) << " decimal value " << v;
+
+ actual = StrCat(Hex(v));
+ expected = Printf("%llx", static_cast<unsigned long long>(v));
+ EXPECT_EQ(expected, actual) << " decimal value " << v;
+}
+
+static void CheckHex32(uint32 v) {
+ using tensorflow::strings::Hex;
+ string actual = StrCat(Hex(v, tensorflow::strings::ZERO_PAD_8));
+ string expected = Printf("%08x", v);
+ EXPECT_EQ(expected, actual) << " decimal value " << v;
+
+ actual = StrCat(Hex(v));
+ expected = Printf("%x", v);
+ EXPECT_EQ(expected, actual) << " decimal value " << v;
+}
+
+static void CheckHexSigned32(int32 v) {
+ using tensorflow::strings::Hex;
+ string actual = StrCat(Hex(v, tensorflow::strings::ZERO_PAD_8));
+ string expected = Printf("%08x", v);
+ EXPECT_EQ(expected, actual) << " decimal value " << v;
+
+ actual = StrCat(Hex(v));
+ expected = Printf("%x", v);
+ EXPECT_EQ(expected, actual) << " decimal value " << v;
+}
+
+static void TestFastPrints() {
+ using tensorflow::strings::Hex;
+
+ // Test min int to make sure that works
+ for (int i = 0; i < 10000; i++) {
+ CheckHex64(i);
+ CheckHex32(i);
+ CheckHexSigned32(i);
+ CheckHexSigned32(-i);
+ }
+ CheckHex64(0x123456789abcdef0ull);
+ CheckHex32(0x12345678);
+
+ int8 minus_one_8bit = -1;
+ EXPECT_EQ("ff", StrCat(Hex(minus_one_8bit)));
+
+ int16 minus_one_16bit = -1;
+ EXPECT_EQ("ffff", StrCat(Hex(minus_one_16bit)));
+}
+
+TEST(Numbers, TestFunctionsMovedOverFromNumbersMain) { TestFastPrints(); }
+
+} // namespace strings
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/strings/stringprintf.cc b/tensorflow/core/lib/strings/stringprintf.cc
new file mode 100644
index 0000000000..b354706cbd
--- /dev/null
+++ b/tensorflow/core/lib/strings/stringprintf.cc
@@ -0,0 +1,85 @@
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+#include <errno.h>
+#include <stdarg.h> // For va_list and related operations
+#include <stdio.h> // MSVC requires this for _vsnprintf
+#include <vector>
+
+namespace tensorflow {
+namespace strings {
+
+#ifdef COMPILER_MSVC
+enum { IS_COMPILER_MSVC = 1 };
+#else
+enum { IS_COMPILER_MSVC = 0 };
+#endif
+
+void Appendv(string* dst, const char* format, va_list ap) {
+ // First try with a small fixed size buffer
+ static const int kSpaceLength = 1024;
+ char space[kSpaceLength];
+
+ // It's possible for methods that use a va_list to invalidate
+ // the data in it upon use. The fix is to make a copy
+ // of the structure before using it and use that copy instead.
+ va_list backup_ap;
+ va_copy(backup_ap, ap);
+ int result = vsnprintf(space, kSpaceLength, format, backup_ap);
+ va_end(backup_ap);
+
+ if (result < kSpaceLength) {
+ if (result >= 0) {
+ // Normal case -- everything fit.
+ dst->append(space, result);
+ return;
+ }
+
+ if (IS_COMPILER_MSVC) {
+ // Error or MSVC running out of space. MSVC 8.0 and higher
+ // can be asked about space needed with the special idiom below:
+ va_copy(backup_ap, ap);
+ result = vsnprintf(NULL, 0, format, backup_ap);
+ va_end(backup_ap);
+ }
+
+ if (result < 0) {
+ // Just an error.
+ return;
+ }
+ }
+
+ // Increase the buffer size to the size requested by vsnprintf,
+ // plus one for the closing \0.
+ int length = result + 1;
+ char* buf = new char[length];
+
+ // Restore the va_list before we use it again
+ va_copy(backup_ap, ap);
+ result = vsnprintf(buf, length, format, backup_ap);
+ va_end(backup_ap);
+
+ if (result >= 0 && result < length) {
+ // It fit
+ dst->append(buf, result);
+ }
+ delete[] buf;
+}
+
+string Printf(const char* format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ string result;
+ Appendv(&result, format, ap);
+ va_end(ap);
+ return result;
+}
+
+void Appendf(string* dst, const char* format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ Appendv(dst, format, ap);
+ va_end(ap);
+}
+
+} // namespace strings
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/strings/stringprintf.h b/tensorflow/core/lib/strings/stringprintf.h
new file mode 100644
index 0000000000..23ca2583ca
--- /dev/null
+++ b/tensorflow/core/lib/strings/stringprintf.h
@@ -0,0 +1,37 @@
+// Printf variants that place their output in a C++ string.
+//
+// Usage:
+// string result = strings::Printf("%d %s\n", 10, "hello");
+// strings::SPrintf(&result, "%d %s\n", 10, "hello");
+// strings::Appendf(&result, "%d %s\n", 20, "there");
+
+#ifndef TENSORFLOW_LIB_STRINGS_STRINGPRINTF_H_
+#define TENSORFLOW_LIB_STRINGS_STRINGPRINTF_H_
+
+#include <stdarg.h>
+#include <string>
+#include <vector>
+
+#include "tensorflow/core/platform/port.h"
+
+namespace tensorflow {
+namespace strings {
+
+// Return a C++ string
+extern string Printf(const char* format, ...)
+ // Tell the compiler to do printf format string checking.
+ TF_PRINTF_ATTRIBUTE(1, 2);
+
+// Append result to a supplied string
+extern void Appendf(string* dst, const char* format, ...)
+ // Tell the compiler to do printf format string checking.
+ TF_PRINTF_ATTRIBUTE(2, 3);
+
+// Lower-level routine that takes a va_list and appends to a specified
+// string. All other routines are just convenience wrappers around it.
+extern void Appendv(string* dst, const char* format, va_list ap);
+
+} // namespace strings
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_STRINGS_STRINGPRINTF_H_
diff --git a/tensorflow/core/lib/strings/stringprintf_test.cc b/tensorflow/core/lib/strings/stringprintf_test.cc
new file mode 100644
index 0000000000..737ed5c0e0
--- /dev/null
+++ b/tensorflow/core/lib/strings/stringprintf_test.cc
@@ -0,0 +1,113 @@
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+#include <string>
+
+#include <gtest/gtest.h>
+
+namespace tensorflow {
+namespace strings {
+namespace {
+
+TEST(PrintfTest, Empty) {
+ EXPECT_EQ("", Printf("%s", string().c_str()));
+ EXPECT_EQ("", Printf("%s", ""));
+}
+
+TEST(PrintfTest, Misc) {
+// MSVC does not support $ format specifier.
+#if !defined(COMPILER_MSVC)
+ EXPECT_EQ("123hello w", Printf("%3$d%2$s %1$c", 'w', "hello", 123));
+#endif // !COMPILER_MSVC
+}
+
+TEST(AppendfTest, Empty) {
+ string value("Hello");
+ const char* empty = "";
+ Appendf(&value, "%s", empty);
+ EXPECT_EQ("Hello", value);
+}
+
+TEST(AppendfTest, EmptyString) {
+ string value("Hello");
+ Appendf(&value, "%s", "");
+ EXPECT_EQ("Hello", value);
+}
+
+TEST(AppendfTest, String) {
+ string value("Hello");
+ Appendf(&value, " %s", "World");
+ EXPECT_EQ("Hello World", value);
+}
+
+TEST(AppendfTest, Int) {
+ string value("Hello");
+ Appendf(&value, " %d", 123);
+ EXPECT_EQ("Hello 123", value);
+}
+
+TEST(PrintfTest, Multibyte) {
+ // If we are in multibyte mode and feed invalid multibyte sequence,
+ // Printf should return an empty string instead of running
+ // out of memory while trying to determine destination buffer size.
+ // see b/4194543.
+
+ char* old_locale = setlocale(LC_CTYPE, NULL);
+ // Push locale with multibyte mode
+ setlocale(LC_CTYPE, "en_US.utf8");
+
+ const char kInvalidCodePoint[] = "\375\067s";
+ string value = Printf("%.*s", 3, kInvalidCodePoint);
+
+ // In some versions of glibc (e.g. eglibc-2.11.1, aka GRTEv2), snprintf
+ // returns error given an invalid codepoint. Other versions
+ // (e.g. eglibc-2.15, aka pre-GRTEv3) emit the codepoint verbatim.
+ // We test that the output is one of the above.
+ EXPECT_TRUE(value.empty() || value == kInvalidCodePoint);
+
+ // Repeat with longer string, to make sure that the dynamically
+ // allocated path in StringAppendV is handled correctly.
+ int n = 2048;
+ char* buf = new char[n + 1];
+ memset(buf, ' ', n - 3);
+ memcpy(buf + n - 3, kInvalidCodePoint, 4);
+ value = Printf("%.*s", n, buf);
+ // See GRTEv2 vs. GRTEv3 comment above.
+ EXPECT_TRUE(value.empty() || value == buf);
+ delete[] buf;
+
+ setlocale(LC_CTYPE, old_locale);
+}
+
+TEST(PrintfTest, NoMultibyte) {
+ // No multibyte handling, but the string contains funny chars.
+ char* old_locale = setlocale(LC_CTYPE, NULL);
+ setlocale(LC_CTYPE, "POSIX");
+ string value = Printf("%.*s", 3, "\375\067s");
+ setlocale(LC_CTYPE, old_locale);
+ EXPECT_EQ("\375\067s", value);
+}
+
+TEST(PrintfTest, DontOverwriteErrno) {
+ // Check that errno isn't overwritten unless we're printing
+ // something significantly larger than what people are normally
+ // printing in their badly written PLOG() statements.
+ errno = ECHILD;
+ string value = Printf("Hello, %s!", "World");
+ EXPECT_EQ(ECHILD, errno);
+}
+
+TEST(PrintfTest, LargeBuf) {
+ // Check that the large buffer is handled correctly.
+ int n = 2048;
+ char* buf = new char[n + 1];
+ memset(buf, ' ', n);
+ buf[n] = 0;
+ string value = Printf("%s", buf);
+ EXPECT_EQ(buf, value);
+ delete[] buf;
+}
+
+} // namespace
+
+} // namespace strings
+} // namespace tensorflow