aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/lib/strings/str_util.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/lib/strings/str_util.cc')
-rw-r--r--tensorflow/core/lib/strings/str_util.cc312
1 files changed, 312 insertions, 0 deletions
diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc
new file mode 100644
index 0000000000..cccd50c7ff
--- /dev/null
+++ b/tensorflow/core/lib/strings/str_util.cc
@@ -0,0 +1,312 @@
+#include "tensorflow/core/lib/strings/str_util.h"
+#include <ctype.h>
+
+namespace tensorflow {
+namespace str_util {
+
+static char hex_char[] = "0123456789abcdef";
+
+string CEscape(const string& src) {
+ string dest;
+
+ for (unsigned char c : src) {
+ switch (c) {
+ case '\n':
+ dest.append("\\n");
+ break;
+ case '\r':
+ dest.append("\\r");
+ break;
+ case '\t':
+ dest.append("\\t");
+ break;
+ case '\"':
+ dest.append("\\\"");
+ break;
+ case '\'':
+ dest.append("\\'");
+ break;
+ case '\\':
+ dest.append("\\\\");
+ break;
+ default:
+ // Note that if we emit \xNN and the src character after that is a hex
+ // digit then that digit must be escaped too to prevent it being
+ // interpreted as part of the character code by C.
+ if ((c >= 0x80) || !isprint(c)) {
+ dest.append("\\");
+ dest.push_back(hex_char[c / 64]);
+ dest.push_back(hex_char[(c % 64) / 8]);
+ dest.push_back(hex_char[c % 8]);
+ } else {
+ dest.push_back(c);
+ break;
+ }
+ }
+ }
+
+ return dest;
+}
+
+namespace { // Private helpers for CUnescape().
+
+inline bool is_octal_digit(unsigned char c) { return c >= '0' && c <= '7'; }
+
+inline bool ascii_isxdigit(unsigned char c) {
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
+ (c >= 'A' && c <= 'F');
+}
+
+inline int hex_digit_to_int(char c) {
+ int x = static_cast<unsigned char>(c);
+ if (x > '9') {
+ x += 9;
+ }
+ return x & 0xf;
+}
+
+bool CUnescapeInternal(StringPiece source, char* dest, int* dest_len,
+ string* error) {
+ char* d = dest;
+ const char* p = source.data();
+ const char* end = source.end();
+ const char* last_byte = end - 1;
+
+ // Small optimization for case where source = dest and there's no escaping
+ while (p == d && p < end && *p != '\\') p++, d++;
+
+ while (p < end) {
+ if (*p != '\\') {
+ *d++ = *p++;
+ } else {
+ if (++p > last_byte) { // skip past the '\\'
+ if (error) *error = "String cannot end with \\";
+ return false;
+ }
+ switch (*p) {
+ case 'a':
+ *d++ = '\a';
+ break;
+ case 'b':
+ *d++ = '\b';
+ break;
+ case 'f':
+ *d++ = '\f';
+ break;
+ case 'n':
+ *d++ = '\n';
+ break;
+ case 'r':
+ *d++ = '\r';
+ break;
+ case 't':
+ *d++ = '\t';
+ break;
+ case 'v':
+ *d++ = '\v';
+ break;
+ case '\\':
+ *d++ = '\\';
+ break;
+ case '?':
+ *d++ = '\?';
+ break; // \? Who knew?
+ case '\'':
+ *d++ = '\'';
+ break;
+ case '"':
+ *d++ = '\"';
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3': // octal digit: 1 to 3 digits
+ case '4':
+ case '5':
+ case '6':
+ case '7': {
+ const char* octal_start = p;
+ unsigned int ch = *p - '0';
+ if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p - '0';
+ if (p < last_byte && is_octal_digit(p[1]))
+ ch = ch * 8 + *++p - '0'; // now points at last digit
+ if (ch > 0xff) {
+ if (error) {
+ *error = "Value of \\" +
+ string(octal_start, p + 1 - octal_start) +
+ " exceeds 0xff";
+ }
+ return false;
+ }
+ *d++ = ch;
+ break;
+ }
+ case 'x':
+ case 'X': {
+ if (p >= last_byte) {
+ if (error) *error = "String cannot end with \\x";
+ return false;
+ } else if (!ascii_isxdigit(p[1])) {
+ if (error) *error = "\\x cannot be followed by a non-hex digit";
+ return false;
+ }
+ unsigned int ch = 0;
+ const char* hex_start = p;
+ while (p < last_byte && ascii_isxdigit(p[1]))
+ // Arbitrarily many hex digits
+ ch = (ch << 4) + hex_digit_to_int(*++p);
+ if (ch > 0xFF) {
+ if (error) {
+ *error = "Value of \\" + string(hex_start, p + 1 - hex_start) +
+ " exceeds 0xff";
+ }
+ return false;
+ }
+ *d++ = ch;
+ break;
+ }
+ default: {
+ if (error) *error = string("Unknown escape sequence: \\") + *p;
+ return false;
+ }
+ }
+ p++; // read past letter we escaped
+ }
+ }
+ *dest_len = d - dest;
+ return true;
+}
+
+} // namespace
+
+bool CUnescape(StringPiece source, string* dest, string* error) {
+ dest->resize(source.size());
+ int dest_size;
+ if (!CUnescapeInternal(source, const_cast<char*>(dest->data()), &dest_size,
+ error)) {
+ return false;
+ }
+ dest->erase(dest_size);
+ return true;
+}
+
+bool NumericParse32(const string& text, int32* val) {
+ // Slow, but this code is not performance critical, and this
+ // doesn't bring in any new dependencies
+ char junk;
+ if (sscanf(text.c_str(), "%d%c", val, &junk) == 1) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+void StripTrailingWhitespace(string* s) {
+ string::size_type i;
+ for (i = s->size(); i > 0 && isspace((*s)[i - 1]); --i) {
+ }
+ s->resize(i);
+}
+
+// Return lower-cased version of s.
+string Lowercase(StringPiece s) {
+ string result(s.data(), s.size());
+ for (char& c : result) {
+ c = tolower(c);
+ }
+ return result;
+}
+
+// Return upper-cased version of s.
+string Uppercase(StringPiece s) {
+ string result(s.data(), s.size());
+ for (char& c : result) {
+ c = toupper(c);
+ }
+ return result;
+}
+
+void TitlecaseString(string* s, StringPiece delimiters) {
+ bool upper = true;
+ for (string::iterator ss = s->begin(); ss != s->end(); ++ss) {
+ if (upper) {
+ *ss = toupper(*ss);
+ }
+ upper = (delimiters.find(*ss) != StringPiece::npos);
+ }
+}
+
+size_t RemoveLeadingWhitespace(StringPiece* text) {
+ size_t count = 0;
+ const char* ptr = text->data();
+ while (count < text->size() && isspace(*ptr)) {
+ count++;
+ ptr++;
+ }
+ text->remove_prefix(count);
+ return count;
+}
+
+size_t RemoveTrailingWhitespace(StringPiece* text) {
+ size_t count = 0;
+ const char* ptr = text->data() + text->size() - 1;
+ while (count < text->size() && isspace(*ptr)) {
+ ++count;
+ --ptr;
+ }
+ text->remove_suffix(count);
+ return count;
+}
+
+size_t RemoveWhitespaceContext(StringPiece* text) {
+ // use RemoveLeadingWhitespace() and RemoveTrailingWhitespace() to do the job
+ return (RemoveLeadingWhitespace(text) + RemoveTrailingWhitespace(text));
+}
+
+bool ConsumePrefix(StringPiece* s, StringPiece expected) {
+ if (s->starts_with(expected)) {
+ s->remove_prefix(expected.size());
+ return true;
+ }
+ return false;
+}
+
+bool ConsumeLeadingDigits(StringPiece* s, uint64* val) {
+ const char* p = s->data();
+ const char* limit = p + s->size();
+ uint64 v = 0;
+ while (p < limit) {
+ const char c = *p;
+ if (c < '0' || c > '9') break;
+ uint64 new_v = (v * 10) + (c - '0');
+ if (new_v < v) {
+ // Overflow occurred
+ return false;
+ }
+ v = new_v;
+ p++;
+ }
+ if (p > s->data()) {
+ // Consume some digits
+ s->remove_prefix(p - s->data());
+ *val = v;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool SplitAndParseAsInts(StringPiece text, char delim,
+ std::vector<int32>* result) {
+ result->clear();
+ std::vector<string> num_strings = Split(text, delim);
+ for (const auto& s : num_strings) {
+ int32 num;
+ if (!NumericParse32(s, &num)) return false;
+ result->push_back(num);
+ }
+ return true;
+}
+
+} // namespace str_util
+} // namespace tensorflow