diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-11-14 12:02:25 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-11-14 12:22:01 -0800 |
commit | 317c011b19c90c8aeed4ce200d33f68b56311150 (patch) | |
tree | a2b3506900f4b36721b2b0b3bfd8c61d95446d79 | |
parent | 7a346347ee5a4078e2bd1cca00247c4219af326c (diff) |
Fixed a bug in tensorflow::str_util::CUnescape. Added a str util test that
failed without this change.
The CUnescape did a const_cast to the result string's buffer, which made it write
the same buffer without copying.
PiperOrigin-RevId: 175714391
-rw-r--r-- | tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc | 2 | ||||
-rw-r--r-- | tensorflow/core/lib/strings/str_util.cc | 28 | ||||
-rw-r--r-- | tensorflow/core/lib/strings/str_util_test.cc | 13 |
3 files changed, 36 insertions, 7 deletions
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index 8eeed339b8..29ae3296ca 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -866,7 +866,7 @@ TEST_F(HloParserTest, CommaBetweenSubAttributes) { const string original = R"(HloModule test_comma_module: ENTRY %test_comma.v4 () -> f32[] { - ROOT %constant = f32[] constant(-4.2), metadata={source_line=5, op_type="const"} + ROOT %constant = f32[] constant(-4.2), metadata={source_line=5, op_type="::const"} } )"; diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc index 240e1454e5..d28857803d 100644 --- a/tensorflow/core/lib/strings/str_util.cc +++ b/tensorflow/core/lib/strings/str_util.cc @@ -84,15 +84,32 @@ inline int hex_digit_to_int(char c) { return x & 0xf; } -bool CUnescapeInternal(StringPiece source, char* dest, +bool CUnescapeInternal(StringPiece source, string* dest, string::size_type* dest_len, string* error) { - char* d = dest; const char* p = source.data(); const char* end = source.end(); const char* last_byte = end - 1; + // We are going to write the result to dest with its iterator. If our string + // implementation uses copy-on-write, this will trigger a copy-on-write of + // dest's buffer; that is, dest will be assigned a new buffer. + // + // Note that the following way is NOT a legal way to modify a string's + // content: + // + // char* d = const_cast<char*>(dest->data()); + // + // This won't trigger copy-on-write of the string, and so is dangerous when + // the buffer is shared. + auto d = dest->begin(); + // Small optimization for case where source = dest and there's no escaping - while (p == d && p < end && *p != '\\') p++, d++; + if (source.data() == dest->data()) { + while (p < end && *p != '\\') { + p++; + d++; + } + } while (p < end) { if (*p != '\\') { @@ -192,7 +209,7 @@ bool CUnescapeInternal(StringPiece source, char* dest, p++; // read past letter we escaped } } - *dest_len = d - dest; + *dest_len = d - dest->begin(); return true; } @@ -215,8 +232,7 @@ bool SplitAndParseAsInts(StringPiece text, char delim, bool CUnescape(StringPiece source, string* dest, string* error) { dest->resize(source.size()); string::size_type dest_size; - if (!CUnescapeInternal(source, const_cast<char*>(dest->data()), &dest_size, - error)) { + if (!CUnescapeInternal(source, dest, &dest_size, error)) { return false; } dest->erase(dest_size); diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc index 5c735a87a3..d5909d17aa 100644 --- a/tensorflow/core/lib/strings/str_util_test.cc +++ b/tensorflow/core/lib/strings/str_util_test.cc @@ -43,6 +43,19 @@ TEST(CUnescape, Basic) { EXPECT_EQ("\320hi\200", ExpectCUnescapeSuccess("\\320hi\\200")); } +TEST(CUnescape, HandlesCopyOnWriteStrings) { + string dest = "hello"; + string read = dest; + // For std::string, read and dest now share the same buffer. + + string error; + StringPiece source = "llohe"; + // CUnescape is going to write "llohe" to dest, so dest's buffer will be + // reallocated, and read's buffer remains untouched. + EXPECT_TRUE(str_util::CUnescape(source, &dest, &error)); + EXPECT_EQ("hello", read); +} + TEST(StripTrailingWhitespace, Basic) { string test; test = "hello"; |