aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-11-14 12:02:25 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-11-14 12:22:01 -0800
commit317c011b19c90c8aeed4ce200d33f68b56311150 (patch)
treea2b3506900f4b36721b2b0b3bfd8c61d95446d79
parent7a346347ee5a4078e2bd1cca00247c4219af326c (diff)
Fixed a bug in tensorflow::str_util::CUnescape. Added a str util test that
failed without this change. The CUnescape did a const_cast to the result string's buffer, which made it write the same buffer without copying. PiperOrigin-RevId: 175714391
-rw-r--r--tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc2
-rw-r--r--tensorflow/core/lib/strings/str_util.cc28
-rw-r--r--tensorflow/core/lib/strings/str_util_test.cc13
3 files changed, 36 insertions, 7 deletions
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 8eeed339b8..29ae3296ca 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -866,7 +866,7 @@ TEST_F(HloParserTest, CommaBetweenSubAttributes) {
const string original = R"(HloModule test_comma_module:
ENTRY %test_comma.v4 () -> f32[] {
- ROOT %constant = f32[] constant(-4.2), metadata={source_line=5, op_type="const"}
+ ROOT %constant = f32[] constant(-4.2), metadata={source_line=5, op_type="::const"}
}
)";
diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc
index 240e1454e5..d28857803d 100644
--- a/tensorflow/core/lib/strings/str_util.cc
+++ b/tensorflow/core/lib/strings/str_util.cc
@@ -84,15 +84,32 @@ inline int hex_digit_to_int(char c) {
return x & 0xf;
}
-bool CUnescapeInternal(StringPiece source, char* dest,
+bool CUnescapeInternal(StringPiece source, string* dest,
string::size_type* dest_len, string* error) {
- char* d = dest;
const char* p = source.data();
const char* end = source.end();
const char* last_byte = end - 1;
+ // We are going to write the result to dest with its iterator. If our string
+ // implementation uses copy-on-write, this will trigger a copy-on-write of
+ // dest's buffer; that is, dest will be assigned a new buffer.
+ //
+ // Note that the following way is NOT a legal way to modify a string's
+ // content:
+ //
+ // char* d = const_cast<char*>(dest->data());
+ //
+ // This won't trigger copy-on-write of the string, and so is dangerous when
+ // the buffer is shared.
+ auto d = dest->begin();
+
// Small optimization for case where source = dest and there's no escaping
- while (p == d && p < end && *p != '\\') p++, d++;
+ if (source.data() == dest->data()) {
+ while (p < end && *p != '\\') {
+ p++;
+ d++;
+ }
+ }
while (p < end) {
if (*p != '\\') {
@@ -192,7 +209,7 @@ bool CUnescapeInternal(StringPiece source, char* dest,
p++; // read past letter we escaped
}
}
- *dest_len = d - dest;
+ *dest_len = d - dest->begin();
return true;
}
@@ -215,8 +232,7 @@ bool SplitAndParseAsInts(StringPiece text, char delim,
bool CUnescape(StringPiece source, string* dest, string* error) {
dest->resize(source.size());
string::size_type dest_size;
- if (!CUnescapeInternal(source, const_cast<char*>(dest->data()), &dest_size,
- error)) {
+ if (!CUnescapeInternal(source, dest, &dest_size, error)) {
return false;
}
dest->erase(dest_size);
diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc
index 5c735a87a3..d5909d17aa 100644
--- a/tensorflow/core/lib/strings/str_util_test.cc
+++ b/tensorflow/core/lib/strings/str_util_test.cc
@@ -43,6 +43,19 @@ TEST(CUnescape, Basic) {
EXPECT_EQ("\320hi\200", ExpectCUnescapeSuccess("\\320hi\\200"));
}
+TEST(CUnescape, HandlesCopyOnWriteStrings) {
+ string dest = "hello";
+ string read = dest;
+ // For std::string, read and dest now share the same buffer.
+
+ string error;
+ StringPiece source = "llohe";
+ // CUnescape is going to write "llohe" to dest, so dest's buffer will be
+ // reallocated, and read's buffer remains untouched.
+ EXPECT_TRUE(str_util::CUnescape(source, &dest, &error));
+ EXPECT_EQ("hello", read);
+}
+
TEST(StripTrailingWhitespace, Basic) {
string test;
test = "hello";