diff options
author | 2017-08-29 14:42:01 -0700 | |
---|---|---|
committer | 2017-08-29 14:46:06 -0700 | |
commit | e8ce634f4a42298a8a386635f873017b6e2df8e3 (patch) | |
tree | f5f8fb1f306bd67059ff715c3da1f734b4a5a410 /tensorflow/core | |
parent | dee5007d2440d87592e7359c87bcd2d3b40f914b (diff) |
Add ArgDefCase conversion to tensorflow::str_util
PiperOrigin-RevId: 166905993
Diffstat (limited to 'tensorflow/core')
-rw-r--r-- | tensorflow/core/lib/strings/str_util.cc | 52 | ||||
-rw-r--r-- | tensorflow/core/lib/strings/str_util.h | 11 | ||||
-rw-r--r-- | tensorflow/core/lib/strings/str_util_test.cc | 32 |
3 files changed, 95 insertions, 0 deletions
diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc index c68e14f09f..1159304724 100644 --- a/tensorflow/core/lib/strings/str_util.cc +++ b/tensorflow/core/lib/strings/str_util.cc @@ -248,6 +248,58 @@ string Uppercase(StringPiece s) { return result; } +string ArgDefCase(StringPiece s) { + const int n = s.size(); + + // Compute the size of resulting string. + // Number of extra underscores we will need to add. + int extra_us = 0; + // Number of non-alpha chars in the beginning to skip. + int to_skip = 0; + for (int i = 0; i < n; ++i) { + // If we are skipping and current letter is non-alpha, skip it as well + if (i == to_skip && !isalpha(s[i])) { + ++to_skip; + continue; + } + + // If we are here, we are not skipping any more. + // If this letter is upper case, not the very first char in the + // resulting string, and previous letter isn't replaced with an underscore, + // we will need to insert an underscore. + if (isupper(s[i]) && i != to_skip && i > 0 && isalnum(s[i - 1])) { + ++extra_us; + } + } + + // Initialize result with all '_'s. There is no string + // constructor that does not initialize memory. + string result(n + extra_us - to_skip, '_'); + // i - index into s + // j - index into result + for (int i = to_skip, j = 0; i < n; ++i, ++j) { + DCHECK_LT(j, result.size()); + char c = s[i]; + // If c is not alphanumeric, we don't need to do anything + // since there is already an underscore in its place. + if (isalnum(c)) { + if (isupper(c)) { + // If current char is upper case, we might need to insert an + // underscore. + if (i != to_skip) { + DCHECK_GT(j, 0); + if (result[j - 1] != '_') ++j; + } + result[j] = tolower(c); + } else { + result[j] = c; + } + } + } + + return result; +} + void TitlecaseString(string* s, StringPiece delimiters) { bool upper = true; for (string::iterator ss = s->begin(); ss != s->end(); ++ss) { diff --git a/tensorflow/core/lib/strings/str_util.h b/tensorflow/core/lib/strings/str_util.h index 669f0d3c52..8cea0f0718 100644 --- a/tensorflow/core/lib/strings/str_util.h +++ b/tensorflow/core/lib/strings/str_util.h @@ -81,6 +81,17 @@ string Lowercase(StringPiece s); // Return upper-cased version of s. string Uppercase(StringPiece s); +// Converts "^2ILoveYou!" to "i_love_you_". More specifically: +// - converts all non-alphanumeric characters to underscores +// - replaces each occurence of a capital letter (except the very +// first character and if there is already an '_' before it) with '_' +// followed by this letter in lower case +// - Skips leading non-alpha characters +// This method is useful for producing strings matching "[a-z][a-z0-9_]*" +// as required by OpDef.ArgDef.name. The resulting string is either empty or +// matches this regex. +string ArgDefCase(StringPiece s); + // Capitalize first character of each word in "*s". "delimiters" is a // set of characters that can be used as word boundaries. void TitlecaseString(string* s, StringPiece delimiters); diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc index 040f7447e4..5c735a87a3 100644 --- a/tensorflow/core/lib/strings/str_util_test.cc +++ b/tensorflow/core/lib/strings/str_util_test.cc @@ -338,6 +338,38 @@ TEST(Uppercase, Basic) { EXPECT_EQ("HELLO WORLD", str_util::Uppercase("Hello World")); } +TEST(SnakeCase, Basic) { + EXPECT_EQ("", str_util::ArgDefCase("")); + EXPECT_EQ("", str_util::ArgDefCase("!")); + EXPECT_EQ("", str_util::ArgDefCase("5")); + EXPECT_EQ("", str_util::ArgDefCase("!:")); + EXPECT_EQ("", str_util::ArgDefCase("5-5")); + EXPECT_EQ("", str_util::ArgDefCase("_!")); + EXPECT_EQ("", str_util::ArgDefCase("_5")); + EXPECT_EQ("a", str_util::ArgDefCase("_a")); + EXPECT_EQ("a", str_util::ArgDefCase("_A")); + EXPECT_EQ("i", str_util::ArgDefCase("I")); + EXPECT_EQ("i", str_util::ArgDefCase("i")); + EXPECT_EQ("i_", str_util::ArgDefCase("I%")); + EXPECT_EQ("i_", str_util::ArgDefCase("i%")); + EXPECT_EQ("i", str_util::ArgDefCase("%I")); + EXPECT_EQ("i", str_util::ArgDefCase("-i")); + EXPECT_EQ("i", str_util::ArgDefCase("3i")); + EXPECT_EQ("i", str_util::ArgDefCase("32i")); + EXPECT_EQ("i3", str_util::ArgDefCase("i3")); + EXPECT_EQ("i_a3", str_util::ArgDefCase("i_A3")); + EXPECT_EQ("i_i", str_util::ArgDefCase("II")); + EXPECT_EQ("i_i", str_util::ArgDefCase("I_I")); + EXPECT_EQ("i__i", str_util::ArgDefCase("I__I")); + EXPECT_EQ("i_i_32", str_util::ArgDefCase("II-32")); + EXPECT_EQ("ii_32", str_util::ArgDefCase("Ii-32")); + EXPECT_EQ("hi_there", str_util::ArgDefCase("HiThere")); + EXPECT_EQ("hi_hi", str_util::ArgDefCase("Hi!Hi")); + EXPECT_EQ("hi_hi", str_util::ArgDefCase("HiHi")); + EXPECT_EQ("hihi", str_util::ArgDefCase("Hihi")); + EXPECT_EQ("hi_hi", str_util::ArgDefCase("Hi_Hi")); +} + TEST(TitlecaseString, Basic) { string s = "sparse_lookup"; str_util::TitlecaseString(&s, "_"); |