aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core
diff options
context:
space:
mode:
authorGravatar Igor Ganichev <iga@google.com>2017-08-29 14:42:01 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-08-29 14:46:06 -0700
commite8ce634f4a42298a8a386635f873017b6e2df8e3 (patch)
treef5f8fb1f306bd67059ff715c3da1f734b4a5a410 /tensorflow/core
parentdee5007d2440d87592e7359c87bcd2d3b40f914b (diff)
Add ArgDefCase conversion to tensorflow::str_util
PiperOrigin-RevId: 166905993
Diffstat (limited to 'tensorflow/core')
-rw-r--r--tensorflow/core/lib/strings/str_util.cc52
-rw-r--r--tensorflow/core/lib/strings/str_util.h11
-rw-r--r--tensorflow/core/lib/strings/str_util_test.cc32
3 files changed, 95 insertions, 0 deletions
diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc
index c68e14f09f..1159304724 100644
--- a/tensorflow/core/lib/strings/str_util.cc
+++ b/tensorflow/core/lib/strings/str_util.cc
@@ -248,6 +248,58 @@ string Uppercase(StringPiece s) {
return result;
}
+string ArgDefCase(StringPiece s) {
+ const int n = s.size();
+
+ // Compute the size of resulting string.
+ // Number of extra underscores we will need to add.
+ int extra_us = 0;
+ // Number of non-alpha chars in the beginning to skip.
+ int to_skip = 0;
+ for (int i = 0; i < n; ++i) {
+ // If we are skipping and current letter is non-alpha, skip it as well
+ if (i == to_skip && !isalpha(s[i])) {
+ ++to_skip;
+ continue;
+ }
+
+ // If we are here, we are not skipping any more.
+ // If this letter is upper case, not the very first char in the
+ // resulting string, and previous letter isn't replaced with an underscore,
+ // we will need to insert an underscore.
+ if (isupper(s[i]) && i != to_skip && i > 0 && isalnum(s[i - 1])) {
+ ++extra_us;
+ }
+ }
+
+ // Initialize result with all '_'s. There is no string
+ // constructor that does not initialize memory.
+ string result(n + extra_us - to_skip, '_');
+ // i - index into s
+ // j - index into result
+ for (int i = to_skip, j = 0; i < n; ++i, ++j) {
+ DCHECK_LT(j, result.size());
+ char c = s[i];
+ // If c is not alphanumeric, we don't need to do anything
+ // since there is already an underscore in its place.
+ if (isalnum(c)) {
+ if (isupper(c)) {
+ // If current char is upper case, we might need to insert an
+ // underscore.
+ if (i != to_skip) {
+ DCHECK_GT(j, 0);
+ if (result[j - 1] != '_') ++j;
+ }
+ result[j] = tolower(c);
+ } else {
+ result[j] = c;
+ }
+ }
+ }
+
+ return result;
+}
+
void TitlecaseString(string* s, StringPiece delimiters) {
bool upper = true;
for (string::iterator ss = s->begin(); ss != s->end(); ++ss) {
diff --git a/tensorflow/core/lib/strings/str_util.h b/tensorflow/core/lib/strings/str_util.h
index 669f0d3c52..8cea0f0718 100644
--- a/tensorflow/core/lib/strings/str_util.h
+++ b/tensorflow/core/lib/strings/str_util.h
@@ -81,6 +81,17 @@ string Lowercase(StringPiece s);
// Return upper-cased version of s.
string Uppercase(StringPiece s);
+// Converts "^2ILoveYou!" to "i_love_you_". More specifically:
+// - converts all non-alphanumeric characters to underscores
+// - replaces each occurence of a capital letter (except the very
+// first character and if there is already an '_' before it) with '_'
+// followed by this letter in lower case
+// - Skips leading non-alpha characters
+// This method is useful for producing strings matching "[a-z][a-z0-9_]*"
+// as required by OpDef.ArgDef.name. The resulting string is either empty or
+// matches this regex.
+string ArgDefCase(StringPiece s);
+
// Capitalize first character of each word in "*s". "delimiters" is a
// set of characters that can be used as word boundaries.
void TitlecaseString(string* s, StringPiece delimiters);
diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc
index 040f7447e4..5c735a87a3 100644
--- a/tensorflow/core/lib/strings/str_util_test.cc
+++ b/tensorflow/core/lib/strings/str_util_test.cc
@@ -338,6 +338,38 @@ TEST(Uppercase, Basic) {
EXPECT_EQ("HELLO WORLD", str_util::Uppercase("Hello World"));
}
+TEST(SnakeCase, Basic) {
+ EXPECT_EQ("", str_util::ArgDefCase(""));
+ EXPECT_EQ("", str_util::ArgDefCase("!"));
+ EXPECT_EQ("", str_util::ArgDefCase("5"));
+ EXPECT_EQ("", str_util::ArgDefCase("!:"));
+ EXPECT_EQ("", str_util::ArgDefCase("5-5"));
+ EXPECT_EQ("", str_util::ArgDefCase("_!"));
+ EXPECT_EQ("", str_util::ArgDefCase("_5"));
+ EXPECT_EQ("a", str_util::ArgDefCase("_a"));
+ EXPECT_EQ("a", str_util::ArgDefCase("_A"));
+ EXPECT_EQ("i", str_util::ArgDefCase("I"));
+ EXPECT_EQ("i", str_util::ArgDefCase("i"));
+ EXPECT_EQ("i_", str_util::ArgDefCase("I%"));
+ EXPECT_EQ("i_", str_util::ArgDefCase("i%"));
+ EXPECT_EQ("i", str_util::ArgDefCase("%I"));
+ EXPECT_EQ("i", str_util::ArgDefCase("-i"));
+ EXPECT_EQ("i", str_util::ArgDefCase("3i"));
+ EXPECT_EQ("i", str_util::ArgDefCase("32i"));
+ EXPECT_EQ("i3", str_util::ArgDefCase("i3"));
+ EXPECT_EQ("i_a3", str_util::ArgDefCase("i_A3"));
+ EXPECT_EQ("i_i", str_util::ArgDefCase("II"));
+ EXPECT_EQ("i_i", str_util::ArgDefCase("I_I"));
+ EXPECT_EQ("i__i", str_util::ArgDefCase("I__I"));
+ EXPECT_EQ("i_i_32", str_util::ArgDefCase("II-32"));
+ EXPECT_EQ("ii_32", str_util::ArgDefCase("Ii-32"));
+ EXPECT_EQ("hi_there", str_util::ArgDefCase("HiThere"));
+ EXPECT_EQ("hi_hi", str_util::ArgDefCase("Hi!Hi"));
+ EXPECT_EQ("hi_hi", str_util::ArgDefCase("HiHi"));
+ EXPECT_EQ("hihi", str_util::ArgDefCase("Hihi"));
+ EXPECT_EQ("hi_hi", str_util::ArgDefCase("Hi_Hi"));
+}
+
TEST(TitlecaseString, Basic) {
string s = "sparse_lookup";
str_util::TitlecaseString(&s, "_");