aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Jon Skeet <jonskeet@google.com>2016-04-08 12:19:10 +0100
committerGravatar Jon Skeet <jonskeet@google.com>2016-04-20 03:45:59 +0100
commit75626ed79c726ed9fd96d9d143ce6b6c88413bf8 (patch)
treea64be68c42ea3abdd589df459fbf685724bfd82c /src
parent2a197b3eb0d74ac1f1d461ac958ebccaf968031c (diff)
Add C# codegen changes to enum value names (mostly C++)
Overview of changes: - A new C#-specific command-line option, legacy_enum_values to revert to the old behavior - When legacy_enum_values isn't specified, we strip the enum name as a prefix, and PascalCase the value name - A new attribute within the C# code so that we can always tell the original in-proto name Regenerating the C# code with legacy_enum_values leads to code which still compiles and works - but there's more still to do.
Diffstat (limited to 'src')
-rw-r--r--src/google/protobuf/compiler/csharp/csharp_enum.cc20
-rw-r--r--src/google/protobuf/compiler/csharp/csharp_generator.cc3
-rw-r--r--src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc20
-rw-r--r--src/google/protobuf/compiler/csharp/csharp_helpers.cc93
-rw-r--r--src/google/protobuf/compiler/csharp/csharp_helpers.h2
-rw-r--r--src/google/protobuf/compiler/csharp/csharp_options.h9
6 files changed, 141 insertions, 6 deletions
diff --git a/src/google/protobuf/compiler/csharp/csharp_enum.cc b/src/google/protobuf/compiler/csharp/csharp_enum.cc
index 9616f172..bdfcc2be 100644
--- a/src/google/protobuf/compiler/csharp/csharp_enum.cc
+++ b/src/google/protobuf/compiler/csharp/csharp_enum.cc
@@ -41,6 +41,7 @@
#include <google/protobuf/compiler/csharp/csharp_doc_comment.h>
#include <google/protobuf/compiler/csharp/csharp_enum.h>
#include <google/protobuf/compiler/csharp/csharp_helpers.h>
+#include <google/protobuf/compiler/csharp/csharp_options.h>
using google::protobuf::internal::scoped_ptr;
@@ -64,11 +65,24 @@ void EnumGenerator::Generate(io::Printer* printer) {
"access_level", class_access_level(),
"name", descriptor_->name());
printer->Indent();
+ std::set<string> used_names;
for (int i = 0; i < descriptor_->value_count(); i++) {
WriteEnumValueDocComment(printer, descriptor_->value(i));
- printer->Print("$name$ = $number$,\n",
- "name", descriptor_->value(i)->name(),
- "number", SimpleItoa(descriptor_->value(i)->number()));
+ string original_name = descriptor_->value(i)->name();
+ string name = options()->legacy_enum_values
+ ? descriptor_->value(i)->name()
+ : GetEnumValueName(descriptor_->name(), descriptor_->value(i)->name());
+ // Make sure we don't get any duplicate names due to prefix removal.
+ while (!used_names.insert(name).second) {
+ // It's possible we'll end up giving this warning multiple times, but that's better than not at all.
+ GOOGLE_LOG(WARNING) << "Duplicate enum value " << name << " (originally " << original_name
+ << ") in " << descriptor_->name() << "; adding underscore to distinguish";
+ name += "_";
+ }
+ printer->Print("[pbr::OriginalName(\"$original_name$\")] $name$ = $number$,\n",
+ "original_name", original_name,
+ "name", name,
+ "number", SimpleItoa(descriptor_->value(i)->number()));
}
printer->Outdent();
printer->Print("}\n");
diff --git a/src/google/protobuf/compiler/csharp/csharp_generator.cc b/src/google/protobuf/compiler/csharp/csharp_generator.cc
index c13ed65b..d74e8c88 100644
--- a/src/google/protobuf/compiler/csharp/csharp_generator.cc
+++ b/src/google/protobuf/compiler/csharp/csharp_generator.cc
@@ -83,6 +83,9 @@ bool Generator::Generate(
cli_options.base_namespace_specified = true;
} else if (options[i].first == "internal_access") {
cli_options.internal_access = true;
+ } else if (options[i].first == "legacy_enum_values") {
+ // TODO: Remove this before final release
+ cli_options.legacy_enum_values = true;
} else {
*error = "Unknown generator option: " + options[i].first;
return false;
diff --git a/src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc b/src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc
index 7ef7df42..5755fee0 100644
--- a/src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc
+++ b/src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc
@@ -30,8 +30,8 @@
#include <memory>
-#include <google/protobuf/compiler/ruby/ruby_generator.h>
#include <google/protobuf/compiler/command_line_interface.h>
+#include <google/protobuf/compiler/csharp/csharp_helpers.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/io/printer.h>
@@ -45,7 +45,23 @@ namespace compiler {
namespace csharp {
namespace {
-// TODO(jtattermusch): add some tests.
+TEST(CSharpEnumValue, PascalCasedPrefixStripping) {
+ EXPECT_EQ("Bar", GetEnumValueName("Foo", "BAR"));
+ EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "BAR_BAZ"));
+ EXPECT_EQ("Bar", GetEnumValueName("Foo", "FOO_BAR"));
+ EXPECT_EQ("Bar", GetEnumValueName("Foo", "FOO__BAR"));
+ EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "FOO_BAR_BAZ"));
+ EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "Foo_BarBaz"));
+ EXPECT_EQ("Bar", GetEnumValueName("FO_O", "FOO_BAR"));
+ EXPECT_EQ("Bar", GetEnumValueName("FOO", "F_O_O_BAR"));
+ EXPECT_EQ("Bar", GetEnumValueName("Foo", "BAR"));
+ EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "BAR_BAZ"));
+ EXPECT_EQ("Foo", GetEnumValueName("Foo", "FOO"));
+ EXPECT_EQ("Foo", GetEnumValueName("Foo", "FOO___"));
+ // Identifiers can't start with digits
+ EXPECT_EQ("_2Bar", GetEnumValueName("Foo", "FOO_2_BAR"));
+ EXPECT_EQ("_2", GetEnumValueName("Foo", "FOO___2"));
+}
} // namespace
} // namespace csharp
diff --git a/src/google/protobuf/compiler/csharp/csharp_helpers.cc b/src/google/protobuf/compiler/csharp/csharp_helpers.cc
index 41265f9a..efd01556 100644
--- a/src/google/protobuf/compiler/csharp/csharp_helpers.cc
+++ b/src/google/protobuf/compiler/csharp/csharp_helpers.cc
@@ -178,6 +178,99 @@ std::string UnderscoresToPascalCase(const std::string& input) {
return UnderscoresToCamelCase(input, true);
}
+// Convert a string which is expected to be SHOUTY_CASE (but may not be *precisely* shouty)
+// into a PascalCase string. Precise rules implemented:
+
+// Previous input character Current character Case
+// Any Non-alphanumeric Skipped
+// None - first char of input Alphanumeric Upper
+// Non-letter (e.g. _ or 1) Alphanumeric Upper
+// Numeric Alphanumeric Upper
+// Lower letter Alphanumeric Same as current
+// Upper letter Alphanumeric Lower
+std::string ShoutyToPascalCase(const std::string& input) {
+ string result;
+ // Simple way of implementing "always start with upper"
+ char previous = '_';
+ for (int i = 0; i < input.size(); i++) {
+ char current = input[i];
+ if (!ascii_isalnum(current)) {
+ previous = current;
+ continue;
+ }
+ if (!ascii_isalnum(previous)) {
+ result += ascii_toupper(current);
+ } else if (ascii_isdigit(previous)) {
+ result += ascii_toupper(current);
+ } else if (ascii_islower(previous)) {
+ result += current;
+ } else {
+ result += ascii_tolower(current);
+ }
+ previous = current;
+ }
+ return result;
+}
+
+// Attempt to remove a prefix from a value, ignoring casing and skipping underscores.
+// (foo, foo_bar) => bar - underscore after prefix is skipped
+// (FOO, foo_bar) => bar - casing is ignored
+// (foo_bar, foobarbaz) => baz - underscore in prefix is ignored
+// (foobar, foo_barbaz) => baz - underscore in value is ignored
+// (foo, bar) => bar - prefix isn't matched; return original value
+std::string TryRemovePrefix(const std::string& prefix, const std::string& value) {
+ // First normalize to a lower-case no-underscores prefix to match against
+ std::string prefix_to_match = "";
+ for (size_t i = 0; i < prefix.size(); i++) {
+ if (prefix[i] != '_') {
+ prefix_to_match += ascii_tolower(prefix[i]);
+ }
+ }
+
+ // This keeps track of how much of value we've consumed
+ size_t prefix_index, value_index;
+ for (prefix_index = 0, value_index = 0;
+ prefix_index < prefix_to_match.size() && value_index < value.size();
+ value_index++) {
+ // Skip over underscores in the value
+ if (value[value_index] == '_') {
+ continue;
+ }
+ if (ascii_tolower(value[value_index]) != prefix_to_match[prefix_index++]) {
+ // Failed to match the prefix - bail out early.
+ return value;
+ }
+ }
+
+ // If we didn't finish looking through the prefix, we can't strip it.
+ if (prefix_index < prefix_to_match.size()) {
+ return value;
+ }
+
+ // Step over any underscores after the prefix
+ while (value_index < value.size() && value[value_index] == '_') {
+ value_index++;
+ }
+
+ // If there's nothing left (e.g. it was a prefix with only underscores afterwards), don't strip.
+ if (value_index == value.size()) {
+ return value;
+ }
+
+ return value.substr(value_index);
+}
+
+std::string GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name) {
+ std::string stripped = TryRemovePrefix(enum_name, enum_value_name);
+ std::string result = ShoutyToPascalCase(stripped);
+ // Just in case we have an enum name of FOO and a value of FOO_2... make sure the returned
+ // string is a valid identifier.
+ if (ascii_isdigit(result[0])) {
+ result = "_" + result;
+ }
+ return result;
+}
+
std::string ToCSharpName(const std::string& name, const FileDescriptor* file) {
std::string result = GetFileNamespace(file);
if (result != "") {
diff --git a/src/google/protobuf/compiler/csharp/csharp_helpers.h b/src/google/protobuf/compiler/csharp/csharp_helpers.h
index eaf85014..8830e957 100644
--- a/src/google/protobuf/compiler/csharp/csharp_helpers.h
+++ b/src/google/protobuf/compiler/csharp/csharp_helpers.h
@@ -93,6 +93,8 @@ inline std::string UnderscoresToCamelCase(const std::string& input, bool cap_nex
std::string UnderscoresToPascalCase(const std::string& input);
+std::string GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name);
+
// TODO(jtattermusch): perhaps we could move this to strutil
std::string StringToBase64(const std::string& input);
diff --git a/src/google/protobuf/compiler/csharp/csharp_options.h b/src/google/protobuf/compiler/csharp/csharp_options.h
index 426fb3b5..4079bf7f 100644
--- a/src/google/protobuf/compiler/csharp/csharp_options.h
+++ b/src/google/protobuf/compiler/csharp/csharp_options.h
@@ -45,7 +45,8 @@ struct Options {
file_extension(".cs"),
base_namespace(""),
base_namespace_specified(false),
- internal_access(false) {
+ internal_access(false),
+ legacy_enum_values(false) {
}
// Extension of the generated file. Defaults to ".cs"
string file_extension;
@@ -68,6 +69,12 @@ struct Options {
// Whether the generated classes should have accessibility level of "internal".
// Defaults to false that generates "public" classes.
bool internal_access;
+ // By default, C# codegen now uses PascalCased enum values names, after
+ // removing the enum type name as a prefix (if it *is* a prefix of the value).
+ // Setting this option reverts to the previous behavior of just copying the
+ // value name specified in the .proto file, allowing gradual migration.
+ // This option will be removed before final release.
+ bool legacy_enum_values;
};
} // namespace csharp