diff options
author | Jon Skeet <skeet@pobox.com> | 2016-04-20 09:01:58 +0100 |
---|---|---|
committer | Jon Skeet <skeet@pobox.com> | 2016-04-20 09:01:58 +0100 |
commit | f265fb8152ae5f7647f876c5539f3ed174673cb2 (patch) | |
tree | e30e4012a829a2597b25a32664fd02c79052741a /src | |
parent | 36978c39388e4dedc7d6ee88aa38b81bcee50ced (diff) | |
parent | d90d615f716c6d2f0ede2b2ce23705d4c502ea45 (diff) |
Merge pull request #1401 from jskeet/enum-casing
Enum casing in C#
Diffstat (limited to 'src')
7 files changed, 152 insertions, 7 deletions
diff --git a/src/google/protobuf/compiler/csharp/csharp_enum.cc b/src/google/protobuf/compiler/csharp/csharp_enum.cc index 9616f172..bdfcc2be 100644 --- a/src/google/protobuf/compiler/csharp/csharp_enum.cc +++ b/src/google/protobuf/compiler/csharp/csharp_enum.cc @@ -41,6 +41,7 @@ #include <google/protobuf/compiler/csharp/csharp_doc_comment.h> #include <google/protobuf/compiler/csharp/csharp_enum.h> #include <google/protobuf/compiler/csharp/csharp_helpers.h> +#include <google/protobuf/compiler/csharp/csharp_options.h> using google::protobuf::internal::scoped_ptr; @@ -64,11 +65,24 @@ void EnumGenerator::Generate(io::Printer* printer) { "access_level", class_access_level(), "name", descriptor_->name()); printer->Indent(); + std::set<string> used_names; for (int i = 0; i < descriptor_->value_count(); i++) { WriteEnumValueDocComment(printer, descriptor_->value(i)); - printer->Print("$name$ = $number$,\n", - "name", descriptor_->value(i)->name(), - "number", SimpleItoa(descriptor_->value(i)->number())); + string original_name = descriptor_->value(i)->name(); + string name = options()->legacy_enum_values + ? descriptor_->value(i)->name() + : GetEnumValueName(descriptor_->name(), descriptor_->value(i)->name()); + // Make sure we don't get any duplicate names due to prefix removal. + while (!used_names.insert(name).second) { + // It's possible we'll end up giving this warning multiple times, but that's better than not at all. + GOOGLE_LOG(WARNING) << "Duplicate enum value " << name << " (originally " << original_name + << ") in " << descriptor_->name() << "; adding underscore to distinguish"; + name += "_"; + } + printer->Print("[pbr::OriginalName(\"$original_name$\")] $name$ = $number$,\n", + "original_name", original_name, + "name", name, + "number", SimpleItoa(descriptor_->value(i)->number())); } printer->Outdent(); printer->Print("}\n"); diff --git a/src/google/protobuf/compiler/csharp/csharp_field_base.cc b/src/google/protobuf/compiler/csharp/csharp_field_base.cc index 3b88954c..e3c34040 100644 --- a/src/google/protobuf/compiler/csharp/csharp_field_base.cc +++ b/src/google/protobuf/compiler/csharp/csharp_field_base.cc @@ -306,7 +306,9 @@ std::string FieldGeneratorBase::default_value() { std::string FieldGeneratorBase::default_value(const FieldDescriptor* descriptor) { switch (descriptor->type()) { case FieldDescriptor::TYPE_ENUM: - return type_name() + "." + descriptor->default_value_enum()->name(); + // All proto3 enums have a default value of 0, and there's an implicit conversion from the constant 0 to + // any C# enum. This means we don't need to work out what we actually mapped the enum value name to. + return "0"; case FieldDescriptor::TYPE_MESSAGE: case FieldDescriptor::TYPE_GROUP: if (IsWrapperType(descriptor)) { diff --git a/src/google/protobuf/compiler/csharp/csharp_generator.cc b/src/google/protobuf/compiler/csharp/csharp_generator.cc index c13ed65b..d74e8c88 100644 --- a/src/google/protobuf/compiler/csharp/csharp_generator.cc +++ b/src/google/protobuf/compiler/csharp/csharp_generator.cc @@ -83,6 +83,9 @@ bool Generator::Generate( cli_options.base_namespace_specified = true; } else if (options[i].first == "internal_access") { cli_options.internal_access = true; + } else if (options[i].first == "legacy_enum_values") { + // TODO: Remove this before final release + cli_options.legacy_enum_values = true; } else { *error = "Unknown generator option: " + options[i].first; return false; diff --git a/src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc b/src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc index 7ef7df42..5755fee0 100644 --- a/src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc +++ b/src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc @@ -30,8 +30,8 @@ #include <memory> -#include <google/protobuf/compiler/ruby/ruby_generator.h> #include <google/protobuf/compiler/command_line_interface.h> +#include <google/protobuf/compiler/csharp/csharp_helpers.h> #include <google/protobuf/io/zero_copy_stream.h> #include <google/protobuf/io/printer.h> @@ -45,7 +45,23 @@ namespace compiler { namespace csharp { namespace { -// TODO(jtattermusch): add some tests. +TEST(CSharpEnumValue, PascalCasedPrefixStripping) { + EXPECT_EQ("Bar", GetEnumValueName("Foo", "BAR")); + EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "BAR_BAZ")); + EXPECT_EQ("Bar", GetEnumValueName("Foo", "FOO_BAR")); + EXPECT_EQ("Bar", GetEnumValueName("Foo", "FOO__BAR")); + EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "FOO_BAR_BAZ")); + EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "Foo_BarBaz")); + EXPECT_EQ("Bar", GetEnumValueName("FO_O", "FOO_BAR")); + EXPECT_EQ("Bar", GetEnumValueName("FOO", "F_O_O_BAR")); + EXPECT_EQ("Bar", GetEnumValueName("Foo", "BAR")); + EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "BAR_BAZ")); + EXPECT_EQ("Foo", GetEnumValueName("Foo", "FOO")); + EXPECT_EQ("Foo", GetEnumValueName("Foo", "FOO___")); + // Identifiers can't start with digits + EXPECT_EQ("_2Bar", GetEnumValueName("Foo", "FOO_2_BAR")); + EXPECT_EQ("_2", GetEnumValueName("Foo", "FOO___2")); +} } // namespace } // namespace csharp diff --git a/src/google/protobuf/compiler/csharp/csharp_helpers.cc b/src/google/protobuf/compiler/csharp/csharp_helpers.cc index 41265f9a..6c154c5a 100644 --- a/src/google/protobuf/compiler/csharp/csharp_helpers.cc +++ b/src/google/protobuf/compiler/csharp/csharp_helpers.cc @@ -178,6 +178,104 @@ std::string UnderscoresToPascalCase(const std::string& input) { return UnderscoresToCamelCase(input, true); } +// Convert a string which is expected to be SHOUTY_CASE (but may not be *precisely* shouty) +// into a PascalCase string. Precise rules implemented: + +// Previous input character Current character Case +// Any Non-alphanumeric Skipped +// None - first char of input Alphanumeric Upper +// Non-letter (e.g. _ or 1) Alphanumeric Upper +// Numeric Alphanumeric Upper +// Lower letter Alphanumeric Same as current +// Upper letter Alphanumeric Lower +std::string ShoutyToPascalCase(const std::string& input) { + string result; + // Simple way of implementing "always start with upper" + char previous = '_'; + for (int i = 0; i < input.size(); i++) { + char current = input[i]; + if (!ascii_isalnum(current)) { + previous = current; + continue; + } + if (!ascii_isalnum(previous)) { + result += ascii_toupper(current); + } else if (ascii_isdigit(previous)) { + result += ascii_toupper(current); + } else if (ascii_islower(previous)) { + result += current; + } else { + result += ascii_tolower(current); + } + previous = current; + } + return result; +} + +// Attempt to remove a prefix from a value, ignoring casing and skipping underscores. +// (foo, foo_bar) => bar - underscore after prefix is skipped +// (FOO, foo_bar) => bar - casing is ignored +// (foo_bar, foobarbaz) => baz - underscore in prefix is ignored +// (foobar, foo_barbaz) => baz - underscore in value is ignored +// (foo, bar) => bar - prefix isn't matched; return original value +std::string TryRemovePrefix(const std::string& prefix, const std::string& value) { + // First normalize to a lower-case no-underscores prefix to match against + std::string prefix_to_match = ""; + for (size_t i = 0; i < prefix.size(); i++) { + if (prefix[i] != '_') { + prefix_to_match += ascii_tolower(prefix[i]); + } + } + + // This keeps track of how much of value we've consumed + size_t prefix_index, value_index; + for (prefix_index = 0, value_index = 0; + prefix_index < prefix_to_match.size() && value_index < value.size(); + value_index++) { + // Skip over underscores in the value + if (value[value_index] == '_') { + continue; + } + if (ascii_tolower(value[value_index]) != prefix_to_match[prefix_index++]) { + // Failed to match the prefix - bail out early. + return value; + } + } + + // If we didn't finish looking through the prefix, we can't strip it. + if (prefix_index < prefix_to_match.size()) { + return value; + } + + // Step over any underscores after the prefix + while (value_index < value.size() && value[value_index] == '_') { + value_index++; + } + + // If there's nothing left (e.g. it was a prefix with only underscores afterwards), don't strip. + if (value_index == value.size()) { + return value; + } + + return value.substr(value_index); +} + +// Format the enum value name in a pleasant way for C#: +// - Strip the enum name as a prefix if possible +// - Convert to PascalCase. +// For example, an enum called Color with a value of COLOR_BLUE should +// result in an enum value in C# called just Blue +std::string GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name) { + std::string stripped = TryRemovePrefix(enum_name, enum_value_name); + std::string result = ShoutyToPascalCase(stripped); + // Just in case we have an enum name of FOO and a value of FOO_2... make sure the returned + // string is a valid identifier. + if (ascii_isdigit(result[0])) { + result = "_" + result; + } + return result; +} + std::string ToCSharpName(const std::string& name, const FileDescriptor* file) { std::string result = GetFileNamespace(file); if (result != "") { diff --git a/src/google/protobuf/compiler/csharp/csharp_helpers.h b/src/google/protobuf/compiler/csharp/csharp_helpers.h index eaf85014..1563ca7e 100644 --- a/src/google/protobuf/compiler/csharp/csharp_helpers.h +++ b/src/google/protobuf/compiler/csharp/csharp_helpers.h @@ -36,6 +36,7 @@ #define GOOGLE_PROTOBUF_COMPILER_CSHARP_HELPERS_H__ #include <string> +#include <google/protobuf/stubs/port.h> #include <google/protobuf/descriptor.pb.h> #include <google/protobuf/descriptor.h> #include <google/protobuf/compiler/code_generator.h> @@ -93,6 +94,10 @@ inline std::string UnderscoresToCamelCase(const std::string& input, bool cap_nex std::string UnderscoresToPascalCase(const std::string& input); +// Note that we wouldn't normally want to export this (we're not expecting +// it to be used outside libprotoc itself) but this exposes it for testing. +std::string LIBPROTOBUF_EXPORT GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name); + // TODO(jtattermusch): perhaps we could move this to strutil std::string StringToBase64(const std::string& input); diff --git a/src/google/protobuf/compiler/csharp/csharp_options.h b/src/google/protobuf/compiler/csharp/csharp_options.h index 426fb3b5..4079bf7f 100644 --- a/src/google/protobuf/compiler/csharp/csharp_options.h +++ b/src/google/protobuf/compiler/csharp/csharp_options.h @@ -45,7 +45,8 @@ struct Options { file_extension(".cs"), base_namespace(""), base_namespace_specified(false), - internal_access(false) { + internal_access(false), + legacy_enum_values(false) { } // Extension of the generated file. Defaults to ".cs" string file_extension; @@ -68,6 +69,12 @@ struct Options { // Whether the generated classes should have accessibility level of "internal". // Defaults to false that generates "public" classes. bool internal_access; + // By default, C# codegen now uses PascalCased enum values names, after + // removing the enum type name as a prefix (if it *is* a prefix of the value). + // Setting this option reverts to the previous behavior of just copying the + // value name specified in the .proto file, allowing gradual migration. + // This option will be removed before final release. + bool legacy_enum_values; }; } // namespace csharp |