aboutsummaryrefslogtreecommitdiffhomepage
path: root/python/google/protobuf/json_format.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/google/protobuf/json_format.py')
-rw-r--r--python/google/protobuf/json_format.py601
1 files changed, 601 insertions, 0 deletions
diff --git a/python/google/protobuf/json_format.py b/python/google/protobuf/json_format.py
new file mode 100644
index 00000000..09110e04
--- /dev/null
+++ b/python/google/protobuf/json_format.py
@@ -0,0 +1,601 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Contains routines for printing protocol messages in JSON format."""
+
+__author__ = 'jieluo@google.com (Jie Luo)'
+
+import base64
+from datetime import datetime
+import json
+import math
+import re
+
+from google.protobuf import descriptor
+
+_TIMESTAMPFOMAT = '%Y-%m-%dT%H:%M:%S'
+_NUMBER = re.compile(u'[0-9+-][0-9e.+-]*')
+_INTEGER = re.compile(u'[0-9+-]')
+_INT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_INT32,
+ descriptor.FieldDescriptor.CPPTYPE_UINT32,
+ descriptor.FieldDescriptor.CPPTYPE_INT64,
+ descriptor.FieldDescriptor.CPPTYPE_UINT64])
+_INT64_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_INT64,
+ descriptor.FieldDescriptor.CPPTYPE_UINT64])
+_FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
+ descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
+if str is bytes:
+ _UNICODETYPE = unicode
+else:
+ _UNICODETYPE = str
+
+
+class SerializeToJsonError(Exception):
+ """Thrown if serialization to JSON fails."""
+
+
+class ParseError(Exception):
+ """Thrown in case of parsing error."""
+
+
+def MessageToJson(message, including_default_value_fields=False):
+ """Converts protobuf message to JSON format.
+
+ Args:
+ message: The protocol buffers message instance to serialize.
+ including_default_value_fields: If True, singular primitive fields,
+ repeated fields, and map fields will always be serialized. If
+ False, only serialize non-empty fields. Singular message fields
+ and oneof fields are not affected by this option.
+
+ Returns:
+ A string containing the JSON formatted protocol buffer message.
+ """
+ js = _MessageToJsonObject(message, including_default_value_fields)
+ return json.dumps(js, indent=2)
+
+
+def _MessageToJsonObject(message, including_default_value_fields):
+ """Converts message to an object according to Proto3 JSON Specification."""
+ message_descriptor = message.DESCRIPTOR
+ if _IsTimestampMessage(message_descriptor):
+ return _TimestampMessageToJsonObject(message)
+ if _IsDurationMessage(message_descriptor):
+ return _DurationMessageToJsonObject(message)
+ if _IsFieldMaskMessage(message_descriptor):
+ return _FieldMaskMessageToJsonObject(message)
+ if _IsWrapperMessage(message_descriptor):
+ return _WrapperMessageToJsonObject(message)
+ return _RegularMessageToJsonObject(message, including_default_value_fields)
+
+
+def _IsMapEntry(field):
+ return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
+ field.message_type.has_options and
+ field.message_type.GetOptions().map_entry)
+
+
+def _RegularMessageToJsonObject(message, including_default_value_fields):
+ """Converts normal message according to Proto3 JSON Specification."""
+ js = {}
+ fields = message.ListFields()
+
+ try:
+ for field, value in fields:
+ name = field.camelcase_name
+ if _IsMapEntry(field):
+ # Convert a map field.
+ js_map = {}
+ for key in value:
+ js_map[key] = _ConvertFieldToJsonObject(
+ field.message_type.fields_by_name['value'],
+ value[key], including_default_value_fields)
+ js[name] = js_map
+ elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
+ # Convert a repeated field.
+ repeated = []
+ for element in value:
+ repeated.append(_ConvertFieldToJsonObject(
+ field, element, including_default_value_fields))
+ js[name] = repeated
+ else:
+ js[name] = _ConvertFieldToJsonObject(
+ field, value, including_default_value_fields)
+
+ # Serialize default value if including_default_value_fields is True.
+ if including_default_value_fields:
+ message_descriptor = message.DESCRIPTOR
+ for field in message_descriptor.fields:
+ # Singular message fields and oneof fields will not be affected.
+ if ((field.label != descriptor.FieldDescriptor.LABEL_REPEATED and
+ field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE) or
+ field.containing_oneof):
+ continue
+ name = field.camelcase_name
+ if name in js:
+ # Skip the field which has been serailized already.
+ continue
+ if _IsMapEntry(field):
+ js[name] = {}
+ elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
+ js[name] = []
+ else:
+ js[name] = _ConvertFieldToJsonObject(field, field.default_value)
+
+ except ValueError as e:
+ raise SerializeToJsonError(
+ 'Failed to serialize {0} field: {1}'.format(field.name, e))
+
+ return js
+
+
+def _ConvertFieldToJsonObject(
+ field, value, including_default_value_fields=False):
+ """Converts field value according to Proto3 JSON Specification."""
+ if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
+ return _MessageToJsonObject(value, including_default_value_fields)
+ elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
+ enum_value = field.enum_type.values_by_number.get(value, None)
+ if enum_value is not None:
+ return enum_value.name
+ else:
+ raise SerializeToJsonError('Enum field contains an integer value '
+ 'which can not mapped to an enum value.')
+ elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
+ if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
+ # Use base64 Data encoding for bytes
+ return base64.b64encode(value).decode('utf-8')
+ else:
+ return value
+ elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
+ if value:
+ return True
+ else:
+ return False
+ elif field.cpp_type in _INT64_TYPES:
+ return str(value)
+ elif field.cpp_type in _FLOAT_TYPES:
+ if math.isinf(value):
+ if value < 0.0:
+ return '-Infinity'
+ else:
+ return 'Infinity'
+ if math.isnan(value):
+ return 'NaN'
+ return value
+
+
+def _IsTimestampMessage(message_descriptor):
+ return (message_descriptor.name == 'Timestamp' and
+ message_descriptor.file.name == 'google/protobuf/timestamp.proto')
+
+
+def _TimestampMessageToJsonObject(message):
+ """Converts Timestamp message according to Proto3 JSON Specification."""
+ nanos = message.nanos % 1e9
+ dt = datetime.utcfromtimestamp(
+ message.seconds + (message.nanos - nanos) / 1e9)
+ result = dt.isoformat()
+ if (nanos % 1e9) == 0:
+ # If there are 0 fractional digits, the fractional
+ # point '.' should be omitted when serializing.
+ return result + 'Z'
+ if (nanos % 1e6) == 0:
+ # Serialize 3 fractional digits.
+ return result + '.%03dZ' % (nanos / 1e6)
+ if (nanos % 1e3) == 0:
+ # Serialize 6 fractional digits.
+ return result + '.%06dZ' % (nanos / 1e3)
+ # Serialize 9 fractional digits.
+ return result + '.%09dZ' % nanos
+
+
+def _IsDurationMessage(message_descriptor):
+ return (message_descriptor.name == 'Duration' and
+ message_descriptor.file.name == 'google/protobuf/duration.proto')
+
+
+def _DurationMessageToJsonObject(message):
+ """Converts Duration message according to Proto3 JSON Specification."""
+ if message.seconds < 0 or message.nanos < 0:
+ result = '-'
+ seconds = - message.seconds + int((0 - message.nanos) / 1e9)
+ nanos = (0 - message.nanos) % 1e9
+ else:
+ result = ''
+ seconds = message.seconds + int(message.nanos / 1e9)
+ nanos = message.nanos % 1e9
+ result += '%d' % seconds
+ if (nanos % 1e9) == 0:
+ # If there are 0 fractional digits, the fractional
+ # point '.' should be omitted when serializing.
+ return result + 's'
+ if (nanos % 1e6) == 0:
+ # Serialize 3 fractional digits.
+ return result + '.%03ds' % (nanos / 1e6)
+ if (nanos % 1e3) == 0:
+ # Serialize 6 fractional digits.
+ return result + '.%06ds' % (nanos / 1e3)
+ # Serialize 9 fractional digits.
+ return result + '.%09ds' % nanos
+
+
+def _IsFieldMaskMessage(message_descriptor):
+ return (message_descriptor.name == 'FieldMask' and
+ message_descriptor.file.name == 'google/protobuf/field_mask.proto')
+
+
+def _FieldMaskMessageToJsonObject(message):
+ """Converts FieldMask message according to Proto3 JSON Specification."""
+ result = ''
+ first = True
+ for path in message.paths:
+ if not first:
+ result += ','
+ result += path
+ first = False
+ return result
+
+
+def _IsWrapperMessage(message_descriptor):
+ return message_descriptor.file.name == 'google/protobuf/wrappers.proto'
+
+
+def _WrapperMessageToJsonObject(message):
+ return _ConvertFieldToJsonObject(
+ message.DESCRIPTOR.fields_by_name['value'], message.value)
+
+
+def _DuplicateChecker(js):
+ result = {}
+ for name, value in js:
+ if name in result:
+ raise ParseError('Failed to load JSON: duplicate key ' + name)
+ result[name] = value
+ return result
+
+
+def Parse(text, message):
+ """Parses a JSON representation of a protocol message into a message.
+
+ Args:
+ text: Message JSON representation.
+ message: A protocol beffer message to merge into.
+
+ Returns:
+ The same message passed as argument.
+
+ Raises::
+ ParseError: On JSON parsing problems.
+ """
+ if not isinstance(text, _UNICODETYPE): text = text.decode('utf-8')
+ try:
+ js = json.loads(text, object_pairs_hook=_DuplicateChecker)
+ except ValueError as e:
+ raise ParseError('Failed to load JSON: ' + str(e))
+ _ConvertFieldValuePair(js, message)
+ return message
+
+
+def _ConvertFieldValuePair(js, message):
+ """Convert field value pairs into regular message.
+
+ Args:
+ js: A JSON object to convert the field value pairs.
+ message: A regular protocol message to record the data.
+
+ Raises:
+ ParseError: In case of problems converting.
+ """
+ names = []
+ message_descriptor = message.DESCRIPTOR
+ for name in js:
+ try:
+ field = message_descriptor.fields_by_camelcase_name.get(name, None)
+ if not field:
+ raise ParseError(
+ 'Message type "{0}" has no field named "{1}".'.format(
+ message_descriptor.full_name, name))
+ if name in names:
+ raise ParseError(
+ 'Message type "{0}" should not have multiple "{1}" fields.'.format(
+ message.DESCRIPTOR.full_name, name))
+ names.append(name)
+ # Check no other oneof field is parsed.
+ if field.containing_oneof is not None:
+ oneof_name = field.containing_oneof.name
+ if oneof_name in names:
+ raise ParseError('Message type "{0}" should not have multiple "{1}" '
+ 'oneof fields.'.format(
+ message.DESCRIPTOR.full_name, oneof_name))
+ names.append(oneof_name)
+
+ value = js[name]
+ if value is None:
+ message.ClearField(field.name)
+ continue
+
+ # Parse field value.
+ if _IsMapEntry(field):
+ message.ClearField(field.name)
+ _ConvertMapFieldValue(value, message, field)
+ elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
+ message.ClearField(field.name)
+ if not isinstance(value, list):
+ raise ParseError('repeated field {0} must be in [] which is '
+ '{1}'.format(name, value))
+ for item in value:
+ if item is None:
+ continue
+ if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
+ sub_message = getattr(message, field.name).add()
+ _ConvertMessage(item, sub_message)
+ else:
+ getattr(message, field.name).append(
+ _ConvertScalarFieldValue(item, field))
+ elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
+ sub_message = getattr(message, field.name)
+ _ConvertMessage(value, sub_message)
+ else:
+ setattr(message, field.name, _ConvertScalarFieldValue(value, field))
+ except ParseError as e:
+ if field and field.containing_oneof is None:
+ raise ParseError('Failed to parse {0} field: {1}'.format(name, e))
+ else:
+ raise ParseError(str(e))
+ except ValueError as e:
+ raise ParseError('Failed to parse {0} field: {1}'.format(name, e))
+ except TypeError as e:
+ raise ParseError('Failed to parse {0} field: {1}'.format(name, e))
+
+
+def _ConvertMessage(value, message):
+ """Convert a JSON object into a message.
+
+ Args:
+ value: A JSON object.
+ message: A WKT or regular protocol message to record the data.
+
+ Raises:
+ ParseError: In case of convert problems.
+ """
+ message_descriptor = message.DESCRIPTOR
+ if _IsTimestampMessage(message_descriptor):
+ _ConvertTimestampMessage(value, message)
+ elif _IsDurationMessage(message_descriptor):
+ _ConvertDurationMessage(value, message)
+ elif _IsFieldMaskMessage(message_descriptor):
+ _ConvertFieldMaskMessage(value, message)
+ elif _IsWrapperMessage(message_descriptor):
+ _ConvertWrapperMessage(value, message)
+ else:
+ _ConvertFieldValuePair(value, message)
+
+
+def _ConvertTimestampMessage(value, message):
+ """Convert a JSON representation into Timestamp message."""
+ timezone_offset = value.find('Z')
+ if timezone_offset == -1:
+ timezone_offset = value.find('+')
+ if timezone_offset == -1:
+ timezone_offset = value.rfind('-')
+ if timezone_offset == -1:
+ raise ParseError(
+ 'Failed to parse timestamp: missing valid timezone offset.')
+ time_value = value[0:timezone_offset]
+ # Parse datetime and nanos
+ point_position = time_value.find('.')
+ if point_position == -1:
+ second_value = time_value
+ nano_value = ''
+ else:
+ second_value = time_value[:point_position]
+ nano_value = time_value[point_position + 1:]
+ date_object = datetime.strptime(second_value, _TIMESTAMPFOMAT)
+ seconds = (date_object - datetime(1970, 1, 1)).total_seconds()
+ if len(nano_value) > 9:
+ raise ParseError(
+ 'Failed to parse Timestamp: nanos {0} more than '
+ '9 fractional digits.'.format(nano_value))
+ if nano_value:
+ nanos = round(float('0.' + nano_value) * 1e9)
+ else:
+ nanos = 0
+ # Parse timezone offsets
+ if value[timezone_offset] == 'Z':
+ if len(value) != timezone_offset + 1:
+ raise ParseError(
+ 'Failed to parse timestamp: invalid trailing data {0}.'.format(value))
+ else:
+ timezone = value[timezone_offset:]
+ pos = timezone.find(':')
+ if pos == -1:
+ raise ParseError(
+ 'Invalid timezone offset value: ' + timezone)
+ if timezone[0] == '+':
+ seconds += (int(timezone[1:pos])*60+int(timezone[pos+1:]))*60
+ else:
+ seconds -= (int(timezone[1:pos])*60+int(timezone[pos+1:]))*60
+ # Set seconds and nanos
+ message.seconds = int(seconds)
+ message.nanos = int(nanos)
+
+
+def _ConvertDurationMessage(value, message):
+ """Convert a JSON representation into Duration message."""
+ if value[-1] != 's':
+ raise ParseError(
+ 'Duration must end with letter "s": ' + value)
+ try:
+ duration = float(value[:-1])
+ except ValueError:
+ raise ParseError(
+ 'Couldn\'t parse duration: ' + value)
+ message.seconds = int(duration)
+ message.nanos = int(round((duration - message.seconds) * 1e9))
+
+
+def _ConvertFieldMaskMessage(value, message):
+ """Convert a JSON representation into FieldMask message."""
+ for path in value.split(','):
+ message.paths.append(path)
+
+
+def _ConvertWrapperMessage(value, message):
+ """Convert a JSON representation into Wrapper message."""
+ field = message.DESCRIPTOR.fields_by_name['value']
+ setattr(message, 'value', _ConvertScalarFieldValue(value, field))
+
+
+def _ConvertMapFieldValue(value, message, field):
+ """Convert map field value for a message map field.
+
+ Args:
+ value: A JSON object to convert the map field value.
+ message: A protocol message to record the converted data.
+ field: The descriptor of the map field to be converted.
+
+ Raises:
+ ParseError: In case of convert problems.
+ """
+ if not isinstance(value, dict):
+ raise ParseError(
+ 'Map fieled {0} must be in {} which is {1}.'.format(field.name, value))
+ key_field = field.message_type.fields_by_name['key']
+ value_field = field.message_type.fields_by_name['value']
+ for key in value:
+ key_value = _ConvertScalarFieldValue(key, key_field, True)
+ if value_field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
+ _ConvertMessage(value[key], getattr(message, field.name)[key_value])
+ else:
+ getattr(message, field.name)[key_value] = _ConvertScalarFieldValue(
+ value[key], value_field)
+
+
+def _ConvertScalarFieldValue(value, field, require_quote=False):
+ """Convert a single scalar field value.
+
+ Args:
+ value: A scalar value to convert the scalar field value.
+ field: The descriptor of the field to convert.
+ require_quote: If True, '"' is required for the field value.
+
+ Returns:
+ The converted scalar field value
+
+ Raises:
+ ParseError: In case of convert problems.
+ """
+ if field.cpp_type in _INT_TYPES:
+ return _ConvertInteger(value)
+ elif field.cpp_type in _FLOAT_TYPES:
+ return _ConvertFloat(value)
+ elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
+ return _ConvertBool(value, require_quote)
+ elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
+ if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
+ return base64.b64decode(value)
+ else:
+ return value
+ elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
+ # Convert an enum value.
+ enum_value = field.enum_type.values_by_name.get(value, None)
+ if enum_value is None:
+ raise ParseError(
+ 'Enum value must be a string literal with double quotes. '
+ 'Type "{0}" has no value named {1}.'.format(
+ field.enum_type.full_name, value))
+ return enum_value.number
+
+
+def _ConvertInteger(value):
+ """Convert an integer.
+
+ Args:
+ value: A scalar value to convert.
+
+ Returns:
+ The integer value.
+
+ Raises:
+ ParseError: If an integer couldn't be consumed.
+ """
+ if isinstance(value, float):
+ raise ParseError('Couldn\'t parse integer: {0}'.format(value))
+
+ if isinstance(value, _UNICODETYPE) and not _INTEGER.match(value):
+ raise ParseError('Couldn\'t parse integer: "{0}"'.format(value))
+
+ return int(value)
+
+
+def _ConvertFloat(value):
+ """Convert an floating point number."""
+ if value == 'nan':
+ raise ParseError('Couldn\'t parse float "nan", use "NaN" instead')
+ try:
+ # Assume Python compatible syntax.
+ return float(value)
+ except ValueError:
+ # Check alternative spellings.
+ if value == '-Infinity':
+ return float('-inf')
+ elif value == 'Infinity':
+ return float('inf')
+ elif value == 'NaN':
+ return float('nan')
+ else:
+ raise ParseError('Couldn\'t parse float: {0}'.format(value))
+
+
+def _ConvertBool(value, require_quote):
+ """Convert a boolean value.
+
+ Args:
+ value: A scalar value to convert.
+ require_quote: If True, '"' is required for the boolean value.
+
+ Returns:
+ The bool parsed.
+
+ Raises:
+ ParseError: If a boolean value couldn't be consumed.
+ """
+ if require_quote:
+ if value == 'true':
+ return True
+ elif value == 'false':
+ return False
+ else:
+ raise ParseError('Expect "true" or "false", not {0}.'.format(value))
+
+ if not isinstance(value, bool):
+ raise ParseError('Expected true or false without quotes.')
+ return value