From 56cfe9306ab60af2a17c26b291438a9449d30f5d Mon Sep 17 00:00:00 2001
From: rsgowman <rgowman@google.com>
Date: Fri, 11 May 2018 10:27:48 -0400
Subject: Refactor serializer (#1250)

Moved Tag, Reader, Writer from serializer.cc's anon namespace to firebase::firestore::nanopb

This should be bug-for-bug compatible. No changes were made to the moved methods.
---
 .../src/firebase/firestore/remote/CMakeLists.txt   |   2 +
 .../src/firebase/firestore/remote/serializer.cc    | 503 +--------------------
 2 files changed, 9 insertions(+), 496 deletions(-)

(limited to 'Firestore/core/src/firebase/firestore/remote')
diff --git a/Firestore/core/src/firebase/firestore/remote/CMakeLists.txt b/Firestore/core/src/firebase/firestore/remote/CMakeLists.txt
index 7f528fb..fc51b37 100644
--- a/Firestore/core/src/firebase/firestore/remote/CMakeLists.txt
+++ b/Firestore/core/src/firebase/firestore/remote/CMakeLists.txt
@@ -21,7 +21,9 @@ cc_library(
     serializer.cc
   DEPENDS
     firebase_firestore_model
+    firebase_firestore_nanopb
     firebase_firestore_protos_nanopb
+    firebase_firestore_util
     grpc::grpc
     nanopb
 )
diff --git a/Firestore/core/src/firebase/firestore/remote/serializer.cc b/Firestore/core/src/firebase/firestore/remote/serializer.cc
index 209f2b1..7aec38b 100644
--- a/Firestore/core/src/firebase/firestore/remote/serializer.cc
+++ b/Firestore/core/src/firebase/firestore/remote/serializer.cc
@@ -25,8 +25,12 @@
 #include <utility>
 
 #include "Firestore/Protos/nanopb/google/firestore/v1beta1/document.pb.h"
+#include "Firestore/core/include/firebase/firestore/firestore_errors.h"
 #include "Firestore/core/include/firebase/firestore/timestamp.h"
 #include "Firestore/core/src/firebase/firestore/model/resource_path.h"
+#include "Firestore/core/src/firebase/firestore/nanopb/reader.h"
+#include "Firestore/core/src/firebase/firestore/nanopb/tag.h"
+#include "Firestore/core/src/firebase/firestore/nanopb/writer.h"
 #include "Firestore/core/src/firebase/firestore/timestamp_internal.h"
 #include "Firestore/core/src/firebase/firestore/util/firebase_assert.h"
 
@@ -41,419 +45,18 @@ using firebase::firestore::model::DocumentKey;
 using firebase::firestore::model::FieldValue;
 using firebase::firestore::model::ObjectValue;
 using firebase::firestore::model::ResourcePath;
+using firebase::firestore::nanopb::Reader;
+using firebase::firestore::nanopb::Tag;
+using firebase::firestore::nanopb::Writer;
 using firebase::firestore::util::Status;
 using firebase::firestore::util::StatusOr;
 
 namespace {
 
-class Writer;
-
-class Reader;
-
 void EncodeObject(Writer* writer, const ObjectValue& object_value);
 
 ObjectValue::Map DecodeObject(Reader* reader);
 
-/**
- * Represents a nanopb tag.
- *
- * field_number is one of the field tags that nanopb generates based off of
- * the proto messages. They're typically named in the format:
- * <parentNameSpace>_<childNameSpace>_<message>_<field>_tag, e.g.
- * google_firestore_v1beta1_Document_name_tag.
- */
-struct Tag {
-  pb_wire_type_t wire_type;
-  uint32_t field_number;
-};
-
-/**
- * Docs TODO(rsgowman). But currently, this just wraps the underlying nanopb
- * pb_ostream_t. Also doc how to check status.
- */
-class Writer {
- public:
-  /**
-   * Creates an output stream that writes to the specified vector. Note that
-   * this vector pointer must remain valid for the lifetime of this Writer.
-   *
-   * (This is roughly equivalent to the nanopb function
-   * pb_ostream_from_buffer())
-   *
-   * @param out_bytes where the output should be serialized to.
-   */
-  static Writer Wrap(std::vector<uint8_t>* out_bytes);
-
-  /**
-   * Creates a non-writing output stream used to calculate the size of
-   * the serialized output.
-   */
-  static Writer Sizing() {
-    return Writer(PB_OSTREAM_SIZING);
-  }
-
-  /**
-   * Writes a message type to the output stream.
-   *
-   * This essentially wraps calls to nanopb's pb_encode_tag() method.
-   */
-  void WriteTag(Tag tag);
-
-  /**
-   * Writes a nanopb message to the output stream.
-   *
-   * This essentially wraps calls to nanopb's `pb_encode()` method. If we didn't
-   * use `oneof`s in our protos, this would be the primary way of encoding
-   * messages.
-   */
-  void WriteNanopbMessage(const pb_field_t fields[], const void* src_struct);
-
-  void WriteSize(size_t size);
-  void WriteNull();
-  void WriteBool(bool bool_value);
-  void WriteInteger(int64_t integer_value);
-
-  void WriteString(const std::string& string_value);
-
-  /**
-   * Writes a message and its length.
-   *
-   * When writing a top level message, protobuf doesn't include the length
-   * (since you can get that already from the length of the binary output.) But
-   * when writing a sub/nested message, you must include the length in the
-   * serialization.
-   *
-   * Call this method when writing a nested message. Provide a function to
-   * write the message itself. This method will calculate the size of the
-   * written message (using the provided function with a non-writing sizing
-   * stream), write out the size (and perform sanity checks), and then serialize
-   * the message by calling the provided function a second time.
-   */
-  void WriteNestedMessage(const std::function<void(Writer*)>& write_message_fn);
-
-  size_t bytes_written() const {
-    return stream_.bytes_written;
-  }
-
-  Status status() const {
-    return status_;
-  }
-
- private:
-  Status status_ = Status::OK();
-
-  /**
-   * Creates a new Writer, based on the given nanopb pb_ostream_t. Note that
-   * a shallow copy will be taken. (Non-null pointers within this struct must
-   * remain valid for the lifetime of this Writer.)
-   */
-  explicit Writer(const pb_ostream_t& stream) : stream_(stream) {
-  }
-
-  /**
-   * Writes a "varint" to the output stream.
-   *
-   * This essentially wraps calls to nanopb's pb_encode_varint() method.
-   *
-   * Note that (despite the value parameter type) this works for bool, enum,
-   * int32, int64, uint32 and uint64 proto field types.
-   *
-   * Note: This is not expected to be called directly, but rather only
-   * via the other Write* methods (i.e. WriteBool, WriteLong, etc)
-   *
-   * @param value The value to write, represented as a uint64_t.
-   */
-  void WriteVarint(uint64_t value);
-
-  pb_ostream_t stream_;
-};
-
-/**
- * Docs TODO(rsgowman). But currently, this just wraps the underlying nanopb
- * pb_istream_t.
- */
-class Reader {
- public:
-  /**
-   * Creates an input stream that reads from the specified bytes. Note that
-   * this reference must remain valid for the lifetime of this Reader.
-   *
-   * (This is roughly equivalent to the nanopb function
-   * pb_istream_from_buffer())
-   *
-   * @param bytes where the input should be deserialized from.
-   */
-  static Reader Wrap(const uint8_t* bytes, size_t length);
-
-  /**
-   * Reads a message type from the input stream.
-   *
-   * This essentially wraps calls to nanopb's pb_decode_tag() method.
-   */
-  Tag ReadTag();
-
-  /**
-   * Reads a nanopb message from the input stream.
-   *
-   * This essentially wraps calls to nanopb's pb_decode() method. If we didn't
-   * use `oneof`s in our protos, this would be the primary way of decoding
-   * messages.
-   */
-  void ReadNanopbMessage(const pb_field_t fields[], void* dest_struct);
-
-  void ReadNull();
-  bool ReadBool();
-  int64_t ReadInteger();
-
-  std::string ReadString();
-
-  /**
-   * Reads a message and its length.
-   *
-   * Analog to Writer::WriteNestedMessage(). See that methods docs for further
-   * details.
-   *
-   * Call this method when reading a nested message. Provide a function to read
-   * the message itself.
-   */
-  template <typename T>
-  T ReadNestedMessage(const std::function<T(Reader*)>& read_message_fn);
-
-  size_t bytes_left() const {
-    return stream_.bytes_left;
-  }
-
-  Status status() const {
-    return status_;
-  }
-
-  void set_status(Status status) {
-    status_ = status;
-  }
-
- private:
-  /**
-   * Creates a new Reader, based on the given nanopb pb_istream_t. Note that
-   * a shallow copy will be taken. (Non-null pointers within this struct must
-   * remain valid for the lifetime of this Reader.)
-   */
-  explicit Reader(pb_istream_t stream) : stream_(stream) {
-  }
-
-  /**
-   * Reads a "varint" from the input stream.
-   *
-   * This essentially wraps calls to nanopb's pb_decode_varint() method.
-   *
-   * Note that (despite the return type) this works for bool, enum, int32,
-   * int64, uint32 and uint64 proto field types.
-   *
-   * Note: This is not expected to be called direclty, but rather only via the
-   * other Decode* methods (i.e. DecodeBool, DecodeLong, etc)
-   *
-   * @return The decoded varint as a uint64_t.
-   */
-  uint64_t ReadVarint();
-
-  Status status_ = Status::OK();
-
-  pb_istream_t stream_;
-};
-
-Writer Writer::Wrap(std::vector<uint8_t>* out_bytes) {
-  // TODO(rsgowman): find a better home for this constant.
-  // A document is defined to have a max size of 1MiB - 4 bytes.
-  static const size_t kMaxDocumentSize = 1 * 1024 * 1024 - 4;
-
-  // Construct a nanopb output stream.
-  //
-  // Set the max_size to be the max document size (as an upper bound; one would
-  // expect individual FieldValue's to be smaller than this).
-  //
-  // bytes_written is (always) initialized to 0. (NB: nanopb does not know or
-  // care about the underlying output vector, so where we are in the vector
-  // itself is irrelevant. i.e. don't use out_bytes->size())
-  pb_ostream_t raw_stream = {
-      /*callback=*/[](pb_ostream_t* stream, const pb_byte_t* buf,
-                      size_t count) -> bool {
-        auto* out_bytes = static_cast<std::vector<uint8_t>*>(stream->state);
-        out_bytes->insert(out_bytes->end(), buf, buf + count);
-        return true;
-      },
-      /*state=*/out_bytes,
-      /*max_size=*/kMaxDocumentSize,
-      /*bytes_written=*/0,
-      /*errmsg=*/nullptr};
-  return Writer(raw_stream);
-}
-
-Reader Reader::Wrap(const uint8_t* bytes, size_t length) {
-  return Reader{pb_istream_from_buffer(bytes, length)};
-}
-
-// TODO(rsgowman): I've left the methods as near as possible to where they were
-// before, which implies that the Writer methods are interspersed with the
-// Reader methods. This should make it a bit easier to review. Refactor these to
-// group the related methods together (probably within their own file rather
-// than here).
-
-void Writer::WriteTag(Tag tag) {
-  if (!status_.ok()) return;
-
-  if (!pb_encode_tag(&stream_, tag.wire_type, tag.field_number)) {
-    FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_));
-  }
-}
-
-Tag Reader::ReadTag() {
-  Tag tag;
-  if (!status_.ok()) return tag;
-
-  bool eof;
-  if (!pb_decode_tag(&stream_, &tag.wire_type, &tag.field_number, &eof)) {
-    status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_));
-    return tag;
-  }
-
-  // nanopb code always returns a false status when setting eof.
-  FIREBASE_ASSERT_MESSAGE(!eof, "nanopb set both ok status and eof to true");
-
-  return tag;
-}
-
-void Writer::WriteNanopbMessage(const pb_field_t fields[],
-                                const void* src_struct) {
-  if (!status_.ok()) return;
-
-  if (!pb_encode(&stream_, fields, src_struct)) {
-    FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_));
-  }
-}
-
-void Reader::ReadNanopbMessage(const pb_field_t fields[], void* dest_struct) {
-  if (!status_.ok()) return;
-
-  if (!pb_decode(&stream_, fields, dest_struct)) {
-    status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_));
-  }
-}
-
-void Writer::WriteSize(size_t size) {
-  return WriteVarint(size);
-}
-
-void Writer::WriteVarint(uint64_t value) {
-  if (!status_.ok()) return;
-
-  if (!pb_encode_varint(&stream_, value)) {
-    FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_));
-  }
-}
-
-/**
- * Note that (despite the return type) this works for bool, enum, int32, int64,
- * uint32 and uint64 proto field types.
- *
- * Note: This is not expected to be called directly, but rather only via the
- * other Decode* methods (i.e. DecodeBool, DecodeLong, etc)
- *
- * @return The decoded varint as a uint64_t.
- */
-uint64_t Reader::ReadVarint() {
-  if (!status_.ok()) return 0;
-
-  uint64_t varint_value = 0;
-  if (!pb_decode_varint(&stream_, &varint_value)) {
-    status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_));
-  }
-  return varint_value;
-}
-
-void Writer::WriteNull() {
-  return WriteVarint(google_protobuf_NullValue_NULL_VALUE);
-}
-
-void Reader::ReadNull() {
-  uint64_t varint = ReadVarint();
-  if (!status_.ok()) return;
-
-  if (varint != google_protobuf_NullValue_NULL_VALUE) {
-    status_ = Status(FirestoreErrorCode::DataLoss,
-                     "Input proto bytes cannot be parsed (invalid null value)");
-  }
-}
-
-void Writer::WriteBool(bool bool_value) {
-  return WriteVarint(bool_value);
-}
-
-bool Reader::ReadBool() {
-  uint64_t varint = ReadVarint();
-  if (!status_.ok()) return false;
-
-  switch (varint) {
-    case 0:
-      return false;
-    case 1:
-      return true;
-    default:
-      status_ =
-          Status(FirestoreErrorCode::DataLoss,
-                 "Input proto bytes cannot be parsed (invalid bool value)");
-      return false;
-  }
-}
-
-void Writer::WriteInteger(int64_t integer_value) {
-  return WriteVarint(integer_value);
-}
-
-int64_t Reader::ReadInteger() {
-  return ReadVarint();
-}
-
-void Writer::WriteString(const std::string& string_value) {
-  if (!status_.ok()) return;
-
-  if (!pb_encode_string(
-          &stream_, reinterpret_cast<const pb_byte_t*>(string_value.c_str()),
-          string_value.length())) {
-    FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_));
-  }
-}
-
-std::string Reader::ReadString() {
-  if (!status_.ok()) return "";
-
-  pb_istream_t substream;
-  if (!pb_make_string_substream(&stream_, &substream)) {
-    status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_));
-    pb_close_string_substream(&stream_, &substream);
-    return "";
-  }
-
-  std::string result(substream.bytes_left, '\0');
-  if (!pb_read(&substream, reinterpret_cast<pb_byte_t*>(&result[0]),
-               substream.bytes_left)) {
-    status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_));
-    pb_close_string_substream(&stream_, &substream);
-    return "";
-  }
-
-  // NB: future versions of nanopb read the remaining characters out of the
-  // substream (and return false if that fails) as an additional safety
-  // check within pb_close_string_substream. Unfortunately, that's not present
-  // in the current version (0.38).  We'll make a stronger assertion and check
-  // to make sure there *are* no remaining characters in the substream.
-  FIREBASE_ASSERT_MESSAGE(
-      substream.bytes_left == 0,
-      "Bytes remaining in substream after supposedly reading all of them.");
-
-  pb_close_string_substream(&stream_, &substream);
-
-  return result;
-}
-
 void EncodeTimestamp(Writer* writer, const Timestamp& timestamp_value) {
   google_protobuf_Timestamp timestamp_proto =
       google_protobuf_Timestamp_init_zero;
@@ -618,98 +221,6 @@ FieldValue DecodeFieldValueImpl(Reader* reader) {
   }
 }
 
-void Writer::WriteNestedMessage(
-    const std::function<void(Writer*)>& write_message_fn) {
-  if (!status_.ok()) return;
-
-  // First calculate the message size using a non-writing substream.
-  Writer sizer = Writer::Sizing();
-  write_message_fn(&sizer);
-  status_ = sizer.status();
-  if (!status_.ok()) return;
-  size_t size = sizer.bytes_written();
-
-  // Write out the size to the output writer.
-  WriteSize(size);
-  if (!status_.ok()) return;
-
-  // If this stream is itself a sizing stream, then we don't need to actually
-  // parse field_value a second time; just update the bytes_written via a call
-  // to pb_write. (If we try to write the contents into a sizing stream, it'll
-  // fail since sizing streams don't actually have any buffer space.)
-  if (stream_.callback == nullptr) {
-    if (!pb_write(&stream_, nullptr, size)) {
-      FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_));
-    }
-    return;
-  }
-
-  // Ensure the output stream has enough space
-  if (stream_.bytes_written + size > stream_.max_size) {
-    FIREBASE_ASSERT_MESSAGE(
-        false,
-        "Insufficient space in the output stream to write the given message");
-  }
-
-  // Use a substream to verify that a callback doesn't write more than what it
-  // did the first time. (Use an initializer rather than setting fields
-  // individually like nanopb does. This gives us a *chance* of noticing if
-  // nanopb adds new fields.)
-  Writer writer({stream_.callback, stream_.state,
-                 /*max_size=*/size, /*bytes_written=*/0,
-                 /*errmsg=*/nullptr});
-  write_message_fn(&writer);
-  status_ = writer.status();
-  if (!status_.ok()) return;
-
-  stream_.bytes_written += writer.stream_.bytes_written;
-  stream_.state = writer.stream_.state;
-  stream_.errmsg = writer.stream_.errmsg;
-
-  if (writer.bytes_written() != size) {
-    // submsg size changed
-    FIREBASE_ASSERT_MESSAGE(
-        false, "Parsing the nested message twice yielded different sizes");
-  }
-}
-
-template <typename T>
-T Reader::ReadNestedMessage(const std::function<T(Reader*)>& read_message_fn) {
-  // Implementation note: This is roughly modeled on pb_decode_delimited,
-  // adjusted to account for the oneof in FieldValue.
-
-  if (!status_.ok()) return T();
-
-  pb_istream_t raw_substream;
-  if (!pb_make_string_substream(&stream_, &raw_substream)) {
-    status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_));
-    pb_close_string_substream(&stream_, &raw_substream);
-    return T();
-  }
-  Reader substream(raw_substream);
-
-  // If this fails, we *won't* return right away so that we can cleanup the
-  // substream (although technically, that turns out not to matter; no resource
-  // leaks occur if we don't do this.)
-  // TODO(rsgowman): Consider RAII here. (Watch out for Reader class which also
-  // wraps streams.)
-  T message = read_message_fn(&substream);
-  status_ = substream.status();
-
-  // NB: future versions of nanopb read the remaining characters out of the
-  // substream (and return false if that fails) as an additional safety
-  // check within pb_close_string_substream. Unfortunately, that's not present
-  // in the current version (0.38).  We'll make a stronger assertion and check
-  // to make sure there *are* no remaining characters in the substream.
-  FIREBASE_ASSERT_MESSAGE(
-      substream.bytes_left() == 0,
-      "Bytes remaining in substream after supposedly reading all of them.");
-
-  pb_close_string_substream(&stream_, &substream.stream_);
-
-  return message;
-}
-
 /**
  * Encodes a 'FieldsEntry' object, within a FieldValue's map_value type.
  *
-- 
cgit v1.2.3