From 56cfe9306ab60af2a17c26b291438a9449d30f5d Mon Sep 17 00:00:00 2001 From: rsgowman Date: Fri, 11 May 2018 10:27:48 -0400 Subject: Refactor serializer (#1250) Moved Tag, Reader, Writer from serializer.cc's anon namespace to firebase::firestore::nanopb This should be bug-for-bug compatible. No changes were made to the moved methods. --- .../src/firebase/firestore/nanopb/CMakeLists.txt | 27 ++ .../core/src/firebase/firestore/nanopb/reader.cc | 141 ++++++ .../core/src/firebase/firestore/nanopb/reader.h | 170 +++++++ Firestore/core/src/firebase/firestore/nanopb/tag.h | 43 ++ .../core/src/firebase/firestore/nanopb/writer.cc | 165 +++++++ .../core/src/firebase/firestore/nanopb/writer.h | 140 ++++++ .../src/firebase/firestore/remote/CMakeLists.txt | 2 + .../src/firebase/firestore/remote/serializer.cc | 503 +-------------------- 8 files changed, 695 insertions(+), 496 deletions(-) create mode 100644 Firestore/core/src/firebase/firestore/nanopb/CMakeLists.txt create mode 100644 Firestore/core/src/firebase/firestore/nanopb/reader.cc create mode 100644 Firestore/core/src/firebase/firestore/nanopb/reader.h create mode 100644 Firestore/core/src/firebase/firestore/nanopb/tag.h create mode 100644 Firestore/core/src/firebase/firestore/nanopb/writer.cc create mode 100644 Firestore/core/src/firebase/firestore/nanopb/writer.h (limited to 'Firestore/core/src') diff --git a/Firestore/core/src/firebase/firestore/nanopb/CMakeLists.txt b/Firestore/core/src/firebase/firestore/nanopb/CMakeLists.txt new file mode 100644 index 0000000..82ffb65 --- /dev/null +++ b/Firestore/core/src/firebase/firestore/nanopb/CMakeLists.txt @@ -0,0 +1,27 @@ +# Copyright 2018 Google +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cc_library( + firebase_firestore_nanopb + SOURCES + tag.h + reader.h + reader.cc + writer.h + writer.cc + DEPENDS + firebase_firestore_util + firebase_firestore_protos_nanopb + nanopb +) diff --git a/Firestore/core/src/firebase/firestore/nanopb/reader.cc b/Firestore/core/src/firebase/firestore/nanopb/reader.cc new file mode 100644 index 0000000..86e38ac --- /dev/null +++ b/Firestore/core/src/firebase/firestore/nanopb/reader.cc @@ -0,0 +1,141 @@ +/* + * Copyright 2018 Google + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Firestore/core/src/firebase/firestore/nanopb/reader.h" + +#include "Firestore/Protos/nanopb/google/firestore/v1beta1/document.pb.h" + +namespace firebase { +namespace firestore { +namespace nanopb { + +using firebase::firestore::util::Status; +using std::int64_t; +using std::uint64_t; + +Reader Reader::Wrap(const uint8_t* bytes, size_t length) { + return Reader{pb_istream_from_buffer(bytes, length)}; +} + +Tag Reader::ReadTag() { + Tag tag; + if (!status_.ok()) return tag; + + bool eof; + if (!pb_decode_tag(&stream_, &tag.wire_type, &tag.field_number, &eof)) { + status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); + return tag; + } + + // nanopb code always returns a false status when setting eof. + FIREBASE_ASSERT_MESSAGE(!eof, "nanopb set both ok status and eof to true"); + + return tag; +} + +void Reader::ReadNanopbMessage(const pb_field_t fields[], void* dest_struct) { + if (!status_.ok()) return; + + if (!pb_decode(&stream_, fields, dest_struct)) { + status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); + } +} + +/** + * Note that (despite the return type) this works for bool, enum, int32, int64, + * uint32 and uint64 proto field types. + * + * Note: This is not expected to be called directly, but rather only via the + * other Decode* methods (i.e. DecodeBool, DecodeLong, etc) + * + * @return The decoded varint as a uint64_t. + */ +uint64_t Reader::ReadVarint() { + if (!status_.ok()) return 0; + + uint64_t varint_value = 0; + if (!pb_decode_varint(&stream_, &varint_value)) { + status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); + } + return varint_value; +} + +void Reader::ReadNull() { + uint64_t varint = ReadVarint(); + if (!status_.ok()) return; + + if (varint != google_protobuf_NullValue_NULL_VALUE) { + status_ = Status(FirestoreErrorCode::DataLoss, + "Input proto bytes cannot be parsed (invalid null value)"); + } +} + +bool Reader::ReadBool() { + uint64_t varint = ReadVarint(); + if (!status_.ok()) return false; + + switch (varint) { + case 0: + return false; + case 1: + return true; + default: + status_ = + Status(FirestoreErrorCode::DataLoss, + "Input proto bytes cannot be parsed (invalid bool value)"); + return false; + } +} + +int64_t Reader::ReadInteger() { + return ReadVarint(); +} + +std::string Reader::ReadString() { + if (!status_.ok()) return ""; + + pb_istream_t substream; + if (!pb_make_string_substream(&stream_, &substream)) { + status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); + pb_close_string_substream(&stream_, &substream); + return ""; + } + + std::string result(substream.bytes_left, '\0'); + if (!pb_read(&substream, reinterpret_cast(&result[0]), + substream.bytes_left)) { + status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); + pb_close_string_substream(&stream_, &substream); + return ""; + } + + // NB: future versions of nanopb read the remaining characters out of the + // substream (and return false if that fails) as an additional safety + // check within pb_close_string_substream. Unfortunately, that's not present + // in the current version (0.38). We'll make a stronger assertion and check + // to make sure there *are* no remaining characters in the substream. + FIREBASE_ASSERT_MESSAGE( + substream.bytes_left == 0, + "Bytes remaining in substream after supposedly reading all of them."); + + pb_close_string_substream(&stream_, &substream); + + return result; +} + +} // namespace nanopb +} // namespace firestore +} // namespace firebase diff --git a/Firestore/core/src/firebase/firestore/nanopb/reader.h b/Firestore/core/src/firebase/firestore/nanopb/reader.h new file mode 100644 index 0000000..7d77a4d --- /dev/null +++ b/Firestore/core/src/firebase/firestore/nanopb/reader.h @@ -0,0 +1,170 @@ +/* + * Copyright 2018 Google + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_READER_H_ +#define FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_READER_H_ + +#include +#include + +#include +#include +#include + +#include "Firestore/core/include/firebase/firestore/firestore_errors.h" +#include "Firestore/core/src/firebase/firestore/nanopb/tag.h" +#include "Firestore/core/src/firebase/firestore/util/firebase_assert.h" +#include "Firestore/core/src/firebase/firestore/util/status.h" + +namespace firebase { +namespace firestore { +namespace nanopb { + +/** + * Docs TODO(rsgowman). But currently, this just wraps the underlying nanopb + * pb_istream_t. + */ +class Reader { + public: + /** + * Creates an input stream that reads from the specified bytes. Note that + * this reference must remain valid for the lifetime of this Reader. + * + * (This is roughly equivalent to the nanopb function + * pb_istream_from_buffer()) + * + * @param bytes where the input should be deserialized from. + */ + static Reader Wrap(const uint8_t* bytes, size_t length); + + /** + * Reads a message type from the input stream. + * + * This essentially wraps calls to nanopb's pb_decode_tag() method. + */ + Tag ReadTag(); + + /** + * Reads a nanopb message from the input stream. + * + * This essentially wraps calls to nanopb's pb_decode() method. If we didn't + * use `oneof`s in our protos, this would be the primary way of decoding + * messages. + */ + void ReadNanopbMessage(const pb_field_t fields[], void* dest_struct); + + void ReadNull(); + bool ReadBool(); + std::int64_t ReadInteger(); + + std::string ReadString(); + + /** + * Reads a message and its length. + * + * Analog to Writer::WriteNestedMessage(). See that methods docs for further + * details. + * + * Call this method when reading a nested message. Provide a function to read + * the message itself. + */ + template + T ReadNestedMessage(const std::function& read_message_fn); + + size_t bytes_left() const { + return stream_.bytes_left; + } + + util::Status status() const { + return status_; + } + + void set_status(util::Status status) { + status_ = status; + } + + private: + /** + * Creates a new Reader, based on the given nanopb pb_istream_t. Note that + * a shallow copy will be taken. (Non-null pointers within this struct must + * remain valid for the lifetime of this Reader.) + */ + explicit Reader(pb_istream_t stream) : stream_(stream) { + } + + /** + * Reads a "varint" from the input stream. + * + * This essentially wraps calls to nanopb's pb_decode_varint() method. + * + * Note that (despite the return type) this works for bool, enum, int32, + * int64, uint32 and uint64 proto field types. + * + * Note: This is not expected to be called direclty, but rather only via the + * other Decode* methods (i.e. DecodeBool, DecodeLong, etc) + * + * @return The decoded varint as a uint64_t. + */ + std::uint64_t ReadVarint(); + + util::Status status_ = util::Status::OK(); + + pb_istream_t stream_; +}; + +template +T Reader::ReadNestedMessage(const std::function& read_message_fn) { + // Implementation note: This is roughly modeled on pb_decode_delimited, + // adjusted to account for the oneof in FieldValue. + + if (!status_.ok()) return T(); + + pb_istream_t raw_substream; + if (!pb_make_string_substream(&stream_, &raw_substream)) { + status_ = + util::Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); + pb_close_string_substream(&stream_, &raw_substream); + return T(); + } + Reader substream(raw_substream); + + // If this fails, we *won't* return right away so that we can cleanup the + // substream (although technically, that turns out not to matter; no resource + // leaks occur if we don't do this.) + // TODO(rsgowman): Consider RAII here. (Watch out for Reader class which also + // wraps streams.) + T message = read_message_fn(&substream); + status_ = substream.status(); + + // NB: future versions of nanopb read the remaining characters out of the + // substream (and return false if that fails) as an additional safety + // check within pb_close_string_substream. Unfortunately, that's not present + // in the current version (0.38). We'll make a stronger assertion and check + // to make sure there *are* no remaining characters in the substream. + FIREBASE_ASSERT_MESSAGE( + substream.bytes_left() == 0, + "Bytes remaining in substream after supposedly reading all of them."); + + pb_close_string_substream(&stream_, &substream.stream_); + + return message; +} + +} // namespace nanopb +} // namespace firestore +} // namespace firebase + +#endif // FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_READER_H_ diff --git a/Firestore/core/src/firebase/firestore/nanopb/tag.h b/Firestore/core/src/firebase/firestore/nanopb/tag.h new file mode 100644 index 0000000..455ef0c --- /dev/null +++ b/Firestore/core/src/firebase/firestore/nanopb/tag.h @@ -0,0 +1,43 @@ +/* + * Copyright 2018 Google + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_TAG_H__ +#define FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_TAG_H__ + +#include + +namespace firebase { +namespace firestore { +namespace nanopb { + +/** + * Represents a nanopb tag. + * + * field_number is one of the field tags that nanopb generates based off of + * the proto messages. They're typically named in the format: + * ____tag, e.g. + * google_firestore_v1beta1_Document_name_tag. + */ +struct Tag { + pb_wire_type_t wire_type; + uint32_t field_number; +}; + +} // namespace nanopb +} // namespace firestore +} // namespace firebase + +#endif // FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_TAG_H_ diff --git a/Firestore/core/src/firebase/firestore/nanopb/writer.cc b/Firestore/core/src/firebase/firestore/nanopb/writer.cc new file mode 100644 index 0000000..cbee989 --- /dev/null +++ b/Firestore/core/src/firebase/firestore/nanopb/writer.cc @@ -0,0 +1,165 @@ +/* + * Copyright 2018 Google + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Firestore/core/src/firebase/firestore/nanopb/writer.h" + +#include "Firestore/Protos/nanopb/google/firestore/v1beta1/document.pb.h" + +namespace firebase { +namespace firestore { +namespace nanopb { + +using firebase::firestore::util::Status; +using std::int64_t; +using std::int8_t; +using std::uint64_t; + +Writer Writer::Wrap(std::vector* out_bytes) { + // TODO(rsgowman): find a better home for this constant. + // A document is defined to have a max size of 1MiB - 4 bytes. + static const size_t kMaxDocumentSize = 1 * 1024 * 1024 - 4; + + // Construct a nanopb output stream. + // + // Set the max_size to be the max document size (as an upper bound; one would + // expect individual FieldValue's to be smaller than this). + // + // bytes_written is (always) initialized to 0. (NB: nanopb does not know or + // care about the underlying output vector, so where we are in the vector + // itself is irrelevant. i.e. don't use out_bytes->size()) + pb_ostream_t raw_stream = { + /*callback=*/[](pb_ostream_t* stream, const pb_byte_t* buf, + size_t count) -> bool { + auto* out_bytes = static_cast*>(stream->state); + out_bytes->insert(out_bytes->end(), buf, buf + count); + return true; + }, + /*state=*/out_bytes, + /*max_size=*/kMaxDocumentSize, + /*bytes_written=*/0, + /*errmsg=*/nullptr}; + return Writer(raw_stream); +} + +void Writer::WriteTag(Tag tag) { + if (!status_.ok()) return; + + if (!pb_encode_tag(&stream_, tag.wire_type, tag.field_number)) { + FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); + } +} + +void Writer::WriteNanopbMessage(const pb_field_t fields[], + const void* src_struct) { + if (!status_.ok()) return; + + if (!pb_encode(&stream_, fields, src_struct)) { + FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); + } +} + +void Writer::WriteSize(size_t size) { + return WriteVarint(size); +} + +void Writer::WriteVarint(uint64_t value) { + if (!status_.ok()) return; + + if (!pb_encode_varint(&stream_, value)) { + FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); + } +} + +void Writer::WriteNull() { + return WriteVarint(google_protobuf_NullValue_NULL_VALUE); +} + +void Writer::WriteBool(bool bool_value) { + return WriteVarint(bool_value); +} + +void Writer::WriteInteger(int64_t integer_value) { + return WriteVarint(integer_value); +} + +void Writer::WriteString(const std::string& string_value) { + if (!status_.ok()) return; + + if (!pb_encode_string( + &stream_, reinterpret_cast(string_value.c_str()), + string_value.length())) { + FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); + } +} + +void Writer::WriteNestedMessage( + const std::function& write_message_fn) { + if (!status_.ok()) return; + + // First calculate the message size using a non-writing substream. + Writer sizer = Writer::Sizing(); + write_message_fn(&sizer); + status_ = sizer.status(); + if (!status_.ok()) return; + size_t size = sizer.bytes_written(); + + // Write out the size to the output writer. + WriteSize(size); + if (!status_.ok()) return; + + // If this stream is itself a sizing stream, then we don't need to actually + // parse field_value a second time; just update the bytes_written via a call + // to pb_write. (If we try to write the contents into a sizing stream, it'll + // fail since sizing streams don't actually have any buffer space.) + if (stream_.callback == nullptr) { + if (!pb_write(&stream_, nullptr, size)) { + FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); + } + return; + } + + // Ensure the output stream has enough space + if (stream_.bytes_written + size > stream_.max_size) { + FIREBASE_ASSERT_MESSAGE( + false, + "Insufficient space in the output stream to write the given message"); + } + + // Use a substream to verify that a callback doesn't write more than what it + // did the first time. (Use an initializer rather than setting fields + // individually like nanopb does. This gives us a *chance* of noticing if + // nanopb adds new fields.) + Writer writer({stream_.callback, stream_.state, + /*max_size=*/size, /*bytes_written=*/0, + /*errmsg=*/nullptr}); + write_message_fn(&writer); + status_ = writer.status(); + if (!status_.ok()) return; + + stream_.bytes_written += writer.stream_.bytes_written; + stream_.state = writer.stream_.state; + stream_.errmsg = writer.stream_.errmsg; + + if (writer.bytes_written() != size) { + // submsg size changed + FIREBASE_ASSERT_MESSAGE( + false, "Parsing the nested message twice yielded different sizes"); + } +} + +} // namespace nanopb +} // namespace firestore +} // namespace firebase diff --git a/Firestore/core/src/firebase/firestore/nanopb/writer.h b/Firestore/core/src/firebase/firestore/nanopb/writer.h new file mode 100644 index 0000000..e428826 --- /dev/null +++ b/Firestore/core/src/firebase/firestore/nanopb/writer.h @@ -0,0 +1,140 @@ +/* + * Copyright 2018 Google + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_WRITER_H_ +#define FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_WRITER_H_ + +#include +#include + +#include +#include +#include +#include + +#include "Firestore/core/src/firebase/firestore/nanopb/tag.h" +#include "Firestore/core/src/firebase/firestore/util/status.h" + +namespace firebase { +namespace firestore { +namespace nanopb { + +/** + * Docs TODO(rsgowman). But currently, this just wraps the underlying nanopb + * pb_ostream_t. Also doc how to check status. + */ +class Writer { + public: + /** + * Creates an output stream that writes to the specified vector. Note that + * this vector pointer must remain valid for the lifetime of this Writer. + * + * (This is roughly equivalent to the nanopb function + * pb_ostream_from_buffer()) + * + * @param out_bytes where the output should be serialized to. + */ + static Writer Wrap(std::vector* out_bytes); + + /** + * Creates a non-writing output stream used to calculate the size of + * the serialized output. + */ + static Writer Sizing() { + return Writer(PB_OSTREAM_SIZING); + } + + /** + * Writes a message type to the output stream. + * + * This essentially wraps calls to nanopb's pb_encode_tag() method. + */ + void WriteTag(Tag tag); + + /** + * Writes a nanopb message to the output stream. + * + * This essentially wraps calls to nanopb's `pb_encode()` method. If we didn't + * use `oneof`s in our protos, this would be the primary way of encoding + * messages. + */ + void WriteNanopbMessage(const pb_field_t fields[], const void* src_struct); + + void WriteSize(size_t size); + void WriteNull(); + void WriteBool(bool bool_value); + void WriteInteger(std::int64_t integer_value); + + void WriteString(const std::string& string_value); + + /** + * Writes a message and its length. + * + * When writing a top level message, protobuf doesn't include the length + * (since you can get that already from the length of the binary output.) But + * when writing a sub/nested message, you must include the length in the + * serialization. + * + * Call this method when writing a nested message. Provide a function to + * write the message itself. This method will calculate the size of the + * written message (using the provided function with a non-writing sizing + * stream), write out the size (and perform sanity checks), and then serialize + * the message by calling the provided function a second time. + */ + void WriteNestedMessage(const std::function& write_message_fn); + + size_t bytes_written() const { + return stream_.bytes_written; + } + + util::Status status() const { + return status_; + } + + private: + util::Status status_ = util::Status::OK(); + + /** + * Creates a new Writer, based on the given nanopb pb_ostream_t. Note that + * a shallow copy will be taken. (Non-null pointers within this struct must + * remain valid for the lifetime of this Writer.) + */ + explicit Writer(const pb_ostream_t& stream) : stream_(stream) { + } + + /** + * Writes a "varint" to the output stream. + * + * This essentially wraps calls to nanopb's pb_encode_varint() method. + * + * Note that (despite the value parameter type) this works for bool, enum, + * int32, int64, uint32 and uint64 proto field types. + * + * Note: This is not expected to be called directly, but rather only + * via the other Write* methods (i.e. WriteBool, WriteLong, etc) + * + * @param value The value to write, represented as a uint64_t. + */ + void WriteVarint(std::uint64_t value); + + pb_ostream_t stream_; +}; + +} // namespace nanopb +} // namespace firestore +} // namespace firebase + +#endif // FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_WRITER_H_ diff --git a/Firestore/core/src/firebase/firestore/remote/CMakeLists.txt b/Firestore/core/src/firebase/firestore/remote/CMakeLists.txt index 7f528fb..fc51b37 100644 --- a/Firestore/core/src/firebase/firestore/remote/CMakeLists.txt +++ b/Firestore/core/src/firebase/firestore/remote/CMakeLists.txt @@ -21,7 +21,9 @@ cc_library( serializer.cc DEPENDS firebase_firestore_model + firebase_firestore_nanopb firebase_firestore_protos_nanopb + firebase_firestore_util grpc::grpc nanopb ) diff --git a/Firestore/core/src/firebase/firestore/remote/serializer.cc b/Firestore/core/src/firebase/firestore/remote/serializer.cc index 209f2b1..7aec38b 100644 --- a/Firestore/core/src/firebase/firestore/remote/serializer.cc +++ b/Firestore/core/src/firebase/firestore/remote/serializer.cc @@ -25,8 +25,12 @@ #include #include "Firestore/Protos/nanopb/google/firestore/v1beta1/document.pb.h" +#include "Firestore/core/include/firebase/firestore/firestore_errors.h" #include "Firestore/core/include/firebase/firestore/timestamp.h" #include "Firestore/core/src/firebase/firestore/model/resource_path.h" +#include "Firestore/core/src/firebase/firestore/nanopb/reader.h" +#include "Firestore/core/src/firebase/firestore/nanopb/tag.h" +#include "Firestore/core/src/firebase/firestore/nanopb/writer.h" #include "Firestore/core/src/firebase/firestore/timestamp_internal.h" #include "Firestore/core/src/firebase/firestore/util/firebase_assert.h" @@ -41,419 +45,18 @@ using firebase::firestore::model::DocumentKey; using firebase::firestore::model::FieldValue; using firebase::firestore::model::ObjectValue; using firebase::firestore::model::ResourcePath; +using firebase::firestore::nanopb::Reader; +using firebase::firestore::nanopb::Tag; +using firebase::firestore::nanopb::Writer; using firebase::firestore::util::Status; using firebase::firestore::util::StatusOr; namespace { -class Writer; - -class Reader; - void EncodeObject(Writer* writer, const ObjectValue& object_value); ObjectValue::Map DecodeObject(Reader* reader); -/** - * Represents a nanopb tag. - * - * field_number is one of the field tags that nanopb generates based off of - * the proto messages. They're typically named in the format: - * ____tag, e.g. - * google_firestore_v1beta1_Document_name_tag. - */ -struct Tag { - pb_wire_type_t wire_type; - uint32_t field_number; -}; - -/** - * Docs TODO(rsgowman). But currently, this just wraps the underlying nanopb - * pb_ostream_t. Also doc how to check status. - */ -class Writer { - public: - /** - * Creates an output stream that writes to the specified vector. Note that - * this vector pointer must remain valid for the lifetime of this Writer. - * - * (This is roughly equivalent to the nanopb function - * pb_ostream_from_buffer()) - * - * @param out_bytes where the output should be serialized to. - */ - static Writer Wrap(std::vector* out_bytes); - - /** - * Creates a non-writing output stream used to calculate the size of - * the serialized output. - */ - static Writer Sizing() { - return Writer(PB_OSTREAM_SIZING); - } - - /** - * Writes a message type to the output stream. - * - * This essentially wraps calls to nanopb's pb_encode_tag() method. - */ - void WriteTag(Tag tag); - - /** - * Writes a nanopb message to the output stream. - * - * This essentially wraps calls to nanopb's `pb_encode()` method. If we didn't - * use `oneof`s in our protos, this would be the primary way of encoding - * messages. - */ - void WriteNanopbMessage(const pb_field_t fields[], const void* src_struct); - - void WriteSize(size_t size); - void WriteNull(); - void WriteBool(bool bool_value); - void WriteInteger(int64_t integer_value); - - void WriteString(const std::string& string_value); - - /** - * Writes a message and its length. - * - * When writing a top level message, protobuf doesn't include the length - * (since you can get that already from the length of the binary output.) But - * when writing a sub/nested message, you must include the length in the - * serialization. - * - * Call this method when writing a nested message. Provide a function to - * write the message itself. This method will calculate the size of the - * written message (using the provided function with a non-writing sizing - * stream), write out the size (and perform sanity checks), and then serialize - * the message by calling the provided function a second time. - */ - void WriteNestedMessage(const std::function& write_message_fn); - - size_t bytes_written() const { - return stream_.bytes_written; - } - - Status status() const { - return status_; - } - - private: - Status status_ = Status::OK(); - - /** - * Creates a new Writer, based on the given nanopb pb_ostream_t. Note that - * a shallow copy will be taken. (Non-null pointers within this struct must - * remain valid for the lifetime of this Writer.) - */ - explicit Writer(const pb_ostream_t& stream) : stream_(stream) { - } - - /** - * Writes a "varint" to the output stream. - * - * This essentially wraps calls to nanopb's pb_encode_varint() method. - * - * Note that (despite the value parameter type) this works for bool, enum, - * int32, int64, uint32 and uint64 proto field types. - * - * Note: This is not expected to be called directly, but rather only - * via the other Write* methods (i.e. WriteBool, WriteLong, etc) - * - * @param value The value to write, represented as a uint64_t. - */ - void WriteVarint(uint64_t value); - - pb_ostream_t stream_; -}; - -/** - * Docs TODO(rsgowman). But currently, this just wraps the underlying nanopb - * pb_istream_t. - */ -class Reader { - public: - /** - * Creates an input stream that reads from the specified bytes. Note that - * this reference must remain valid for the lifetime of this Reader. - * - * (This is roughly equivalent to the nanopb function - * pb_istream_from_buffer()) - * - * @param bytes where the input should be deserialized from. - */ - static Reader Wrap(const uint8_t* bytes, size_t length); - - /** - * Reads a message type from the input stream. - * - * This essentially wraps calls to nanopb's pb_decode_tag() method. - */ - Tag ReadTag(); - - /** - * Reads a nanopb message from the input stream. - * - * This essentially wraps calls to nanopb's pb_decode() method. If we didn't - * use `oneof`s in our protos, this would be the primary way of decoding - * messages. - */ - void ReadNanopbMessage(const pb_field_t fields[], void* dest_struct); - - void ReadNull(); - bool ReadBool(); - int64_t ReadInteger(); - - std::string ReadString(); - - /** - * Reads a message and its length. - * - * Analog to Writer::WriteNestedMessage(). See that methods docs for further - * details. - * - * Call this method when reading a nested message. Provide a function to read - * the message itself. - */ - template - T ReadNestedMessage(const std::function& read_message_fn); - - size_t bytes_left() const { - return stream_.bytes_left; - } - - Status status() const { - return status_; - } - - void set_status(Status status) { - status_ = status; - } - - private: - /** - * Creates a new Reader, based on the given nanopb pb_istream_t. Note that - * a shallow copy will be taken. (Non-null pointers within this struct must - * remain valid for the lifetime of this Reader.) - */ - explicit Reader(pb_istream_t stream) : stream_(stream) { - } - - /** - * Reads a "varint" from the input stream. - * - * This essentially wraps calls to nanopb's pb_decode_varint() method. - * - * Note that (despite the return type) this works for bool, enum, int32, - * int64, uint32 and uint64 proto field types. - * - * Note: This is not expected to be called direclty, but rather only via the - * other Decode* methods (i.e. DecodeBool, DecodeLong, etc) - * - * @return The decoded varint as a uint64_t. - */ - uint64_t ReadVarint(); - - Status status_ = Status::OK(); - - pb_istream_t stream_; -}; - -Writer Writer::Wrap(std::vector* out_bytes) { - // TODO(rsgowman): find a better home for this constant. - // A document is defined to have a max size of 1MiB - 4 bytes. - static const size_t kMaxDocumentSize = 1 * 1024 * 1024 - 4; - - // Construct a nanopb output stream. - // - // Set the max_size to be the max document size (as an upper bound; one would - // expect individual FieldValue's to be smaller than this). - // - // bytes_written is (always) initialized to 0. (NB: nanopb does not know or - // care about the underlying output vector, so where we are in the vector - // itself is irrelevant. i.e. don't use out_bytes->size()) - pb_ostream_t raw_stream = { - /*callback=*/[](pb_ostream_t* stream, const pb_byte_t* buf, - size_t count) -> bool { - auto* out_bytes = static_cast*>(stream->state); - out_bytes->insert(out_bytes->end(), buf, buf + count); - return true; - }, - /*state=*/out_bytes, - /*max_size=*/kMaxDocumentSize, - /*bytes_written=*/0, - /*errmsg=*/nullptr}; - return Writer(raw_stream); -} - -Reader Reader::Wrap(const uint8_t* bytes, size_t length) { - return Reader{pb_istream_from_buffer(bytes, length)}; -} - -// TODO(rsgowman): I've left the methods as near as possible to where they were -// before, which implies that the Writer methods are interspersed with the -// Reader methods. This should make it a bit easier to review. Refactor these to -// group the related methods together (probably within their own file rather -// than here). - -void Writer::WriteTag(Tag tag) { - if (!status_.ok()) return; - - if (!pb_encode_tag(&stream_, tag.wire_type, tag.field_number)) { - FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); - } -} - -Tag Reader::ReadTag() { - Tag tag; - if (!status_.ok()) return tag; - - bool eof; - if (!pb_decode_tag(&stream_, &tag.wire_type, &tag.field_number, &eof)) { - status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); - return tag; - } - - // nanopb code always returns a false status when setting eof. - FIREBASE_ASSERT_MESSAGE(!eof, "nanopb set both ok status and eof to true"); - - return tag; -} - -void Writer::WriteNanopbMessage(const pb_field_t fields[], - const void* src_struct) { - if (!status_.ok()) return; - - if (!pb_encode(&stream_, fields, src_struct)) { - FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); - } -} - -void Reader::ReadNanopbMessage(const pb_field_t fields[], void* dest_struct) { - if (!status_.ok()) return; - - if (!pb_decode(&stream_, fields, dest_struct)) { - status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); - } -} - -void Writer::WriteSize(size_t size) { - return WriteVarint(size); -} - -void Writer::WriteVarint(uint64_t value) { - if (!status_.ok()) return; - - if (!pb_encode_varint(&stream_, value)) { - FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); - } -} - -/** - * Note that (despite the return type) this works for bool, enum, int32, int64, - * uint32 and uint64 proto field types. - * - * Note: This is not expected to be called directly, but rather only via the - * other Decode* methods (i.e. DecodeBool, DecodeLong, etc) - * - * @return The decoded varint as a uint64_t. - */ -uint64_t Reader::ReadVarint() { - if (!status_.ok()) return 0; - - uint64_t varint_value = 0; - if (!pb_decode_varint(&stream_, &varint_value)) { - status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); - } - return varint_value; -} - -void Writer::WriteNull() { - return WriteVarint(google_protobuf_NullValue_NULL_VALUE); -} - -void Reader::ReadNull() { - uint64_t varint = ReadVarint(); - if (!status_.ok()) return; - - if (varint != google_protobuf_NullValue_NULL_VALUE) { - status_ = Status(FirestoreErrorCode::DataLoss, - "Input proto bytes cannot be parsed (invalid null value)"); - } -} - -void Writer::WriteBool(bool bool_value) { - return WriteVarint(bool_value); -} - -bool Reader::ReadBool() { - uint64_t varint = ReadVarint(); - if (!status_.ok()) return false; - - switch (varint) { - case 0: - return false; - case 1: - return true; - default: - status_ = - Status(FirestoreErrorCode::DataLoss, - "Input proto bytes cannot be parsed (invalid bool value)"); - return false; - } -} - -void Writer::WriteInteger(int64_t integer_value) { - return WriteVarint(integer_value); -} - -int64_t Reader::ReadInteger() { - return ReadVarint(); -} - -void Writer::WriteString(const std::string& string_value) { - if (!status_.ok()) return; - - if (!pb_encode_string( - &stream_, reinterpret_cast(string_value.c_str()), - string_value.length())) { - FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); - } -} - -std::string Reader::ReadString() { - if (!status_.ok()) return ""; - - pb_istream_t substream; - if (!pb_make_string_substream(&stream_, &substream)) { - status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); - pb_close_string_substream(&stream_, &substream); - return ""; - } - - std::string result(substream.bytes_left, '\0'); - if (!pb_read(&substream, reinterpret_cast(&result[0]), - substream.bytes_left)) { - status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); - pb_close_string_substream(&stream_, &substream); - return ""; - } - - // NB: future versions of nanopb read the remaining characters out of the - // substream (and return false if that fails) as an additional safety - // check within pb_close_string_substream. Unfortunately, that's not present - // in the current version (0.38). We'll make a stronger assertion and check - // to make sure there *are* no remaining characters in the substream. - FIREBASE_ASSERT_MESSAGE( - substream.bytes_left == 0, - "Bytes remaining in substream after supposedly reading all of them."); - - pb_close_string_substream(&stream_, &substream); - - return result; -} - void EncodeTimestamp(Writer* writer, const Timestamp& timestamp_value) { google_protobuf_Timestamp timestamp_proto = google_protobuf_Timestamp_init_zero; @@ -618,98 +221,6 @@ FieldValue DecodeFieldValueImpl(Reader* reader) { } } -void Writer::WriteNestedMessage( - const std::function& write_message_fn) { - if (!status_.ok()) return; - - // First calculate the message size using a non-writing substream. - Writer sizer = Writer::Sizing(); - write_message_fn(&sizer); - status_ = sizer.status(); - if (!status_.ok()) return; - size_t size = sizer.bytes_written(); - - // Write out the size to the output writer. - WriteSize(size); - if (!status_.ok()) return; - - // If this stream is itself a sizing stream, then we don't need to actually - // parse field_value a second time; just update the bytes_written via a call - // to pb_write. (If we try to write the contents into a sizing stream, it'll - // fail since sizing streams don't actually have any buffer space.) - if (stream_.callback == nullptr) { - if (!pb_write(&stream_, nullptr, size)) { - FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); - } - return; - } - - // Ensure the output stream has enough space - if (stream_.bytes_written + size > stream_.max_size) { - FIREBASE_ASSERT_MESSAGE( - false, - "Insufficient space in the output stream to write the given message"); - } - - // Use a substream to verify that a callback doesn't write more than what it - // did the first time. (Use an initializer rather than setting fields - // individually like nanopb does. This gives us a *chance* of noticing if - // nanopb adds new fields.) - Writer writer({stream_.callback, stream_.state, - /*max_size=*/size, /*bytes_written=*/0, - /*errmsg=*/nullptr}); - write_message_fn(&writer); - status_ = writer.status(); - if (!status_.ok()) return; - - stream_.bytes_written += writer.stream_.bytes_written; - stream_.state = writer.stream_.state; - stream_.errmsg = writer.stream_.errmsg; - - if (writer.bytes_written() != size) { - // submsg size changed - FIREBASE_ASSERT_MESSAGE( - false, "Parsing the nested message twice yielded different sizes"); - } -} - -template -T Reader::ReadNestedMessage(const std::function& read_message_fn) { - // Implementation note: This is roughly modeled on pb_decode_delimited, - // adjusted to account for the oneof in FieldValue. - - if (!status_.ok()) return T(); - - pb_istream_t raw_substream; - if (!pb_make_string_substream(&stream_, &raw_substream)) { - status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); - pb_close_string_substream(&stream_, &raw_substream); - return T(); - } - Reader substream(raw_substream); - - // If this fails, we *won't* return right away so that we can cleanup the - // substream (although technically, that turns out not to matter; no resource - // leaks occur if we don't do this.) - // TODO(rsgowman): Consider RAII here. (Watch out for Reader class which also - // wraps streams.) - T message = read_message_fn(&substream); - status_ = substream.status(); - - // NB: future versions of nanopb read the remaining characters out of the - // substream (and return false if that fails) as an additional safety - // check within pb_close_string_substream. Unfortunately, that's not present - // in the current version (0.38). We'll make a stronger assertion and check - // to make sure there *are* no remaining characters in the substream. - FIREBASE_ASSERT_MESSAGE( - substream.bytes_left() == 0, - "Bytes remaining in substream after supposedly reading all of them."); - - pb_close_string_substream(&stream_, &substream.stream_); - - return message; -} - /** * Encodes a 'FieldsEntry' object, within a FieldValue's map_value type. * -- cgit v1.2.3