From 56cfe9306ab60af2a17c26b291438a9449d30f5d Mon Sep 17 00:00:00 2001 From: rsgowman Date: Fri, 11 May 2018 10:27:48 -0400 Subject: Refactor serializer (#1250) Moved Tag, Reader, Writer from serializer.cc's anon namespace to firebase::firestore::nanopb This should be bug-for-bug compatible. No changes were made to the moved methods. --- .../src/firebase/firestore/nanopb/CMakeLists.txt | 27 ++++ .../core/src/firebase/firestore/nanopb/reader.cc | 141 +++++++++++++++++ .../core/src/firebase/firestore/nanopb/reader.h | 170 +++++++++++++++++++++ Firestore/core/src/firebase/firestore/nanopb/tag.h | 43 ++++++ .../core/src/firebase/firestore/nanopb/writer.cc | 165 ++++++++++++++++++++ .../core/src/firebase/firestore/nanopb/writer.h | 140 +++++++++++++++++ 6 files changed, 686 insertions(+) create mode 100644 Firestore/core/src/firebase/firestore/nanopb/CMakeLists.txt create mode 100644 Firestore/core/src/firebase/firestore/nanopb/reader.cc create mode 100644 Firestore/core/src/firebase/firestore/nanopb/reader.h create mode 100644 Firestore/core/src/firebase/firestore/nanopb/tag.h create mode 100644 Firestore/core/src/firebase/firestore/nanopb/writer.cc create mode 100644 Firestore/core/src/firebase/firestore/nanopb/writer.h (limited to 'Firestore/core/src/firebase/firestore/nanopb') diff --git a/Firestore/core/src/firebase/firestore/nanopb/CMakeLists.txt b/Firestore/core/src/firebase/firestore/nanopb/CMakeLists.txt new file mode 100644 index 0000000..82ffb65 --- /dev/null +++ b/Firestore/core/src/firebase/firestore/nanopb/CMakeLists.txt @@ -0,0 +1,27 @@ +# Copyright 2018 Google +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cc_library( + firebase_firestore_nanopb + SOURCES + tag.h + reader.h + reader.cc + writer.h + writer.cc + DEPENDS + firebase_firestore_util + firebase_firestore_protos_nanopb + nanopb +) diff --git a/Firestore/core/src/firebase/firestore/nanopb/reader.cc b/Firestore/core/src/firebase/firestore/nanopb/reader.cc new file mode 100644 index 0000000..86e38ac --- /dev/null +++ b/Firestore/core/src/firebase/firestore/nanopb/reader.cc @@ -0,0 +1,141 @@ +/* + * Copyright 2018 Google + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Firestore/core/src/firebase/firestore/nanopb/reader.h" + +#include "Firestore/Protos/nanopb/google/firestore/v1beta1/document.pb.h" + +namespace firebase { +namespace firestore { +namespace nanopb { + +using firebase::firestore::util::Status; +using std::int64_t; +using std::uint64_t; + +Reader Reader::Wrap(const uint8_t* bytes, size_t length) { + return Reader{pb_istream_from_buffer(bytes, length)}; +} + +Tag Reader::ReadTag() { + Tag tag; + if (!status_.ok()) return tag; + + bool eof; + if (!pb_decode_tag(&stream_, &tag.wire_type, &tag.field_number, &eof)) { + status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); + return tag; + } + + // nanopb code always returns a false status when setting eof. + FIREBASE_ASSERT_MESSAGE(!eof, "nanopb set both ok status and eof to true"); + + return tag; +} + +void Reader::ReadNanopbMessage(const pb_field_t fields[], void* dest_struct) { + if (!status_.ok()) return; + + if (!pb_decode(&stream_, fields, dest_struct)) { + status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); + } +} + +/** + * Note that (despite the return type) this works for bool, enum, int32, int64, + * uint32 and uint64 proto field types. + * + * Note: This is not expected to be called directly, but rather only via the + * other Decode* methods (i.e. DecodeBool, DecodeLong, etc) + * + * @return The decoded varint as a uint64_t. + */ +uint64_t Reader::ReadVarint() { + if (!status_.ok()) return 0; + + uint64_t varint_value = 0; + if (!pb_decode_varint(&stream_, &varint_value)) { + status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); + } + return varint_value; +} + +void Reader::ReadNull() { + uint64_t varint = ReadVarint(); + if (!status_.ok()) return; + + if (varint != google_protobuf_NullValue_NULL_VALUE) { + status_ = Status(FirestoreErrorCode::DataLoss, + "Input proto bytes cannot be parsed (invalid null value)"); + } +} + +bool Reader::ReadBool() { + uint64_t varint = ReadVarint(); + if (!status_.ok()) return false; + + switch (varint) { + case 0: + return false; + case 1: + return true; + default: + status_ = + Status(FirestoreErrorCode::DataLoss, + "Input proto bytes cannot be parsed (invalid bool value)"); + return false; + } +} + +int64_t Reader::ReadInteger() { + return ReadVarint(); +} + +std::string Reader::ReadString() { + if (!status_.ok()) return ""; + + pb_istream_t substream; + if (!pb_make_string_substream(&stream_, &substream)) { + status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); + pb_close_string_substream(&stream_, &substream); + return ""; + } + + std::string result(substream.bytes_left, '\0'); + if (!pb_read(&substream, reinterpret_cast(&result[0]), + substream.bytes_left)) { + status_ = Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); + pb_close_string_substream(&stream_, &substream); + return ""; + } + + // NB: future versions of nanopb read the remaining characters out of the + // substream (and return false if that fails) as an additional safety + // check within pb_close_string_substream. Unfortunately, that's not present + // in the current version (0.38). We'll make a stronger assertion and check + // to make sure there *are* no remaining characters in the substream. + FIREBASE_ASSERT_MESSAGE( + substream.bytes_left == 0, + "Bytes remaining in substream after supposedly reading all of them."); + + pb_close_string_substream(&stream_, &substream); + + return result; +} + +} // namespace nanopb +} // namespace firestore +} // namespace firebase diff --git a/Firestore/core/src/firebase/firestore/nanopb/reader.h b/Firestore/core/src/firebase/firestore/nanopb/reader.h new file mode 100644 index 0000000..7d77a4d --- /dev/null +++ b/Firestore/core/src/firebase/firestore/nanopb/reader.h @@ -0,0 +1,170 @@ +/* + * Copyright 2018 Google + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_READER_H_ +#define FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_READER_H_ + +#include +#include + +#include +#include +#include + +#include "Firestore/core/include/firebase/firestore/firestore_errors.h" +#include "Firestore/core/src/firebase/firestore/nanopb/tag.h" +#include "Firestore/core/src/firebase/firestore/util/firebase_assert.h" +#include "Firestore/core/src/firebase/firestore/util/status.h" + +namespace firebase { +namespace firestore { +namespace nanopb { + +/** + * Docs TODO(rsgowman). But currently, this just wraps the underlying nanopb + * pb_istream_t. + */ +class Reader { + public: + /** + * Creates an input stream that reads from the specified bytes. Note that + * this reference must remain valid for the lifetime of this Reader. + * + * (This is roughly equivalent to the nanopb function + * pb_istream_from_buffer()) + * + * @param bytes where the input should be deserialized from. + */ + static Reader Wrap(const uint8_t* bytes, size_t length); + + /** + * Reads a message type from the input stream. + * + * This essentially wraps calls to nanopb's pb_decode_tag() method. + */ + Tag ReadTag(); + + /** + * Reads a nanopb message from the input stream. + * + * This essentially wraps calls to nanopb's pb_decode() method. If we didn't + * use `oneof`s in our protos, this would be the primary way of decoding + * messages. + */ + void ReadNanopbMessage(const pb_field_t fields[], void* dest_struct); + + void ReadNull(); + bool ReadBool(); + std::int64_t ReadInteger(); + + std::string ReadString(); + + /** + * Reads a message and its length. + * + * Analog to Writer::WriteNestedMessage(). See that methods docs for further + * details. + * + * Call this method when reading a nested message. Provide a function to read + * the message itself. + */ + template + T ReadNestedMessage(const std::function& read_message_fn); + + size_t bytes_left() const { + return stream_.bytes_left; + } + + util::Status status() const { + return status_; + } + + void set_status(util::Status status) { + status_ = status; + } + + private: + /** + * Creates a new Reader, based on the given nanopb pb_istream_t. Note that + * a shallow copy will be taken. (Non-null pointers within this struct must + * remain valid for the lifetime of this Reader.) + */ + explicit Reader(pb_istream_t stream) : stream_(stream) { + } + + /** + * Reads a "varint" from the input stream. + * + * This essentially wraps calls to nanopb's pb_decode_varint() method. + * + * Note that (despite the return type) this works for bool, enum, int32, + * int64, uint32 and uint64 proto field types. + * + * Note: This is not expected to be called direclty, but rather only via the + * other Decode* methods (i.e. DecodeBool, DecodeLong, etc) + * + * @return The decoded varint as a uint64_t. + */ + std::uint64_t ReadVarint(); + + util::Status status_ = util::Status::OK(); + + pb_istream_t stream_; +}; + +template +T Reader::ReadNestedMessage(const std::function& read_message_fn) { + // Implementation note: This is roughly modeled on pb_decode_delimited, + // adjusted to account for the oneof in FieldValue. + + if (!status_.ok()) return T(); + + pb_istream_t raw_substream; + if (!pb_make_string_substream(&stream_, &raw_substream)) { + status_ = + util::Status(FirestoreErrorCode::DataLoss, PB_GET_ERROR(&stream_)); + pb_close_string_substream(&stream_, &raw_substream); + return T(); + } + Reader substream(raw_substream); + + // If this fails, we *won't* return right away so that we can cleanup the + // substream (although technically, that turns out not to matter; no resource + // leaks occur if we don't do this.) + // TODO(rsgowman): Consider RAII here. (Watch out for Reader class which also + // wraps streams.) + T message = read_message_fn(&substream); + status_ = substream.status(); + + // NB: future versions of nanopb read the remaining characters out of the + // substream (and return false if that fails) as an additional safety + // check within pb_close_string_substream. Unfortunately, that's not present + // in the current version (0.38). We'll make a stronger assertion and check + // to make sure there *are* no remaining characters in the substream. + FIREBASE_ASSERT_MESSAGE( + substream.bytes_left() == 0, + "Bytes remaining in substream after supposedly reading all of them."); + + pb_close_string_substream(&stream_, &substream.stream_); + + return message; +} + +} // namespace nanopb +} // namespace firestore +} // namespace firebase + +#endif // FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_READER_H_ diff --git a/Firestore/core/src/firebase/firestore/nanopb/tag.h b/Firestore/core/src/firebase/firestore/nanopb/tag.h new file mode 100644 index 0000000..455ef0c --- /dev/null +++ b/Firestore/core/src/firebase/firestore/nanopb/tag.h @@ -0,0 +1,43 @@ +/* + * Copyright 2018 Google + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_TAG_H__ +#define FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_TAG_H__ + +#include + +namespace firebase { +namespace firestore { +namespace nanopb { + +/** + * Represents a nanopb tag. + * + * field_number is one of the field tags that nanopb generates based off of + * the proto messages. They're typically named in the format: + * ____tag, e.g. + * google_firestore_v1beta1_Document_name_tag. + */ +struct Tag { + pb_wire_type_t wire_type; + uint32_t field_number; +}; + +} // namespace nanopb +} // namespace firestore +} // namespace firebase + +#endif // FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_TAG_H_ diff --git a/Firestore/core/src/firebase/firestore/nanopb/writer.cc b/Firestore/core/src/firebase/firestore/nanopb/writer.cc new file mode 100644 index 0000000..cbee989 --- /dev/null +++ b/Firestore/core/src/firebase/firestore/nanopb/writer.cc @@ -0,0 +1,165 @@ +/* + * Copyright 2018 Google + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Firestore/core/src/firebase/firestore/nanopb/writer.h" + +#include "Firestore/Protos/nanopb/google/firestore/v1beta1/document.pb.h" + +namespace firebase { +namespace firestore { +namespace nanopb { + +using firebase::firestore::util::Status; +using std::int64_t; +using std::int8_t; +using std::uint64_t; + +Writer Writer::Wrap(std::vector* out_bytes) { + // TODO(rsgowman): find a better home for this constant. + // A document is defined to have a max size of 1MiB - 4 bytes. + static const size_t kMaxDocumentSize = 1 * 1024 * 1024 - 4; + + // Construct a nanopb output stream. + // + // Set the max_size to be the max document size (as an upper bound; one would + // expect individual FieldValue's to be smaller than this). + // + // bytes_written is (always) initialized to 0. (NB: nanopb does not know or + // care about the underlying output vector, so where we are in the vector + // itself is irrelevant. i.e. don't use out_bytes->size()) + pb_ostream_t raw_stream = { + /*callback=*/[](pb_ostream_t* stream, const pb_byte_t* buf, + size_t count) -> bool { + auto* out_bytes = static_cast*>(stream->state); + out_bytes->insert(out_bytes->end(), buf, buf + count); + return true; + }, + /*state=*/out_bytes, + /*max_size=*/kMaxDocumentSize, + /*bytes_written=*/0, + /*errmsg=*/nullptr}; + return Writer(raw_stream); +} + +void Writer::WriteTag(Tag tag) { + if (!status_.ok()) return; + + if (!pb_encode_tag(&stream_, tag.wire_type, tag.field_number)) { + FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); + } +} + +void Writer::WriteNanopbMessage(const pb_field_t fields[], + const void* src_struct) { + if (!status_.ok()) return; + + if (!pb_encode(&stream_, fields, src_struct)) { + FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); + } +} + +void Writer::WriteSize(size_t size) { + return WriteVarint(size); +} + +void Writer::WriteVarint(uint64_t value) { + if (!status_.ok()) return; + + if (!pb_encode_varint(&stream_, value)) { + FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); + } +} + +void Writer::WriteNull() { + return WriteVarint(google_protobuf_NullValue_NULL_VALUE); +} + +void Writer::WriteBool(bool bool_value) { + return WriteVarint(bool_value); +} + +void Writer::WriteInteger(int64_t integer_value) { + return WriteVarint(integer_value); +} + +void Writer::WriteString(const std::string& string_value) { + if (!status_.ok()) return; + + if (!pb_encode_string( + &stream_, reinterpret_cast(string_value.c_str()), + string_value.length())) { + FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); + } +} + +void Writer::WriteNestedMessage( + const std::function& write_message_fn) { + if (!status_.ok()) return; + + // First calculate the message size using a non-writing substream. + Writer sizer = Writer::Sizing(); + write_message_fn(&sizer); + status_ = sizer.status(); + if (!status_.ok()) return; + size_t size = sizer.bytes_written(); + + // Write out the size to the output writer. + WriteSize(size); + if (!status_.ok()) return; + + // If this stream is itself a sizing stream, then we don't need to actually + // parse field_value a second time; just update the bytes_written via a call + // to pb_write. (If we try to write the contents into a sizing stream, it'll + // fail since sizing streams don't actually have any buffer space.) + if (stream_.callback == nullptr) { + if (!pb_write(&stream_, nullptr, size)) { + FIREBASE_ASSERT_MESSAGE(false, PB_GET_ERROR(&stream_)); + } + return; + } + + // Ensure the output stream has enough space + if (stream_.bytes_written + size > stream_.max_size) { + FIREBASE_ASSERT_MESSAGE( + false, + "Insufficient space in the output stream to write the given message"); + } + + // Use a substream to verify that a callback doesn't write more than what it + // did the first time. (Use an initializer rather than setting fields + // individually like nanopb does. This gives us a *chance* of noticing if + // nanopb adds new fields.) + Writer writer({stream_.callback, stream_.state, + /*max_size=*/size, /*bytes_written=*/0, + /*errmsg=*/nullptr}); + write_message_fn(&writer); + status_ = writer.status(); + if (!status_.ok()) return; + + stream_.bytes_written += writer.stream_.bytes_written; + stream_.state = writer.stream_.state; + stream_.errmsg = writer.stream_.errmsg; + + if (writer.bytes_written() != size) { + // submsg size changed + FIREBASE_ASSERT_MESSAGE( + false, "Parsing the nested message twice yielded different sizes"); + } +} + +} // namespace nanopb +} // namespace firestore +} // namespace firebase diff --git a/Firestore/core/src/firebase/firestore/nanopb/writer.h b/Firestore/core/src/firebase/firestore/nanopb/writer.h new file mode 100644 index 0000000..e428826 --- /dev/null +++ b/Firestore/core/src/firebase/firestore/nanopb/writer.h @@ -0,0 +1,140 @@ +/* + * Copyright 2018 Google + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_WRITER_H_ +#define FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_WRITER_H_ + +#include +#include + +#include +#include +#include +#include + +#include "Firestore/core/src/firebase/firestore/nanopb/tag.h" +#include "Firestore/core/src/firebase/firestore/util/status.h" + +namespace firebase { +namespace firestore { +namespace nanopb { + +/** + * Docs TODO(rsgowman). But currently, this just wraps the underlying nanopb + * pb_ostream_t. Also doc how to check status. + */ +class Writer { + public: + /** + * Creates an output stream that writes to the specified vector. Note that + * this vector pointer must remain valid for the lifetime of this Writer. + * + * (This is roughly equivalent to the nanopb function + * pb_ostream_from_buffer()) + * + * @param out_bytes where the output should be serialized to. + */ + static Writer Wrap(std::vector* out_bytes); + + /** + * Creates a non-writing output stream used to calculate the size of + * the serialized output. + */ + static Writer Sizing() { + return Writer(PB_OSTREAM_SIZING); + } + + /** + * Writes a message type to the output stream. + * + * This essentially wraps calls to nanopb's pb_encode_tag() method. + */ + void WriteTag(Tag tag); + + /** + * Writes a nanopb message to the output stream. + * + * This essentially wraps calls to nanopb's `pb_encode()` method. If we didn't + * use `oneof`s in our protos, this would be the primary way of encoding + * messages. + */ + void WriteNanopbMessage(const pb_field_t fields[], const void* src_struct); + + void WriteSize(size_t size); + void WriteNull(); + void WriteBool(bool bool_value); + void WriteInteger(std::int64_t integer_value); + + void WriteString(const std::string& string_value); + + /** + * Writes a message and its length. + * + * When writing a top level message, protobuf doesn't include the length + * (since you can get that already from the length of the binary output.) But + * when writing a sub/nested message, you must include the length in the + * serialization. + * + * Call this method when writing a nested message. Provide a function to + * write the message itself. This method will calculate the size of the + * written message (using the provided function with a non-writing sizing + * stream), write out the size (and perform sanity checks), and then serialize + * the message by calling the provided function a second time. + */ + void WriteNestedMessage(const std::function& write_message_fn); + + size_t bytes_written() const { + return stream_.bytes_written; + } + + util::Status status() const { + return status_; + } + + private: + util::Status status_ = util::Status::OK(); + + /** + * Creates a new Writer, based on the given nanopb pb_ostream_t. Note that + * a shallow copy will be taken. (Non-null pointers within this struct must + * remain valid for the lifetime of this Writer.) + */ + explicit Writer(const pb_ostream_t& stream) : stream_(stream) { + } + + /** + * Writes a "varint" to the output stream. + * + * This essentially wraps calls to nanopb's pb_encode_varint() method. + * + * Note that (despite the value parameter type) this works for bool, enum, + * int32, int64, uint32 and uint64 proto field types. + * + * Note: This is not expected to be called directly, but rather only + * via the other Write* methods (i.e. WriteBool, WriteLong, etc) + * + * @param value The value to write, represented as a uint64_t. + */ + void WriteVarint(std::uint64_t value); + + pb_ostream_t stream_; +}; + +} // namespace nanopb +} // namespace firestore +} // namespace firebase + +#endif // FIRESTORE_CORE_SRC_FIREBASE_FIRESTORE_NANOPB_WRITER_H_ -- cgit v1.2.3