summaryrefslogtreecommitdiff
path: root/absl/log/internal/proto.h
blob: c8d14acc4715124cc3629ed15b3360e9e1e5dcfa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// -----------------------------------------------------------------------------
// File: internal/proto.h
// -----------------------------------------------------------------------------
//
// Declares functions for serializing and deserializing data to and from memory
// buffers in protocol buffer wire format.  This library takes no steps to
// ensure that the encoded data matches with any message specification.

#ifndef ABSL_LOG_INTERNAL_PROTO_H_
#define ABSL_LOG_INTERNAL_PROTO_H_

#include <cstddef>
#include <cstdint>
#include <limits>

#include "absl/base/attributes.h"
#include "absl/base/casts.h"
#include "absl/base/config.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"

namespace absl {
ABSL_NAMESPACE_BEGIN
namespace log_internal {

// absl::Span<char> represents a view into the available space in a mutable
// buffer during encoding.  Encoding functions shrink the span as they go so
// that the same view can be passed to a series of Encode functions.  If the
// data do not fit, nothing is encoded, the view is set to size zero (so that
// all subsequent encode calls fail), and false is returned.  Otherwise true is
// returned.

// In particular, attempting to encode a series of data into an insufficient
// buffer has consistent and efficient behavior without any caller-side error
// checking.  Individual values will be encoded in their entirety or not at all
// (unless one of the `Truncate` functions is used).  Once a value is omitted
// because it does not fit, no subsequent values will be encoded to preserve
// ordering; the decoded sequence will be a prefix of the original sequence.

// There are two ways to encode a message-typed field:
//
// * Construct its contents in a separate buffer and use `EncodeBytes` to copy
//   it into the primary buffer with type, tag, and length.
// * Use `EncodeMessageStart` to write type and tag fields and reserve space for
//   the length field, then encode the contents directly into the buffer, then
//   use `EncodeMessageLength` to write the actual length into the reserved
//   bytes.  This works fine if the actual length takes fewer bytes to encode
//   than were reserved, although you don't get your extra bytes back.
//   This approach will always produce a valid encoding, but your protocol may
//   require that the whole message field by omitted if the buffer is too small
//   to contain all desired subfields.  In this case, operate on a copy of the
//   buffer view and assign back only if everything fit, i.e. if the last
//   `Encode` call returned true.

// Encodes the specified integer as a varint field and returns true if it fits.
// Used for int32_t, int64_t, uint32_t, uint64_t, bool, and enum field types.
// Consumes up to kMaxVarintSize * 2 bytes (20).
bool EncodeVarint(uint64_t tag, uint64_t value, absl::Span<char> *buf);
inline bool EncodeVarint(uint64_t tag, int64_t value, absl::Span<char> *buf) {
  return EncodeVarint(tag, static_cast<uint64_t>(value), buf);
}
inline bool EncodeVarint(uint64_t tag, uint32_t value, absl::Span<char> *buf) {
  return EncodeVarint(tag, static_cast<uint64_t>(value), buf);
}
inline bool EncodeVarint(uint64_t tag, int32_t value, absl::Span<char> *buf) {
  return EncodeVarint(tag, static_cast<uint64_t>(value), buf);
}

// Encodes the specified integer as a varint field using ZigZag encoding and
// returns true if it fits.
// Used for sint32 and sint64 field types.
// Consumes up to kMaxVarintSize * 2 bytes (20).
inline bool EncodeVarintZigZag(uint64_t tag, int64_t value,
                               absl::Span<char> *buf) {
  if (value < 0)
    return EncodeVarint(tag, 2 * static_cast<uint64_t>(-(value + 1)) + 1, buf);
  return EncodeVarint(tag, 2 * static_cast<uint64_t>(value), buf);
}

// Encodes the specified integer as a 64-bit field and returns true if it fits.
// Used for fixed64 and sfixed64 field types.
// Consumes up to kMaxVarintSize + 8 bytes (18).
bool Encode64Bit(uint64_t tag, uint64_t value, absl::Span<char> *buf);
inline bool Encode64Bit(uint64_t tag, int64_t value, absl::Span<char> *buf) {
  return Encode64Bit(tag, static_cast<uint64_t>(value), buf);
}
inline bool Encode64Bit(uint64_t tag, uint32_t value, absl::Span<char> *buf) {
  return Encode64Bit(tag, static_cast<uint64_t>(value), buf);
}
inline bool Encode64Bit(uint64_t tag, int32_t value, absl::Span<char> *buf) {
  return Encode64Bit(tag, static_cast<uint64_t>(value), buf);
}

// Encodes the specified double as a 64-bit field and returns true if it fits.
// Used for double field type.
// Consumes up to kMaxVarintSize + 8 bytes (18).
inline bool EncodeDouble(uint64_t tag, double value, absl::Span<char> *buf) {
  return Encode64Bit(tag, absl::bit_cast<uint64_t>(value), buf);
}

// Encodes the specified integer as a 32-bit field and returns true if it fits.
// Used for fixed32 and sfixed32 field types.
// Consumes up to kMaxVarintSize + 4 bytes (14).
bool Encode32Bit(uint64_t tag, uint32_t value, absl::Span<char> *buf);
inline bool Encode32Bit(uint64_t tag, int32_t value, absl::Span<char> *buf) {
  return Encode32Bit(tag, static_cast<uint32_t>(value), buf);
}

// Encodes the specified float as a 32-bit field and returns true if it fits.
// Used for float field type.
// Consumes up to kMaxVarintSize + 4 bytes (14).
inline bool EncodeFloat(uint64_t tag, float value, absl::Span<char> *buf) {
  return Encode32Bit(tag, absl::bit_cast<uint32_t>(value), buf);
}

// Encodes the specified bytes as a length-delimited field and returns true if
// they fit.
// Used for string, bytes, message, and packed-repeated field type.
// Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()).
bool EncodeBytes(uint64_t tag, absl::Span<const char> value,
                 absl::Span<char> *buf);

// Encodes as many of the specified bytes as will fit as a length-delimited
// field and returns true as long as the field header (`tag_type` and `length`)
// fits.
// Used for string, bytes, message, and packed-repeated field type.
// Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()).
bool EncodeBytesTruncate(uint64_t tag, absl::Span<const char> value,
                         absl::Span<char> *buf);

// Encodes the specified string as a length-delimited field and returns true if
// it fits.
// Used for string, bytes, message, and packed-repeated field type.
// Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()).
inline bool EncodeString(uint64_t tag, absl::string_view value,
                         absl::Span<char> *buf) {
  return EncodeBytes(tag, value, buf);
}

// Encodes as much of the specified string as will fit as a length-delimited
// field and returns true as long as the field header (`tag_type` and `length`)
// fits.
// Used for string, bytes, message, and packed-repeated field type.
// Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()).
inline bool EncodeStringTruncate(uint64_t tag, absl::string_view value,
                                 absl::Span<char> *buf) {
  return EncodeBytesTruncate(tag, value, buf);
}

// Encodes the header for a length-delimited field containing up to `max_size`
// bytes or the number remaining in the buffer, whichever is less.  If the
// header fits, a non-nullptr `Span` is returned; this must be passed to
// `EncodeMessageLength` after all contents are encoded to finalize the length
// field.  If the header does not fit, a nullptr `Span` is returned which is
// safe to pass to `EncodeMessageLength` but need not be.
// Used for string, bytes, message, and packed-repeated field type.
// Consumes up to kMaxVarintSize * 2 bytes (20).
ABSL_MUST_USE_RESULT absl::Span<char> EncodeMessageStart(uint64_t tag,
                                                         uint64_t max_size,
                                                         absl::Span<char> *buf);

// Finalizes the length field in `msg` so that it encompasses all data encoded
// since the call to `EncodeMessageStart` which returned `msg`.  Does nothing if
// `msg` is a `nullptr` `Span`.
void EncodeMessageLength(absl::Span<char> msg, const absl::Span<char> *buf);

enum class WireType : uint64_t {
  kVarint = 0,
  k64Bit = 1,
  kLengthDelimited = 2,
  k32Bit = 5,
};

constexpr size_t VarintSize(uint64_t value) {
  return value < 128 ? 1 : 1 + VarintSize(value >> 7);
}
constexpr size_t MinVarintSize() {
  return VarintSize((std::numeric_limits<uint64_t>::min)());
}
constexpr size_t MaxVarintSize() {
  return VarintSize((std::numeric_limits<uint64_t>::max)());
}

constexpr uint64_t MaxVarintForSize(size_t size) {
  return size >= 10 ? (std::numeric_limits<uint64_t>::max)()
                    : (static_cast<uint64_t>(1) << size * 7) - 1;
}

// `BufferSizeFor` returns a number of bytes guaranteed to be sufficient to
// store encoded fields of the specified WireTypes regardless of tag numbers and
// data values.  This only makes sense for `WireType::kLengthDelimited` if you
// add in the length of the contents yourself, e.g. for string and bytes fields
// by adding the lengths of any encoded strings to the return value or for
// submessage fields by enumerating the fields you may encode into their
// contents.
constexpr size_t BufferSizeFor() { return 0; }
template <typename... T>
constexpr size_t BufferSizeFor(WireType type, T... tail) {
  // tag_type + data + ...
  return MaxVarintSize() +
         (type == WireType::kVarint ? MaxVarintSize() :              //
              type == WireType::k64Bit ? 8 :                         //
                  type == WireType::k32Bit ? 4 : MaxVarintSize()) +  //
         BufferSizeFor(tail...);
}

// absl::Span<const char> represents a view into the un-processed space in a
// buffer during decoding.  Decoding functions shrink the span as they go so
// that the same view can be decoded iteratively until all data are processed.
// In general, if the buffer is exhausted but additional bytes are expected by
// the decoder, it will return values as if the additional bytes were zeros.
// Length-delimited fields are an exception - if the encoded length field
// indicates more data bytes than are available in the buffer, the `bytes_value`
// and `string_value` accessors will return truncated views.

class ProtoField final {
 public:
  // Consumes bytes from `data` and returns true if there were any bytes to
  // decode.
  bool DecodeFrom(absl::Span<const char> *data);
  uint64_t tag() const { return tag_; }
  WireType type() const { return type_; }

  // These value accessors will return nonsense if the data were not encoded in
  // the corresponding wiretype from the corresponding C++ (or other language)
  // type.

  double double_value() const { return absl::bit_cast<double>(value_); }
  float float_value() const {
    return absl::bit_cast<float>(static_cast<uint32_t>(value_));
  }
  int32_t int32_value() const { return static_cast<int32_t>(value_); }
  int64_t int64_value() const { return static_cast<int64_t>(value_); }
  int32_t sint32_value() const {
    if (value_ % 2) return static_cast<int32_t>(0 - ((value_ - 1) / 2) - 1);
    return static_cast<int32_t>(value_ / 2);
  }
  int64_t sint64_value() const {
    if (value_ % 2) return 0 - ((value_ - 1) / 2) - 1;
    return value_ / 2;
  }
  uint32_t uint32_value() const { return static_cast<uint32_t>(value_); }
  uint64_t uint64_value() const { return value_; }
  bool bool_value() const { return value_ != 0; }
  // To decode an enum, call int32_value() and cast to the appropriate type.
  // Note that the official C++ proto compiler treats enum fields with values
  // that do not correspond to a defined enumerator as unknown fields.

  // To decode fields within a submessage field, call
  // `DecodeNextField(field.BytesValue())`.
  absl::Span<const char> bytes_value() const { return data_; }
  absl::string_view string_value() const {
    const auto data = bytes_value();
    return absl::string_view(data.data(), data.size());
  }
  // Returns the encoded length of a length-delimited field.  This equals
  // `bytes_value().size()` except when the latter has been truncated due to
  // buffer underrun.
  uint64_t encoded_length() const { return value_; }

 private:
  uint64_t tag_;
  WireType type_;
  // For `kTypeVarint`, `kType64Bit`, and `kType32Bit`, holds the decoded value.
  // For `kTypeLengthDelimited`, holds the decoded length.
  uint64_t value_;
  absl::Span<const char> data_;
};

}  // namespace log_internal
ABSL_NAMESPACE_END
}  // namespace absl

#endif  // ABSL_LOG_INTERNAL_PROTO_H_