aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/google/protobuf/descriptor_database.h
blob: 28f8af7a2da0089159a9fa056cc55fed4469aff7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc.  All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Author: kenton@google.com (Kenton Varda)
//  Based on original Protocol Buffers design by
//  Sanjay Ghemawat, Jeff Dean, and others.
//
// Interface for manipulating databases of descriptors.

#ifndef GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__
#define GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__

#include <map>
#include <string>
#include <utility>
#include <vector>
#include <google/protobuf/stubs/common.h>
#include <google/protobuf/descriptor.h>

namespace google {
namespace protobuf {

// Defined in this file.
class DescriptorDatabase;
class SimpleDescriptorDatabase;
class EncodedDescriptorDatabase;
class DescriptorPoolDatabase;
class MergedDescriptorDatabase;

// Abstract interface for a database of descriptors.
//
// This is useful if you want to create a DescriptorPool which loads
// descriptors on-demand from some sort of large database.  If the database
// is large, it may be inefficient to enumerate every .proto file inside it
// calling DescriptorPool::BuildFile() for each one.  Instead, a DescriptorPool
// can be created which wraps a DescriptorDatabase and only builds particular
// descriptors when they are needed.
class LIBPROTOBUF_EXPORT DescriptorDatabase {
 public:
  inline DescriptorDatabase() {}
  virtual ~DescriptorDatabase();

  // Find a file by file name.  Fills in in *output and returns true if found.
  // Otherwise, returns false, leaving the contents of *output undefined.
  virtual bool FindFileByName(const string& filename,
                              FileDescriptorProto* output) = 0;

  // Find the file that declares the given fully-qualified symbol name.
  // If found, fills in *output and returns true, otherwise returns false
  // and leaves *output undefined.
  virtual bool FindFileContainingSymbol(const string& symbol_name,
                                        FileDescriptorProto* output) = 0;

  // Find the file which defines an extension extending the given message type
  // with the given field number.  If found, fills in *output and returns true,
  // otherwise returns false and leaves *output undefined.  containing_type
  // must be a fully-qualified type name.
  virtual bool FindFileContainingExtension(const string& containing_type,
                                           int field_number,
                                           FileDescriptorProto* output) = 0;

  // Finds the tag numbers used by all known extensions of
  // extendee_type, and appends them to output in an undefined
  // order. This method is best-effort: it's not guaranteed that the
  // database will find all extensions, and it's not guaranteed that
  // FindFileContainingExtension will return true on all of the found
  // numbers. Returns true if the search was successful, otherwise
  // returns false and leaves output unchanged.
  //
  // This method has a default implementation that always returns
  // false.
  virtual bool FindAllExtensionNumbers(const string& /* extendee_type */,
                                       std::vector<int>* /* output */) {
    return false;
  }


  // Finds the file names and appends them to the output in an
  // undefined order. This method is best-effort: it's not guaranteed that the
  // database will find all files. Returns true if the database supports
  // searching all file names, otherwise returns false and leaves output
  // unchanged.
  //
  // This method has a default implementation that always returns
  // false.
  virtual bool FindAllFileNames(std::vector<string>* output) {
    return false;
  }

 private:
  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DescriptorDatabase);
};

// A DescriptorDatabase into which you can insert files manually.
//
// FindFileContainingSymbol() is fully-implemented.  When you add a file, its
// symbols will be indexed for this purpose.  Note that the implementation
// may return false positives, but only if it isn't possible for the symbol
// to be defined in any other file.  In particular, if a file defines a symbol
// "Foo", then searching for "Foo.[anything]" will match that file.  This way,
// the database does not need to aggressively index all children of a symbol.
//
// FindFileContainingExtension() is mostly-implemented.  It works if and only
// if the original FieldDescriptorProto defining the extension has a
// fully-qualified type name in its "extendee" field (i.e. starts with a '.').
// If the extendee is a relative name, SimpleDescriptorDatabase will not
// attempt to resolve the type, so it will not know what type the extension is
// extending.  Therefore, calling FindFileContainingExtension() with the
// extension's containing type will never actually find that extension.  Note
// that this is an unlikely problem, as all FileDescriptorProtos created by the
// protocol compiler (as well as ones created by calling
// FileDescriptor::CopyTo()) will always use fully-qualified names for all
// types.  You only need to worry if you are constructing FileDescriptorProtos
// yourself, or are calling compiler::Parser directly.
class LIBPROTOBUF_EXPORT SimpleDescriptorDatabase : public DescriptorDatabase {
 public:
  SimpleDescriptorDatabase();
  ~SimpleDescriptorDatabase();

  // Adds the FileDescriptorProto to the database, making a copy.  The object
  // can be deleted after Add() returns.  Returns false if the file conflicted
  // with a file already in the database, in which case an error will have
  // been written to GOOGLE_LOG(ERROR).
  bool Add(const FileDescriptorProto& file);

  // Adds the FileDescriptorProto to the database and takes ownership of it.
  bool AddAndOwn(const FileDescriptorProto* file);

  // implements DescriptorDatabase -----------------------------------
  bool FindFileByName(const string& filename,
                      FileDescriptorProto* output);
  bool FindFileContainingSymbol(const string& symbol_name,
                                FileDescriptorProto* output);
  bool FindFileContainingExtension(const string& containing_type,
                                   int field_number,
                                   FileDescriptorProto* output);
  bool FindAllExtensionNumbers(const string& extendee_type,
                               std::vector<int>* output);

 private:
  // So that it can use DescriptorIndex.
  friend class EncodedDescriptorDatabase;

  // An index mapping file names, symbol names, and extension numbers to
  // some sort of values.
  template <typename Value>
  class DescriptorIndex {
   public:
    // Helpers to recursively add particular descriptors and all their contents
    // to the index.
    bool AddFile(const FileDescriptorProto& file,
                 Value value);
    bool AddSymbol(const string& name, Value value);
    bool AddNestedExtensions(const DescriptorProto& message_type,
                             Value value);
    bool AddExtension(const FieldDescriptorProto& field,
                      Value value);

    Value FindFile(const string& filename);
    Value FindSymbol(const string& name);
    Value FindExtension(const string& containing_type, int field_number);
    bool FindAllExtensionNumbers(const string& containing_type,
                                 std::vector<int>* output);

   private:
    std::map<string, Value> by_name_;
    std::map<string, Value> by_symbol_;
    std::map<std::pair<string, int>, Value> by_extension_;

    // Invariant:  The by_symbol_ map does not contain any symbols which are
    // prefixes of other symbols in the map.  For example, "foo.bar" is a
    // prefix of "foo.bar.baz" (but is not a prefix of "foo.barbaz").
    //
    // This invariant is important because it means that given a symbol name,
    // we can find a key in the map which is a prefix of the symbol in O(lg n)
    // time, and we know that there is at most one such key.
    //
    // The prefix lookup algorithm works like so:
    // 1) Find the last key in the map which is less than or equal to the
    //    search key.
    // 2) If the found key is a prefix of the search key, then return it.
    //    Otherwise, there is no match.
    //
    // I am sure this algorithm has been described elsewhere, but since I
    // wasn't able to find it quickly I will instead prove that it works
    // myself.  The key to the algorithm is that if a match exists, step (1)
    // will find it.  Proof:
    // 1) Define the "search key" to be the key we are looking for, the "found
    //    key" to be the key found in step (1), and the "match key" to be the
    //    key which actually matches the serach key (i.e. the key we're trying
    //    to find).
    // 2) The found key must be less than or equal to the search key by
    //    definition.
    // 3) The match key must also be less than or equal to the search key
    //    (because it is a prefix).
    // 4) The match key cannot be greater than the found key, because if it
    //    were, then step (1) of the algorithm would have returned the match
    //    key instead (since it finds the *greatest* key which is less than or
    //    equal to the search key).
    // 5) Therefore, the found key must be between the match key and the search
    //    key, inclusive.
    // 6) Since the search key must be a sub-symbol of the match key, if it is
    //    not equal to the match key, then search_key[match_key.size()] must
    //    be '.'.
    // 7) Since '.' sorts before any other character that is valid in a symbol
    //    name, then if the found key is not equal to the match key, then
    //    found_key[match_key.size()] must also be '.', because any other value
    //    would make it sort after the search key.
    // 8) Therefore, if the found key is not equal to the match key, then the
    //    found key must be a sub-symbol of the match key.  However, this would
    //    contradict our map invariant which says that no symbol in the map is
    //    a sub-symbol of any other.
    // 9) Therefore, the found key must match the match key.
    //
    // The above proof assumes the match key exists.  In the case that the
    // match key does not exist, then step (1) will return some other symbol.
    // That symbol cannot be a super-symbol of the search key since if it were,
    // then it would be a match, and we're assuming the match key doesn't exist.
    // Therefore, step 2 will correctly return no match.

    // Find the last entry in the by_symbol_ map whose key is less than or
    // equal to the given name.
    typename std::map<string, Value>::iterator FindLastLessOrEqual(
        const string& name);

    // True if either the arguments are equal or super_symbol identifies a
    // parent symbol of sub_symbol (e.g. "foo.bar" is a parent of
    // "foo.bar.baz", but not a parent of "foo.barbaz").
    bool IsSubSymbol(const string& sub_symbol, const string& super_symbol);

    // Returns true if and only if all characters in the name are alphanumerics,
    // underscores, or periods.
    bool ValidateSymbolName(const string& name);
  };


  DescriptorIndex<const FileDescriptorProto*> index_;
  std::vector<const FileDescriptorProto*> files_to_delete_;

  // If file is non-NULL, copy it into *output and return true, otherwise
  // return false.
  bool MaybeCopy(const FileDescriptorProto* file,
                 FileDescriptorProto* output);

  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(SimpleDescriptorDatabase);
};

// Very similar to SimpleDescriptorDatabase, but stores all the descriptors
// as raw bytes and generally tries to use as little memory as possible.
//
// The same caveats regarding FindFileContainingExtension() apply as with
// SimpleDescriptorDatabase.
class LIBPROTOBUF_EXPORT EncodedDescriptorDatabase : public DescriptorDatabase {
 public:
  EncodedDescriptorDatabase();
  ~EncodedDescriptorDatabase();

  // Adds the FileDescriptorProto to the database.  The descriptor is provided
  // in encoded form.  The database does not make a copy of the bytes, nor
  // does it take ownership; it's up to the caller to make sure the bytes
  // remain valid for the life of the database.  Returns false and logs an error
  // if the bytes are not a valid FileDescriptorProto or if the file conflicted
  // with a file already in the database.
  bool Add(const void* encoded_file_descriptor, int size);

  // Like Add(), but makes a copy of the data, so that the caller does not
  // need to keep it around.
  bool AddCopy(const void* encoded_file_descriptor, int size);

  // Like FindFileContainingSymbol but returns only the name of the file.
  bool FindNameOfFileContainingSymbol(const string& symbol_name,
                                      string* output);

  // implements DescriptorDatabase -----------------------------------
  bool FindFileByName(const string& filename,
                      FileDescriptorProto* output);
  bool FindFileContainingSymbol(const string& symbol_name,
                                FileDescriptorProto* output);
  bool FindFileContainingExtension(const string& containing_type,
                                   int field_number,
                                   FileDescriptorProto* output);
  bool FindAllExtensionNumbers(const string& extendee_type,
                               std::vector<int>* output);

 private:
  SimpleDescriptorDatabase::DescriptorIndex<std::pair<const void*, int> >
      index_;
  std::vector<void*> files_to_delete_;

  // If encoded_file.first is non-NULL, parse the data into *output and return
  // true, otherwise return false.
  bool MaybeParse(std::pair<const void*, int> encoded_file,
                  FileDescriptorProto* output);

  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(EncodedDescriptorDatabase);
};

// A DescriptorDatabase that fetches files from a given pool.
class LIBPROTOBUF_EXPORT DescriptorPoolDatabase : public DescriptorDatabase {
 public:
  explicit DescriptorPoolDatabase(const DescriptorPool& pool);
  ~DescriptorPoolDatabase();

  // implements DescriptorDatabase -----------------------------------
  bool FindFileByName(const string& filename,
                      FileDescriptorProto* output);
  bool FindFileContainingSymbol(const string& symbol_name,
                                FileDescriptorProto* output);
  bool FindFileContainingExtension(const string& containing_type,
                                   int field_number,
                                   FileDescriptorProto* output);
  bool FindAllExtensionNumbers(const string& extendee_type,
                               std::vector<int>* output);

 private:
  const DescriptorPool& pool_;
  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DescriptorPoolDatabase);
};

// A DescriptorDatabase that wraps two or more others.  It first searches the
// first database and, if that fails, tries the second, and so on.
class LIBPROTOBUF_EXPORT MergedDescriptorDatabase : public DescriptorDatabase {
 public:
  // Merge just two databases.  The sources remain property of the caller.
  MergedDescriptorDatabase(DescriptorDatabase* source1,
                           DescriptorDatabase* source2);
  // Merge more than two databases.  The sources remain property of the caller.
  // The vector may be deleted after the constructor returns but the
  // DescriptorDatabases need to stick around.
  explicit MergedDescriptorDatabase(
      const std::vector<DescriptorDatabase*>& sources);
  ~MergedDescriptorDatabase();

  // implements DescriptorDatabase -----------------------------------
  bool FindFileByName(const string& filename,
                      FileDescriptorProto* output);
  bool FindFileContainingSymbol(const string& symbol_name,
                                FileDescriptorProto* output);
  bool FindFileContainingExtension(const string& containing_type,
                                   int field_number,
                                   FileDescriptorProto* output);
  // Merges the results of calling all databases. Returns true iff any
  // of the databases returned true.
  bool FindAllExtensionNumbers(const string& extendee_type,
                               std::vector<int>* output);


 private:
  std::vector<DescriptorDatabase*> sources_;
  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MergedDescriptorDatabase);
};

}  // namespace protobuf

}  // namespace google
#endif  // GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__