From 5a3405c51a09999857e3450aa2e1be42e3fd325a Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Mon, 6 Feb 2017 12:40:51 -0800 Subject: Update upb for php. (#2662) --- php/ext/google/protobuf/def.c | 4 +- php/ext/google/protobuf/encode_decode.c | 2 +- php/ext/google/protobuf/upb.c | 4561 +++++++++++++++++++------------ php/ext/google/protobuf/upb.h | 896 +++--- 4 files changed, 3386 insertions(+), 2077 deletions(-) (limited to 'php') diff --git a/php/ext/google/protobuf/def.c b/php/ext/google/protobuf/def.c index 6ea2cc93..0ce39ec3 100644 --- a/php/ext/google/protobuf/def.c +++ b/php/ext/google/protobuf/def.c @@ -209,14 +209,14 @@ static void init_generated_pool_once(TSRMLS_D) { static void descriptor_pool_init_c_instance(DescriptorPool *pool TSRMLS_DC) { zend_object_std_init(&pool->std, descriptor_pool_type TSRMLS_CC); - pool->symtab = upb_symtab_new(&pool->symtab); + pool->symtab = upb_symtab_new(); ALLOC_HASHTABLE(pool->pending_list); zend_hash_init(pool->pending_list, 1, NULL, ZVAL_PTR_DTOR, 0); } static void descriptor_pool_free_c(DescriptorPool *pool TSRMLS_DC) { - upb_symtab_unref(pool->symtab, &pool->symtab); + upb_symtab_free(pool->symtab); zend_hash_destroy(pool->pending_list); FREE_HASHTABLE(pool->pending_list); diff --git a/php/ext/google/protobuf/encode_decode.c b/php/ext/google/protobuf/encode_decode.c index eafe1ae8..32a0fbea 100644 --- a/php/ext/google/protobuf/encode_decode.c +++ b/php/ext/google/protobuf/encode_decode.c @@ -675,7 +675,7 @@ static void add_handlers_for_singular_field(upb_handlers *h, case UPB_TYPE_INT64: case UPB_TYPE_UINT64: case UPB_TYPE_DOUBLE: - upb_shim_set(h, f, offset, -1); + upb_msg_setscalarhandler(h, f, offset, -1); break; case UPB_TYPE_STRING: case UPB_TYPE_BYTES: { diff --git a/php/ext/google/protobuf/upb.c b/php/ext/google/protobuf/upb.c index 98daafc0..e0c56f8e 100644 --- a/php/ext/google/protobuf/upb.c +++ b/php/ext/google/protobuf/upb.c @@ -1,33 +1,4 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - +// Amalgamated source file #include "upb.h" @@ -150,21 +121,6 @@ bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) { const upb_filedef *upb_def_file(const upb_def *d) { return d->file; } -upb_def *upb_def_dup(const upb_def *def, const void *o) { - switch (def->type) { - case UPB_DEF_MSG: - return upb_msgdef_upcast_mutable( - upb_msgdef_dup(upb_downcast_msgdef(def), o)); - case UPB_DEF_FIELD: - return upb_fielddef_upcast_mutable( - upb_fielddef_dup(upb_downcast_fielddef(def), o)); - case UPB_DEF_ENUM: - return upb_enumdef_upcast_mutable( - upb_enumdef_dup(upb_downcast_enumdef(def), o)); - default: UPB_ASSERT(false); return NULL; - } -} - static bool upb_def_init(upb_def *def, upb_deftype_t type, const struct upb_refcounted_vtbl *vtbl, const void *owner) { @@ -311,6 +267,7 @@ static bool assign_msg_indices(upb_msgdef *m, upb_status *s) { /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the * lowest indexes, but we do not publicly guarantee this. */ upb_msg_field_iter j; + upb_msg_oneof_iter k; int i; uint32_t selector; int n = upb_msgdef_numfields(m); @@ -395,6 +352,13 @@ static bool assign_msg_indices(upb_msgdef *m, upb_status *s) { #undef TRY #endif + for(upb_msg_oneof_begin(&k, m), i = 0; + !upb_msg_oneof_done(&k); + upb_msg_oneof_next(&k), i++) { + upb_oneofdef *o = upb_msg_iter_oneof(&k); + o->index = i; + } + upb_gfree(fields); return true; } @@ -512,21 +476,6 @@ err2: return NULL; } -upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) { - upb_enum_iter i; - upb_enumdef *new_e = upb_enumdef_new(owner); - if (!new_e) return NULL; - for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { - bool success = upb_enumdef_addval( - new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL); - if (!success) { - upb_enumdef_unref(new_e, owner); - return NULL; - } - } - return new_e; -} - bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) { upb_def *d = upb_enumdef_upcast_mutable(e); return upb_def_freeze(&d, 1, status); @@ -761,7 +710,8 @@ upb_fielddef *upb_fielddef_new(const void *o) { return f; } -upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) { +static upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, + const void *owner) { const char *srcname; upb_fielddef *newf = upb_fielddef_new(owner); if (!newf) return NULL; @@ -1476,7 +1426,9 @@ err2: return NULL; } -upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) { +static upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner); + +static upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) { bool ok; upb_msg_field_iter i; upb_msg_oneof_iter o; @@ -1812,7 +1764,8 @@ err2: return NULL; } -upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) { +static upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, + const void *owner) { bool ok; upb_oneof_iter i; upb_oneofdef *newo = upb_oneofdef_new(owner); @@ -1861,6 +1814,10 @@ int upb_oneofdef_numfields(const upb_oneofdef *o) { return upb_strtable_count(&o->ntof); } +uint32_t upb_oneofdef_index(const upb_oneofdef *o) { + return o->index; +} + bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f, const void *ref_donor, upb_status *s) { @@ -2156,495 +2113,843 @@ bool upb_filedef_adddep(upb_filedef *f, const upb_filedef *dep) { return false; } } -/* -** TODO(haberman): it's unclear whether a lot of the consistency checks should -** UPB_ASSERT() or return false. -*/ - - -#include - -static void *upb_calloc(size_t size) { - void *mem = upb_gmalloc(size); - if (mem) { - memset(mem, 0, size); +void upb_symtab_free(upb_symtab *s) { + upb_strtable_iter i; + upb_strtable_begin(&i, &s->symtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + upb_def_unref(def, s); } - return mem; + upb_strtable_uninit(&s->symtab); + upb_gfree(s); } -/* Defined for the sole purpose of having a unique pointer value for - * UPB_NO_CLOSURE. */ -char _upb_noclosure; - -static void freehandlers(upb_refcounted *r) { - upb_handlers *h = (upb_handlers*)r; - - upb_inttable_iter i; - upb_inttable_begin(&i, &h->cleanup_); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - void *val = (void*)upb_inttable_iter_key(&i); - upb_value func_val = upb_inttable_iter_value(&i); - upb_handlerfree *func = upb_value_getfptr(func_val); - func(val); +upb_symtab *upb_symtab_new() { + upb_symtab *s = upb_gmalloc(sizeof(*s)); + if (!s) { + return NULL; } - upb_inttable_uninit(&h->cleanup_); - upb_msgdef_unref(h->msg, h); - upb_gfree(h->sub); - upb_gfree(h); + upb_strtable_init(&s->symtab, UPB_CTYPE_PTR); + return s; } -static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit, - void *closure) { - const upb_handlers *h = (const upb_handlers*)r; - upb_msg_field_iter i; - for(upb_msg_field_begin(&i, h->msg); - !upb_msg_field_done(&i); - upb_msg_field_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - const upb_handlers *sub; - if (!upb_fielddef_issubmsg(f)) continue; - sub = upb_handlers_getsubhandlers(h, f); - if (sub) visit(r, upb_handlers_upcast(sub), closure); - } +const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) { + upb_value v; + upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ? + upb_value_getptr(v) : NULL; + return ret; } -static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers}; - -typedef struct { - upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */ - upb_handlers_callback *callback; - const void *closure; -} dfs_state; - -/* TODO(haberman): discard upb_handlers* objects that do not actually have any - * handlers set and cannot reach any upb_handlers* object that does. This is - * slightly tricky to do correctly. */ -static upb_handlers *newformsg(const upb_msgdef *m, const void *owner, - dfs_state *s) { - upb_msg_field_iter i; - upb_handlers *h = upb_handlers_new(m, owner); - if (!h) return NULL; - if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom; - - s->callback(s->closure, h); - - /* For each submessage field, get or create a handlers object and set it as - * the subhandlers. */ - for(upb_msg_field_begin(&i, m); - !upb_msg_field_done(&i); - upb_msg_field_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - const upb_msgdef *subdef; - upb_value subm_ent; - - if (!upb_fielddef_issubmsg(f)) continue; - - subdef = upb_downcast_msgdef(upb_fielddef_subdef(f)); - if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) { - upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent)); - } else { - upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s); - if (!sub_mh) goto oom; - upb_handlers_setsubhandlers(h, f, sub_mh); - upb_handlers_unref(sub_mh, &sub_mh); - } - } - return h; - -oom: - upb_handlers_unref(h, owner); - return NULL; +const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { + upb_value v; + upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ? + upb_value_getptr(v) : NULL; + return def ? upb_dyncast_msgdef(def) : NULL; } -/* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the - * subhandlers for this submessage field. */ -#define SUBH(h, selector) (h->sub[selector]) - -/* The selector for a submessage field is the field index. */ -#define SUBH_F(h, f) SUBH(h, f->index_) - -static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f, - upb_handlertype_t type) { - upb_selector_t sel; - UPB_ASSERT(!upb_handlers_isfrozen(h)); - if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) { - upb_status_seterrf( - &h->status_, "type mismatch: field %s does not belong to message %s", - upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h))); - return -1; - } - if (!upb_handlers_getselector(f, type, &sel)) { - upb_status_seterrf( - &h->status_, - "type mismatch: cannot register handler type %d for field %s", - type, upb_fielddef_name(f)); - return -1; - } - return sel; +const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) { + upb_value v; + upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ? + upb_value_getptr(v) : NULL; + return def ? upb_dyncast_enumdef(def) : NULL; } -static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f, - upb_handlertype_t type) { - int32_t sel = trygetsel(h, f, type); - UPB_ASSERT(sel >= 0); - return sel; +/* Given a symbol and the base symbol inside which it is defined, find the + * symbol's definition in t. */ +static upb_def *upb_resolvename(const upb_strtable *t, + const char *base, const char *sym) { + if(strlen(sym) == 0) return NULL; + if(sym[0] == '.') { + /* Symbols starting with '.' are absolute, so we do a single lookup. + * Slice to omit the leading '.' */ + upb_value v; + return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL; + } else { + /* Remove components from base until we find an entry or run out. + * TODO: This branch is totally broken, but currently not used. */ + (void)base; + UPB_ASSERT(false); + return NULL; + } } -static const void **returntype(upb_handlers *h, const upb_fielddef *f, - upb_handlertype_t type) { - return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_; +const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, + const char *sym) { + upb_def *ret = upb_resolvename(&s->symtab, base, sym); + return ret; } -static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f, - upb_handlertype_t type, upb_func *func, - upb_handlerattr *attr) { - upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER; - const void *closure_type; - const void **context_closure_type; - - UPB_ASSERT(!upb_handlers_isfrozen(h)); +/* TODO(haberman): we need a lot more testing of error conditions. */ +static bool symtab_add(upb_symtab *s, upb_def *const*defs, size_t n, + void *ref_donor, upb_refcounted *freeze_also, + upb_status *status) { + size_t i; + size_t add_n; + size_t freeze_n; + upb_strtable_iter iter; + upb_refcounted **add_objs = NULL; + upb_def **add_defs = NULL; + size_t add_objs_size; + upb_strtable addtab; - if (sel < 0) { - upb_status_seterrmsg(&h->status_, - "incorrect handler type for this field."); - return false; + if (n == 0 && !freeze_also) { + return true; } - if (h->table[sel].func) { - upb_status_seterrmsg(&h->status_, - "cannot change handler once it has been set."); + if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) { + upb_status_seterrmsg(status, "out of memory"); return false; } - if (attr) { - set_attr = *attr; - } + /* Add new defs to our "add" set. */ + for (i = 0; i < n; i++) { + upb_def *def = defs[i]; + const char *fullname; + upb_fielddef *f; - /* Check that the given closure type matches the closure type that has been - * established for this context (if any). */ - closure_type = upb_handlerattr_closuretype(&set_attr); + if (upb_def_isfrozen(def)) { + upb_status_seterrmsg(status, "added defs must be mutable"); + goto err; + } + UPB_ASSERT(!upb_def_isfrozen(def)); + fullname = upb_def_fullname(def); + if (!fullname) { + upb_status_seterrmsg( + status, "Anonymous defs cannot be added to a symtab"); + goto err; + } - if (type == UPB_HANDLER_STRING) { - context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR); - } else if (f && upb_fielddef_isseq(f) && - type != UPB_HANDLER_STARTSEQ && - type != UPB_HANDLER_ENDSEQ) { - context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ); - } else { - context_closure_type = &h->top_closure_type; - } + f = upb_dyncast_fielddef_mutable(def); - if (closure_type && *context_closure_type && - closure_type != *context_closure_type) { - /* TODO(haberman): better message for debugging. */ if (f) { - upb_status_seterrf(&h->status_, - "closure type does not match for field %s", - upb_fielddef_name(f)); - } else { - upb_status_seterrmsg( - &h->status_, "closure type does not match for message-level handler"); + if (!upb_fielddef_containingtypename(f)) { + upb_status_seterrmsg(status, + "Standalone fielddefs must have a containing type " + "(extendee) name set"); + goto err; + } + } else { + if (upb_strtable_lookup(&addtab, fullname, NULL)) { + upb_status_seterrf(status, "Conflicting defs named '%s'", fullname); + goto err; + } + if (upb_strtable_lookup(&s->symtab, fullname, NULL)) { + upb_status_seterrf(status, "Symtab already has a def named '%s'", + fullname); + goto err; + } + upb_def_donateref(def, ref_donor, s); + if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def))) + goto oom_err; + def->came_from_user = true; } - return false; } - if (closure_type) - *context_closure_type = closure_type; + /* Add standalone fielddefs (ie. extensions) to the appropriate messages. + * If the appropriate message only exists in the existing symtab, duplicate + * it so we have a mutable copy we can add the fields to. */ + for (i = 0; i < n; i++) { + upb_def *def = defs[i]; + upb_fielddef *f = upb_dyncast_fielddef_mutable(def); + const char *msgname; + upb_value v; + upb_msgdef *m; - /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer - * matches any pre-existing expectations about what type is expected. */ - if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) { - const void *return_type = upb_handlerattr_returnclosuretype(&set_attr); - const void *table_return_type = - upb_handlerattr_returnclosuretype(&h->table[sel].attr); - if (return_type && table_return_type && return_type != table_return_type) { - upb_status_seterrmsg(&h->status_, "closure return type does not match"); - return false; + if (!f) continue; + msgname = upb_fielddef_containingtypename(f); + /* We validated this earlier in this function. */ + UPB_ASSERT(msgname); + + /* If the extendee name is absolutely qualified, move past the initial ".". + * TODO(haberman): it is not obvious what it would mean if this was not + * absolutely qualified. */ + if (msgname[0] == '.') { + msgname++; } - if (table_return_type && !return_type) - upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type); + if (upb_strtable_lookup(&addtab, msgname, &v)) { + /* Extendee is in the set of defs the user asked us to add. */ + m = upb_value_getptr(v); + } else { + /* Need to find and dup the extendee from the existing symtab. */ + const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname); + if (!frozen_m) { + upb_status_seterrf(status, + "Tried to extend message %s that does not exist " + "in this SymbolTable.", + msgname); + goto err; + } + m = upb_msgdef_dup(frozen_m, s); + if (!m) goto oom_err; + if (!upb_strtable_insert(&addtab, msgname, upb_value_ptr(m))) { + upb_msgdef_unref(m, s); + goto oom_err; + } + } + + if (!upb_msgdef_addfield(m, f, ref_donor, status)) { + goto err; + } } - h->table[sel].func = (upb_func*)func; - h->table[sel].attr = set_attr; - return true; -} + /* Now using the table, resolve symbolic references for subdefs. */ + upb_strtable_begin(&iter, &addtab); + for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { + const char *base; + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); + upb_msgdef *m = upb_dyncast_msgdef_mutable(def); + upb_msg_field_iter j; -/* Returns the effective closure type for this handler (which will propagate - * from outer frames if this frame has no START* handler). Not implemented for - * UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is - * the effective closure type is unspecified (either no handler was registered - * to specify it or the handler that was registered did not specify the closure - * type). */ -const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f, - upb_handlertype_t type) { - const void *ret; - upb_selector_t sel; + if (!m) continue; + /* Type names are resolved relative to the message in which they appear. */ + base = upb_msgdef_fullname(m); - UPB_ASSERT(type != UPB_HANDLER_STRING); - ret = h->top_closure_type; + for(upb_msg_field_begin(&j, m); + !upb_msg_field_done(&j); + upb_msg_field_next(&j)) { + upb_fielddef *f = upb_msg_iter_field(&j); + const char *name = upb_fielddef_subdefname(f); + if (name && !upb_fielddef_subdef(f)) { + /* Try the lookup in the current set of to-be-added defs first. If not + * there, try existing defs. */ + upb_def *subdef = upb_resolvename(&addtab, base, name); + if (subdef == NULL) { + subdef = upb_resolvename(&s->symtab, base, name); + } + if (subdef == NULL) { + upb_status_seterrf( + status, "couldn't resolve name '%s' in message '%s'", name, base); + goto err; + } else if (!upb_fielddef_setsubdef(f, subdef, status)) { + goto err; + } + } + } + } - if (upb_fielddef_isseq(f) && - type != UPB_HANDLER_STARTSEQ && - type != UPB_HANDLER_ENDSEQ && - h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) { - ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr); + /* We need an array of the defs in addtab, for passing to + * upb_refcounted_freeze(). */ + add_objs_size = upb_strtable_count(&addtab); + if (freeze_also) { + add_objs_size++; } - if (type == UPB_HANDLER_STRING && - h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) { - ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr); + add_defs = upb_gmalloc(sizeof(void*) * add_objs_size); + if (add_defs == NULL) goto oom_err; + upb_strtable_begin(&iter, &addtab); + for (add_n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { + add_defs[add_n++] = upb_value_getptr(upb_strtable_iter_value(&iter)); } - /* The effective type of the submessage; not used yet. - * if (type == SUBMESSAGE && - * h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) { - * ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr); - * } */ + /* Validate defs. */ + if (!_upb_def_validate(add_defs, add_n, status)) { + goto err; + } - return ret; -} + /* Cheat a little and give the array a new type. + * This is probably undefined behavior, but this code will be deleted soon. */ + add_objs = (upb_refcounted**)add_defs; -/* Checks whether the START* handler specified by f & type is missing even - * though it is required to convert the established type of an outer frame - * ("closure_type") into the established type of an inner frame (represented in - * the return closure type of this handler's attr. */ -bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type, - upb_status *status) { - const void *closure_type; - const upb_handlerattr *attr; - const void *return_closure_type; + freeze_n = add_n; + if (freeze_also) { + add_objs[freeze_n++] = freeze_also; + } - upb_selector_t sel = handlers_getsel(h, f, type); - if (h->table[sel].func) return true; - closure_type = effective_closure_type(h, f, type); - attr = &h->table[sel].attr; - return_closure_type = upb_handlerattr_returnclosuretype(attr); - if (closure_type && return_closure_type && - closure_type != return_closure_type) { - upb_status_seterrf(status, - "expected start handler to return sub type for field %f", - upb_fielddef_name(f)); - return false; + if (!upb_refcounted_freeze(add_objs, freeze_n, status, + UPB_MAX_MESSAGE_DEPTH * 2)) { + goto err; } - return true; -} -/* Public interface ***********************************************************/ + /* This must be delayed until all errors have been detected, since error + * recovery code uses this table to cleanup defs. */ + upb_strtable_uninit(&addtab); -upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) { - int extra; - upb_handlers *h; + /* TODO(haberman) we don't properly handle errors after this point (like + * OOM in upb_strtable_insert() below). */ + for (i = 0; i < add_n; i++) { + upb_def *def = (upb_def*)add_objs[i]; + const char *name = upb_def_fullname(def); + upb_value v; + bool success; - UPB_ASSERT(upb_msgdef_isfrozen(md)); + if (upb_strtable_remove(&s->symtab, name, &v)) { + const upb_def *def = upb_value_getptr(v); + upb_def_unref(def, s); + } + success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def)); + UPB_ASSERT(success == true); + } + upb_gfree(add_defs); + return true; - extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1); - h = upb_calloc(sizeof(*h) + extra); - if (!h) return NULL; +oom_err: + upb_status_seterrmsg(status, "out of memory"); +err: { + /* We need to donate the refs back. */ + upb_strtable_begin(&iter, &addtab); + for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); + upb_def_donateref(def, s, ref_donor); + } + } + upb_strtable_uninit(&addtab); + upb_gfree(add_defs); + UPB_ASSERT(!upb_ok(status)); + return false; +} - h->msg = md; - upb_msgdef_ref(h->msg, h); - upb_status_clear(&h->status_); +bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, size_t n, + void *ref_donor, upb_status *status) { + return symtab_add(s, defs, n, ref_donor, NULL, status); +} - if (md->submsg_field_count > 0) { - h->sub = upb_calloc(md->submsg_field_count * sizeof(*h->sub)); - if (!h->sub) goto oom; - } else { - h->sub = 0; +bool upb_symtab_addfile(upb_symtab *s, upb_filedef *file, upb_status *status) { + size_t n; + size_t i; + upb_def **defs; + bool ret; + + n = upb_filedef_defcount(file); + defs = upb_gmalloc(sizeof(*defs) * n); + + if (defs == NULL) { + upb_status_seterrmsg(status, "Out of memory"); + return false; } - if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner)) - goto oom; - if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom; + for (i = 0; i < n; i++) { + defs[i] = upb_filedef_mutabledef(file, i); + } - /* calloc() above initialized all handlers to NULL. */ - return h; + ret = symtab_add(s, defs, n, NULL, upb_filedef_upcast_mutable(file), status); -oom: - freehandlers(upb_handlers_upcast_mutable(h)); - return NULL; + upb_gfree(defs); + return ret; } -const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m, - const void *owner, - upb_handlers_callback *callback, - const void *closure) { - dfs_state state; - upb_handlers *ret; - bool ok; - upb_refcounted *r; - - state.callback = callback; - state.closure = closure; - if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL; +/* Iteration. */ - ret = newformsg(m, owner, &state); +static void advance_to_matching(upb_symtab_iter *iter) { + if (iter->type == UPB_DEF_ANY) + return; - upb_inttable_uninit(&state.tab); - if (!ret) return NULL; + while (!upb_strtable_done(&iter->iter) && + iter->type != upb_symtab_iter_def(iter)->type) { + upb_strtable_next(&iter->iter); + } +} - r = upb_handlers_upcast_mutable(ret); - ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH); - UPB_ASSERT(ok); +void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s, + upb_deftype_t type) { + upb_strtable_begin(&iter->iter, &s->symtab); + iter->type = type; + advance_to_matching(iter); +} - return ret; +void upb_symtab_next(upb_symtab_iter *iter) { + upb_strtable_next(&iter->iter); + advance_to_matching(iter); } -const upb_status *upb_handlers_status(upb_handlers *h) { - UPB_ASSERT(!upb_handlers_isfrozen(h)); - return &h->status_; +bool upb_symtab_done(const upb_symtab_iter *iter) { + return upb_strtable_done(&iter->iter); } -void upb_handlers_clearerr(upb_handlers *h) { - UPB_ASSERT(!upb_handlers_isfrozen(h)); - upb_status_clear(&h->status_); +const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) { + return upb_value_getptr(upb_strtable_iter_value(&iter->iter)); } +/* +** TODO(haberman): it's unclear whether a lot of the consistency checks should +** UPB_ASSERT() or return false. +*/ -#define SETTER(name, handlerctype, handlertype) \ - bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \ - handlerctype func, upb_handlerattr *attr) { \ - int32_t sel = trygetsel(h, f, handlertype); \ - return doset(h, sel, f, handlertype, (upb_func*)func, attr); \ - } -SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32) -SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64) -SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32) -SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64) -SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT) -SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE) -SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL) -SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR) -SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING) -SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR) -SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ) -SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG) -SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG) -SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ) +#include -#undef SETTER -bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func, - upb_handlerattr *attr) { - return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32, - (upb_func *)func, attr); +static void *upb_calloc(size_t size) { + void *mem = upb_gmalloc(size); + if (mem) { + memset(mem, 0, size); + } + return mem; } -bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func, - upb_handlerattr *attr) { - UPB_ASSERT(!upb_handlers_isfrozen(h)); - return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32, - (upb_func *)func, attr); -} +/* Defined for the sole purpose of having a unique pointer value for + * UPB_NO_CLOSURE. */ +char _upb_noclosure; -bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f, - const upb_handlers *sub) { - UPB_ASSERT(sub); - UPB_ASSERT(!upb_handlers_isfrozen(h)); - UPB_ASSERT(upb_fielddef_issubmsg(f)); - if (SUBH_F(h, f)) return false; /* Can't reset. */ - if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) { - return false; +static void freehandlers(upb_refcounted *r) { + upb_handlers *h = (upb_handlers*)r; + + upb_inttable_iter i; + upb_inttable_begin(&i, &h->cleanup_); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + void *val = (void*)upb_inttable_iter_key(&i); + upb_value func_val = upb_inttable_iter_value(&i); + upb_handlerfree *func = upb_value_getfptr(func_val); + func(val); } - SUBH_F(h, f) = sub; - upb_ref2(sub, h); - return true; -} -const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h, - const upb_fielddef *f) { - UPB_ASSERT(upb_fielddef_issubmsg(f)); - return SUBH_F(h, f); + upb_inttable_uninit(&h->cleanup_); + upb_msgdef_unref(h->msg, h); + upb_gfree(h->sub); + upb_gfree(h); } -bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel, - upb_handlerattr *attr) { - if (!upb_handlers_gethandler(h, sel)) - return false; - *attr = h->table[sel].attr; - return true; +static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit, + void *closure) { + const upb_handlers *h = (const upb_handlers*)r; + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, h->msg); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + const upb_handlers *sub; + if (!upb_fielddef_issubmsg(f)) continue; + sub = upb_handlers_getsubhandlers(h, f); + if (sub) visit(r, upb_handlers_upcast(sub), closure); + } } -const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h, - upb_selector_t sel) { - /* STARTSUBMSG selector in sel is the field's selector base. */ - return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT); -} +static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers}; -const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; } +typedef struct { + upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */ + upb_handlers_callback *callback; + const void *closure; +} dfs_state; -bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) { - bool ok; - if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) { - return false; - } - ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func)); - UPB_ASSERT(ok); - return true; -} +/* TODO(haberman): discard upb_handlers* objects that do not actually have any + * handlers set and cannot reach any upb_handlers* object that does. This is + * slightly tricky to do correctly. */ +static upb_handlers *newformsg(const upb_msgdef *m, const void *owner, + dfs_state *s) { + upb_msg_field_iter i; + upb_handlers *h = upb_handlers_new(m, owner); + if (!h) return NULL; + if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom; + s->callback(s->closure, h); -/* "Static" methods ***********************************************************/ + /* For each submessage field, get or create a handlers object and set it as + * the subhandlers. */ + for(upb_msg_field_begin(&i, m); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + const upb_msgdef *subdef; + upb_value subm_ent; -bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) { - /* TODO: verify we have a transitive closure. */ - int i; - for (i = 0; i < n; i++) { - upb_msg_field_iter j; - upb_handlers *h = handlers[i]; + if (!upb_fielddef_issubmsg(f)) continue; - if (!upb_ok(&h->status_)) { - upb_status_seterrf(s, "handlers for message %s had error status: %s", - upb_msgdef_fullname(upb_handlers_msgdef(h)), - upb_status_errmsg(&h->status_)); - return false; + subdef = upb_downcast_msgdef(upb_fielddef_subdef(f)); + if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) { + upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent)); + } else { + upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s); + if (!sub_mh) goto oom; + upb_handlers_setsubhandlers(h, f, sub_mh); + upb_handlers_unref(sub_mh, &sub_mh); } + } + return h; - /* Check that there are no closure mismatches due to missing Start* handlers - * or subhandlers with different type-level types. */ - for(upb_msg_field_begin(&j, h->msg); - !upb_msg_field_done(&j); - upb_msg_field_next(&j)) { - - const upb_fielddef *f = upb_msg_iter_field(&j); - if (upb_fielddef_isseq(f)) { - if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s)) - return false; - } - - if (upb_fielddef_isstring(f)) { - if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s)) - return false; - } +oom: + upb_handlers_unref(h, owner); + return NULL; +} - if (upb_fielddef_issubmsg(f)) { - bool hashandler = false; - if (upb_handlers_gethandler( - h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) || - upb_handlers_gethandler( - h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) { - hashandler = true; - } +/* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the + * subhandlers for this submessage field. */ +#define SUBH(h, selector) (h->sub[selector]) - if (upb_fielddef_isseq(f) && - (upb_handlers_gethandler( - h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) || - upb_handlers_gethandler( - h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) { - hashandler = true; - } +/* The selector for a submessage field is the field index. */ +#define SUBH_F(h, f) SUBH(h, f->index_) - if (hashandler && !upb_handlers_getsubhandlers(h, f)) { - /* For now we add an empty subhandlers in this case. It makes the - * decoder code generator simpler, because it only has to handle two - * cases (submessage has handlers or not) as opposed to three - * (submessage has handlers in enclosing message but no subhandlers). - * - * This makes parsing less efficient in the case that we want to - * notice a submessage but skip its contents (like if we're testing +static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f, + upb_handlertype_t type) { + upb_selector_t sel; + UPB_ASSERT(!upb_handlers_isfrozen(h)); + if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) { + upb_status_seterrf( + &h->status_, "type mismatch: field %s does not belong to message %s", + upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h))); + return -1; + } + if (!upb_handlers_getselector(f, type, &sel)) { + upb_status_seterrf( + &h->status_, + "type mismatch: cannot register handler type %d for field %s", + type, upb_fielddef_name(f)); + return -1; + } + return sel; +} + +static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f, + upb_handlertype_t type) { + int32_t sel = trygetsel(h, f, type); + UPB_ASSERT(sel >= 0); + return sel; +} + +static const void **returntype(upb_handlers *h, const upb_fielddef *f, + upb_handlertype_t type) { + return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_; +} + +static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f, + upb_handlertype_t type, upb_func *func, + upb_handlerattr *attr) { + upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER; + const void *closure_type; + const void **context_closure_type; + + UPB_ASSERT(!upb_handlers_isfrozen(h)); + + if (sel < 0) { + upb_status_seterrmsg(&h->status_, + "incorrect handler type for this field."); + return false; + } + + if (h->table[sel].func) { + upb_status_seterrmsg(&h->status_, + "cannot change handler once it has been set."); + return false; + } + + if (attr) { + set_attr = *attr; + } + + /* Check that the given closure type matches the closure type that has been + * established for this context (if any). */ + closure_type = upb_handlerattr_closuretype(&set_attr); + + if (type == UPB_HANDLER_STRING) { + context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR); + } else if (f && upb_fielddef_isseq(f) && + type != UPB_HANDLER_STARTSEQ && + type != UPB_HANDLER_ENDSEQ) { + context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ); + } else { + context_closure_type = &h->top_closure_type; + } + + if (closure_type && *context_closure_type && + closure_type != *context_closure_type) { + /* TODO(haberman): better message for debugging. */ + if (f) { + upb_status_seterrf(&h->status_, + "closure type does not match for field %s", + upb_fielddef_name(f)); + } else { + upb_status_seterrmsg( + &h->status_, "closure type does not match for message-level handler"); + } + return false; + } + + if (closure_type) + *context_closure_type = closure_type; + + /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer + * matches any pre-existing expectations about what type is expected. */ + if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) { + const void *return_type = upb_handlerattr_returnclosuretype(&set_attr); + const void *table_return_type = + upb_handlerattr_returnclosuretype(&h->table[sel].attr); + if (return_type && table_return_type && return_type != table_return_type) { + upb_status_seterrmsg(&h->status_, "closure return type does not match"); + return false; + } + + if (table_return_type && !return_type) + upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type); + } + + h->table[sel].func = (upb_func*)func; + h->table[sel].attr = set_attr; + return true; +} + +/* Returns the effective closure type for this handler (which will propagate + * from outer frames if this frame has no START* handler). Not implemented for + * UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is + * the effective closure type is unspecified (either no handler was registered + * to specify it or the handler that was registered did not specify the closure + * type). */ +const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f, + upb_handlertype_t type) { + const void *ret; + upb_selector_t sel; + + UPB_ASSERT(type != UPB_HANDLER_STRING); + ret = h->top_closure_type; + + if (upb_fielddef_isseq(f) && + type != UPB_HANDLER_STARTSEQ && + type != UPB_HANDLER_ENDSEQ && + h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) { + ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr); + } + + if (type == UPB_HANDLER_STRING && + h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) { + ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr); + } + + /* The effective type of the submessage; not used yet. + * if (type == SUBMESSAGE && + * h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) { + * ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr); + * } */ + + return ret; +} + +/* Checks whether the START* handler specified by f & type is missing even + * though it is required to convert the established type of an outer frame + * ("closure_type") into the established type of an inner frame (represented in + * the return closure type of this handler's attr. */ +bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type, + upb_status *status) { + const void *closure_type; + const upb_handlerattr *attr; + const void *return_closure_type; + + upb_selector_t sel = handlers_getsel(h, f, type); + if (h->table[sel].func) return true; + closure_type = effective_closure_type(h, f, type); + attr = &h->table[sel].attr; + return_closure_type = upb_handlerattr_returnclosuretype(attr); + if (closure_type && return_closure_type && + closure_type != return_closure_type) { + upb_status_seterrf(status, + "expected start handler to return sub type for field %f", + upb_fielddef_name(f)); + return false; + } + return true; +} + +/* Public interface ***********************************************************/ + +upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) { + int extra; + upb_handlers *h; + + UPB_ASSERT(upb_msgdef_isfrozen(md)); + + extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1); + h = upb_calloc(sizeof(*h) + extra); + if (!h) return NULL; + + h->msg = md; + upb_msgdef_ref(h->msg, h); + upb_status_clear(&h->status_); + + if (md->submsg_field_count > 0) { + h->sub = upb_calloc(md->submsg_field_count * sizeof(*h->sub)); + if (!h->sub) goto oom; + } else { + h->sub = 0; + } + + if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner)) + goto oom; + if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom; + + /* calloc() above initialized all handlers to NULL. */ + return h; + +oom: + freehandlers(upb_handlers_upcast_mutable(h)); + return NULL; +} + +const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m, + const void *owner, + upb_handlers_callback *callback, + const void *closure) { + dfs_state state; + upb_handlers *ret; + bool ok; + upb_refcounted *r; + + state.callback = callback; + state.closure = closure; + if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL; + + ret = newformsg(m, owner, &state); + + upb_inttable_uninit(&state.tab); + if (!ret) return NULL; + + r = upb_handlers_upcast_mutable(ret); + ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH); + UPB_ASSERT(ok); + + return ret; +} + +const upb_status *upb_handlers_status(upb_handlers *h) { + UPB_ASSERT(!upb_handlers_isfrozen(h)); + return &h->status_; +} + +void upb_handlers_clearerr(upb_handlers *h) { + UPB_ASSERT(!upb_handlers_isfrozen(h)); + upb_status_clear(&h->status_); +} + +#define SETTER(name, handlerctype, handlertype) \ + bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \ + handlerctype func, upb_handlerattr *attr) { \ + int32_t sel = trygetsel(h, f, handlertype); \ + return doset(h, sel, f, handlertype, (upb_func*)func, attr); \ + } + +SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32) +SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64) +SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32) +SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64) +SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT) +SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE) +SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL) +SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR) +SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING) +SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR) +SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ) +SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG) +SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG) +SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ) + +#undef SETTER + +bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func, + upb_handlerattr *attr) { + return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32, + (upb_func *)func, attr); +} + +bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func, + upb_handlerattr *attr) { + UPB_ASSERT(!upb_handlers_isfrozen(h)); + return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32, + (upb_func *)func, attr); +} + +bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f, + const upb_handlers *sub) { + UPB_ASSERT(sub); + UPB_ASSERT(!upb_handlers_isfrozen(h)); + UPB_ASSERT(upb_fielddef_issubmsg(f)); + if (SUBH_F(h, f)) return false; /* Can't reset. */ + if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) { + return false; + } + SUBH_F(h, f) = sub; + upb_ref2(sub, h); + return true; +} + +const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h, + const upb_fielddef *f) { + UPB_ASSERT(upb_fielddef_issubmsg(f)); + return SUBH_F(h, f); +} + +bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel, + upb_handlerattr *attr) { + if (!upb_handlers_gethandler(h, sel)) + return false; + *attr = h->table[sel].attr; + return true; +} + +const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h, + upb_selector_t sel) { + /* STARTSUBMSG selector in sel is the field's selector base. */ + return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT); +} + +const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; } + +bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) { + bool ok; + if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) { + return false; + } + ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func)); + UPB_ASSERT(ok); + return true; +} + + +/* "Static" methods ***********************************************************/ + +bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) { + /* TODO: verify we have a transitive closure. */ + int i; + for (i = 0; i < n; i++) { + upb_msg_field_iter j; + upb_handlers *h = handlers[i]; + + if (!upb_ok(&h->status_)) { + upb_status_seterrf(s, "handlers for message %s had error status: %s", + upb_msgdef_fullname(upb_handlers_msgdef(h)), + upb_status_errmsg(&h->status_)); + return false; + } + + /* Check that there are no closure mismatches due to missing Start* handlers + * or subhandlers with different type-level types. */ + for(upb_msg_field_begin(&j, h->msg); + !upb_msg_field_done(&j); + upb_msg_field_next(&j)) { + + const upb_fielddef *f = upb_msg_iter_field(&j); + if (upb_fielddef_isseq(f)) { + if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s)) + return false; + } + + if (upb_fielddef_isstring(f)) { + if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s)) + return false; + } + + if (upb_fielddef_issubmsg(f)) { + bool hashandler = false; + if (upb_handlers_gethandler( + h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) || + upb_handlers_gethandler( + h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) { + hashandler = true; + } + + if (upb_fielddef_isseq(f) && + (upb_handlers_gethandler( + h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) || + upb_handlers_gethandler( + h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) { + hashandler = true; + } + + if (hashandler && !upb_handlers_getsubhandlers(h, f)) { + /* For now we add an empty subhandlers in this case. It makes the + * decoder code generator simpler, because it only has to handle two + * cases (submessage has handlers or not) as opposed to three + * (submessage has handlers in enclosing message but no subhandlers). + * + * This makes parsing less efficient in the case that we want to + * notice a submessage but skip its contents (like if we're testing * for submessage presence or counting the number of repeated * submessages). In this case we will end up parsing the submessage * field by field and throwing away the results for each, instead of @@ -2658,1624 +2963,2346 @@ bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) { upb_handlers_unref(sub, &sub); } - /* TODO(haberman): check type of submessage. - * This is slightly tricky; also consider whether we should check that - * they match at setsubhandlers time. */ - } + /* TODO(haberman): check type of submessage. + * This is slightly tricky; also consider whether we should check that + * they match at setsubhandlers time. */ + } + } + } + + if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s, + UPB_MAX_HANDLER_DEPTH)) { + return false; + } + + return true; +} + +upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_ENUM: return UPB_HANDLER_INT32; + case UPB_TYPE_INT64: return UPB_HANDLER_INT64; + case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32; + case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64; + case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT; + case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE; + case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL; + default: UPB_ASSERT(false); return -1; /* Invalid input. */ + } +} + +bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type, + upb_selector_t *s) { + switch (type) { + case UPB_HANDLER_INT32: + case UPB_HANDLER_INT64: + case UPB_HANDLER_UINT32: + case UPB_HANDLER_UINT64: + case UPB_HANDLER_FLOAT: + case UPB_HANDLER_DOUBLE: + case UPB_HANDLER_BOOL: + if (!upb_fielddef_isprimitive(f) || + upb_handlers_getprimitivehandlertype(f) != type) + return false; + *s = f->selector_base; + break; + case UPB_HANDLER_STRING: + if (upb_fielddef_isstring(f)) { + *s = f->selector_base; + } else if (upb_fielddef_lazy(f)) { + *s = f->selector_base + 3; + } else { + return false; + } + break; + case UPB_HANDLER_STARTSTR: + if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) { + *s = f->selector_base + 1; + } else { + return false; + } + break; + case UPB_HANDLER_ENDSTR: + if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) { + *s = f->selector_base + 2; + } else { + return false; + } + break; + case UPB_HANDLER_STARTSEQ: + if (!upb_fielddef_isseq(f)) return false; + *s = f->selector_base - 2; + break; + case UPB_HANDLER_ENDSEQ: + if (!upb_fielddef_isseq(f)) return false; + *s = f->selector_base - 1; + break; + case UPB_HANDLER_STARTSUBMSG: + if (!upb_fielddef_issubmsg(f)) return false; + /* Selectors for STARTSUBMSG are at the beginning of the table so that the + * selector can also be used as an index into the "sub" array of + * subhandlers. The indexes for the two into these two tables are the + * same, except that in the handler table the static selectors come first. */ + *s = f->index_ + UPB_STATIC_SELECTOR_COUNT; + break; + case UPB_HANDLER_ENDSUBMSG: + if (!upb_fielddef_issubmsg(f)) return false; + *s = f->selector_base; + break; + } + UPB_ASSERT((size_t)*s < upb_fielddef_containingtype(f)->selector_count); + return true; +} + +uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) { + return upb_fielddef_isseq(f) ? 2 : 0; +} + +uint32_t upb_handlers_selectorcount(const upb_fielddef *f) { + uint32_t ret = 1; + if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */ + if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */ + if (upb_fielddef_issubmsg(f)) { + /* ENDSUBMSG (STARTSUBMSG is at table beginning) */ + ret += 0; + if (upb_fielddef_lazy(f)) { + /* STARTSTR/ENDSTR/STRING (for lazy) */ + ret += 3; + } + } + return ret; +} + + +/* upb_handlerattr ************************************************************/ + +void upb_handlerattr_init(upb_handlerattr *attr) { + upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER; + memcpy(attr, &from, sizeof(*attr)); +} + +void upb_handlerattr_uninit(upb_handlerattr *attr) { + UPB_UNUSED(attr); +} + +bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) { + attr->handler_data_ = hd; + return true; +} + +bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) { + attr->closure_type_ = type; + return true; +} + +const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) { + return attr->closure_type_; +} + +bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr, + const void *type) { + attr->return_closure_type_ = type; + return true; +} + +const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) { + return attr->return_closure_type_; +} + +bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) { + attr->alwaysok_ = alwaysok; + return true; +} + +bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) { + return attr->alwaysok_; +} + +/* upb_bufhandle **************************************************************/ + +size_t upb_bufhandle_objofs(const upb_bufhandle *h) { + return h->objofs_; +} + +/* upb_byteshandler ***********************************************************/ + +void upb_byteshandler_init(upb_byteshandler* h) { + memset(h, 0, sizeof(*h)); +} + +/* For when we support handlerfree callbacks. */ +void upb_byteshandler_uninit(upb_byteshandler* h) { + UPB_UNUSED(h); +} + +bool upb_byteshandler_setstartstr(upb_byteshandler *h, + upb_startstr_handlerfunc *func, void *d) { + h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func; + h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d; + return true; +} + +bool upb_byteshandler_setstring(upb_byteshandler *h, + upb_string_handlerfunc *func, void *d) { + h->table[UPB_STRING_SELECTOR].func = (upb_func*)func; + h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d; + return true; +} + +bool upb_byteshandler_setendstr(upb_byteshandler *h, + upb_endfield_handlerfunc *func, void *d) { + h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func; + h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d; + return true; +} + + +static bool is_power_of_two(size_t val) { + return (val & (val - 1)) == 0; +} + +/* Align up to the given power of 2. */ +static size_t align_up(size_t val, size_t align) { + UPB_ASSERT(is_power_of_two(align)); + return (val + align - 1) & ~(align - 1); +} + +static size_t div_round_up(size_t n, size_t d) { + return (n + d - 1) / d; +} + +bool upb_fieldtype_mapkeyok(upb_fieldtype_t type) { + return type == UPB_TYPE_BOOL || type == UPB_TYPE_INT32 || + type == UPB_TYPE_UINT32 || type == UPB_TYPE_INT64 || + type == UPB_TYPE_UINT64 || type == UPB_TYPE_STRING; +} + +void *upb_array_pack(const upb_array *arr, void *p, size_t *ofs, size_t size); +void *upb_map_pack(const upb_map *map, void *p, size_t *ofs, size_t size); + +#define CHARPTR_AT(msg, ofs) ((char*)msg + ofs) +#define ENCODE_MAX_NESTING 64 +#define CHECK_TRUE(x) if (!(x)) { return false; } + +/** upb_msgval ****************************************************************/ + +#define upb_alignof(t) offsetof(struct { char c; t x; }, x) + +/* These functions will generate real memcpy() calls on ARM sadly, because + * the compiler assumes they might not be aligned. */ + +static upb_msgval upb_msgval_read(const void *p, size_t ofs, + uint8_t size) { + upb_msgval val; + p = (char*)p + ofs; + memcpy(&val, p, size); + return val; +} + +static void upb_msgval_write(void *p, size_t ofs, upb_msgval val, + uint8_t size) { + p = (char*)p + ofs; + memcpy(p, &val, size); +} + +static size_t upb_msgval_sizeof(upb_fieldtype_t type) { + switch (type) { + case UPB_TYPE_DOUBLE: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT64: + return 8; + case UPB_TYPE_ENUM: + case UPB_TYPE_INT32: + case UPB_TYPE_UINT32: + case UPB_TYPE_FLOAT: + return 4; + case UPB_TYPE_BOOL: + return 1; + case UPB_TYPE_BYTES: + case UPB_TYPE_MESSAGE: + return sizeof(void*); + case UPB_TYPE_STRING: + return sizeof(char*) + sizeof(size_t); + } + UPB_UNREACHABLE(); +} + +static uint8_t upb_msg_fieldsize(const upb_fielddef *f) { + if (upb_fielddef_isseq(f)) { + return sizeof(void*); + } else { + return upb_msgval_sizeof(upb_fielddef_type(f)); + } +} + +/* TODO(haberman): this is broken right now because upb_msgval can contain + * a char* / size_t pair, which is too big for a upb_value. To fix this + * we'll probably need to dynamically allocate a upb_msgval and store a + * pointer to that in the tables for extensions/maps. */ +static upb_value upb_toval(upb_msgval val) { + upb_value ret; + UPB_UNUSED(val); + memset(&ret, 0, sizeof(upb_value)); /* XXX */ + return ret; +} + +static upb_msgval upb_msgval_fromval(upb_value val) { + upb_msgval ret; + UPB_UNUSED(val); + memset(&ret, 0, sizeof(upb_msgval)); /* XXX */ + return ret; +} + +static upb_ctype_t upb_fieldtotabtype(upb_fieldtype_t type) { + switch (type) { + case UPB_TYPE_FLOAT: return UPB_CTYPE_FLOAT; + case UPB_TYPE_DOUBLE: return UPB_CTYPE_DOUBLE; + case UPB_TYPE_BOOL: return UPB_CTYPE_BOOL; + case UPB_TYPE_BYTES: + case UPB_TYPE_MESSAGE: + case UPB_TYPE_STRING: return UPB_CTYPE_CONSTPTR; + case UPB_TYPE_ENUM: + case UPB_TYPE_INT32: return UPB_CTYPE_INT32; + case UPB_TYPE_UINT32: return UPB_CTYPE_UINT32; + case UPB_TYPE_INT64: return UPB_CTYPE_INT64; + case UPB_TYPE_UINT64: return UPB_CTYPE_UINT64; + default: UPB_ASSERT(false); return 0; + } +} + +static upb_msgval upb_msgval_fromdefault(const upb_fielddef *f) { + /* TODO(haberman): improve/optimize this (maybe use upb_msgval in fielddef) */ + switch (upb_fielddef_type(f)) { + case UPB_TYPE_FLOAT: + return upb_msgval_float(upb_fielddef_defaultfloat(f)); + case UPB_TYPE_DOUBLE: + return upb_msgval_double(upb_fielddef_defaultdouble(f)); + case UPB_TYPE_BOOL: + return upb_msgval_bool(upb_fielddef_defaultbool(f)); + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + size_t len; + const char *ptr = upb_fielddef_defaultstr(f, &len); + return upb_msgval_str(ptr, len); + } + case UPB_TYPE_MESSAGE: + return upb_msgval_msg(NULL); + case UPB_TYPE_ENUM: + case UPB_TYPE_INT32: + return upb_msgval_int32(upb_fielddef_defaultint32(f)); + case UPB_TYPE_UINT32: + return upb_msgval_uint32(upb_fielddef_defaultuint32(f)); + case UPB_TYPE_INT64: + return upb_msgval_int64(upb_fielddef_defaultint64(f)); + case UPB_TYPE_UINT64: + return upb_msgval_uint64(upb_fielddef_defaultuint64(f)); + default: + UPB_ASSERT(false); + return upb_msgval_msg(NULL); + } +} + + +/** upb_msglayout *************************************************************/ + +struct upb_msglayout { + upb_msgfactory *factory; + const upb_msgdef *msgdef; + size_t size; + size_t extdict_offset; + void *default_msg; + uint32_t *field_offsets; + uint32_t *case_offsets; + uint32_t *hasbits; + bool has_extdict; + uint8_t align; +}; + +static void upb_msg_checkfield(const upb_msglayout *l, const upb_fielddef *f) { + UPB_ASSERT(l->msgdef == upb_fielddef_containingtype(f)); +} + +static void upb_msglayout_free(upb_msglayout *l) { + upb_gfree(l->default_msg); + upb_gfree(l); +} + +const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l) { + return l->msgdef; +} + +static size_t upb_msglayout_place(upb_msglayout *l, size_t size) { + size_t ret; + + l->size = align_up(l->size, size); + l->align = align_up(l->align, size); + ret = l->size; + l->size += size; + return ret; +} + +static uint32_t upb_msglayout_offset(const upb_msglayout *l, + const upb_fielddef *f) { + return l->field_offsets[upb_fielddef_index(f)]; +} + +static uint32_t upb_msglayout_hasbit(const upb_msglayout *l, + const upb_fielddef *f) { + return l->hasbits[upb_fielddef_index(f)]; +} + +static bool upb_msglayout_initdefault(upb_msglayout *l) { + const upb_msgdef *m = l->msgdef; + upb_msg_field_iter it; + + if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2 && l->size) { + /* Allocate default message and set default values in it. */ + l->default_msg = upb_gmalloc(l->size); + if (!l->default_msg) { + return false; + } + + memset(l->default_msg, 0, l->size); + + for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { + const upb_fielddef* f = upb_msg_iter_field(&it); + + if (upb_fielddef_containingoneof(f)) { + continue; + } + + if (!upb_fielddef_isstring(f) && + !upb_fielddef_issubmsg(f) && + !upb_fielddef_isseq(f)) { + upb_msg_set(l->default_msg, f, upb_msgval_fromdefault(f), l); + } + } + } + + return true; +} + +static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { + upb_msg_field_iter it; + upb_msg_oneof_iter oit; + upb_msglayout *l; + size_t hasbit; + size_t array_size = upb_msgdef_numfields(m) + upb_msgdef_numoneofs(m); + + if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2) { + array_size += upb_msgdef_numfields(m); /* hasbits. */ + } + + l = upb_gmalloc(sizeof(*l) + (sizeof(uint32_t) * array_size)); + if (!l) return NULL; + + memset(l, 0, sizeof(*l)); + + l->msgdef = m; + l->align = 1; + l->field_offsets = (uint32_t*)CHARPTR_AT(l, sizeof(*l)); + l->case_offsets = l->field_offsets + upb_msgdef_numfields(m); + l->hasbits = l->case_offsets + upb_msgdef_numoneofs(m); + + /* Allocate data offsets in three stages: + * + * 1. hasbits. + * 2. regular fields. + * 3. oneof fields. + * + * OPT: There is a lot of room for optimization here to minimize the size. + */ + + /* Allocate hasbits. Start at sizeof(void*) for upb_alloc*. */ + for (upb_msg_field_begin(&it, m), hasbit = sizeof(void*) * 8; + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { + const upb_fielddef* f = upb_msg_iter_field(&it); + + if (upb_fielddef_haspresence(f) && !upb_fielddef_containingoneof(f)) { + l->hasbits[upb_fielddef_index(f)] = hasbit++; + } + } + + /* Account for space used by hasbits. */ + l->size = div_round_up(hasbit, 8); + + /* Allocate non-oneof fields. */ + for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { + const upb_fielddef* f = upb_msg_iter_field(&it); + size_t field_size = upb_msg_fieldsize(f); + size_t index = upb_fielddef_index(f); + + + if (upb_fielddef_containingoneof(f)) { + /* Oneofs are handled separately below. */ + continue; + } + + l->field_offsets[index] = upb_msglayout_place(l, field_size); + } + + /* Allocate oneof fields. Each oneof field consists of a uint32 for the case + * and space for the actual data. */ + for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit); + upb_msg_oneof_next(&oit)) { + const upb_oneofdef* oneof = upb_msg_iter_oneof(&oit); + upb_oneof_iter fit; + size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */ + size_t field_size = 0; + size_t case_offset; + size_t val_offset; + + /* Calculate field size: the max of all field sizes. */ + for (upb_oneof_begin(&fit, oneof); + !upb_oneof_done(&fit); + upb_oneof_next(&fit)) { + const upb_fielddef* f = upb_oneof_iter_field(&fit); + field_size = UPB_MAX(field_size, upb_msg_fieldsize(f)); + } + + /* Align and allocate case offset. */ + case_offset = upb_msglayout_place(l, case_size); + val_offset = upb_msglayout_place(l, field_size); + + l->case_offsets[upb_oneofdef_index(oneof)] = case_offset; + + /* Assign all fields in the oneof this same offset. */ + for (upb_oneof_begin(&fit, oneof); !upb_oneof_done(&fit); + upb_oneof_next(&fit)) { + const upb_fielddef* f = upb_oneof_iter_field(&fit); + l->field_offsets[upb_fielddef_index(f)] = val_offset; + } + } + + /* Size of the entire structure should be a multiple of its greatest + * alignment. */ + l->size = align_up(l->size, l->align); + + if (upb_msglayout_initdefault(l)) { + return l; + } else { + upb_msglayout_free(l); + return NULL; + } +} + +upb_msgfactory *upb_msglayout_factory(const upb_msglayout *layout) { + return layout->factory; +} + + +/** upb_msgfactory ************************************************************/ + +struct upb_msgfactory { + const upb_symtab *symtab; /* We own a ref. */ + upb_inttable layouts; + upb_inttable mergehandlers; +}; + +upb_msgfactory *upb_msgfactory_new(const upb_symtab *symtab) { + upb_msgfactory *ret = upb_gmalloc(sizeof(*ret)); + + ret->symtab = symtab; + upb_inttable_init(&ret->layouts, UPB_CTYPE_PTR); + upb_inttable_init(&ret->mergehandlers, UPB_CTYPE_CONSTPTR); + + return ret; +} + +void upb_msgfactory_free(upb_msgfactory *f) { + upb_inttable_iter i; + upb_inttable_begin(&i, &f->layouts); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + upb_msglayout *l = upb_value_getptr(upb_inttable_iter_value(&i)); + upb_msglayout_free(l); + } + + upb_inttable_begin(&i, &f->mergehandlers); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + const upb_handlers *h = upb_value_getconstptr(upb_inttable_iter_value(&i)); + upb_handlers_unref(h, f); + } + + upb_inttable_uninit(&f->layouts); + upb_inttable_uninit(&f->mergehandlers); + upb_gfree(f); +} + +const upb_symtab *upb_msgfactory_symtab(const upb_msgfactory *f) { + return f->symtab; +} + +const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f, + const upb_msgdef *m) { + upb_value v; + UPB_ASSERT(upb_symtab_lookupmsg(f->symtab, upb_msgdef_fullname(m)) == m); + UPB_ASSERT(!upb_msgdef_mapentry(m)); + + if (upb_inttable_lookupptr(&f->layouts, m, &v)) { + UPB_ASSERT(upb_value_getptr(v)); + return upb_value_getptr(v); + } else { + upb_msgfactory *mutable_f = (void*)f; + upb_msglayout *l = upb_msglayout_new(m); + upb_inttable_insertptr(&mutable_f->layouts, m, upb_value_ptr(l)); + UPB_ASSERT(l); + l->factory = f; + return l; + } +} + +/* Our handlers that we don't expose externally. */ + +void *upb_msg_startstr(void *msg, const void *hd, size_t size_hint) { + uint32_t ofs = (uintptr_t)hd; + /* We pass NULL here because we know we can get away with it. */ + upb_alloc *alloc = upb_msg_alloc(msg, NULL); + upb_msgval val; + UPB_UNUSED(size_hint); + + val = upb_msgval_read(msg, ofs, upb_msgval_sizeof(UPB_TYPE_STRING)); + + upb_free(alloc, (void*)val.str.ptr); + val.str.ptr = NULL; + val.str.len = 0; + + upb_msgval_write(msg, ofs, val, upb_msgval_sizeof(UPB_TYPE_STRING)); + return msg; +} + +size_t upb_msg_str(void *msg, const void *hd, const char *ptr, size_t size, + const upb_bufhandle *handle) { + uint32_t ofs = (uintptr_t)hd; + /* We pass NULL here because we know we can get away with it. */ + upb_alloc *alloc = upb_msg_alloc(msg, NULL); + upb_msgval val; + size_t newsize; + UPB_UNUSED(handle); + + val = upb_msgval_read(msg, ofs, upb_msgval_sizeof(UPB_TYPE_STRING)); + + newsize = val.str.len + size; + val.str.ptr = upb_realloc(alloc, (void*)val.str.ptr, val.str.len, newsize); + + if (!val.str.ptr) { + return false; + } + + memcpy((char*)val.str.ptr + val.str.len, ptr, size); + val.str.len = newsize; + upb_msgval_write(msg, ofs, val, upb_msgval_sizeof(UPB_TYPE_STRING)); + return size; +} + +static void callback(const void *closure, upb_handlers *h) { + upb_msgfactory *factory = (upb_msgfactory*)closure; + const upb_msgdef *md = upb_handlers_msgdef(h); + const upb_msglayout* layout = upb_msgfactory_getlayout(factory, md); + upb_msg_field_iter i; + UPB_UNUSED(factory); + + for(upb_msg_field_begin(&i, md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { + const upb_fielddef *f = upb_msg_iter_field(&i); + size_t offset = upb_msglayout_offset(layout, f); + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, (void*)offset); + + if (upb_fielddef_isseq(f)) { + } else if (upb_fielddef_isstring(f)) { + upb_handlers_setstartstr(h, f, upb_msg_startstr, &attr); + upb_handlers_setstring(h, f, upb_msg_str, &attr); + } else { + upb_msg_setscalarhandler( + h, f, offset, upb_msglayout_hasbit(layout, f)); + } + } +} + +const upb_handlers *upb_msgfactory_getmergehandlers(upb_msgfactory *f, + const upb_msgdef *m) { + upb_msgfactory *mutable_f = (void*)f; + + /* TODO(haberman): properly cache these. */ + const upb_handlers *ret = upb_handlers_newfrozen(m, f, callback, f); + upb_inttable_push(&mutable_f->mergehandlers, upb_value_constptr(ret)); + + return ret; +} + +const upb_visitorplan *upb_msgfactory_getvisitorplan(upb_msgfactory *f, + const upb_handlers *h) { + const upb_msgdef *md = upb_handlers_msgdef(h); + return (const upb_visitorplan*)upb_msgfactory_getlayout(f, md); +} + + +/** upb_visitor ***************************************************************/ + +struct upb_visitor { + const upb_msglayout *layout; + upb_sink *sink; +}; + +static upb_selector_t getsel2(const upb_fielddef *f, upb_handlertype_t type) { + upb_selector_t ret; + bool ok = upb_handlers_getselector(f, type, &ret); + UPB_ASSERT(ok); + return ret; +} + +static bool upb_visitor_hasfield(const upb_msg *msg, const upb_fielddef *f, + const upb_msglayout *layout) { + if (upb_fielddef_isseq(f)) { + return upb_msgval_getarr(upb_msg_get(msg, f, layout)) != NULL; + } else if (upb_msgdef_syntax(upb_fielddef_containingtype(f)) == + UPB_SYNTAX_PROTO2) { + return upb_msg_has(msg, f, layout); + } else { + upb_msgval val = upb_msg_get(msg, f, layout); + switch (upb_fielddef_type(f)) { + case UPB_TYPE_FLOAT: + return upb_msgval_getfloat(val) != 0; + case UPB_TYPE_DOUBLE: + return upb_msgval_getdouble(val) != 0; + case UPB_TYPE_BOOL: + return upb_msgval_getbool(val); + case UPB_TYPE_ENUM: + case UPB_TYPE_INT32: + return upb_msgval_getint32(val) != 0; + case UPB_TYPE_UINT32: + return upb_msgval_getuint32(val) != 0; + case UPB_TYPE_INT64: + return upb_msgval_getint64(val) != 0; + case UPB_TYPE_UINT64: + return upb_msgval_getuint64(val) != 0; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + return upb_msgval_getstr(val) && upb_msgval_getstrlen(val) > 0; + case UPB_TYPE_MESSAGE: + return upb_msgval_getmsg(val) != NULL; } + UPB_UNREACHABLE(); } +} - if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s, - UPB_MAX_HANDLER_DEPTH)) { +static bool upb_visitor_visitmsg2(const upb_msg *msg, + const upb_msglayout *layout, upb_sink *sink, + int depth) { + const upb_msgdef *md = upb_msglayout_msgdef(layout); + upb_msg_field_iter i; + upb_status status; + + upb_sink_startmsg(sink); + + /* Protect against cycles (possible because users may freely reassign message + * and repeated fields) by imposing a maximum recursion depth. */ + if (depth > ENCODE_MAX_NESTING) { return false; } - return true; -} + for (upb_msg_field_begin(&i, md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + upb_msgval val; -upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) { - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT32: - case UPB_TYPE_ENUM: return UPB_HANDLER_INT32; - case UPB_TYPE_INT64: return UPB_HANDLER_INT64; - case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32; - case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64; - case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT; - case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE; - case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL; - default: UPB_ASSERT(false); return -1; /* Invalid input. */ - } -} + if (!upb_visitor_hasfield(msg, f, layout)) { + continue; + } -bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type, - upb_selector_t *s) { - switch (type) { - case UPB_HANDLER_INT32: - case UPB_HANDLER_INT64: - case UPB_HANDLER_UINT32: - case UPB_HANDLER_UINT64: - case UPB_HANDLER_FLOAT: - case UPB_HANDLER_DOUBLE: - case UPB_HANDLER_BOOL: - if (!upb_fielddef_isprimitive(f) || - upb_handlers_getprimitivehandlertype(f) != type) - return false; - *s = f->selector_base; - break; - case UPB_HANDLER_STRING: - if (upb_fielddef_isstring(f)) { - *s = f->selector_base; - } else if (upb_fielddef_lazy(f)) { - *s = f->selector_base + 3; - } else { - return false; - } - break; - case UPB_HANDLER_STARTSTR: - if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) { - *s = f->selector_base + 1; - } else { - return false; - } - break; - case UPB_HANDLER_ENDSTR: - if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) { - *s = f->selector_base + 2; - } else { - return false; + val = upb_msg_get(msg, f, layout); + + if (upb_fielddef_isseq(f)) { + const upb_array *arr = upb_msgval_getarr(val); + UPB_ASSERT(arr); + /* TODO: putary(ary, f, sink, depth);*/ + } else if (upb_fielddef_issubmsg(f)) { + const upb_map *map = upb_msgval_getmap(val); + UPB_ASSERT(map); + /* TODO: putmap(map, f, sink, depth);*/ + } else if (upb_fielddef_isstring(f)) { + /* TODO putstr(); */ + } else { + upb_selector_t sel = getsel2(f, upb_handlers_getprimitivehandlertype(f)); + UPB_ASSERT(upb_fielddef_isprimitive(f)); + + switch (upb_fielddef_type(f)) { + case UPB_TYPE_FLOAT: + CHECK_TRUE(upb_sink_putfloat(sink, sel, upb_msgval_getfloat(val))); + break; + case UPB_TYPE_DOUBLE: + CHECK_TRUE( + upb_sink_putdouble(sink, sel, upb_msgval_getdouble(val))); + break; + case UPB_TYPE_BOOL: + CHECK_TRUE(upb_sink_putbool(sink, sel, upb_msgval_getbool(val))); + break; + case UPB_TYPE_ENUM: + case UPB_TYPE_INT32: + CHECK_TRUE(upb_sink_putint32(sink, sel, upb_msgval_getint32(val))); + break; + case UPB_TYPE_UINT32: + CHECK_TRUE( + upb_sink_putuint32(sink, sel, upb_msgval_getuint32(val))); + break; + case UPB_TYPE_INT64: + CHECK_TRUE(upb_sink_putint64(sink, sel, upb_msgval_getint64(val))); + break; + case UPB_TYPE_UINT64: + CHECK_TRUE( + upb_sink_putuint64(sink, sel, upb_msgval_getuint64(val))); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + case UPB_TYPE_MESSAGE: + UPB_UNREACHABLE(); } - break; - case UPB_HANDLER_STARTSEQ: - if (!upb_fielddef_isseq(f)) return false; - *s = f->selector_base - 2; - break; - case UPB_HANDLER_ENDSEQ: - if (!upb_fielddef_isseq(f)) return false; - *s = f->selector_base - 1; - break; - case UPB_HANDLER_STARTSUBMSG: - if (!upb_fielddef_issubmsg(f)) return false; - /* Selectors for STARTSUBMSG are at the beginning of the table so that the - * selector can also be used as an index into the "sub" array of - * subhandlers. The indexes for the two into these two tables are the - * same, except that in the handler table the static selectors come first. */ - *s = f->index_ + UPB_STATIC_SELECTOR_COUNT; - break; - case UPB_HANDLER_ENDSUBMSG: - if (!upb_fielddef_issubmsg(f)) return false; - *s = f->selector_base; - break; + } } - UPB_ASSERT((size_t)*s < upb_fielddef_containingtype(f)->selector_count); + + upb_sink_endmsg(sink, &status); return true; } -uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) { - return upb_fielddef_isseq(f) ? 2 : 0; +upb_visitor *upb_visitor_create(upb_env *e, const upb_visitorplan *vp, + upb_sink *output) { + upb_visitor *visitor = upb_env_malloc(e, sizeof(*visitor)); + visitor->layout = (const upb_msglayout*)vp; + visitor->sink = output; + return visitor; } -uint32_t upb_handlers_selectorcount(const upb_fielddef *f) { - uint32_t ret = 1; - if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */ - if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */ - if (upb_fielddef_issubmsg(f)) { - /* ENDSUBMSG (STARTSUBMSG is at table beginning) */ - ret += 0; - if (upb_fielddef_lazy(f)) { - /* STARTSTR/ENDSTR/STRING (for lazy) */ - ret += 3; - } - } - return ret; +bool upb_visitor_visitmsg(upb_visitor *visitor, const upb_msg *msg) { + return upb_visitor_visitmsg2(msg, visitor->layout, visitor->sink, 0); } -/* upb_handlerattr ************************************************************/ +/** upb_msg *******************************************************************/ -void upb_handlerattr_init(upb_handlerattr *attr) { - upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER; - memcpy(attr, &from, sizeof(*attr)); -} +/* If we always read/write as a consistent type to each address, this shouldn't + * violate aliasing. + */ +#define DEREF(msg, ofs, type) *(type*)CHARPTR_AT(msg, ofs) -void upb_handlerattr_uninit(upb_handlerattr *attr) { - UPB_UNUSED(attr); +static upb_inttable *upb_msg_trygetextdict(const upb_msg *msg, + const upb_msglayout *l) { + return l->has_extdict ? DEREF(msg, l->extdict_offset, upb_inttable*) : NULL; } -bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) { - attr->handler_data_ = hd; - return true; -} +static upb_inttable *upb_msg_getextdict(upb_msg *msg, + const upb_msglayout *l, + upb_alloc *a) { + upb_inttable *ext_dict; + UPB_ASSERT(l->has_extdict); -bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) { - attr->closure_type_ = type; - return true; -} + ext_dict = upb_msg_trygetextdict(msg, l); -const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) { - return attr->closure_type_; -} + if (!ext_dict) { + ext_dict = upb_malloc(a, sizeof(upb_inttable)); -bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr, - const void *type) { - attr->return_closure_type_ = type; - return true; -} + if (!ext_dict) { + return NULL; + } -const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) { - return attr->return_closure_type_; + /* Use an 8-byte type to ensure all bytes are copied. */ + if (!upb_inttable_init2(ext_dict, UPB_CTYPE_INT64, a)) { + upb_free(a, ext_dict); + return NULL; + } + + DEREF(msg, l->extdict_offset, upb_inttable*) = ext_dict; + } + + return ext_dict; } -bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) { - attr->alwaysok_ = alwaysok; - return true; +static uint32_t upb_msg_getoneofint(const upb_msg *msg, + const upb_oneofdef *o, + const upb_msglayout *l) { + size_t oneof_ofs = l->case_offsets[upb_oneofdef_index(o)]; + return DEREF(msg, oneof_ofs, uint8_t); } -bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) { - return attr->alwaysok_; +static void upb_msg_setoneofcase(const upb_msg *msg, + const upb_oneofdef *o, + const upb_msglayout *l, + uint32_t val) { + size_t oneof_ofs = l->case_offsets[upb_oneofdef_index(o)]; + DEREF(msg, oneof_ofs, uint8_t) = val; } -/* upb_bufhandle **************************************************************/ -size_t upb_bufhandle_objofs(const upb_bufhandle *h) { - return h->objofs_; +static bool upb_msg_oneofis(const upb_msg *msg, const upb_msglayout *l, + const upb_oneofdef *o, const upb_fielddef *f) { + return upb_msg_getoneofint(msg, o, l) == upb_fielddef_number(f); } -/* upb_byteshandler ***********************************************************/ +size_t upb_msg_sizeof(const upb_msglayout *l) { return l->size; } -void upb_byteshandler_init(upb_byteshandler* h) { - memset(h, 0, sizeof(*h)); +void upb_msg_init(upb_msg *msg, const upb_msglayout *l, upb_alloc *a) { + if (l->default_msg) { + memcpy(msg, l->default_msg, l->size); + } else { + memset(msg, 0, l->size); + } + + /* Set arena pointer. */ + memcpy(msg, &a, sizeof(a)); } -/* For when we support handlerfree callbacks. */ -void upb_byteshandler_uninit(upb_byteshandler* h) { - UPB_UNUSED(h); +void upb_msg_uninit(upb_msg *msg, const upb_msglayout *l) { + upb_inttable *ext_dict = upb_msg_trygetextdict(msg, l); + if (ext_dict) { + upb_inttable_uninit2(ext_dict, upb_msg_alloc(msg, l)); + } } -bool upb_byteshandler_setstartstr(upb_byteshandler *h, - upb_startstr_handlerfunc *func, void *d) { - h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func; - h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d; - return true; +upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a) { + upb_msg *msg = upb_malloc(a, upb_msg_sizeof(l)); + + if (msg) { + upb_msg_init(msg, l, a); + } + + return msg; } -bool upb_byteshandler_setstring(upb_byteshandler *h, - upb_string_handlerfunc *func, void *d) { - h->table[UPB_STRING_SELECTOR].func = (upb_func*)func; - h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d; - return true; +void upb_msg_free(upb_msg *msg, const upb_msglayout *l) { + upb_msg_uninit(msg, l); + upb_free(upb_msg_alloc(msg, l), msg); } -bool upb_byteshandler_setendstr(upb_byteshandler *h, - upb_endfield_handlerfunc *func, void *d) { - h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func; - h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d; - return true; +upb_alloc *upb_msg_alloc(const upb_msg *msg, const upb_msglayout *l) { + upb_alloc *alloc; + UPB_UNUSED(l); + memcpy(&alloc, msg, sizeof(alloc)); + return alloc; } -/* -** upb::RefCounted Implementation -** -** Our key invariants are: -** 1. reference cycles never span groups -** 2. for ref2(to, from), we increment to's count iff group(from) != group(to) -** -** The previous two are how we avoid leaking cycles. Other important -** invariants are: -** 3. for mutable objects "from" and "to", if there exists a ref2(to, from) -** this implies group(from) == group(to). (In practice, what we implement -** is even stronger; "from" and "to" will share a group if there has *ever* -** been a ref2(to, from), but all that is necessary for correctness is the -** weaker one). -** 4. mutable and immutable objects are never in the same group. -*/ +bool upb_msg_has(const upb_msg *msg, + const upb_fielddef *f, + const upb_msglayout *l) { + const upb_oneofdef *o; + upb_msg_checkfield(l, f); + UPB_ASSERT(upb_fielddef_haspresence(f)); -#include + if (upb_fielddef_isextension(f)) { + /* Extensions are set when they are present in the extension dict. */ + upb_inttable *ext_dict = upb_msg_trygetextdict(msg, l); + upb_value v; + return ext_dict != NULL && + upb_inttable_lookup32(ext_dict, upb_fielddef_number(f), &v); + } else if ((o = upb_fielddef_containingoneof(f)) != NULL) { + /* Oneofs are set when the oneof number is set to this field. */ + return upb_msg_getoneofint(msg, o, l) == upb_fielddef_number(f); + } else { + /* Other fields are set when their hasbit is set. */ + uint32_t hasbit = l->hasbits[upb_fielddef_index(f)]; + return DEREF(msg, hasbit / 8, char) | (1 << (hasbit % 8)); + } +} -static void freeobj(upb_refcounted *o); +upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f, + const upb_msglayout *l) { + upb_msg_checkfield(l, f); -const char untracked_val; -const void *UPB_UNTRACKED_REF = &untracked_val; + if (upb_fielddef_isextension(f)) { + upb_inttable *ext_dict = upb_msg_trygetextdict(msg, l); + upb_value val; + if (upb_inttable_lookup32(ext_dict, upb_fielddef_number(f), &val)) { + return upb_msgval_fromval(val); + } else { + return upb_msgval_fromdefault(f); + } + } else { + size_t ofs = l->field_offsets[upb_fielddef_index(f)]; + const upb_oneofdef *o = upb_fielddef_containingoneof(f); + upb_msgval ret; -/* arch-specific atomic primitives *******************************************/ + if (o && !upb_msg_oneofis(msg, l, o, f)) { + /* Oneof defaults can't come from the message because the memory is reused + * by all types in the oneof. */ + return upb_msgval_fromdefault(f); + } -#ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/ + ret = upb_msgval_read(msg, ofs, upb_msg_fieldsize(f)); + return ret; + } +} -static void atomic_inc(uint32_t *a) { (*a)++; } -static bool atomic_dec(uint32_t *a) { return --(*a) == 0; } +bool upb_msg_set(upb_msg *msg, + const upb_fielddef *f, + upb_msgval val, + const upb_msglayout *l) { + upb_alloc *a = upb_msg_alloc(msg, l); + upb_msg_checkfield(l, f); -#elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/ + if (upb_fielddef_isextension(f)) { + /* TODO(haberman): introduce table API that can do this in one call. */ + upb_inttable *ext = upb_msg_getextdict(msg, l, a); + upb_value val2 = upb_toval(val); + if (!upb_inttable_replace(ext, upb_fielddef_number(f), val2) && + !upb_inttable_insert2(ext, upb_fielddef_number(f), val2, a)) { + return false; + } + } else { + size_t ofs = l->field_offsets[upb_fielddef_index(f)]; + const upb_oneofdef *o = upb_fielddef_containingoneof(f); -static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); } -static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; } + if (o) { + upb_msg_setoneofcase(msg, o, l, upb_fielddef_number(f)); + } -#elif defined(WIN32) /*-------------------------------------------------------*/ + upb_msgval_write(msg, ofs, val, upb_msg_fieldsize(f)); + } + return true; +} -#include -static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); } -static bool atomic_dec(upb_atomic_t *a) { - return InterlockedDecrement(&a->val) == 0; -} +/** upb_array *****************************************************************/ -#else -#error Atomic primitives not defined for your platform/CPU. \ - Implement them or compile with UPB_THREAD_UNSAFE. -#endif +struct upb_array { + upb_fieldtype_t type; + uint8_t element_size; + void *data; /* Each element is element_size. */ + size_t len; /* Measured in elements. */ + size_t size; /* Measured in elements. */ + upb_alloc *alloc; +}; -/* All static objects point to this refcount. - * It is special-cased in ref/unref below. */ -uint32_t static_refcount = -1; +#define DEREF_ARR(arr, i, type) ((type*)arr->data)[i] -/* We can avoid atomic ops for statically-declared objects. - * This is a minor optimization but nice since we can avoid degrading under - * contention in this case. */ +size_t upb_array_sizeof(upb_fieldtype_t type) { + UPB_UNUSED(type); + return sizeof(upb_array); +} -static void refgroup(uint32_t *group) { - if (group != &static_refcount) - atomic_inc(group); +void upb_array_init(upb_array *arr, upb_fieldtype_t type, upb_alloc *alloc) { + arr->type = type; + arr->data = NULL; + arr->len = 0; + arr->size = 0; + arr->element_size = upb_msgval_sizeof(type); + arr->alloc = alloc; } -static bool unrefgroup(uint32_t *group) { - if (group == &static_refcount) { - return false; - } else { - return atomic_dec(group); - } +void upb_array_uninit(upb_array *arr) { + upb_free(arr->alloc, arr->data); } +upb_array *upb_array_new(upb_fieldtype_t type, upb_alloc *a) { + upb_array *ret = upb_malloc(a, upb_array_sizeof(type)); -/* Reference tracking (debug only) ********************************************/ + if (ret) { + upb_array_init(ret, type, a); + } -#ifdef UPB_DEBUG_REFS + return ret; +} -#ifdef UPB_THREAD_UNSAFE +void upb_array_free(upb_array *arr) { + upb_array_uninit(arr); + upb_free(arr->alloc, arr); +} -static void upb_lock() {} -static void upb_unlock() {} +size_t upb_array_size(const upb_array *arr) { + return arr->len; +} -#else +upb_fieldtype_t upb_array_type(const upb_array *arr) { + return arr->type; +} -/* User must define functions that lock/unlock a global mutex and link this - * file against them. */ -void upb_lock(); -void upb_unlock(); +upb_msgval upb_array_get(const upb_array *arr, size_t i) { + UPB_ASSERT(i < arr->len); + return upb_msgval_read(arr->data, i * arr->element_size, arr->element_size); +} -#endif +bool upb_array_set(upb_array *arr, size_t i, upb_msgval val) { + UPB_ASSERT(i <= arr->len); -/* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some - * code-paths that can normally never fail, like upb_refcounted_ref(). Since - * we have no way to propagage out-of-memory errors back to the user, and since - * these errors can only occur in UPB_DEBUG_REFS mode, we use an allocator that - * immediately aborts on failure (avoiding the global allocator, which might - * inject failures). */ + if (i == arr->len) { + /* Extending the array. */ -#include + if (i == arr->size) { + /* Need to reallocate. */ + size_t new_size = UPB_MAX(arr->size * 2, 8); + size_t new_bytes = new_size * arr->element_size; + size_t old_bytes = arr->size * arr->element_size; + upb_msgval *new_data = + upb_realloc(arr->alloc, arr->data, old_bytes, new_bytes); -static void *upb_debugrefs_allocfunc(upb_alloc *alloc, void *ptr, - size_t oldsize, size_t size) { - UPB_UNUSED(alloc); - UPB_UNUSED(oldsize); - if (size == 0) { - free(ptr); - return NULL; - } else { - void *ret = realloc(ptr, size); + if (!new_data) { + return false; + } - if (!ret) { - abort(); + arr->data = new_data; + arr->size = new_size; } - return ret; + arr->len = i + 1; } + + upb_msgval_write(arr->data, i * arr->element_size, val, arr->element_size); + return true; } -upb_alloc upb_alloc_debugrefs = {&upb_debugrefs_allocfunc}; -typedef struct { - int count; /* How many refs there are (duplicates only allowed for ref2). */ - bool is_ref2; -} trackedref; +/** upb_map *******************************************************************/ -static trackedref *trackedref_new(bool is_ref2) { - trackedref *ret = upb_malloc(&upb_alloc_debugrefs, sizeof(*ret)); - ret->count = 1; - ret->is_ref2 = is_ref2; - return ret; +struct upb_map { + upb_fieldtype_t key_type; + upb_fieldtype_t val_type; + /* We may want to optimize this to use inttable where possible, for greater + * efficiency and lower memory footprint. */ + upb_strtable strtab; + upb_alloc *alloc; +}; + +static void upb_map_tokey(upb_fieldtype_t type, upb_msgval *key, + const char **out_key, size_t *out_len) { + switch (type) { + case UPB_TYPE_STRING: + /* Point to string data of the input key. */ + *out_key = key->str.ptr; + *out_len = key->str.len; + return; + case UPB_TYPE_BOOL: + case UPB_TYPE_INT32: + case UPB_TYPE_UINT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT64: + /* Point to the key itself. XXX: big-endian. */ + *out_key = (const char*)key; + *out_len = upb_msgval_sizeof(type); + return; + case UPB_TYPE_BYTES: + case UPB_TYPE_DOUBLE: + case UPB_TYPE_ENUM: + case UPB_TYPE_FLOAT: + case UPB_TYPE_MESSAGE: + break; /* Cannot be a map key. */ + } + UPB_UNREACHABLE(); } -static void track(const upb_refcounted *r, const void *owner, bool ref2) { - upb_value v; +static upb_msgval upb_map_fromkey(upb_fieldtype_t type, const char *key, + size_t len) { + switch (type) { + case UPB_TYPE_STRING: + return upb_msgval_str(key, len); + case UPB_TYPE_BOOL: + case UPB_TYPE_INT32: + case UPB_TYPE_UINT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT64: + return upb_msgval_read(key, 0, upb_msgval_sizeof(type)); + case UPB_TYPE_BYTES: + case UPB_TYPE_DOUBLE: + case UPB_TYPE_ENUM: + case UPB_TYPE_FLOAT: + case UPB_TYPE_MESSAGE: + break; /* Cannot be a map key. */ + } + UPB_UNREACHABLE(); +} - UPB_ASSERT(owner); - if (owner == UPB_UNTRACKED_REF) return; +size_t upb_map_sizeof(upb_fieldtype_t ktype, upb_fieldtype_t vtype) { + /* Size does not currently depend on key/value type. */ + UPB_UNUSED(ktype); + UPB_UNUSED(vtype); + return sizeof(upb_map); +} - upb_lock(); - if (upb_inttable_lookupptr(r->refs, owner, &v)) { - trackedref *ref = upb_value_getptr(v); - /* Since we allow multiple ref2's for the same to/from pair without - * allocating separate memory for each one, we lose the fine-grained - * tracking behavior we get with regular refs. Since ref2s only happen - * inside upb, we'll accept this limitation until/unless there is a really - * difficult upb-internal bug that can't be figured out without it. */ - UPB_ASSERT(ref2); - UPB_ASSERT(ref->is_ref2); - ref->count++; - } else { - trackedref *ref = trackedref_new(ref2); - upb_inttable_insertptr2(r->refs, owner, upb_value_ptr(ref), - &upb_alloc_debugrefs); - if (ref2) { - /* We know this cast is safe when it is a ref2, because it's coming from - * another refcounted object. */ - const upb_refcounted *from = owner; - UPB_ASSERT(!upb_inttable_lookupptr(from->ref2s, r, NULL)); - upb_inttable_insertptr2(from->ref2s, r, upb_value_ptr(NULL), - &upb_alloc_debugrefs); - } +bool upb_map_init(upb_map *map, upb_fieldtype_t ktype, upb_fieldtype_t vtype, + upb_alloc *a) { + upb_ctype_t vtabtype = upb_fieldtotabtype(vtype); + UPB_ASSERT(upb_fieldtype_mapkeyok(ktype)); + map->key_type = ktype; + map->val_type = vtype; + map->alloc = a; + + if (!upb_strtable_init2(&map->strtab, vtabtype, a)) { + return false; } - upb_unlock(); + + return true; } -static void untrack(const upb_refcounted *r, const void *owner, bool ref2) { - upb_value v; - bool found; - trackedref *ref; +void upb_map_uninit(upb_map *map) { + upb_strtable_uninit2(&map->strtab, map->alloc); +} - UPB_ASSERT(owner); - if (owner == UPB_UNTRACKED_REF) return; +upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype, + upb_alloc *a) { + upb_map *map = upb_malloc(a, upb_map_sizeof(ktype, vtype)); - upb_lock(); - found = upb_inttable_lookupptr(r->refs, owner, &v); - /* This assert will fail if an owner attempts to release a ref it didn't have. */ - UPB_ASSERT(found); - ref = upb_value_getptr(v); - UPB_ASSERT(ref->is_ref2 == ref2); - if (--ref->count == 0) { - free(ref); - upb_inttable_removeptr(r->refs, owner, NULL); - if (ref2) { - /* We know this cast is safe when it is a ref2, because it's coming from - * another refcounted object. */ - const upb_refcounted *from = owner; - bool removed = upb_inttable_removeptr(from->ref2s, r, NULL); - UPB_ASSERT(removed); - } + if (!map) { + return NULL; } - upb_unlock(); -} -static void checkref(const upb_refcounted *r, const void *owner, bool ref2) { - upb_value v; - bool found; - trackedref *ref; + if (!upb_map_init(map, ktype, vtype, a)) { + return NULL; + } - upb_lock(); - found = upb_inttable_lookupptr(r->refs, owner, &v); - UPB_ASSERT(found); - ref = upb_value_getptr(v); - UPB_ASSERT(ref->is_ref2 == ref2); - upb_unlock(); + return map; } -/* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that - * originate from the given owner. */ -static void getref2s(const upb_refcounted *owner, upb_inttable *tab) { - upb_inttable_iter i; +void upb_map_free(upb_map *map) { + upb_map_uninit(map); + upb_free(map->alloc, map); +} - upb_lock(); - upb_inttable_begin(&i, owner->ref2s); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - upb_value v; - upb_value count; - trackedref *ref; - bool found; +size_t upb_map_size(const upb_map *map) { + return upb_strtable_count(&map->strtab); +} - upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i); +upb_fieldtype_t upb_map_keytype(const upb_map *map) { + return map->key_type; +} - /* To get the count we need to look in the target's table. */ - found = upb_inttable_lookupptr(to->refs, owner, &v); - UPB_ASSERT(found); - ref = upb_value_getptr(v); - count = upb_value_int32(ref->count); +upb_fieldtype_t upb_map_valuetype(const upb_map *map) { + return map->val_type; +} - upb_inttable_insertptr2(tab, to, count, &upb_alloc_debugrefs); +bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val) { + upb_value tabval; + const char *key_str; + size_t key_len; + bool ret; + + upb_map_tokey(map->key_type, &key, &key_str, &key_len); + ret = upb_strtable_lookup2(&map->strtab, key_str, key_len, &tabval); + if (ret) { + memcpy(val, &tabval, sizeof(tabval)); } - upb_unlock(); + + return ret; } -typedef struct { - upb_inttable ref2; - const upb_refcounted *obj; -} check_state; +bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val, + upb_msgval *removed) { + const char *key_str; + size_t key_len; + upb_value tabval = upb_toval(val); + upb_value removedtabval; + upb_alloc *a = map->alloc; -static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj, - void *closure) { - check_state *s = closure; - upb_inttable *ref2 = &s->ref2; - upb_value v; - bool removed; - int32_t newcount; + upb_map_tokey(map->key_type, &key, &key_str, &key_len); - UPB_ASSERT(obj == s->obj); - UPB_ASSERT(subobj); - removed = upb_inttable_removeptr(ref2, subobj, &v); - /* The following assertion will fail if the visit() function visits a subobj - * that it did not have a ref2 on, or visits the same subobj too many times. */ - UPB_ASSERT(removed); - newcount = upb_value_getint32(v) - 1; - if (newcount > 0) { - upb_inttable_insert2(ref2, (uintptr_t)subobj, upb_value_int32(newcount), - &upb_alloc_debugrefs); + /* TODO(haberman): add overwrite operation to minimize number of lookups. */ + if (upb_strtable_lookup2(&map->strtab, key_str, key_len, NULL)) { + upb_strtable_remove3(&map->strtab, key_str, key_len, &removedtabval, a); + memcpy(&removed, &removedtabval, sizeof(removed)); } -} -static void visit(const upb_refcounted *r, upb_refcounted_visit *v, - void *closure) { - /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know - * exactly the set of nodes that visit() should visit. So we verify visit()'s - * correctness here. */ - check_state state; - state.obj = r; - upb_inttable_init2(&state.ref2, UPB_CTYPE_INT32, &upb_alloc_debugrefs); - getref2s(r, &state.ref2); + return upb_strtable_insert3(&map->strtab, key_str, key_len, tabval, a); +} - /* This should visit any children in the ref2 table. */ - if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state); +bool upb_map_del(upb_map *map, upb_msgval key) { + const char *key_str; + size_t key_len; + upb_alloc *a = map->alloc; - /* This assertion will fail if the visit() function missed any children. */ - UPB_ASSERT(upb_inttable_count(&state.ref2) == 0); - upb_inttable_uninit2(&state.ref2, &upb_alloc_debugrefs); - if (r->vtbl->visit) r->vtbl->visit(r, v, closure); + upb_map_tokey(map->key_type, &key, &key_str, &key_len); + return upb_strtable_remove3(&map->strtab, key_str, key_len, NULL, a); } -static void trackinit(upb_refcounted *r) { - r->refs = upb_malloc(&upb_alloc_debugrefs, sizeof(*r->refs)); - r->ref2s = upb_malloc(&upb_alloc_debugrefs, sizeof(*r->ref2s)); - upb_inttable_init2(r->refs, UPB_CTYPE_PTR, &upb_alloc_debugrefs); - upb_inttable_init2(r->ref2s, UPB_CTYPE_PTR, &upb_alloc_debugrefs); + +/** upb_mapiter ***************************************************************/ + +struct upb_mapiter { + upb_strtable_iter iter; + upb_fieldtype_t key_type; +}; + +size_t upb_mapiter_sizeof() { + return sizeof(upb_mapiter); } -static void trackfree(const upb_refcounted *r) { - upb_inttable_uninit2(r->refs, &upb_alloc_debugrefs); - upb_inttable_uninit2(r->ref2s, &upb_alloc_debugrefs); - upb_free(&upb_alloc_debugrefs, r->refs); - upb_free(&upb_alloc_debugrefs, r->ref2s); +void upb_mapiter_begin(upb_mapiter *i, const upb_map *map) { + upb_strtable_begin(&i->iter, &map->strtab); + i->key_type = map->key_type; } -#else +upb_mapiter *upb_mapiter_new(const upb_map *t, upb_alloc *a) { + upb_mapiter *ret = upb_malloc(a, upb_mapiter_sizeof()); -static void track(const upb_refcounted *r, const void *owner, bool ref2) { - UPB_UNUSED(r); - UPB_UNUSED(owner); - UPB_UNUSED(ref2); + if (!ret) { + return NULL; + } + + upb_mapiter_begin(ret, t); + return ret; } -static void untrack(const upb_refcounted *r, const void *owner, bool ref2) { - UPB_UNUSED(r); - UPB_UNUSED(owner); - UPB_UNUSED(ref2); +void upb_mapiter_free(upb_mapiter *i, upb_alloc *a) { + upb_free(a, i); } -static void checkref(const upb_refcounted *r, const void *owner, bool ref2) { - UPB_UNUSED(r); - UPB_UNUSED(owner); - UPB_UNUSED(ref2); +void upb_mapiter_next(upb_mapiter *i) { + upb_strtable_next(&i->iter); } -static void trackinit(upb_refcounted *r) { - UPB_UNUSED(r); +bool upb_mapiter_done(const upb_mapiter *i) { + return upb_strtable_done(&i->iter); } -static void trackfree(const upb_refcounted *r) { - UPB_UNUSED(r); +upb_msgval upb_mapiter_key(const upb_mapiter *i) { + return upb_map_fromkey(i->key_type, upb_strtable_iter_key(&i->iter), + upb_strtable_iter_keylength(&i->iter)); } -static void visit(const upb_refcounted *r, upb_refcounted_visit *v, - void *closure) { - if (r->vtbl->visit) r->vtbl->visit(r, v, closure); +upb_msgval upb_mapiter_value(const upb_mapiter *i) { + return upb_msgval_fromval(upb_strtable_iter_value(&i->iter)); } -#endif /* UPB_DEBUG_REFS */ +void upb_mapiter_setdone(upb_mapiter *i) { + upb_strtable_iter_setdone(&i->iter); +} +bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2) { + return upb_strtable_iter_isequal(&i1->iter, &i2->iter); +} -/* freeze() *******************************************************************/ -/* The freeze() operation is by far the most complicated part of this scheme. - * We compute strongly-connected components and then mutate the graph such that - * we preserve the invariants documented at the top of this file. And we must - * handle out-of-memory errors gracefully (without leaving the graph - * inconsistent), which adds to the fun. */ +/** Handlers for upb_msg ******************************************************/ -/* The state used by the freeze operation (shared across many functions). */ typedef struct { - int depth; - int maxdepth; - uint64_t index; - /* Maps upb_refcounted* -> attributes (color, etc). attr layout varies by - * color. */ - upb_inttable objattr; - upb_inttable stack; /* stack of upb_refcounted* for Tarjan's algorithm. */ - upb_inttable groups; /* array of uint32_t*, malloc'd refcounts for new groups */ - upb_status *status; - jmp_buf err; -} tarjan; + size_t offset; + int32_t hasbit; +} upb_msg_handlerdata; -static void release_ref2(const upb_refcounted *obj, - const upb_refcounted *subobj, - void *closure); +/* Fallback implementation if the handler is not specialized by the producer. */ +#define MSG_WRITER(type, ctype) \ + bool upb_msg_set ## type (void *c, const void *hd, ctype val) { \ + uint8_t *m = c; \ + const upb_msg_handlerdata *d = hd; \ + if (d->hasbit > 0) \ + *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \ + *(ctype*)&m[d->offset] = val; \ + return true; \ + } \ -/* Node attributes -----------------------------------------------------------*/ +MSG_WRITER(double, double) +MSG_WRITER(float, float) +MSG_WRITER(int32, int32_t) +MSG_WRITER(int64, int64_t) +MSG_WRITER(uint32, uint32_t) +MSG_WRITER(uint64, uint64_t) +MSG_WRITER(bool, bool) -/* After our analysis phase all nodes will be either GRAY or WHITE. */ +bool upb_msg_setscalarhandler(upb_handlers *h, const upb_fielddef *f, + size_t offset, int32_t hasbit) { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + bool ok; -typedef enum { - BLACK = 0, /* Object has not been seen. */ - GRAY, /* Object has been found via a refgroup but may not be reachable. */ - GREEN, /* Object is reachable and is currently on the Tarjan stack. */ - WHITE /* Object is reachable and has been assigned a group (SCC). */ -} color_t; + upb_msg_handlerdata *d = upb_gmalloc(sizeof(*d)); + if (!d) return false; + d->offset = offset; + d->hasbit = hasbit; -UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); } -UPB_NORETURN static void oom(tarjan *t) { - upb_status_seterrmsg(t->status, "out of memory"); - err(t); -} + upb_handlerattr_sethandlerdata(&attr, d); + upb_handlerattr_setalwaysok(&attr, true); + upb_handlers_addcleanup(h, d, upb_gfree); -static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) { - upb_value v; - return upb_inttable_lookupptr(&t->objattr, r, &v) ? - upb_value_getuint64(v) : 0; -} +#define TYPE(u, l) \ + case UPB_TYPE_##u: \ + ok = upb_handlers_set##l(h, f, upb_msg_set##l, &attr); break; -static uint64_t getattr(const tarjan *t, const upb_refcounted *r) { - upb_value v; - bool found = upb_inttable_lookupptr(&t->objattr, r, &v); - UPB_ASSERT(found); - return upb_value_getuint64(v); -} + ok = false; -static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) { - upb_inttable_removeptr(&t->objattr, r, NULL); - upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr)); -} + switch (upb_fielddef_type(f)) { + TYPE(INT64, int64); + TYPE(INT32, int32); + TYPE(ENUM, int32); + TYPE(UINT64, uint64); + TYPE(UINT32, uint32); + TYPE(DOUBLE, double); + TYPE(FLOAT, float); + TYPE(BOOL, bool); + default: UPB_ASSERT(false); break; + } +#undef TYPE -static color_t color(tarjan *t, const upb_refcounted *r) { - return trygetattr(t, r) & 0x3; /* Color is always stored in the low 2 bits. */ + upb_handlerattr_uninit(&attr); + return ok; } -static void set_gray(tarjan *t, const upb_refcounted *r) { - UPB_ASSERT(color(t, r) == BLACK); - setattr(t, r, GRAY); -} +bool upb_msg_getscalarhandlerdata(const upb_handlers *h, + upb_selector_t s, + upb_fieldtype_t *type, + size_t *offset, + int32_t *hasbit) { + const upb_msg_handlerdata *d; + upb_func *f = upb_handlers_gethandler(h, s); -/* Pushes an obj onto the Tarjan stack and sets it to GREEN. */ -static void push(tarjan *t, const upb_refcounted *r) { - UPB_ASSERT(color(t, r) == BLACK || color(t, r) == GRAY); - /* This defines the attr layout for the GREEN state. "index" and "lowlink" - * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */ - setattr(t, r, GREEN | (t->index << 2) | (t->index << 33)); - if (++t->index == 0x80000000) { - upb_status_seterrmsg(t->status, "too many objects to freeze"); - err(t); + if ((upb_int64_handlerfunc*)f == upb_msg_setint64) { + *type = UPB_TYPE_INT64; + } else if ((upb_int32_handlerfunc*)f == upb_msg_setint32) { + *type = UPB_TYPE_INT32; + } else if ((upb_uint64_handlerfunc*)f == upb_msg_setuint64) { + *type = UPB_TYPE_UINT64; + } else if ((upb_uint32_handlerfunc*)f == upb_msg_setuint32) { + *type = UPB_TYPE_UINT32; + } else if ((upb_double_handlerfunc*)f == upb_msg_setdouble) { + *type = UPB_TYPE_DOUBLE; + } else if ((upb_float_handlerfunc*)f == upb_msg_setfloat) { + *type = UPB_TYPE_FLOAT; + } else if ((upb_bool_handlerfunc*)f == upb_msg_setbool) { + *type = UPB_TYPE_BOOL; + } else { + return false; } - upb_inttable_push(&t->stack, upb_value_ptr((void*)r)); -} -/* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its - * SCC group. */ -static upb_refcounted *pop(tarjan *t) { - upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack)); - UPB_ASSERT(color(t, r) == GREEN); - /* This defines the attr layout for nodes in the WHITE state. - * Top of group stack is [group, NULL]; we point at group. */ - setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8); - return r; + d = upb_handlers_gethandlerdata(h, s); + *offset = d->offset; + *hasbit = d->hasbit; + return true; } +/* +** upb::RefCounted Implementation +** +** Our key invariants are: +** 1. reference cycles never span groups +** 2. for ref2(to, from), we increment to's count iff group(from) != group(to) +** +** The previous two are how we avoid leaking cycles. Other important +** invariants are: +** 3. for mutable objects "from" and "to", if there exists a ref2(to, from) +** this implies group(from) == group(to). (In practice, what we implement +** is even stronger; "from" and "to" will share a group if there has *ever* +** been a ref2(to, from), but all that is necessary for correctness is the +** weaker one). +** 4. mutable and immutable objects are never in the same group. +*/ -static void tarjan_newgroup(tarjan *t) { - uint32_t *group = upb_gmalloc(sizeof(*group)); - if (!group) oom(t); - /* Push group and empty group leader (we'll fill in leader later). */ - if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) || - !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) { - upb_gfree(group); - oom(t); - } - *group = 0; -} -static uint32_t idx(tarjan *t, const upb_refcounted *r) { - UPB_ASSERT(color(t, r) == GREEN); - return (getattr(t, r) >> 2) & 0x7FFFFFFF; -} +#include -static uint32_t lowlink(tarjan *t, const upb_refcounted *r) { - if (color(t, r) == GREEN) { - return getattr(t, r) >> 33; - } else { - return UINT32_MAX; - } -} +static void freeobj(upb_refcounted *o); -static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) { - UPB_ASSERT(color(t, r) == GREEN); - setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF)); -} +const char untracked_val; +const void *UPB_UNTRACKED_REF = &untracked_val; -static uint32_t *group(tarjan *t, upb_refcounted *r) { - uint64_t groupnum; - upb_value v; - bool found; +/* arch-specific atomic primitives *******************************************/ - UPB_ASSERT(color(t, r) == WHITE); - groupnum = getattr(t, r) >> 8; - found = upb_inttable_lookup(&t->groups, groupnum, &v); - UPB_ASSERT(found); - return upb_value_getptr(v); -} +#ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/ -/* If the group leader for this object's group has not previously been set, - * the given object is assigned to be its leader. */ -static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) { - uint64_t leader_slot; - upb_value v; - bool found; +static void atomic_inc(uint32_t *a) { (*a)++; } +static bool atomic_dec(uint32_t *a) { return --(*a) == 0; } - UPB_ASSERT(color(t, r) == WHITE); - leader_slot = (getattr(t, r) >> 8) + 1; - found = upb_inttable_lookup(&t->groups, leader_slot, &v); - UPB_ASSERT(found); - if (upb_value_getptr(v)) { - return upb_value_getptr(v); - } else { - upb_inttable_remove(&t->groups, leader_slot, NULL); - upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r)); - return r; - } +#elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/ + +static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); } +static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; } + +#elif defined(WIN32) /*-------------------------------------------------------*/ + +#include + +static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); } +static bool atomic_dec(upb_atomic_t *a) { + return InterlockedDecrement(&a->val) == 0; } +#else +#error Atomic primitives not defined for your platform/CPU. \ + Implement them or compile with UPB_THREAD_UNSAFE. +#endif -/* Tarjan's algorithm --------------------------------------------------------*/ +/* All static objects point to this refcount. + * It is special-cased in ref/unref below. */ +uint32_t static_refcount = -1; -/* See: - * http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */ -static void do_tarjan(const upb_refcounted *obj, tarjan *t); +/* We can avoid atomic ops for statically-declared objects. + * This is a minor optimization but nice since we can avoid degrading under + * contention in this case. */ -static void tarjan_visit(const upb_refcounted *obj, - const upb_refcounted *subobj, - void *closure) { - tarjan *t = closure; - if (++t->depth > t->maxdepth) { - upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth); - err(t); - } else if (subobj->is_frozen || color(t, subobj) == WHITE) { - /* Do nothing: we don't want to visit or color already-frozen nodes, - * and WHITE nodes have already been assigned a SCC. */ - } else if (color(t, subobj) < GREEN) { - /* Subdef has not yet been visited; recurse on it. */ - do_tarjan(subobj, t); - set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj))); - } else if (color(t, subobj) == GREEN) { - /* Subdef is in the stack and hence in the current SCC. */ - set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj))); - } - --t->depth; +static void refgroup(uint32_t *group) { + if (group != &static_refcount) + atomic_inc(group); } -static void do_tarjan(const upb_refcounted *obj, tarjan *t) { - if (color(t, obj) == BLACK) { - /* We haven't seen this object's group; mark the whole group GRAY. */ - const upb_refcounted *o = obj; - do { set_gray(t, o); } while ((o = o->next) != obj); - } - - push(t, obj); - visit(obj, tarjan_visit, t); - if (lowlink(t, obj) == idx(t, obj)) { - tarjan_newgroup(t); - while (pop(t) != obj) - ; +static bool unrefgroup(uint32_t *group) { + if (group == &static_refcount) { + return false; + } else { + return atomic_dec(group); } } -/* freeze() ------------------------------------------------------------------*/ +/* Reference tracking (debug only) ********************************************/ -static void crossref(const upb_refcounted *r, const upb_refcounted *subobj, - void *_t) { - tarjan *t = _t; - UPB_ASSERT(color(t, r) > BLACK); - if (color(t, subobj) > BLACK && r->group != subobj->group) { - /* Previously this ref was not reflected in subobj->group because they - * were in the same group; now that they are split a ref must be taken. */ - refgroup(subobj->group); - } -} +#ifdef UPB_DEBUG_REFS -static bool freeze(upb_refcounted *const*roots, int n, upb_status *s, - int maxdepth) { - volatile bool ret = false; - int i; - upb_inttable_iter iter; +#ifdef UPB_THREAD_UNSAFE - /* We run in two passes so that we can allocate all memory before performing - * any mutation of the input -- this allows us to leave the input unchanged - * in the case of memory allocation failure. */ - tarjan t; - t.index = 0; - t.depth = 0; - t.maxdepth = maxdepth; - t.status = s; - if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1; - if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2; - if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3; - if (setjmp(t.err) != 0) goto err4; +static void upb_lock() {} +static void upb_unlock() {} +#else - for (i = 0; i < n; i++) { - if (color(&t, roots[i]) < GREEN) { - do_tarjan(roots[i], &t); +/* User must define functions that lock/unlock a global mutex and link this + * file against them. */ +void upb_lock(); +void upb_unlock(); + +#endif + +/* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some + * code-paths that can normally never fail, like upb_refcounted_ref(). Since + * we have no way to propagage out-of-memory errors back to the user, and since + * these errors can only occur in UPB_DEBUG_REFS mode, we use an allocator that + * immediately aborts on failure (avoiding the global allocator, which might + * inject failures). */ + +#include + +static void *upb_debugrefs_allocfunc(upb_alloc *alloc, void *ptr, + size_t oldsize, size_t size) { + UPB_UNUSED(alloc); + UPB_UNUSED(oldsize); + if (size == 0) { + free(ptr); + return NULL; + } else { + void *ret = realloc(ptr, size); + + if (!ret) { + abort(); } + + return ret; } +} - /* If we've made it this far, no further errors are possible so it's safe to - * mutate the objects without risk of leaving them in an inconsistent state. */ - ret = true; +upb_alloc upb_alloc_debugrefs = {&upb_debugrefs_allocfunc}; - /* The transformation that follows requires care. The preconditions are: - * - all objects in attr map are WHITE or GRAY, and are in mutable groups - * (groups of all mutable objs) - * - no ref2(to, from) refs have incremented count(to) if both "to" and - * "from" are in our attr map (this follows from invariants (2) and (3)) */ +typedef struct { + int count; /* How many refs there are (duplicates only allowed for ref2). */ + bool is_ref2; +} trackedref; - /* Pass 1: we remove WHITE objects from their mutable groups, and add them to - * new groups according to the SCC's we computed. These new groups will - * consist of only frozen objects. None will be immediately collectible, - * because WHITE objects are by definition reachable from one of "roots", - * which the caller must own refs on. */ - upb_inttable_begin(&iter, &t.objattr); - for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) { - upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter); - /* Since removal from a singly-linked list requires access to the object's - * predecessor, we consider obj->next instead of obj for moving. With the - * while() loop we guarantee that we will visit every node's predecessor. - * Proof: - * 1. every node's predecessor is in our attr map. - * 2. though the loop body may change a node's predecessor, it will only - * change it to be the node we are currently operating on, so with a - * while() loop we guarantee ourselves the chance to remove each node. */ - while (color(&t, obj->next) == WHITE && - group(&t, obj->next) != obj->next->group) { - upb_refcounted *leader; +static trackedref *trackedref_new(bool is_ref2) { + trackedref *ret = upb_malloc(&upb_alloc_debugrefs, sizeof(*ret)); + ret->count = 1; + ret->is_ref2 = is_ref2; + return ret; +} - /* Remove from old group. */ - upb_refcounted *move = obj->next; - if (obj == move) { - /* Removing the last object from a group. */ - UPB_ASSERT(*obj->group == obj->individual_count); - upb_gfree(obj->group); - } else { - obj->next = move->next; - /* This may decrease to zero; we'll collect GRAY objects (if any) that - * remain in the group in the third pass. */ - UPB_ASSERT(*move->group >= move->individual_count); - *move->group -= move->individual_count; - } +static void track(const upb_refcounted *r, const void *owner, bool ref2) { + upb_value v; - /* Add to new group. */ - leader = groupleader(&t, move); - if (move == leader) { - /* First object added to new group is its leader. */ - move->group = group(&t, move); - move->next = move; - *move->group = move->individual_count; - } else { - /* Group already has at least one object in it. */ - UPB_ASSERT(leader->group == group(&t, move)); - move->group = group(&t, move); - move->next = leader->next; - leader->next = move; - *move->group += move->individual_count; - } + UPB_ASSERT(owner); + if (owner == UPB_UNTRACKED_REF) return; - move->is_frozen = true; + upb_lock(); + if (upb_inttable_lookupptr(r->refs, owner, &v)) { + trackedref *ref = upb_value_getptr(v); + /* Since we allow multiple ref2's for the same to/from pair without + * allocating separate memory for each one, we lose the fine-grained + * tracking behavior we get with regular refs. Since ref2s only happen + * inside upb, we'll accept this limitation until/unless there is a really + * difficult upb-internal bug that can't be figured out without it. */ + UPB_ASSERT(ref2); + UPB_ASSERT(ref->is_ref2); + ref->count++; + } else { + trackedref *ref = trackedref_new(ref2); + upb_inttable_insertptr2(r->refs, owner, upb_value_ptr(ref), + &upb_alloc_debugrefs); + if (ref2) { + /* We know this cast is safe when it is a ref2, because it's coming from + * another refcounted object. */ + const upb_refcounted *from = owner; + UPB_ASSERT(!upb_inttable_lookupptr(from->ref2s, r, NULL)); + upb_inttable_insertptr2(from->ref2s, r, upb_value_ptr(NULL), + &upb_alloc_debugrefs); } } + upb_unlock(); +} - /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must - * increment count(to) if group(obj) != group(to) (which could now be the - * case if "to" was just frozen). */ - upb_inttable_begin(&iter, &t.objattr); - for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) { - upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter); - visit(obj, crossref, &t); +static void untrack(const upb_refcounted *r, const void *owner, bool ref2) { + upb_value v; + bool found; + trackedref *ref; + + UPB_ASSERT(owner); + if (owner == UPB_UNTRACKED_REF) return; + + upb_lock(); + found = upb_inttable_lookupptr(r->refs, owner, &v); + /* This assert will fail if an owner attempts to release a ref it didn't have. */ + UPB_ASSERT(found); + ref = upb_value_getptr(v); + UPB_ASSERT(ref->is_ref2 == ref2); + if (--ref->count == 0) { + free(ref); + upb_inttable_removeptr(r->refs, owner, NULL); + if (ref2) { + /* We know this cast is safe when it is a ref2, because it's coming from + * another refcounted object. */ + const upb_refcounted *from = owner; + bool removed = upb_inttable_removeptr(from->ref2s, r, NULL); + UPB_ASSERT(removed); + } } + upb_unlock(); +} - /* Pass 3: GRAY objects are collected if their group's refcount dropped to - * zero when we removed its white nodes. This can happen if they had only - * been kept alive by virtue of sharing a group with an object that was just - * frozen. - * - * It is important that we do this last, since the GRAY object's free() - * function could call unref2() on just-frozen objects, which will decrement - * refs that were added in pass 2. */ - upb_inttable_begin(&iter, &t.objattr); - for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) { - upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter); - if (obj->group == NULL || *obj->group == 0) { - if (obj->group) { - upb_refcounted *o; +static void checkref(const upb_refcounted *r, const void *owner, bool ref2) { + upb_value v; + bool found; + trackedref *ref; - /* We eagerly free() the group's count (since we can't easily determine - * the group's remaining size it's the easiest way to ensure it gets - * done). */ - upb_gfree(obj->group); + upb_lock(); + found = upb_inttable_lookupptr(r->refs, owner, &v); + UPB_ASSERT(found); + ref = upb_value_getptr(v); + UPB_ASSERT(ref->is_ref2 == ref2); + upb_unlock(); +} + +/* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that + * originate from the given owner. */ +static void getref2s(const upb_refcounted *owner, upb_inttable *tab) { + upb_inttable_iter i; + + upb_lock(); + upb_inttable_begin(&i, owner->ref2s); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + upb_value v; + upb_value count; + trackedref *ref; + bool found; - /* Visit to release ref2's (done in a separate pass since release_ref2 - * depends on o->group being unmodified so it can test merged()). */ - o = obj; - do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj); + upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i); - /* Mark "group" fields as NULL so we know to free the objects later in - * this loop, but also don't try to delete the group twice. */ - o = obj; - do { o->group = NULL; } while ((o = o->next) != obj); - } - freeobj(obj); - } - } + /* To get the count we need to look in the target's table. */ + found = upb_inttable_lookupptr(to->refs, owner, &v); + UPB_ASSERT(found); + ref = upb_value_getptr(v); + count = upb_value_int32(ref->count); -err4: - if (!ret) { - upb_inttable_begin(&iter, &t.groups); - for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) - upb_gfree(upb_value_getptr(upb_inttable_iter_value(&iter))); + upb_inttable_insertptr2(tab, to, count, &upb_alloc_debugrefs); } - upb_inttable_uninit(&t.groups); -err3: - upb_inttable_uninit(&t.stack); -err2: - upb_inttable_uninit(&t.objattr); -err1: - return ret; + upb_unlock(); } +typedef struct { + upb_inttable ref2; + const upb_refcounted *obj; +} check_state; -/* Misc internal functions ***************************************************/ +static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj, + void *closure) { + check_state *s = closure; + upb_inttable *ref2 = &s->ref2; + upb_value v; + bool removed; + int32_t newcount; -static bool merged(const upb_refcounted *r, const upb_refcounted *r2) { - return r->group == r2->group; + UPB_ASSERT(obj == s->obj); + UPB_ASSERT(subobj); + removed = upb_inttable_removeptr(ref2, subobj, &v); + /* The following assertion will fail if the visit() function visits a subobj + * that it did not have a ref2 on, or visits the same subobj too many times. */ + UPB_ASSERT(removed); + newcount = upb_value_getint32(v) - 1; + if (newcount > 0) { + upb_inttable_insert2(ref2, (uintptr_t)subobj, upb_value_int32(newcount), + &upb_alloc_debugrefs); + } } -static void merge(upb_refcounted *r, upb_refcounted *from) { - upb_refcounted *base; - upb_refcounted *tmp; +static void visit(const upb_refcounted *r, upb_refcounted_visit *v, + void *closure) { + /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know + * exactly the set of nodes that visit() should visit. So we verify visit()'s + * correctness here. */ + check_state state; + state.obj = r; + upb_inttable_init2(&state.ref2, UPB_CTYPE_INT32, &upb_alloc_debugrefs); + getref2s(r, &state.ref2); - if (merged(r, from)) return; - *r->group += *from->group; - upb_gfree(from->group); - base = from; + /* This should visit any children in the ref2 table. */ + if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state); - /* Set all refcount pointers in the "from" chain to the merged refcount. - * - * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound - * if the user continuously extends a group by one object. Prevent this by - * using one of the techniques in this paper: - * http://bioinfo.ict.ac.cn/~dbu/AlgorithmCourses/Lectures/Union-Find-Tarjan.pdf */ - do { from->group = r->group; } while ((from = from->next) != base); + /* This assertion will fail if the visit() function missed any children. */ + UPB_ASSERT(upb_inttable_count(&state.ref2) == 0); + upb_inttable_uninit2(&state.ref2, &upb_alloc_debugrefs); + if (r->vtbl->visit) r->vtbl->visit(r, v, closure); +} - /* Merge the two circularly linked lists by swapping their next pointers. */ - tmp = r->next; - r->next = base->next; - base->next = tmp; +static void trackinit(upb_refcounted *r) { + r->refs = upb_malloc(&upb_alloc_debugrefs, sizeof(*r->refs)); + r->ref2s = upb_malloc(&upb_alloc_debugrefs, sizeof(*r->ref2s)); + upb_inttable_init2(r->refs, UPB_CTYPE_PTR, &upb_alloc_debugrefs); + upb_inttable_init2(r->ref2s, UPB_CTYPE_PTR, &upb_alloc_debugrefs); } -static void unref(const upb_refcounted *r); +static void trackfree(const upb_refcounted *r) { + upb_inttable_uninit2(r->refs, &upb_alloc_debugrefs); + upb_inttable_uninit2(r->ref2s, &upb_alloc_debugrefs); + upb_free(&upb_alloc_debugrefs, r->refs); + upb_free(&upb_alloc_debugrefs, r->ref2s); +} -static void release_ref2(const upb_refcounted *obj, - const upb_refcounted *subobj, - void *closure) { - UPB_UNUSED(closure); - untrack(subobj, obj, true); - if (!merged(obj, subobj)) { - UPB_ASSERT(subobj->is_frozen); - unref(subobj); - } +#else + +static void track(const upb_refcounted *r, const void *owner, bool ref2) { + UPB_UNUSED(r); + UPB_UNUSED(owner); + UPB_UNUSED(ref2); } -static void unref(const upb_refcounted *r) { - if (unrefgroup(r->group)) { - const upb_refcounted *o; +static void untrack(const upb_refcounted *r, const void *owner, bool ref2) { + UPB_UNUSED(r); + UPB_UNUSED(owner); + UPB_UNUSED(ref2); +} - upb_gfree(r->group); +static void checkref(const upb_refcounted *r, const void *owner, bool ref2) { + UPB_UNUSED(r); + UPB_UNUSED(owner); + UPB_UNUSED(ref2); +} - /* In two passes, since release_ref2 needs a guarantee that any subobjs - * are alive. */ - o = r; - do { visit(o, release_ref2, NULL); } while((o = o->next) != r); +static void trackinit(upb_refcounted *r) { + UPB_UNUSED(r); +} - o = r; - do { - const upb_refcounted *next = o->next; - UPB_ASSERT(o->is_frozen || o->individual_count == 0); - freeobj((upb_refcounted*)o); - o = next; - } while(o != r); - } +static void trackfree(const upb_refcounted *r) { + UPB_UNUSED(r); } -static void freeobj(upb_refcounted *o) { - trackfree(o); - o->vtbl->free((upb_refcounted*)o); +static void visit(const upb_refcounted *r, upb_refcounted_visit *v, + void *closure) { + if (r->vtbl->visit) r->vtbl->visit(r, v, closure); } +#endif /* UPB_DEBUG_REFS */ -/* Public interface ***********************************************************/ -bool upb_refcounted_init(upb_refcounted *r, - const struct upb_refcounted_vtbl *vtbl, - const void *owner) { -#ifndef NDEBUG - /* Endianness check. This is unrelated to upb_refcounted, it's just a - * convenient place to put the check that we can be assured will run for - * basically every program using upb. */ - const int x = 1; -#ifdef UPB_BIG_ENDIAN - UPB_ASSERT(*(char*)&x != 1); -#else - UPB_ASSERT(*(char*)&x == 1); -#endif -#endif +/* freeze() *******************************************************************/ - r->next = r; - r->vtbl = vtbl; - r->individual_count = 0; - r->is_frozen = false; - r->group = upb_gmalloc(sizeof(*r->group)); - if (!r->group) return false; - *r->group = 0; - trackinit(r); - upb_refcounted_ref(r, owner); - return true; -} +/* The freeze() operation is by far the most complicated part of this scheme. + * We compute strongly-connected components and then mutate the graph such that + * we preserve the invariants documented at the top of this file. And we must + * handle out-of-memory errors gracefully (without leaving the graph + * inconsistent), which adds to the fun. */ -bool upb_refcounted_isfrozen(const upb_refcounted *r) { - return r->is_frozen; -} +/* The state used by the freeze operation (shared across many functions). */ +typedef struct { + int depth; + int maxdepth; + uint64_t index; + /* Maps upb_refcounted* -> attributes (color, etc). attr layout varies by + * color. */ + upb_inttable objattr; + upb_inttable stack; /* stack of upb_refcounted* for Tarjan's algorithm. */ + upb_inttable groups; /* array of uint32_t*, malloc'd refcounts for new groups */ + upb_status *status; + jmp_buf err; +} tarjan; -void upb_refcounted_ref(const upb_refcounted *r, const void *owner) { - track(r, owner, false); - if (!r->is_frozen) - ((upb_refcounted*)r)->individual_count++; - refgroup(r->group); -} +static void release_ref2(const upb_refcounted *obj, + const upb_refcounted *subobj, + void *closure); -void upb_refcounted_unref(const upb_refcounted *r, const void *owner) { - untrack(r, owner, false); - if (!r->is_frozen) - ((upb_refcounted*)r)->individual_count--; - unref(r); +/* Node attributes -----------------------------------------------------------*/ + +/* After our analysis phase all nodes will be either GRAY or WHITE. */ + +typedef enum { + BLACK = 0, /* Object has not been seen. */ + GRAY, /* Object has been found via a refgroup but may not be reachable. */ + GREEN, /* Object is reachable and is currently on the Tarjan stack. */ + WHITE /* Object is reachable and has been assigned a group (SCC). */ +} color_t; + +UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); } +UPB_NORETURN static void oom(tarjan *t) { + upb_status_seterrmsg(t->status, "out of memory"); + err(t); } -void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) { - UPB_ASSERT(!from->is_frozen); /* Non-const pointer implies this. */ - track(r, from, true); - if (r->is_frozen) { - refgroup(r->group); - } else { - merge((upb_refcounted*)r, from); - } +static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) { + upb_value v; + return upb_inttable_lookupptr(&t->objattr, r, &v) ? + upb_value_getuint64(v) : 0; } -void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) { - UPB_ASSERT(!from->is_frozen); /* Non-const pointer implies this. */ - untrack(r, from, true); - if (r->is_frozen) { - unref(r); - } else { - UPB_ASSERT(merged(r, from)); - } +static uint64_t getattr(const tarjan *t, const upb_refcounted *r) { + upb_value v; + bool found = upb_inttable_lookupptr(&t->objattr, r, &v); + UPB_ASSERT(found); + return upb_value_getuint64(v); } -void upb_refcounted_donateref( - const upb_refcounted *r, const void *from, const void *to) { - UPB_ASSERT(from != to); - if (to != NULL) - upb_refcounted_ref(r, to); - if (from != NULL) - upb_refcounted_unref(r, from); +static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) { + upb_inttable_removeptr(&t->objattr, r, NULL); + upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr)); } -void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) { - checkref(r, owner, false); +static color_t color(tarjan *t, const upb_refcounted *r) { + return trygetattr(t, r) & 0x3; /* Color is always stored in the low 2 bits. */ } -bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s, - int maxdepth) { - int i; - bool ret; - for (i = 0; i < n; i++) { - UPB_ASSERT(!roots[i]->is_frozen); - } - ret = freeze(roots, n, s, maxdepth); - UPB_ASSERT(!s || ret == upb_ok(s)); - return ret; +static void set_gray(tarjan *t, const upb_refcounted *r) { + UPB_ASSERT(color(t, r) == BLACK); + setattr(t, r, GRAY); } +/* Pushes an obj onto the Tarjan stack and sets it to GREEN. */ +static void push(tarjan *t, const upb_refcounted *r) { + UPB_ASSERT(color(t, r) == BLACK || color(t, r) == GRAY); + /* This defines the attr layout for the GREEN state. "index" and "lowlink" + * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */ + setattr(t, r, GREEN | (t->index << 2) | (t->index << 33)); + if (++t->index == 0x80000000) { + upb_status_seterrmsg(t->status, "too many objects to freeze"); + err(t); + } + upb_inttable_push(&t->stack, upb_value_ptr((void*)r)); +} -/* Fallback implementation if the shim is not specialized by the JIT. */ -#define SHIM_WRITER(type, ctype) \ - bool upb_shim_set ## type (void *c, const void *hd, ctype val) { \ - uint8_t *m = c; \ - const upb_shim_data *d = hd; \ - if (d->hasbit > 0) \ - *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \ - *(ctype*)&m[d->offset] = val; \ - return true; \ - } \ - -SHIM_WRITER(double, double) -SHIM_WRITER(float, float) -SHIM_WRITER(int32, int32_t) -SHIM_WRITER(int64, int64_t) -SHIM_WRITER(uint32, uint32_t) -SHIM_WRITER(uint64, uint64_t) -SHIM_WRITER(bool, bool) -#undef SHIM_WRITER - -bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset, - int32_t hasbit) { - upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; - bool ok; +/* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its + * SCC group. */ +static upb_refcounted *pop(tarjan *t) { + upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack)); + UPB_ASSERT(color(t, r) == GREEN); + /* This defines the attr layout for nodes in the WHITE state. + * Top of group stack is [group, NULL]; we point at group. */ + setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8); + return r; +} - upb_shim_data *d = upb_gmalloc(sizeof(*d)); - if (!d) return false; - d->offset = offset; - d->hasbit = hasbit; +static void tarjan_newgroup(tarjan *t) { + uint32_t *group = upb_gmalloc(sizeof(*group)); + if (!group) oom(t); + /* Push group and empty group leader (we'll fill in leader later). */ + if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) || + !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) { + upb_gfree(group); + oom(t); + } + *group = 0; +} - upb_handlerattr_sethandlerdata(&attr, d); - upb_handlerattr_setalwaysok(&attr, true); - upb_handlers_addcleanup(h, d, upb_gfree); +static uint32_t idx(tarjan *t, const upb_refcounted *r) { + UPB_ASSERT(color(t, r) == GREEN); + return (getattr(t, r) >> 2) & 0x7FFFFFFF; +} -#define TYPE(u, l) \ - case UPB_TYPE_##u: \ - ok = upb_handlers_set##l(h, f, upb_shim_set##l, &attr); break; +static uint32_t lowlink(tarjan *t, const upb_refcounted *r) { + if (color(t, r) == GREEN) { + return getattr(t, r) >> 33; + } else { + return UINT32_MAX; + } +} - ok = false; +static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) { + UPB_ASSERT(color(t, r) == GREEN); + setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF)); +} - switch (upb_fielddef_type(f)) { - TYPE(INT64, int64); - TYPE(INT32, int32); - TYPE(ENUM, int32); - TYPE(UINT64, uint64); - TYPE(UINT32, uint32); - TYPE(DOUBLE, double); - TYPE(FLOAT, float); - TYPE(BOOL, bool); - default: UPB_ASSERT(false); break; - } -#undef TYPE +static uint32_t *group(tarjan *t, upb_refcounted *r) { + uint64_t groupnum; + upb_value v; + bool found; - upb_handlerattr_uninit(&attr); - return ok; + UPB_ASSERT(color(t, r) == WHITE); + groupnum = getattr(t, r) >> 8; + found = upb_inttable_lookup(&t->groups, groupnum, &v); + UPB_ASSERT(found); + return upb_value_getptr(v); } -const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s, - upb_fieldtype_t *type) { - upb_func *f = upb_handlers_gethandler(h, s); +/* If the group leader for this object's group has not previously been set, + * the given object is assigned to be its leader. */ +static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) { + uint64_t leader_slot; + upb_value v; + bool found; - if ((upb_int64_handlerfunc*)f == upb_shim_setint64) { - *type = UPB_TYPE_INT64; - } else if ((upb_int32_handlerfunc*)f == upb_shim_setint32) { - *type = UPB_TYPE_INT32; - } else if ((upb_uint64_handlerfunc*)f == upb_shim_setuint64) { - *type = UPB_TYPE_UINT64; - } else if ((upb_uint32_handlerfunc*)f == upb_shim_setuint32) { - *type = UPB_TYPE_UINT32; - } else if ((upb_double_handlerfunc*)f == upb_shim_setdouble) { - *type = UPB_TYPE_DOUBLE; - } else if ((upb_float_handlerfunc*)f == upb_shim_setfloat) { - *type = UPB_TYPE_FLOAT; - } else if ((upb_bool_handlerfunc*)f == upb_shim_setbool) { - *type = UPB_TYPE_BOOL; + UPB_ASSERT(color(t, r) == WHITE); + leader_slot = (getattr(t, r) >> 8) + 1; + found = upb_inttable_lookup(&t->groups, leader_slot, &v); + UPB_ASSERT(found); + if (upb_value_getptr(v)) { + return upb_value_getptr(v); } else { - return NULL; + upb_inttable_remove(&t->groups, leader_slot, NULL); + upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r)); + return r; } - - return (const upb_shim_data*)upb_handlers_gethandlerdata(h, s); } -#include +/* Tarjan's algorithm --------------------------------------------------------*/ -static void upb_symtab_free(upb_refcounted *r) { - upb_symtab *s = (upb_symtab*)r; - upb_strtable_iter i; - upb_strtable_begin(&i, &s->symtab); - for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { - const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); - upb_def_unref(def, s); +/* See: + * http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */ +static void do_tarjan(const upb_refcounted *obj, tarjan *t); + +static void tarjan_visit(const upb_refcounted *obj, + const upb_refcounted *subobj, + void *closure) { + tarjan *t = closure; + if (++t->depth > t->maxdepth) { + upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth); + err(t); + } else if (subobj->is_frozen || color(t, subobj) == WHITE) { + /* Do nothing: we don't want to visit or color already-frozen nodes, + * and WHITE nodes have already been assigned a SCC. */ + } else if (color(t, subobj) < GREEN) { + /* Subdef has not yet been visited; recurse on it. */ + do_tarjan(subobj, t); + set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj))); + } else if (color(t, subobj) == GREEN) { + /* Subdef is in the stack and hence in the current SCC. */ + set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj))); } - upb_strtable_uninit(&s->symtab); - upb_gfree(s); + --t->depth; } -upb_symtab *upb_symtab_new(const void *owner) { - static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free}; - - upb_symtab *s = upb_gmalloc(sizeof(*s)); - if (!s) { - return NULL; +static void do_tarjan(const upb_refcounted *obj, tarjan *t) { + if (color(t, obj) == BLACK) { + /* We haven't seen this object's group; mark the whole group GRAY. */ + const upb_refcounted *o = obj; + do { set_gray(t, o); } while ((o = o->next) != obj); } - upb_refcounted_init(upb_symtab_upcast_mutable(s), &vtbl, owner); - upb_strtable_init(&s->symtab, UPB_CTYPE_PTR); - return s; + push(t, obj); + visit(obj, tarjan_visit, t); + if (lowlink(t, obj) == idx(t, obj)) { + tarjan_newgroup(t); + while (pop(t) != obj) + ; + } } -void upb_symtab_freeze(upb_symtab *s) { - upb_refcounted *r; - bool ok; - - UPB_ASSERT(!upb_symtab_isfrozen(s)); - r = upb_symtab_upcast_mutable(s); - /* The symtab does not take ref2's (see refcounted.h) on the defs, because - * defs cannot refer back to the table and therefore cannot create cycles. So - * 0 will suffice for maxdepth here. */ - ok = upb_refcounted_freeze(&r, 1, NULL, 0); - UPB_ASSERT(ok); -} -const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) { - upb_value v; - upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ? - upb_value_getptr(v) : NULL; - return ret; -} +/* freeze() ------------------------------------------------------------------*/ -const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { - upb_value v; - upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ? - upb_value_getptr(v) : NULL; - return def ? upb_dyncast_msgdef(def) : NULL; +static void crossref(const upb_refcounted *r, const upb_refcounted *subobj, + void *_t) { + tarjan *t = _t; + UPB_ASSERT(color(t, r) > BLACK); + if (color(t, subobj) > BLACK && r->group != subobj->group) { + /* Previously this ref was not reflected in subobj->group because they + * were in the same group; now that they are split a ref must be taken. */ + refgroup(subobj->group); + } } -const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) { - upb_value v; - upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ? - upb_value_getptr(v) : NULL; - return def ? upb_dyncast_enumdef(def) : NULL; -} +static bool freeze(upb_refcounted *const*roots, int n, upb_status *s, + int maxdepth) { + volatile bool ret = false; + int i; + upb_inttable_iter iter; -/* Given a symbol and the base symbol inside which it is defined, find the - * symbol's definition in t. */ -static upb_def *upb_resolvename(const upb_strtable *t, - const char *base, const char *sym) { - if(strlen(sym) == 0) return NULL; - if(sym[0] == '.') { - /* Symbols starting with '.' are absolute, so we do a single lookup. - * Slice to omit the leading '.' */ - upb_value v; - return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL; - } else { - /* Remove components from base until we find an entry or run out. - * TODO: This branch is totally broken, but currently not used. */ - (void)base; - UPB_ASSERT(false); - return NULL; - } -} + /* We run in two passes so that we can allocate all memory before performing + * any mutation of the input -- this allows us to leave the input unchanged + * in the case of memory allocation failure. */ + tarjan t; + t.index = 0; + t.depth = 0; + t.maxdepth = maxdepth; + t.status = s; + if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1; + if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2; + if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3; + if (setjmp(t.err) != 0) goto err4; -const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, - const char *sym) { - upb_def *ret = upb_resolvename(&s->symtab, base, sym); - return ret; -} -/* Starts a depth-first traversal at "def", recursing into any subdefs - * (ie. submessage types). Adds duplicates of existing defs to addtab - * wherever necessary, so that the resulting symtab will be consistent once - * addtab is added. - * - * More specifically, if any def D is found in the DFS that: - * - * 1. can reach a def that is being replaced by something in addtab, AND - * - * 2. is not itself being replaced already (ie. this name doesn't already - * exist in addtab) - * - * ...then a duplicate (new copy) of D will be added to addtab. - * - * Returns true if this happened for any def reachable from "def." - * - * It is slightly tricky to do this correctly in the presence of cycles. If we - * detect that our DFS has hit a cycle, we might not yet know if any SCCs on - * our stack can reach a def in addtab or not. Once we figure this out, that - * answer needs to apply to *all* defs in these SCCs, even if we visited them - * already. So a straight up one-pass cycle-detecting DFS won't work. - * - * To work around this problem, we traverse each SCC (which we already - * computed, since these defs are frozen) as a single node. We first compute - * whether the SCC as a whole can reach any def in addtab, then we dup (or not) - * the entire SCC. This requires breaking the encapsulation of upb_refcounted, - * since that is where we get the data about what SCC we are in. */ -static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab, - const void *new_owner, upb_inttable *seen, - upb_status *s) { - upb_value v; - bool need_dup; - const upb_def *base; - const void* memoize_key; + for (i = 0; i < n; i++) { + if (color(&t, roots[i]) < GREEN) { + do_tarjan(roots[i], &t); + } + } - /* Memoize results of this function for efficiency (since we're traversing a - * DAG this is not needed to limit the depth of the search). - * - * We memoize by SCC instead of by individual def. */ - memoize_key = def->base.group; + /* If we've made it this far, no further errors are possible so it's safe to + * mutate the objects without risk of leaving them in an inconsistent state. */ + ret = true; - if (upb_inttable_lookupptr(seen, memoize_key, &v)) - return upb_value_getbool(v); + /* The transformation that follows requires care. The preconditions are: + * - all objects in attr map are WHITE or GRAY, and are in mutable groups + * (groups of all mutable objs) + * - no ref2(to, from) refs have incremented count(to) if both "to" and + * "from" are in our attr map (this follows from invariants (2) and (3)) */ - /* Visit submessages for all messages in the SCC. */ - need_dup = false; - base = def; - do { - upb_value v; - const upb_msgdef *m; + /* Pass 1: we remove WHITE objects from their mutable groups, and add them to + * new groups according to the SCC's we computed. These new groups will + * consist of only frozen objects. None will be immediately collectible, + * because WHITE objects are by definition reachable from one of "roots", + * which the caller must own refs on. */ + upb_inttable_begin(&iter, &t.objattr); + for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) { + upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter); + /* Since removal from a singly-linked list requires access to the object's + * predecessor, we consider obj->next instead of obj for moving. With the + * while() loop we guarantee that we will visit every node's predecessor. + * Proof: + * 1. every node's predecessor is in our attr map. + * 2. though the loop body may change a node's predecessor, it will only + * change it to be the node we are currently operating on, so with a + * while() loop we guarantee ourselves the chance to remove each node. */ + while (color(&t, obj->next) == WHITE && + group(&t, obj->next) != obj->next->group) { + upb_refcounted *leader; - UPB_ASSERT(upb_def_isfrozen(def)); - if (def->type == UPB_DEF_FIELD) continue; - if (upb_strtable_lookup(addtab, upb_def_fullname(def), &v)) { - need_dup = true; - } + /* Remove from old group. */ + upb_refcounted *move = obj->next; + if (obj == move) { + /* Removing the last object from a group. */ + UPB_ASSERT(*obj->group == obj->individual_count); + upb_gfree(obj->group); + } else { + obj->next = move->next; + /* This may decrease to zero; we'll collect GRAY objects (if any) that + * remain in the group in the third pass. */ + UPB_ASSERT(*move->group >= move->individual_count); + *move->group -= move->individual_count; + } - /* For messages, continue the recursion by visiting all subdefs, but only - * ones in different SCCs. */ - m = upb_dyncast_msgdef(def); - if (m) { - upb_msg_field_iter i; - for(upb_msg_field_begin(&i, m); - !upb_msg_field_done(&i); - upb_msg_field_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - const upb_def *subdef; - - if (!upb_fielddef_hassubdef(f)) continue; - subdef = upb_fielddef_subdef(f); - - /* Skip subdefs in this SCC. */ - if (def->base.group == subdef->base.group) continue; - - /* |= to avoid short-circuit; we need its side-effects. */ - need_dup |= upb_resolve_dfs(subdef, addtab, new_owner, seen, s); - if (!upb_ok(s)) return false; + /* Add to new group. */ + leader = groupleader(&t, move); + if (move == leader) { + /* First object added to new group is its leader. */ + move->group = group(&t, move); + move->next = move; + *move->group = move->individual_count; + } else { + /* Group already has at least one object in it. */ + UPB_ASSERT(leader->group == group(&t, move)); + move->group = group(&t, move); + move->next = leader->next; + leader->next = move; + *move->group += move->individual_count; } + + move->is_frozen = true; } - } while ((def = (upb_def*)def->base.next) != base); + } - if (need_dup) { - /* Dup all defs in this SCC that don't already have entries in addtab. */ - def = base; - do { - const char *name; - - if (def->type == UPB_DEF_FIELD) continue; - name = upb_def_fullname(def); - if (!upb_strtable_lookup(addtab, name, NULL)) { - upb_def *newdef = upb_def_dup(def, new_owner); - if (!newdef) goto oom; - newdef->came_from_user = false; - if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef))) - goto oom; - } - } while ((def = (upb_def*)def->base.next) != base); + /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must + * increment count(to) if group(obj) != group(to) (which could now be the + * case if "to" was just frozen). */ + upb_inttable_begin(&iter, &t.objattr); + for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) { + upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter); + visit(obj, crossref, &t); } - upb_inttable_insertptr(seen, memoize_key, upb_value_bool(need_dup)); - return need_dup; + /* Pass 3: GRAY objects are collected if their group's refcount dropped to + * zero when we removed its white nodes. This can happen if they had only + * been kept alive by virtue of sharing a group with an object that was just + * frozen. + * + * It is important that we do this last, since the GRAY object's free() + * function could call unref2() on just-frozen objects, which will decrement + * refs that were added in pass 2. */ + upb_inttable_begin(&iter, &t.objattr); + for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) { + upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter); + if (obj->group == NULL || *obj->group == 0) { + if (obj->group) { + upb_refcounted *o; -oom: - upb_status_seterrmsg(s, "out of memory"); - return false; -} + /* We eagerly free() the group's count (since we can't easily determine + * the group's remaining size it's the easiest way to ensure it gets + * done). */ + upb_gfree(obj->group); -/* TODO(haberman): we need a lot more testing of error conditions. - * The came_from_user stuff in particular is not tested. */ -static bool symtab_add(upb_symtab *s, upb_def *const*defs, size_t n, - void *ref_donor, upb_refcounted *freeze_also, - upb_status *status) { - size_t i; - size_t add_n; - size_t freeze_n; - upb_strtable_iter iter; - upb_refcounted **add_objs = NULL; - upb_def **add_defs = NULL; - size_t add_objs_size; - upb_strtable addtab; - upb_inttable seen; + /* Visit to release ref2's (done in a separate pass since release_ref2 + * depends on o->group being unmodified so it can test merged()). */ + o = obj; + do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj); - if (n == 0 && !freeze_also) { - return true; + /* Mark "group" fields as NULL so we know to free the objects later in + * this loop, but also don't try to delete the group twice. */ + o = obj; + do { o->group = NULL; } while ((o = o->next) != obj); + } + freeobj(obj); + } } - UPB_ASSERT(!upb_symtab_isfrozen(s)); - if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) { - upb_status_seterrmsg(status, "out of memory"); - return false; +err4: + if (!ret) { + upb_inttable_begin(&iter, &t.groups); + for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) + upb_gfree(upb_value_getptr(upb_inttable_iter_value(&iter))); } + upb_inttable_uninit(&t.groups); +err3: + upb_inttable_uninit(&t.stack); +err2: + upb_inttable_uninit(&t.objattr); +err1: + return ret; +} - /* Add new defs to our "add" set. */ - for (i = 0; i < n; i++) { - upb_def *def = defs[i]; - const char *fullname; - upb_fielddef *f; - if (upb_def_isfrozen(def)) { - upb_status_seterrmsg(status, "added defs must be mutable"); - goto err; - } - UPB_ASSERT(!upb_def_isfrozen(def)); - fullname = upb_def_fullname(def); - if (!fullname) { - upb_status_seterrmsg( - status, "Anonymous defs cannot be added to a symtab"); - goto err; - } +/* Misc internal functions ***************************************************/ - f = upb_dyncast_fielddef_mutable(def); +static bool merged(const upb_refcounted *r, const upb_refcounted *r2) { + return r->group == r2->group; +} - if (f) { - if (!upb_fielddef_containingtypename(f)) { - upb_status_seterrmsg(status, - "Standalone fielddefs must have a containing type " - "(extendee) name set"); - goto err; - } - } else { - if (upb_strtable_lookup(&addtab, fullname, NULL)) { - upb_status_seterrf(status, "Conflicting defs named '%s'", fullname); - goto err; - } - /* We need this to back out properly, because if there is a failure we - * need to donate the ref back to the caller. */ - def->came_from_user = true; - upb_def_donateref(def, ref_donor, s); - if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def))) - goto oom_err; - } - } +static void merge(upb_refcounted *r, upb_refcounted *from) { + upb_refcounted *base; + upb_refcounted *tmp; - /* Add standalone fielddefs (ie. extensions) to the appropriate messages. - * If the appropriate message only exists in the existing symtab, duplicate - * it so we have a mutable copy we can add the fields to. */ - for (i = 0; i < n; i++) { - upb_def *def = defs[i]; - upb_fielddef *f = upb_dyncast_fielddef_mutable(def); - const char *msgname; - upb_value v; - upb_msgdef *m; + if (merged(r, from)) return; + *r->group += *from->group; + upb_gfree(from->group); + base = from; - if (!f) continue; - msgname = upb_fielddef_containingtypename(f); - /* We validated this earlier in this function. */ - UPB_ASSERT(msgname); + /* Set all refcount pointers in the "from" chain to the merged refcount. + * + * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound + * if the user continuously extends a group by one object. Prevent this by + * using one of the techniques in this paper: + * http://bioinfo.ict.ac.cn/~dbu/AlgorithmCourses/Lectures/Union-Find-Tarjan.pdf */ + do { from->group = r->group; } while ((from = from->next) != base); - /* If the extendee name is absolutely qualified, move past the initial ".". - * TODO(haberman): it is not obvious what it would mean if this was not - * absolutely qualified. */ - if (msgname[0] == '.') { - msgname++; - } + /* Merge the two circularly linked lists by swapping their next pointers. */ + tmp = r->next; + r->next = base->next; + base->next = tmp; +} - if (upb_strtable_lookup(&addtab, msgname, &v)) { - /* Extendee is in the set of defs the user asked us to add. */ - m = upb_value_getptr(v); - } else { - /* Need to find and dup the extendee from the existing symtab. */ - const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname); - if (!frozen_m) { - upb_status_seterrf(status, - "Tried to extend message %s that does not exist " - "in this SymbolTable.", - msgname); - goto err; - } - m = upb_msgdef_dup(frozen_m, s); - if (!m) goto oom_err; - if (!upb_strtable_insert(&addtab, msgname, upb_value_ptr(m))) { - upb_msgdef_unref(m, s); - goto oom_err; - } - } +static void unref(const upb_refcounted *r); - if (!upb_msgdef_addfield(m, f, ref_donor, status)) { - goto err; - } +static void release_ref2(const upb_refcounted *obj, + const upb_refcounted *subobj, + void *closure) { + UPB_UNUSED(closure); + untrack(subobj, obj, true); + if (!merged(obj, subobj)) { + UPB_ASSERT(subobj->is_frozen); + unref(subobj); } +} - /* Add dups of any existing def that can reach a def with the same name as - * anything in our "add" set. */ - if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err; - upb_strtable_begin(&iter, &s->symtab); - for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { - upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); - upb_resolve_dfs(def, &addtab, s, &seen, status); - if (!upb_ok(status)) goto err; - } - upb_inttable_uninit(&seen); +static void unref(const upb_refcounted *r) { + if (unrefgroup(r->group)) { + const upb_refcounted *o; - /* Now using the table, resolve symbolic references for subdefs. */ - upb_strtable_begin(&iter, &addtab); - for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { - const char *base; - upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); - upb_msgdef *m = upb_dyncast_msgdef_mutable(def); - upb_msg_field_iter j; + upb_gfree(r->group); - if (!m) continue; - /* Type names are resolved relative to the message in which they appear. */ - base = upb_msgdef_fullname(m); + /* In two passes, since release_ref2 needs a guarantee that any subobjs + * are alive. */ + o = r; + do { visit(o, release_ref2, NULL); } while((o = o->next) != r); - for(upb_msg_field_begin(&j, m); - !upb_msg_field_done(&j); - upb_msg_field_next(&j)) { - upb_fielddef *f = upb_msg_iter_field(&j); - const char *name = upb_fielddef_subdefname(f); - if (name && !upb_fielddef_subdef(f)) { - /* Try the lookup in the current set of to-be-added defs first. If not - * there, try existing defs. */ - upb_def *subdef = upb_resolvename(&addtab, base, name); - if (subdef == NULL) { - subdef = upb_resolvename(&s->symtab, base, name); - } - if (subdef == NULL) { - upb_status_seterrf( - status, "couldn't resolve name '%s' in message '%s'", name, base); - goto err; - } else if (!upb_fielddef_setsubdef(f, subdef, status)) { - goto err; - } - } - } + o = r; + do { + const upb_refcounted *next = o->next; + UPB_ASSERT(o->is_frozen || o->individual_count == 0); + freeobj((upb_refcounted*)o); + o = next; + } while(o != r); } +} - /* We need an array of the defs in addtab, for passing to - * upb_refcounted_freeze(). */ - add_objs_size = upb_strtable_count(&addtab); - if (freeze_also) { - add_objs_size++; - } +static void freeobj(upb_refcounted *o) { + trackfree(o); + o->vtbl->free((upb_refcounted*)o); +} - add_defs = upb_gmalloc(sizeof(void*) * add_objs_size); - if (add_defs == NULL) goto oom_err; - upb_strtable_begin(&iter, &addtab); - for (add_n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { - add_defs[add_n++] = upb_value_getptr(upb_strtable_iter_value(&iter)); - } - /* Validate defs. */ - if (!_upb_def_validate(add_defs, add_n, status)) { - goto err; - } +/* Public interface ***********************************************************/ - /* Cheat a little and give the array a new type. - * This is probably undefined behavior, but this code will be deleted soon. */ - add_objs = (upb_refcounted**)add_defs; +bool upb_refcounted_init(upb_refcounted *r, + const struct upb_refcounted_vtbl *vtbl, + const void *owner) { +#ifndef NDEBUG + /* Endianness check. This is unrelated to upb_refcounted, it's just a + * convenient place to put the check that we can be assured will run for + * basically every program using upb. */ + const int x = 1; +#ifdef UPB_BIG_ENDIAN + UPB_ASSERT(*(char*)&x != 1); +#else + UPB_ASSERT(*(char*)&x == 1); +#endif +#endif - freeze_n = add_n; - if (freeze_also) { - add_objs[freeze_n++] = freeze_also; - } + r->next = r; + r->vtbl = vtbl; + r->individual_count = 0; + r->is_frozen = false; + r->group = upb_gmalloc(sizeof(*r->group)); + if (!r->group) return false; + *r->group = 0; + trackinit(r); + upb_refcounted_ref(r, owner); + return true; +} - if (!upb_refcounted_freeze(add_objs, freeze_n, status, - UPB_MAX_MESSAGE_DEPTH * 2)) { - goto err; - } +bool upb_refcounted_isfrozen(const upb_refcounted *r) { + return r->is_frozen; +} - /* This must be delayed until all errors have been detected, since error - * recovery code uses this table to cleanup defs. */ - upb_strtable_uninit(&addtab); +void upb_refcounted_ref(const upb_refcounted *r, const void *owner) { + track(r, owner, false); + if (!r->is_frozen) + ((upb_refcounted*)r)->individual_count++; + refgroup(r->group); +} - /* TODO(haberman) we don't properly handle errors after this point (like - * OOM in upb_strtable_insert() below). */ - for (i = 0; i < add_n; i++) { - upb_def *def = (upb_def*)add_objs[i]; - const char *name = upb_def_fullname(def); - upb_value v; - bool success; +void upb_refcounted_unref(const upb_refcounted *r, const void *owner) { + untrack(r, owner, false); + if (!r->is_frozen) + ((upb_refcounted*)r)->individual_count--; + unref(r); +} - if (upb_strtable_remove(&s->symtab, name, &v)) { - const upb_def *def = upb_value_getptr(v); - upb_def_unref(def, s); - } - success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def)); - UPB_ASSERT(success == true); +void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) { + UPB_ASSERT(!from->is_frozen); /* Non-const pointer implies this. */ + track(r, from, true); + if (r->is_frozen) { + refgroup(r->group); + } else { + merge((upb_refcounted*)r, from); } - upb_gfree(add_defs); - return true; +} -oom_err: - upb_status_seterrmsg(status, "out of memory"); -err: { - /* For defs the user passed in, we need to donate the refs back. For defs - * we dup'd, we need to just unref them. */ - upb_strtable_begin(&iter, &addtab); - for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { - upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); - bool came_from_user = def->came_from_user; - def->came_from_user = false; - if (came_from_user) { - upb_def_donateref(def, s, ref_donor); - } else { - upb_def_unref(def, s); - } - } +void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) { + UPB_ASSERT(!from->is_frozen); /* Non-const pointer implies this. */ + untrack(r, from, true); + if (r->is_frozen) { + unref(r); + } else { + UPB_ASSERT(merged(r, from)); } - upb_strtable_uninit(&addtab); - upb_gfree(add_defs); - UPB_ASSERT(!upb_ok(status)); - return false; } -bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, size_t n, - void *ref_donor, upb_status *status) { - return symtab_add(s, defs, n, ref_donor, NULL, status); +void upb_refcounted_donateref( + const upb_refcounted *r, const void *from, const void *to) { + UPB_ASSERT(from != to); + if (to != NULL) + upb_refcounted_ref(r, to); + if (from != NULL) + upb_refcounted_unref(r, from); } -bool upb_symtab_addfile(upb_symtab *s, upb_filedef *file, upb_status *status) { - size_t n; - size_t i; - upb_def **defs; +void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) { + checkref(r, owner, false); +} + +bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s, + int maxdepth) { + int i; bool ret; + for (i = 0; i < n; i++) { + UPB_ASSERT(!roots[i]->is_frozen); + } + ret = freeze(roots, n, s, maxdepth); + UPB_ASSERT(!s || ret == upb_ok(s)); + return ret; +} - n = upb_filedef_defcount(file); - defs = upb_gmalloc(sizeof(*defs) * n); - if (defs == NULL) { - upb_status_seterrmsg(status, "Out of memory"); - return false; +bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink *sink) { + void *subc; + bool ret; + upb_bufhandle handle; + upb_bufhandle_init(&handle); + upb_bufhandle_setbuf(&handle, buf, 0); + ret = upb_bytessink_start(sink, len, &subc); + if (ret && len != 0) { + ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len); } - - for (i = 0; i < n; i++) { - defs[i] = upb_filedef_mutabledef(file, i); + if (ret) { + ret = upb_bytessink_end(sink); } + upb_bufhandle_uninit(&handle); + return ret; +} - ret = symtab_add(s, defs, n, NULL, upb_filedef_upcast_mutable(file), status); +struct upb_bufsink { + upb_byteshandler handler; + upb_bytessink sink; + upb_env *env; + char *ptr; + size_t len, size; +}; - upb_gfree(defs); - return ret; +static void *upb_bufsink_start(void *_sink, const void *hd, size_t size_hint) { + upb_bufsink *sink = _sink; + UPB_UNUSED(hd); + UPB_UNUSED(size_hint); + sink->len = 0; + return sink; } -/* Iteration. */ +static size_t upb_bufsink_string(void *_sink, const void *hd, const char *ptr, + size_t len, const upb_bufhandle *handle) { + upb_bufsink *sink = _sink; + size_t new_size = sink->size; -static void advance_to_matching(upb_symtab_iter *iter) { - if (iter->type == UPB_DEF_ANY) - return; + UPB_ASSERT(new_size > 0); + UPB_UNUSED(hd); + UPB_UNUSED(handle); - while (!upb_strtable_done(&iter->iter) && - iter->type != upb_symtab_iter_def(iter)->type) { - upb_strtable_next(&iter->iter); + while (sink->len + len > new_size) { + new_size *= 2; + } + + if (new_size != sink->size) { + sink->ptr = upb_env_realloc(sink->env, sink->ptr, sink->size, new_size); + sink->size = new_size; } + + memcpy(sink->ptr + sink->len, ptr, len); + sink->len += len; + + return len; } -void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s, - upb_deftype_t type) { - upb_strtable_begin(&iter->iter, &s->symtab); - iter->type = type; - advance_to_matching(iter); +upb_bufsink *upb_bufsink_new(upb_env *env) { + upb_bufsink *sink = upb_env_malloc(env, sizeof(upb_bufsink)); + upb_byteshandler_init(&sink->handler); + upb_byteshandler_setstartstr(&sink->handler, upb_bufsink_start, NULL); + upb_byteshandler_setstring(&sink->handler, upb_bufsink_string, NULL); + + upb_bytessink_reset(&sink->sink, &sink->handler, sink); + + sink->env = env; + sink->size = 32; + sink->ptr = upb_env_malloc(env, sink->size); + sink->len = 0; + + return sink; } -void upb_symtab_next(upb_symtab_iter *iter) { - upb_strtable_next(&iter->iter); - advance_to_matching(iter); +void upb_bufsink_free(upb_bufsink *sink) { + upb_env_free(sink->env, sink->ptr); + upb_env_free(sink->env, sink); } -bool upb_symtab_done(const upb_symtab_iter *iter) { - return upb_strtable_done(&iter->iter); +upb_bytessink *upb_bufsink_sink(upb_bufsink *sink) { + return &sink->sink; } -const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) { - return upb_value_getptr(upb_strtable_iter_value(&iter->iter)); +const char *upb_bufsink_getdata(const upb_bufsink *sink, size_t *len) { + *len = sink->len; + return sink->ptr; } /* ** upb_table Implementation @@ -5285,7 +6312,7 @@ upb_alloc upb_alloc_global = {&upb_global_allocfunc}; /* Be conservative and choose 16 in case anyone is using SSE. */ static const size_t maxalign = 16; -static size_t align_up(size_t size) { +static size_t align_up_max(size_t size) { return ((size + maxalign - 1) / maxalign) * maxalign; } @@ -5309,7 +6336,7 @@ static void upb_arena_addblock(upb_arena *a, void *ptr, size_t size, block->next = a->block_head; block->size = size; - block->used = align_up(sizeof(mem_block)); + block->used = align_up_max(sizeof(mem_block)); block->owned = owned; a->block_head = block; @@ -5342,7 +6369,7 @@ static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize, return NULL; /* We are an arena, don't need individual frees. */ } - size = align_up(size); + size = align_up_max(size); /* TODO(haberman): special-case if this is a realloc of the last alloc? */ @@ -5412,6 +6439,10 @@ void upb_arena_uninit(upb_arena *a) { block = next; } + + /* Protect against multiple-uninit. */ + a->cleanup_head = NULL; + a->block_head = NULL; } bool upb_arena_addcleanup(upb_arena *a, upb_cleanup_func *func, void *ud) { @@ -6481,7 +7512,7 @@ struct upb_descreader { upb_fielddef *f; }; -static char *upb_strndup(const char *buf, size_t n) { +static char *upb_gstrndup(const char *buf, size_t n) { char *ret = upb_gmalloc(n + 1); if (!ret) return NULL; memcpy(ret, buf, n); @@ -6631,7 +7662,7 @@ static size_t file_onname(void *closure, const void *hd, const char *buf, UPB_UNUSED(hd); UPB_UNUSED(handle); - name = upb_strndup(buf, n); + name = upb_gstrndup(buf, n); /* XXX: see comment at the top of the file. */ ok = upb_filedef_setname(r->file, name, NULL); upb_gfree(name); @@ -6647,7 +7678,7 @@ static size_t file_onpackage(void *closure, const void *hd, const char *buf, UPB_UNUSED(hd); UPB_UNUSED(handle); - package = upb_strndup(buf, n); + package = upb_gstrndup(buf, n); /* XXX: see comment at the top of the file. */ upb_descreader_setscopename(r, package); ok = upb_filedef_setpackage(r->file, package, NULL); @@ -6719,7 +7750,7 @@ static size_t enumval_onname(void *closure, const void *hd, const char *buf, UPB_UNUSED(handle); /* XXX: see comment at the top of the file. */ upb_gfree(r->name); - r->name = upb_strndup(buf, n); + r->name = upb_gstrndup(buf, n); r->saw_name = true; return n; } @@ -6770,7 +7801,7 @@ static bool enum_endmsg(void *closure, const void *hd, upb_status *status) { static size_t enum_onname(void *closure, const void *hd, const char *buf, size_t n, const upb_bufhandle *handle) { upb_descreader *r = closure; - char *fullname = upb_strndup(buf, n); + char *fullname = upb_gstrndup(buf, n); UPB_UNUSED(hd); UPB_UNUSED(handle); /* XXX: see comment at the top of the file. */ @@ -6938,7 +7969,7 @@ static bool field_onnumber(void *closure, const void *hd, int32_t val) { static size_t field_onname(void *closure, const void *hd, const char *buf, size_t n, const upb_bufhandle *handle) { upb_descreader *r = closure; - char *name = upb_strndup(buf, n); + char *name = upb_gstrndup(buf, n); UPB_UNUSED(hd); UPB_UNUSED(handle); @@ -6951,7 +7982,7 @@ static size_t field_onname(void *closure, const void *hd, const char *buf, static size_t field_ontypename(void *closure, const void *hd, const char *buf, size_t n, const upb_bufhandle *handle) { upb_descreader *r = closure; - char *name = upb_strndup(buf, n); + char *name = upb_gstrndup(buf, n); UPB_UNUSED(hd); UPB_UNUSED(handle); @@ -6964,7 +7995,7 @@ static size_t field_ontypename(void *closure, const void *hd, const char *buf, static size_t field_onextendee(void *closure, const void *hd, const char *buf, size_t n, const upb_bufhandle *handle) { upb_descreader *r = closure; - char *name = upb_strndup(buf, n); + char *name = upb_gstrndup(buf, n); UPB_UNUSED(hd); UPB_UNUSED(handle); @@ -6984,7 +8015,7 @@ static size_t field_ondefaultval(void *closure, const void *hd, const char *buf, * type yet, so we save it as a string until the end of the field. * XXX: see comment at the top of the file. */ upb_gfree(r->default_string); - r->default_string = upb_strndup(buf, n); + r->default_string = upb_gstrndup(buf, n); return n; } @@ -7005,7 +8036,7 @@ static size_t oneof_name(void *closure, const void *hd, const char *buf, upb_descreader *r = closure; upb_descreader_frame *f = &r->stack[r->stack_len-1]; upb_oneofdef *o = upb_descreader_getoneof(r, f->oneof_index++); - char *name_null_terminated = upb_strndup(buf, n); + char *name_null_terminated = upb_gstrndup(buf, n); bool ok = upb_oneofdef_setname(o, name_null_terminated, NULL); UPB_UNUSED(hd); UPB_UNUSED(handle); @@ -7042,7 +8073,7 @@ static size_t msg_name(void *closure, const void *hd, const char *buf, upb_descreader *r = closure; upb_msgdef *m = upb_descreader_top(r); /* XXX: see comment at the top of the file. */ - char *name = upb_strndup(buf, n); + char *name = upb_gstrndup(buf, n); UPB_UNUSED(hd); UPB_UNUSED(handle); diff --git a/php/ext/google/protobuf/upb.h b/php/ext/google/protobuf/upb.h index c83b0e03..5f780458 100644 --- a/php/ext/google/protobuf/upb.h +++ b/php/ext/google/protobuf/upb.h @@ -123,20 +123,21 @@ template class InlinedEnvironment; #define UPB_NORETURN #endif +#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L +/* C99/C++11 versions. */ +#include +#define _upb_snprintf snprintf +#define _upb_vsnprintf vsnprintf +#define _upb_va_copy(a, b) va_copy(a, b) +#elif defined __GNUC__ /* A few hacky workarounds for functions not in C89. * For internal use only! * TODO(haberman): fix these by including our own implementations, or finding * another workaround. */ -#ifdef __GNUC__ #define _upb_snprintf __builtin_snprintf #define _upb_vsnprintf __builtin_vsnprintf #define _upb_va_copy(a, b) __va_copy(a, b) -#elif __STDC_VERSION__ >= 199901L -/* C99 versions. */ -#define _upb_snprintf snprintf -#define _upb_vsnprintf vsnprintf -#define _upb_va_copy(a, b) va_copy(a, b) #else #error Need implementations of [v]snprintf and va_copy #endif @@ -280,6 +281,12 @@ template class InlinedEnvironment; * exist in debug mode. This turns into regular assert. */ #define UPB_ASSERT_DEBUGVAR(expr) assert(expr) +#ifdef __GNUC__ +#define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0) +#else +#define UPB_UNREACHABLE() do { assert(0); } while(0) +#endif + /* Generic function type. */ typedef void upb_func(); @@ -513,17 +520,18 @@ struct upb_alloc { }; UPB_INLINE void *upb_malloc(upb_alloc *alloc, size_t size) { - UPB_ASSERT(size > 0); + UPB_ASSERT(alloc); return alloc->func(alloc, NULL, 0, size); } UPB_INLINE void *upb_realloc(upb_alloc *alloc, void *ptr, size_t oldsize, size_t size) { - UPB_ASSERT(size > 0); + UPB_ASSERT(alloc); return alloc->func(alloc, ptr, oldsize, size); } UPB_INLINE void upb_free(upb_alloc *alloc, void *ptr) { + assert(alloc); alloc->func(alloc, ptr, 0, 0); } @@ -572,11 +580,11 @@ UPB_BEGIN_EXTERN_C void upb_arena_init(upb_arena *a); void upb_arena_init2(upb_arena *a, void *mem, size_t n, upb_alloc *alloc); void upb_arena_uninit(upb_arena *a); -upb_alloc *upb_arena_alloc(upb_arena *a); bool upb_arena_addcleanup(upb_arena *a, upb_cleanup_func *func, void *ud); size_t upb_arena_bytesallocated(const upb_arena *a); void upb_arena_setnextblocksize(upb_arena *a, size_t size); void upb_arena_setmaxblocksize(upb_arena *a, size_t size); +UPB_INLINE upb_alloc *upb_arena_alloc(upb_arena *a) { return (upb_alloc*)a; } UPB_END_EXTERN_C @@ -807,7 +815,9 @@ typedef enum { UPB_CTYPE_CSTR = 6, UPB_CTYPE_PTR = 7, UPB_CTYPE_CONSTPTR = 8, - UPB_CTYPE_FPTR = 9 + UPB_CTYPE_FPTR = 9, + UPB_CTYPE_FLOAT = 10, + UPB_CTYPE_DOUBLE = 11 } upb_ctype_t; typedef struct { @@ -881,6 +891,29 @@ FUNCS(constptr, constptr, const void*, uintptr_t, UPB_CTYPE_CONSTPTR) FUNCS(fptr, fptr, upb_func*, uintptr_t, UPB_CTYPE_FPTR) #undef FUNCS + +UPB_INLINE void upb_value_setfloat(upb_value *val, float cval) { + memcpy(&val->val, &cval, sizeof(cval)); + SET_TYPE(val->ctype, UPB_CTYPE_FLOAT); +} + +UPB_INLINE void upb_value_setdouble(upb_value *val, double cval) { + memcpy(&val->val, &cval, sizeof(cval)); + SET_TYPE(val->ctype, UPB_CTYPE_DOUBLE); +} + +UPB_INLINE upb_value upb_value_float(float cval) { + upb_value ret; + upb_value_setfloat(&ret, cval); + return ret; +} + +UPB_INLINE upb_value upb_value_double(double cval) { + upb_value ret; + upb_value_setdouble(&ret, cval); + return ret; +} + #undef SET_TYPE @@ -1123,6 +1156,13 @@ UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) { return t->t.count; } +void upb_inttable_packedsize(const upb_inttable *t, size_t *size); +void upb_strtable_packedsize(const upb_strtable *t, size_t *size); +upb_inttable *upb_inttable_pack(const upb_inttable *t, void *p, size_t *ofs, + size_t size); +upb_strtable *upb_strtable_pack(const upb_strtable *t, void *p, size_t *ofs, + size_t size); + /* Inserts the given key into the hashtable with the given value. The key must * not already exist in the hash table. For string tables, the key must be * NULL-terminated, and the table will make an internal copy of the key. @@ -1669,6 +1709,7 @@ class FieldDef; class FileDef; class MessageDef; class OneofDef; +class SymbolTable; } #endif @@ -1677,6 +1718,8 @@ UPB_DECLARE_DERIVED_TYPE(upb::OneofDef, upb::RefCounted, upb_oneofdef, upb_refcounted) UPB_DECLARE_DERIVED_TYPE(upb::FileDef, upb::RefCounted, upb_filedef, upb_refcounted) +UPB_DECLARE_TYPE(upb::SymbolTable, upb_symtab) + /* The maximum message depth that the type graph can have. This is a resource * limit for the C stack since we sometimes need to recursively traverse the @@ -1710,8 +1753,6 @@ class upb::Def { public: typedef upb_deftype_t Type; - Def* Dup(const void *owner) const; - /* upb::RefCounted methods like Ref()/Unref(). */ UPB_REFCOUNTED_CPPMETHODS @@ -1757,9 +1798,6 @@ class upb::Def { UPB_BEGIN_EXTERN_C -/* Native C API. */ -upb_def *upb_def_dup(const upb_def *def, const void *owner); - /* Include upb_refcounted methods like upb_def_ref()/upb_def_unref(). */ UPB_REFCOUNTED_CMETHODS(upb_def, upb_def_upcast) @@ -1856,15 +1894,19 @@ UPB_DECLARE_DEF_TYPE(upb::EnumDef, enumdef, ENUM) * types defined in descriptor.proto, which gives INT32 and SINT32 separate * types (we distinguish the two with the "integer encoding" enum below). */ typedef enum { - UPB_TYPE_FLOAT = 1, - UPB_TYPE_DOUBLE = 2, - UPB_TYPE_BOOL = 3, - UPB_TYPE_STRING = 4, - UPB_TYPE_BYTES = 5, - UPB_TYPE_MESSAGE = 6, - UPB_TYPE_ENUM = 7, /* Enum values are int32. */ - UPB_TYPE_INT32 = 8, - UPB_TYPE_UINT32 = 9, + /* Types stored in 1 byte. */ + UPB_TYPE_BOOL = 1, + /* Types stored in 4 bytes. */ + UPB_TYPE_FLOAT = 2, + UPB_TYPE_INT32 = 3, + UPB_TYPE_UINT32 = 4, + UPB_TYPE_ENUM = 5, /* Enum values are int32. */ + /* Types stored as pointers (probably 4 or 8 bytes). */ + UPB_TYPE_STRING = 6, + UPB_TYPE_BYTES = 7, + UPB_TYPE_MESSAGE = 8, + /* Types stored as 8 bytes. */ + UPB_TYPE_DOUBLE = 9, UPB_TYPE_INT64 = 10, UPB_TYPE_UINT64 = 11 } upb_fieldtype_t; @@ -1945,13 +1987,6 @@ class upb::FieldDef { /* Returns NULL if memory allocation failed. */ static reffed_ptr New(); - /* Duplicates the given field, returning NULL if memory allocation failed. - * When a fielddef is duplicated, the subdef (if any) is made symbolic if it - * wasn't already. If the subdef is set but has no name (which is possible - * since msgdefs are not required to have a name) the new fielddef's subdef - * will be unset. */ - FieldDef* Dup(const void* owner) const; - /* upb::RefCounted methods like Ref()/Unref(). */ UPB_REFCOUNTED_CPPMETHODS @@ -2038,16 +2073,10 @@ class upb::FieldDef { bool IsPrimitive() const; bool IsMap() const; - /* Whether this field must be able to explicitly represent presence: - * - * * This is always false for repeated fields (an empty repeated field is - * equivalent to a repeated field with zero entries). + /* Returns whether this field explicitly represents presence. * - * * This is always true for submessages. - * - * * For other fields, it depends on the message (see - * MessageDef::SetPrimitivesHavePresence()) - */ + * For proto2 messages: Returns true for any scalar (non-repeated) field. + * For proto3 messages: Returns true for scalar submessage or oneof fields. */ bool HasPresence() const; /* How integers are encoded. Only meaningful for integer types. @@ -2206,7 +2235,6 @@ UPB_BEGIN_EXTERN_C /* Native C API. */ upb_fielddef *upb_fielddef_new(const void *owner); -upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner); /* Include upb_refcounted methods like upb_fielddef_ref(). */ UPB_REFCOUNTED_CMETHODS(upb_fielddef, upb_fielddef_upcast2) @@ -2416,16 +2444,6 @@ class upb::MessageDef { return FindOneofByName(str.c_str(), str.size()); } - /* Returns a new msgdef that is a copy of the given msgdef (and a copy of all - * the fields) but with any references to submessages broken and replaced - * with just the name of the submessage. Returns NULL if memory allocation - * failed. - * - * TODO(haberman): which is more useful, keeping fields resolved or - * unresolving them? If there's no obvious answer, Should this functionality - * just be moved into symtab.c? */ - MessageDef* Dup(const void* owner) const; - /* Is this message a map entry? */ void setmapentry(bool map_entry); bool mapentry() const; @@ -2559,7 +2577,6 @@ UPB_REFCOUNTED_CMETHODS(upb_msgdef, upb_msgdef_upcast2) bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status); -upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner); const char *upb_msgdef_fullname(const upb_msgdef *m); const char *upb_msgdef_name(const upb_msgdef *m); int upb_msgdef_numoneofs(const upb_msgdef *m); @@ -2709,10 +2726,6 @@ class upb::EnumDef { * first one that was added. */ const char* FindValueByNumber(int32_t num) const; - /* Returns a new EnumDef with all the same values. The new EnumDef will be - * owned by the given owner. */ - EnumDef* Dup(const void* owner) const; - /* Iteration over name/value pairs. The order is undefined. * Adding an enum val invalidates any iterators. * @@ -2740,7 +2753,6 @@ UPB_BEGIN_EXTERN_C /* Native C API. */ upb_enumdef *upb_enumdef_new(const void *owner); -upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner); /* Include upb_refcounted methods like upb_enumdef_ref(). */ UPB_REFCOUNTED_CMETHODS(upb_enumdef, upb_enumdef_upcast2) @@ -2785,6 +2797,7 @@ int32_t upb_enum_iter_number(upb_enum_iter *iter); UPB_END_EXTERN_C + /* upb::OneofDef **************************************************************/ typedef upb_inttable_iter upb_oneof_iter; @@ -2849,10 +2862,6 @@ class upb::OneofDef { /* Looks up by tag number. */ const FieldDef* FindFieldByNumber(uint32_t num) const; - /* Returns a new OneofDef with all the same fields. The OneofDef will be owned - * by the given owner. */ - OneofDef* Dup(const void* owner) const; - /* Iteration over fields. The order is undefined. */ class iterator : public std::iterator { public: @@ -2898,16 +2907,16 @@ UPB_BEGIN_EXTERN_C /* Native C API. */ upb_oneofdef *upb_oneofdef_new(const void *owner); -upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner); /* Include upb_refcounted methods like upb_oneofdef_ref(). */ UPB_REFCOUNTED_CMETHODS(upb_oneofdef, upb_oneofdef_upcast) const char *upb_oneofdef_name(const upb_oneofdef *o); -bool upb_oneofdef_setname(upb_oneofdef *o, const char *name, upb_status *s); - const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o); int upb_oneofdef_numfields(const upb_oneofdef *o); +uint32_t upb_oneofdef_index(const upb_oneofdef *o); + +bool upb_oneofdef_setname(upb_oneofdef *o, const char *name, upb_status *s); bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f, const void *ref_donor, upb_status *s); @@ -3051,6 +3060,153 @@ UPB_INLINE upb_def *upb_filedef_mutabledef(upb_filedef *f, int i) { UPB_END_EXTERN_C +typedef struct { + UPB_PRIVATE_FOR_CPP + upb_strtable_iter iter; + upb_deftype_t type; +} upb_symtab_iter; + +#ifdef __cplusplus + +/* Non-const methods in upb::SymbolTable are NOT thread-safe. */ +class upb::SymbolTable { + public: + /* Returns a new symbol table with a single ref owned by "owner." + * Returns NULL if memory allocation failed. */ + static SymbolTable* New(); + static void Free(upb::SymbolTable* table); + + /* For all lookup functions, the returned pointer is not owned by the + * caller; it may be invalidated by any non-const call or unref of the + * SymbolTable! To protect against this, take a ref if desired. */ + + /* Freezes the symbol table: prevents further modification of it. + * After the Freeze() operation is successful, the SymbolTable must only be + * accessed via a const pointer. + * + * Unlike with upb::MessageDef/upb::EnumDef/etc, freezing a SymbolTable is not + * a necessary step in using a SymbolTable. If you have no need for it to be + * immutable, there is no need to freeze it ever. However sometimes it is + * useful, and SymbolTables that are statically compiled into the binary are + * always frozen by nature. */ + void Freeze(); + + /* Resolves the given symbol using the rules described in descriptor.proto, + * namely: + * + * If the name starts with a '.', it is fully-qualified. Otherwise, + * C++-like scoping rules are used to find the type (i.e. first the nested + * types within this message are searched, then within the parent, on up + * to the root namespace). + * + * If not found, returns NULL. */ + const Def* Resolve(const char* base, const char* sym) const; + + /* Finds an entry in the symbol table with this exact name. If not found, + * returns NULL. */ + const Def* Lookup(const char *sym) const; + const MessageDef* LookupMessage(const char *sym) const; + const EnumDef* LookupEnum(const char *sym) const; + + /* TODO: introduce a C++ iterator, but make it nice and templated so that if + * you ask for an iterator of MessageDef the iterated elements are strongly + * typed as MessageDef*. */ + + /* Adds the given mutable defs to the symtab, resolving all symbols (including + * enum default values) and finalizing the defs. Only one def per name may be + * in the list, and the defs may not duplicate any name already in the symtab. + * All defs must have a name -- anonymous defs are not allowed. Anonymous + * defs can still be frozen by calling upb_def_freeze() directly. + * + * The entire operation either succeeds or fails. If the operation fails, + * the symtab is unchanged, false is returned, and status indicates the + * error. The caller passes a ref on all defs to the symtab (even if the + * operation fails). + * + * TODO(haberman): currently failure will leave the symtab unchanged, but may + * leave the defs themselves partially resolved. Does this matter? If so we + * could do a prepass that ensures that all symbols are resolvable and bail + * if not, so we don't mutate anything until we know the operation will + * succeed. */ + bool Add(Def*const* defs, size_t n, void* ref_donor, Status* status); + + bool Add(const std::vector& defs, void *owner, Status* status) { + return Add((Def*const*)&defs[0], defs.size(), owner, status); + } + + /* Resolves all subdefs for messages in this file and attempts to freeze the + * file. If this succeeds, adds all the symbols to this SymbolTable + * (replacing any existing ones with the same names). */ + bool AddFile(FileDef* file, Status* s); + + private: + UPB_DISALLOW_POD_OPS(SymbolTable, upb::SymbolTable) +}; + +#endif /* __cplusplus */ + +UPB_BEGIN_EXTERN_C + +/* Native C API. */ + +upb_symtab *upb_symtab_new(); +void upb_symtab_free(upb_symtab* s); +const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, + const char *sym); +const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym); +const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym); +const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym); +bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, size_t n, + void *ref_donor, upb_status *status); +bool upb_symtab_addfile(upb_symtab *s, upb_filedef *file, upb_status* status); + +/* upb_symtab_iter i; + * for(upb_symtab_begin(&i, s, type); !upb_symtab_done(&i); + * upb_symtab_next(&i)) { + * const upb_def *def = upb_symtab_iter_def(&i); + * // ... + * } + * + * For C we don't have separate iterators for const and non-const. + * It is the caller's responsibility to cast the upb_fielddef* to + * const if the upb_msgdef* is const. */ +void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s, + upb_deftype_t type); +void upb_symtab_next(upb_symtab_iter *iter); +bool upb_symtab_done(const upb_symtab_iter *iter); +const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter); + +UPB_END_EXTERN_C + +#ifdef __cplusplus +/* C++ inline wrappers. */ +namespace upb { +inline SymbolTable* SymbolTable::New() { + return upb_symtab_new(); +} +inline void SymbolTable::Free(SymbolTable* s) { + upb_symtab_free(s); +} +inline const Def *SymbolTable::Resolve(const char *base, + const char *sym) const { + return upb_symtab_resolve(this, base, sym); +} +inline const Def* SymbolTable::Lookup(const char *sym) const { + return upb_symtab_lookup(this, sym); +} +inline const MessageDef *SymbolTable::LookupMessage(const char *sym) const { + return upb_symtab_lookupmsg(this, sym); +} +inline bool SymbolTable::Add( + Def*const* defs, size_t n, void* ref_donor, Status* status) { + return upb_symtab_add(this, (upb_def*const*)defs, n, ref_donor, status); +} +inline bool SymbolTable::AddFile(FileDef* file, Status* s) { + return upb_symtab_addfile(this, file, s); +} +} /* namespace upb */ +#endif + #ifdef __cplusplus UPB_INLINE const char* upb_safecstr(const std::string& str) { @@ -3061,9 +3217,6 @@ UPB_INLINE const char* upb_safecstr(const std::string& str) { /* Inline C++ wrappers. */ namespace upb { -inline Def* Def::Dup(const void* owner) const { - return upb_def_dup(this, owner); -} inline Def::Type Def::def_type() const { return upb_def_type(this); } inline const char* Def::full_name() const { return upb_def_fullname(this); } inline const char* Def::name() const { return upb_def_name(this); } @@ -3113,9 +3266,6 @@ inline reffed_ptr FieldDef::New() { upb_fielddef *f = upb_fielddef_new(&f); return reffed_ptr(f, &f); } -inline FieldDef* FieldDef::Dup(const void* owner) const { - return upb_fielddef_dup(this, owner); -} inline const char* FieldDef::full_name() const { return upb_fielddef_fullname(this); } @@ -3355,9 +3505,6 @@ inline const OneofDef* MessageDef::FindOneofByName(const char* name, size_t len) const { return upb_msgdef_ntoo(this, name, len); } -inline MessageDef* MessageDef::Dup(const void *owner) const { - return upb_msgdef_dup(this, owner); -} inline void MessageDef::setmapentry(bool map_entry) { upb_msgdef_setmapentry(this, map_entry); } @@ -3527,9 +3674,6 @@ inline bool EnumDef::FindValueByName(const char* name, int32_t *num) const { inline const char* EnumDef::FindValueByNumber(int32_t num) const { return upb_enumdef_iton(this, num); } -inline EnumDef* EnumDef::Dup(const void* owner) const { - return upb_enumdef_dup(this, owner); -} inline EnumDef::Iterator::Iterator(const EnumDef* e) { upb_enum_begin(&iter_, e); @@ -3836,6 +3980,7 @@ extern const struct upb_refcounted_vtbl upb_enumdef_vtbl; struct upb_oneofdef { upb_refcounted base; + uint32_t index; /* Index within oneofs. */ const char *name; upb_strtable ntof; upb_inttable itof; @@ -3845,7 +3990,7 @@ struct upb_oneofdef { extern const struct upb_refcounted_vtbl upb_oneofdef_vtbl; #define UPB_ONEOFDEF_INIT(name, ntof, itof, refs, ref2s) \ - { UPB_REFCOUNT_INIT(&upb_oneofdef_vtbl, refs, ref2s), name, ntof, itof } + { UPB_REFCOUNT_INIT(&upb_oneofdef_vtbl, refs, ref2s), 0, name, ntof, itof } /* upb_symtab *****************************************************************/ @@ -5832,12 +5977,14 @@ inline BytesHandler::~BytesHandler() {} #ifdef __cplusplus namespace upb { +class BufferSink; class BufferSource; class BytesSink; class Sink; } #endif +UPB_DECLARE_TYPE(upb::BufferSink, upb_bufsink) UPB_DECLARE_TYPE(upb::BufferSource, upb_bufsrc) UPB_DECLARE_TYPE(upb::BytesSink, upb_bytessink) UPB_DECLARE_TYPE(upb::Sink, upb_sink) @@ -6024,6 +6171,13 @@ struct upb_bufsrc { UPB_BEGIN_EXTERN_C +/* A class for accumulating output string data in a flat buffer. */ + +upb_bufsink *upb_bufsink_new(upb_env *env); +void upb_bufsink_free(upb_bufsink *sink); +upb_bytessink *upb_bufsink_sink(upb_bufsink *sink); +const char *upb_bufsink_getdata(const upb_bufsink *sink, size_t *len); + /* Inline definitions. */ UPB_INLINE void upb_bytessink_reset(upb_bytessink *s, const upb_byteshandler *h, @@ -6073,23 +6227,7 @@ UPB_INLINE bool upb_bytessink_end(upb_bytessink *s) { &s->handler->table[UPB_ENDSTR_SELECTOR].attr)); } -UPB_INLINE bool upb_bufsrc_putbuf(const char *buf, size_t len, - upb_bytessink *sink) { - void *subc; - bool ret; - upb_bufhandle handle; - upb_bufhandle_init(&handle); - upb_bufhandle_setbuf(&handle, buf, 0); - ret = upb_bytessink_start(sink, len, &subc); - if (ret && len != 0) { - ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len); - } - if (ret) { - ret = upb_bytessink_end(sink); - } - upb_bufhandle_uninit(&handle); - return ret; -} +bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink *sink); #define PUTVAL(type, ctype) \ UPB_INLINE bool upb_sink_put##type(upb_sink *s, upb_selector_t sel, \ @@ -6337,267 +6475,407 @@ inline bool BufferSource::PutBuffer(const char *buf, size_t len, #endif /* -** For handlers that do very tiny, very simple operations, the function call -** overhead of calling a handler can be significant. This file allows the -** user to define handlers that do something very simple like store the value -** to memory and/or set a hasbit. JIT compilers can then special-case these -** handlers and emit specialized code for them instead of actually calling the -** handler. +** upb::Message is a representation for protobuf messages. ** -** The functionality is very simple/limited right now but may expand to be able -** to call another function. -*/ - -#ifndef UPB_SHIM_H -#define UPB_SHIM_H +** However it differs from other common representations like +** google::protobuf::Message in one key way: it does not prescribe any +** ownership between messages and submessages, and it relies on the +** client to delete each message/submessage/array/map at the appropriate +** time. +** +** A client can access a upb::Message without knowing anything about +** ownership semantics, but to create or mutate a message a user needs +** to implement the memory management themselves. +** +** Currently all messages, arrays, and maps store a upb_alloc* internally. +** Mutating operations use this when they require dynamically-allocated +** memory. We could potentially eliminate this size overhead later by +** letting the user flip a bit on the factory that prevents this from +** being stored. The user would then need to use separate functions where +** the upb_alloc* is passed explicitly. However for handlers to populate +** such structures, they would need a place to store this upb_alloc* during +** parsing; upb_handlers don't currently have a good way to accommodate this. +** +** TODO: UTF-8 checking? +**/ +#ifndef UPB_MSG_H_ +#define UPB_MSG_H_ -typedef struct { - size_t offset; - int32_t hasbit; -} upb_shim_data; #ifdef __cplusplus namespace upb { +class Array; +class Map; +class MapIterator; +class MessageFactory; +class MessageLayout; +class Visitor; +class VisitorPlan; +} -struct Shim { - typedef upb_shim_data Data; +#endif - /* Sets a handler for the given field that writes the value to the given - * offset and, if hasbit >= 0, sets a bit at the given bit offset. Returns - * true if the handler was set successfully. */ - static bool Set(Handlers *h, const FieldDef *f, size_t ofs, int32_t hasbit); +UPB_DECLARE_TYPE(upb::MessageFactory, upb_msgfactory) +UPB_DECLARE_TYPE(upb::MessageLayout, upb_msglayout) +UPB_DECLARE_TYPE(upb::Array, upb_array) +UPB_DECLARE_TYPE(upb::Map, upb_map) +UPB_DECLARE_TYPE(upb::MapIterator, upb_mapiter) +UPB_DECLARE_TYPE(upb::Visitor, upb_visitor) +UPB_DECLARE_TYPE(upb::VisitorPlan, upb_visitorplan) - /* If this handler is a shim, returns the corresponding upb::Shim::Data and - * stores the type in "type". Otherwise returns NULL. */ - static const Data* GetData(const Handlers* h, Handlers::Selector s, - FieldDef::Type* type); -}; +/* TODO(haberman): C++ accessors */ -} /* namespace upb */ +UPB_BEGIN_EXTERN_C -#endif +typedef void upb_msg; -UPB_BEGIN_EXTERN_C -/* C API. */ -bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset, - int32_t hasbit); -const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s, - upb_fieldtype_t *type); +/** upb_msglayout *************************************************************/ -UPB_END_EXTERN_C +/* upb_msglayout represents the memory layout of a given upb_msgdef. You get + * instances of this from a upb_msgfactory, and the factory always owns the + * msglayout. */ -#ifdef __cplusplus -/* C++ Wrappers. */ -namespace upb { -inline bool Shim::Set(Handlers* h, const FieldDef* f, size_t ofs, - int32_t hasbit) { - return upb_shim_set(h, f, ofs, hasbit); -} -inline const Shim::Data* Shim::GetData(const Handlers* h, Handlers::Selector s, - FieldDef::Type* type) { - return upb_shim_getdata(h, s, type); -} -} /* namespace upb */ -#endif +/* Gets the factory for this layout */ +upb_msgfactory *upb_msglayout_factory(const upb_msglayout *l); -#endif /* UPB_SHIM_H */ -/* -** upb::SymbolTable (upb_symtab) -** -** A symtab (symbol table) stores a name->def map of upb_defs. Clients could -** always create such tables themselves, but upb_symtab has logic for resolving -** symbolic references, and in particular, for keeping a whole set of consistent -** defs when replacing some subset of those defs. This logic is nontrivial. -** -** This is a mixed C/C++ interface that offers a full API to both languages. -** See the top-level README for more information. -*/ +/* Get the msglayout for a submessage. This requires that this field is a + * submessage, ie. upb_fielddef_issubmsg(upb_msglayout_msgdef(l)) == true. + * + * Since map entry messages don't have layouts, if upb_fielddef_ismap(f) == true + * then this function will return the layout for the map's value. It requires + * that the value type of the map field is a submessage. */ +const upb_msglayout *upb_msglayout_sublayout(const upb_msglayout *l, + const upb_fielddef *f); -#ifndef UPB_SYMTAB_H_ -#define UPB_SYMTAB_H_ +/* Returns the msgdef for this msglayout. */ +const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l); -#ifdef __cplusplus -#include -namespace upb { class SymbolTable; } -#endif +/** upb_visitor ***************************************************************/ -UPB_DECLARE_DERIVED_TYPE(upb::SymbolTable, upb::RefCounted, - upb_symtab, upb_refcounted) +/* upb_visitor will visit all the fields of a message and its submessages. It + * uses a upb_visitorplan which you can obtain from a upb_msgfactory. */ -typedef struct { - UPB_PRIVATE_FOR_CPP - upb_strtable_iter iter; - upb_deftype_t type; -} upb_symtab_iter; +upb_visitor *upb_visitor_create(upb_env *e, const upb_visitorplan *vp, + upb_sink *output); +bool upb_visitor_visitmsg(upb_visitor *v, const upb_msg *msg); -#ifdef __cplusplus -/* Non-const methods in upb::SymbolTable are NOT thread-safe. */ -class upb::SymbolTable { - public: - /* Returns a new symbol table with a single ref owned by "owner." - * Returns NULL if memory allocation failed. */ - static reffed_ptr New(); +/** upb_msgfactory ************************************************************/ - /* Include RefCounted base methods. */ - UPB_REFCOUNTED_CPPMETHODS +/* A upb_msgfactory contains a cache of upb_msglayout, upb_handlers, and + * upb_visitorplan objects. These are the objects necessary to represent, + * populate, and and visit upb_msg objects. + * + * These caches are all populated by upb_msgdef, and lazily created on demand. + */ - /* For all lookup functions, the returned pointer is not owned by the - * caller; it may be invalidated by any non-const call or unref of the - * SymbolTable! To protect against this, take a ref if desired. */ +/* Creates and destroys a msgfactory, respectively. The messages for this + * msgfactory must come from |symtab| (which should outlive the msgfactory). */ +upb_msgfactory *upb_msgfactory_new(const upb_symtab *symtab); +void upb_msgfactory_free(upb_msgfactory *f); - /* Freezes the symbol table: prevents further modification of it. - * After the Freeze() operation is successful, the SymbolTable must only be - * accessed via a const pointer. - * - * Unlike with upb::MessageDef/upb::EnumDef/etc, freezing a SymbolTable is not - * a necessary step in using a SymbolTable. If you have no need for it to be - * immutable, there is no need to freeze it ever. However sometimes it is - * useful, and SymbolTables that are statically compiled into the binary are - * always frozen by nature. */ - void Freeze(); +const upb_symtab *upb_msgfactory_symtab(const upb_msgfactory *f); - /* Resolves the given symbol using the rules described in descriptor.proto, - * namely: - * - * If the name starts with a '.', it is fully-qualified. Otherwise, - * C++-like scoping rules are used to find the type (i.e. first the nested - * types within this message are searched, then within the parent, on up - * to the root namespace). - * - * If not found, returns NULL. */ - const Def* Resolve(const char* base, const char* sym) const; +/* The functions to get cached objects, lazily creating them on demand. These + * all require: + * + * - m is in upb_msgfactory_symtab(f) + * - upb_msgdef_mapentry(m) == false (since map messages can't have layouts). + * + * The returned objects will live for as long as the msgfactory does. + * + * TODO(haberman): consider making this thread-safe and take a const + * upb_msgfactory. */ +const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f, + const upb_msgdef *m); +const upb_handlers *upb_msgfactory_getmergehandlers(upb_msgfactory *f, + const upb_msgdef *m); +const upb_visitorplan *upb_msgfactory_getvisitorplan(upb_msgfactory *f, + const upb_handlers *h); - /* Finds an entry in the symbol table with this exact name. If not found, - * returns NULL. */ - const Def* Lookup(const char *sym) const; - const MessageDef* LookupMessage(const char *sym) const; - const EnumDef* LookupEnum(const char *sym) const; - /* TODO: introduce a C++ iterator, but make it nice and templated so that if - * you ask for an iterator of MessageDef the iterated elements are strongly - * typed as MessageDef*. */ +/** upb_msgval ****************************************************************/ - /* Adds the given mutable defs to the symtab, resolving all symbols - * (including enum default values) and finalizing the defs. Only one def per - * name may be in the list, but defs can replace existing defs in the symtab. - * All defs must have a name -- anonymous defs are not allowed. Anonymous - * defs can still be frozen by calling upb_def_freeze() directly. - * - * Any existing defs that can reach defs that are being replaced will - * themselves be replaced also, so that the resulting set of defs is fully - * consistent. - * - * This logic implemented in this method is a convenience; ultimately it - * calls some combination of upb_fielddef_setsubdef(), upb_def_dup(), and - * upb_freeze(), any of which the client could call themself. However, since - * the logic for doing so is nontrivial, we provide it here. - * - * The entire operation either succeeds or fails. If the operation fails, - * the symtab is unchanged, false is returned, and status indicates the - * error. The caller passes a ref on all defs to the symtab (even if the - * operation fails). - * - * TODO(haberman): currently failure will leave the symtab unchanged, but may - * leave the defs themselves partially resolved. Does this matter? If so we - * could do a prepass that ensures that all symbols are resolvable and bail - * if not, so we don't mutate anything until we know the operation will - * succeed. - * - * TODO(haberman): since the defs must be mutable, refining a frozen def - * requires making mutable copies of the entire tree. This is wasteful if - * only a few messages are changing. We may want to add a way of adding a - * tree of frozen defs to the symtab (perhaps an alternate constructor where - * you pass the root of the tree?) */ - bool Add(Def*const* defs, size_t n, void* ref_donor, Status* status); +/* A union representing all possible protobuf values. Used for generic get/set + * operations. */ - bool Add(const std::vector& defs, void *owner, Status* status) { - return Add((Def*const*)&defs[0], defs.size(), owner, status); +typedef union { + bool b; + float flt; + double dbl; + int32_t i32; + int64_t i64; + uint32_t u32; + uint64_t u64; + const upb_map* map; + const upb_msg* msg; + const upb_array* arr; + const void* ptr; + struct { + const char *ptr; + size_t len; + } str; +} upb_msgval; + +#define ACCESSORS(name, membername, ctype) \ + UPB_INLINE ctype upb_msgval_get ## name(upb_msgval v) { \ + return v.membername; \ + } \ + UPB_INLINE void upb_msgval_set ## name(upb_msgval *v, ctype cval) { \ + v->membername = cval; \ + } \ + UPB_INLINE upb_msgval upb_msgval_ ## name(ctype v) { \ + upb_msgval ret; \ + ret.membername = v; \ + return ret; \ } - /* Resolves all subdefs for messages in this file and attempts to freeze the - * file. If this succeeds, adds all the symbols to this SymbolTable - * (replacing any existing ones with the same names). */ - bool AddFile(FileDef* file, Status* s); +ACCESSORS(bool, b, bool) +ACCESSORS(float, flt, float) +ACCESSORS(double, dbl, double) +ACCESSORS(int32, i32, int32_t) +ACCESSORS(int64, i64, int64_t) +ACCESSORS(uint32, u32, uint32_t) +ACCESSORS(uint64, u64, uint64_t) +ACCESSORS(map, map, const upb_map*) +ACCESSORS(msg, msg, const upb_msg*) +ACCESSORS(ptr, ptr, const void*) +ACCESSORS(arr, arr, const upb_array*) + +#undef ACCESSORS + +UPB_INLINE upb_msgval upb_msgval_str(const char *ptr, size_t len) { + upb_msgval ret; + ret.str.ptr = ptr; + ret.str.len = len; + return ret; +} - private: - UPB_DISALLOW_POD_OPS(SymbolTable, upb::SymbolTable) -}; +UPB_INLINE const char* upb_msgval_getstr(upb_msgval val) { + return val.str.ptr; +} -#endif /* __cplusplus */ +UPB_INLINE size_t upb_msgval_getstrlen(upb_msgval val) { + return val.str.len; +} -UPB_BEGIN_EXTERN_C -/* Native C API. */ +/** upb_msg *******************************************************************/ -/* Include refcounted methods like upb_symtab_ref(). */ -UPB_REFCOUNTED_CMETHODS(upb_symtab, upb_symtab_upcast) +/* A upb_msg represents a protobuf message. It always corresponds to a specific + * upb_msglayout, which describes how it is laid out in memory. + * + * The message will have a fixed size, as returned by upb_msg_sizeof(), which + * will be used to store fixed-length fields. The upb_msg may also allocate + * dynamic memory internally to store data such as: + * + * - extensions + * - unknown fields + */ -upb_symtab *upb_symtab_new(const void *owner); -void upb_symtab_freeze(upb_symtab *s); -const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, - const char *sym); -const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym); -const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym); -const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym); -bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, size_t n, - void *ref_donor, upb_status *status); -bool upb_symtab_addfile(upb_symtab *s, upb_filedef *file, upb_status* status); +/* Returns the size of a message given this layout. */ +size_t upb_msg_sizeof(const upb_msglayout *l); -/* upb_symtab_iter i; - * for(upb_symtab_begin(&i, s, type); !upb_symtab_done(&i); - * upb_symtab_next(&i)) { - * const upb_def *def = upb_symtab_iter_def(&i); - * // ... - * } +/* upb_msg_init() / upb_msg_uninit() allow the user to use a pre-allocated + * block of memory as a message. The block's size should be upb_msg_sizeof(). + * upb_msg_uninit() must be called to release internally-allocated memory + * unless the allocator is an arena that does not require freeing. * - * For C we don't have separate iterators for const and non-const. - * It is the caller's responsibility to cast the upb_fielddef* to - * const if the upb_msgdef* is const. */ -void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s, - upb_deftype_t type); -void upb_symtab_next(upb_symtab_iter *iter); -bool upb_symtab_done(const upb_symtab_iter *iter); -const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter); + * Please note that upb_msg_uninit() does *not* free any submessages, maps, + * or arrays referred to by this message's fields. You must free them manually + * yourself. */ +void upb_msg_init(upb_msg *msg, const upb_msglayout *l, upb_alloc *a); +void upb_msg_uninit(upb_msg *msg, const upb_msglayout *l); + +/* Like upb_msg_init() / upb_msg_uninit(), except the message's memory is + * allocated / freed from the given upb_alloc. */ +upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a); +void upb_msg_free(upb_msg *msg, const upb_msglayout *l); + +/* Returns the upb_alloc for the given message. */ +upb_alloc *upb_msg_alloc(const upb_msg *msg, const upb_msglayout *l); + +/* Packs the tree of messages rooted at "msg" into a single hunk of memory, + * allocated from the given allocator. */ +void *upb_msg_pack(const upb_msg *msg, const upb_msglayout *l, + void *p, size_t *ofs, size_t size); + +/* Read-only message API. Can be safely called by anyone. */ + +/* Returns the value associated with this field: + * - for scalar fields (including strings), the value directly. + * - return upb_msg*, or upb_map* for msg/map. + * If the field is unset for these field types, returns NULL. + * + * TODO(haberman): should we let users store cached array/map/msg + * pointers here for fields that are unset? Could be useful for the + * strongly-owned submessage model (ie. generated C API that doesn't use + * arenas). + */ +upb_msgval upb_msg_get(const upb_msg *msg, + const upb_fielddef *f, + const upb_msglayout *l); -UPB_END_EXTERN_C +/* May only be called for fields where upb_fielddef_haspresence(f) == true. */ +bool upb_msg_has(const upb_msg *msg, + const upb_fielddef *f, + const upb_msglayout *l); -#ifdef __cplusplus -/* C++ inline wrappers. */ -namespace upb { -inline reffed_ptr SymbolTable::New() { - upb_symtab *s = upb_symtab_new(&s); - return reffed_ptr(s, &s); -} +/* Returns NULL if no field in the oneof is set. */ +const upb_fielddef *upb_msg_getoneofcase(const upb_msg *msg, + const upb_oneofdef *o, + const upb_msglayout *l); -inline void SymbolTable::Freeze() { - return upb_symtab_freeze(this); -} -inline const Def *SymbolTable::Resolve(const char *base, - const char *sym) const { - return upb_symtab_resolve(this, base, sym); -} -inline const Def* SymbolTable::Lookup(const char *sym) const { - return upb_symtab_lookup(this, sym); -} -inline const MessageDef *SymbolTable::LookupMessage(const char *sym) const { - return upb_symtab_lookupmsg(this, sym); -} -inline bool SymbolTable::Add( - Def*const* defs, size_t n, void* ref_donor, Status* status) { - return upb_symtab_add(this, (upb_def*const*)defs, n, ref_donor, status); -} -inline bool SymbolTable::AddFile(FileDef* file, Status* s) { - return upb_symtab_addfile(this, file, s); -} -} /* namespace upb */ -#endif +/* Returns true if any field in the oneof is set. */ +bool upb_msg_hasoneof(const upb_msg *msg, + const upb_oneofdef *o, + const upb_msglayout *l); + + +/* Mutable message API. May only be called by the owner of the message who + * knows its ownership scheme and how to keep it consistent. */ + +/* Sets the given field to the given value. Does not perform any memory + * management: if you overwrite a pointer to a msg/array/map/string without + * cleaning it up (or using an arena) it will leak. + */ +bool upb_msg_set(upb_msg *msg, + const upb_fielddef *f, + upb_msgval val, + const upb_msglayout *l); + +/* For a primitive field, set it back to its default. For repeated, string, and + * submessage fields set it back to NULL. This could involve releasing some + * internal memory (for example, from an extension dictionary), but it is not + * recursive in any way and will not recover any memory that may be used by + * arrays/maps/strings/msgs that this field may have pointed to. + */ +bool upb_msg_clearfield(upb_msg *msg, + const upb_fielddef *f, + const upb_msglayout *l); + +/* Clears all fields in the oneof such that none of them are set. */ +bool upb_msg_clearoneof(upb_msg *msg, + const upb_oneofdef *o, + const upb_msglayout *l); + +/* TODO(haberman): copyfrom()/mergefrom()? */ + + +/** upb_array *****************************************************************/ + +/* A upb_array stores data for a repeated field. The memory management + * semantics are the same as upb_msg. A upb_array allocates dynamic + * memory internally for the array elements. */ + +size_t upb_array_sizeof(upb_fieldtype_t type); +void upb_array_init(upb_array *arr, upb_fieldtype_t type, upb_alloc *a); +void upb_array_uninit(upb_array *arr); +upb_array *upb_array_new(upb_fieldtype_t type, upb_alloc *a); +void upb_array_free(upb_array *arr); + +/* Read-only interface. Safe for anyone to call. */ + +size_t upb_array_size(const upb_array *arr); +upb_fieldtype_t upb_array_type(const upb_array *arr); +upb_msgval upb_array_get(const upb_array *arr, size_t i); + +/* Write interface. May only be called by the message's owner who can enforce + * its memory management invariants. */ + +bool upb_array_set(upb_array *arr, size_t i, upb_msgval val); + + +/** upb_map *******************************************************************/ + +/* A upb_map stores data for a map field. The memory management semantics are + * the same as upb_msg, with one notable exception. upb_map will internally + * store a copy of all string keys, but *not* any string values or submessages. + * So you must ensure that any string or message values outlive the map, and you + * must delete them manually when they are no longer required. */ + +size_t upb_map_sizeof(upb_fieldtype_t ktype, upb_fieldtype_t vtype); +bool upb_map_init(upb_map *map, upb_fieldtype_t ktype, upb_fieldtype_t vtype, + upb_alloc *a); +void upb_map_uninit(upb_map *map); +upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype, upb_alloc *a); +void upb_map_free(upb_map *map); + +/* Read-only interface. Safe for anyone to call. */ + +size_t upb_map_size(const upb_map *map); +upb_fieldtype_t upb_map_keytype(const upb_map *map); +upb_fieldtype_t upb_map_valuetype(const upb_map *map); +bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val); + +/* Write interface. May only be called by the message's owner who can enforce + * its memory management invariants. */ + +/* Sets or overwrites an entry in the map. Return value indicates whether + * the operation succeeded or failed with OOM, and also whether an existing + * key was replaced or not. */ +bool upb_map_set(upb_map *map, + upb_msgval key, upb_msgval val, + upb_msgval *valremoved); + +/* Deletes an entry in the map. Returns true if the key was present. */ +bool upb_map_del(upb_map *map, upb_msgval key); + + +/** upb_mapiter ***************************************************************/ + +/* For iterating over a map. Map iterators are invalidated by mutations to the + * map, but an invalidated iterator will never return junk or crash the process. + * An invalidated iterator may return entries that were already returned though, + * and if you keep invalidating the iterator during iteration, the program may + * enter an infinite loop. */ + +size_t upb_mapiter_sizeof(); + +void upb_mapiter_begin(upb_mapiter *i, const upb_map *t); +upb_mapiter *upb_mapiter_new(const upb_map *t, upb_alloc *a); +void upb_mapiter_free(upb_mapiter *i, upb_alloc *a); +void upb_mapiter_next(upb_mapiter *i); +bool upb_mapiter_done(const upb_mapiter *i); + +upb_msgval upb_mapiter_key(const upb_mapiter *i); +upb_msgval upb_mapiter_value(const upb_mapiter *i); +void upb_mapiter_setdone(upb_mapiter *i); +bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2); + + +/** Handlers ******************************************************************/ + +/* These are the handlers used internally by upb_msgfactory_getmergehandlers(). + * They write scalar data to a known offset from the message pointer. + * + * These would be trivial for anyone to implement themselves, but it's better + * to use these because some JITs will recognize and specialize these instead + * of actually calling the function. */ + +/* Sets a handler for the given primitive field that will write the data at the + * given offset. If hasbit > 0, also sets a hasbit at the given bit offset + * (addressing each byte low to high). */ +bool upb_msg_setscalarhandler(upb_handlers *h, + const upb_fielddef *f, + size_t offset, + int32_t hasbit); + +/* If the given handler is a msghandlers_primitive field, returns true and sets + * *type, *offset and *hasbit. Otherwise returns false. */ +bool upb_msg_getscalarhandlerdata(const upb_handlers *h, + upb_selector_t s, + upb_fieldtype_t *type, + size_t *offset, + int32_t *hasbit); + +UPB_END_EXTERN_C -#endif /* UPB_SYMTAB_H_ */ +#endif /* UPB_MSG_H_ */ /* ** upb::descriptor::Reader (upb_descreader) ** -- cgit v1.2.3