diff options
Diffstat (limited to 'js/binary/decoder.js')
-rw-r--r-- | js/binary/decoder.js | 146 |
1 files changed, 105 insertions, 41 deletions
diff --git a/js/binary/decoder.js b/js/binary/decoder.js index 9004eff0..e33bf1be 100644 --- a/js/binary/decoder.js +++ b/js/binary/decoder.js @@ -47,6 +47,7 @@ goog.provide('jspb.BinaryDecoder'); goog.provide('jspb.BinaryIterator'); goog.require('goog.asserts'); +goog.require('goog.crypt'); goog.require('jspb.utils'); @@ -57,7 +58,7 @@ goog.require('jspb.utils'); * @param {?jspb.BinaryDecoder=} opt_decoder * @param {?function(this:jspb.BinaryDecoder):(number|boolean|string)=} * opt_next The decoder method to use for next(). - * @param {?Array.<number|boolean|string>=} opt_elements + * @param {?Array<number|boolean|string>=} opt_elements * @constructor * @struct */ @@ -71,7 +72,7 @@ jspb.BinaryIterator = function(opt_decoder, opt_next, opt_elements) { */ this.nextMethod_ = null; - /** @private {Array.<number>} */ + /** @private {?Array<number|boolean|string>} */ this.elements_ = null; /** @private {number} */ @@ -91,7 +92,7 @@ jspb.BinaryIterator = function(opt_decoder, opt_next, opt_elements) { * @param {?jspb.BinaryDecoder=} opt_decoder * @param {?function(this:jspb.BinaryDecoder):(number|boolean|string)=} * opt_next The decoder method to use for next(). - * @param {?Array.<number|boolean|string>=} opt_elements + * @param {?Array<number|boolean|string>=} opt_elements * @private */ jspb.BinaryIterator.prototype.init_ = @@ -100,7 +101,7 @@ jspb.BinaryIterator.prototype.init_ = this.decoder_ = opt_decoder; this.nextMethod_ = opt_next; } - this.elements_ = opt_elements ? opt_elements : null; + this.elements_ = opt_elements || null; this.cursor_ = 0; this.nextValue_ = null; this.atEnd_ = !this.decoder_ && !this.elements_; @@ -111,7 +112,7 @@ jspb.BinaryIterator.prototype.init_ = /** * Global pool of BinaryIterator instances. - * @private {!Array.<!jspb.BinaryIterator>} + * @private {!Array<!jspb.BinaryIterator>} */ jspb.BinaryIterator.instanceCache_ = []; @@ -122,7 +123,7 @@ jspb.BinaryIterator.instanceCache_ = []; * @param {?jspb.BinaryDecoder=} opt_decoder * @param {?function(this:jspb.BinaryDecoder):(number|boolean|string)=} * opt_next The decoder method to use for next(). - * @param {?Array.<number|boolean|string>=} opt_elements + * @param {?Array<number|boolean|string>=} opt_elements * @return {!jspb.BinaryIterator} */ jspb.BinaryIterator.alloc = function(opt_decoder, opt_next, opt_elements) { @@ -223,7 +224,7 @@ jspb.BinaryIterator.prototype.next = function() { jspb.BinaryDecoder = function(opt_bytes, opt_start, opt_length) { /** * Typed byte-wise view of the source buffer. - * @private {Uint8Array} + * @private {?Uint8Array} */ this.bytes_ = null; @@ -273,7 +274,7 @@ jspb.BinaryDecoder = function(opt_bytes, opt_start, opt_length) { /** * Global pool of BinaryDecoder instances. - * @private {!Array.<!jspb.BinaryDecoder>} + * @private {!Array<!jspb.BinaryDecoder>} */ jspb.BinaryDecoder.instanceCache_ = []; @@ -335,7 +336,7 @@ jspb.BinaryDecoder.prototype.clear = function() { /** * Returns the raw buffer. - * @return {Uint8Array} The raw buffer. + * @return {?Uint8Array} The raw buffer. */ jspb.BinaryDecoder.prototype.getBuffer = function() { return this.bytes_; @@ -582,27 +583,24 @@ jspb.BinaryDecoder.prototype.readUnsignedVarint32 = function() { x |= (temp & 0x0F) << 28; if (temp < 128) { // We're reading the high bits of an unsigned varint. The byte we just read - // also contains bits 33 through 35, which we're going to discard. Those - // bits _must_ be zero, or the encoding is invalid. - goog.asserts.assert((temp & 0xF0) == 0); + // also contains bits 33 through 35, which we're going to discard. this.cursor_ += 5; goog.asserts.assert(this.cursor_ <= this.end_); return x >>> 0; } - // If we get here, we're reading the sign extension of a negative 32-bit int. - // We can skip these bytes, as we know in advance that they have to be all - // 1's if the varint is correctly encoded. Since we also know the value is - // negative, we don't have to coerce it to unsigned before we return it. - - goog.asserts.assert((temp & 0xF0) == 0xF0); - goog.asserts.assert(bytes[this.cursor_ + 5] == 0xFF); - goog.asserts.assert(bytes[this.cursor_ + 6] == 0xFF); - goog.asserts.assert(bytes[this.cursor_ + 7] == 0xFF); - goog.asserts.assert(bytes[this.cursor_ + 8] == 0xFF); - goog.asserts.assert(bytes[this.cursor_ + 9] == 0x01); + // If we get here, we need to truncate coming bytes. However we need to make + // sure cursor place is correct. + this.cursor_ += 5; + if (bytes[this.cursor_++] >= 128 && + bytes[this.cursor_++] >= 128 && + bytes[this.cursor_++] >= 128 && + bytes[this.cursor_++] >= 128 && + bytes[this.cursor_++] >= 128) { + // If we get here, the varint is too long. + goog.asserts.assert(false); + } - this.cursor_ += 10; goog.asserts.assert(this.cursor_ <= this.end_); return x; }; @@ -631,6 +629,7 @@ jspb.BinaryDecoder.prototype.readUnsignedVarint32String = function() { return value.toString(); }; + /** * Reads a 32-bit signed variant and returns its value as a string. * @@ -732,6 +731,24 @@ jspb.BinaryDecoder.prototype.readZigzagVarint64 = function() { /** + * Reads a signed, zigzag-encoded 64-bit varint from the binary stream and + * returns its valud as a string. + * + * Zigzag encoding is a modification of varint encoding that reduces the + * storage overhead for small negative integers - for more details on the + * format, see https://developers.google.com/protocol-buffers/docs/encoding + * + * @return {string} The decoded signed, zigzag-encoded 64-bit varint as a + * string. + */ +jspb.BinaryDecoder.prototype.readZigzagVarint64String = function() { + // TODO(haberman): write lossless 64-bit zig-zag math. + var value = this.readZigzagVarint64(); + return value.toString(); +}; + + +/** * Reads a raw unsigned 8-bit integer from the binary stream. * * @return {number} The unsigned 8-bit integer read from the binary stream. @@ -790,6 +807,20 @@ jspb.BinaryDecoder.prototype.readUint64 = function() { /** + * Reads a raw unsigned 64-bit integer from the binary stream. Note that since + * Javascript represents all numbers as double-precision floats, there will be + * precision lost if the absolute value of the integer is larger than 2^53. + * + * @return {string} The unsigned 64-bit integer read from the binary stream. + */ +jspb.BinaryDecoder.prototype.readUint64String = function() { + var bitsLow = this.readUint32(); + var bitsHigh = this.readUint32(); + return jspb.utils.joinUnsignedDecimalString(bitsLow, bitsHigh); +}; + + +/** * Reads a raw signed 8-bit integer from the binary stream. * * @return {number} The signed 8-bit integer read from the binary stream. @@ -848,6 +879,20 @@ jspb.BinaryDecoder.prototype.readInt64 = function() { /** + * Reads a raw signed 64-bit integer from the binary stream and returns it as a + * string. + * + * @return {string} The signed 64-bit integer read from the binary stream. + * Precision will be lost if the integer exceeds 2^53. + */ +jspb.BinaryDecoder.prototype.readInt64String = function() { + var bitsLow = this.readUint32(); + var bitsHigh = this.readUint32(); + return jspb.utils.joinSignedDecimalString(bitsLow, bitsHigh); +}; + + +/** * Reads a 32-bit floating-point number from the binary stream, using the * temporary buffer to realign the data. * @@ -894,11 +939,9 @@ jspb.BinaryDecoder.prototype.readEnum = function() { /** * Reads and parses a UTF-8 encoded unicode string from the stream. - * The code is inspired by maps.vectortown.parse.StreamedDataViewReader, with - * the exception that the implementation here does not get confused if it - * encounters characters longer than three bytes. These characters are ignored - * though, as they are extremely rare: three UTF-8 bytes cover virtually all - * characters in common use (http://en.wikipedia.org/wiki/UTF-8). + * The code is inspired by maps.vectortown.parse.StreamedDataViewReader. + * Supports codepoints from U+0000 up to U+10FFFF. + * (http://en.wikipedia.org/wiki/UTF-8). * @param {number} length The length of the string to read. * @return {string} The decoded string. */ @@ -906,30 +949,50 @@ jspb.BinaryDecoder.prototype.readString = function(length) { var bytes = this.bytes_; var cursor = this.cursor_; var end = cursor + length; - var chars = []; + var codeUnits = []; + var result = ''; while (cursor < end) { var c = bytes[cursor++]; if (c < 128) { // Regular 7-bit ASCII. - chars.push(c); + codeUnits.push(c); } else if (c < 192) { // UTF-8 continuation mark. We are out of sync. This // might happen if we attempted to read a character - // with more than three bytes. + // with more than four bytes. continue; } else if (c < 224) { // UTF-8 with two bytes. var c2 = bytes[cursor++]; - chars.push(((c & 31) << 6) | (c2 & 63)); + codeUnits.push(((c & 31) << 6) | (c2 & 63)); } else if (c < 240) { // UTF-8 with three bytes. var c2 = bytes[cursor++]; var c3 = bytes[cursor++]; - chars.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)); + codeUnits.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)); + } else if (c < 248) { // UTF-8 with 4 bytes. + var c2 = bytes[cursor++]; + var c3 = bytes[cursor++]; + var c4 = bytes[cursor++]; + // Characters written on 4 bytes have 21 bits for a codepoint. + // We can't fit that on 16bit characters, so we use surrogates. + var codepoint = ((c & 7) << 18) | ((c2 & 63) << 12) | ((c3 & 63) << 6) | (c4 & 63); + // Surrogates formula from wikipedia. + // 1. Subtract 0x10000 from codepoint + codepoint -= 0x10000; + // 2. Split this into the high 10-bit value and the low 10-bit value + // 3. Add 0xD800 to the high value to form the high surrogate + // 4. Add 0xDC00 to the low value to form the low surrogate: + var low = (codepoint & 1023) + 0xDC00; + var high = ((codepoint >> 10) & 1023) + 0xD800; + codeUnits.push(high, low); } - } - // String.fromCharCode.apply is faster than manually appending characters on - // Chrome 25+, and generates no additional cons string garbage. - var result = String.fromCharCode.apply(null, chars); + // Avoid exceeding the maximum stack size when calling `apply`. + if (codeUnits.length >= 8192) { + result += String.fromCharCode.apply(null, codeUnits); + codeUnits.length = 0; + } + } + result += goog.crypt.byteArrayToString(codeUnits); this.cursor_ = cursor; return result; }; @@ -950,14 +1013,15 @@ jspb.BinaryDecoder.prototype.readStringWithLength = function() { * Reads a block of raw bytes from the binary stream. * * @param {number} length The number of bytes to read. - * @return {Uint8Array} The decoded block of bytes, or null if the length was - * invalid. + * @return {!Uint8Array} The decoded block of bytes, or an empty block if the + * length was invalid. */ jspb.BinaryDecoder.prototype.readBytes = function(length) { if (length < 0 || this.cursor_ + length > this.bytes_.length) { this.error_ = true; - return null; + goog.asserts.fail('Invalid byte length!'); + return new Uint8Array(0); } var result = this.bytes_.subarray(this.cursor_, this.cursor_ + length); |