aboutsummaryrefslogtreecommitdiffhomepage
path: root/js/binary/decoder.js
diff options
context:
space:
mode:
Diffstat (limited to 'js/binary/decoder.js')
-rw-r--r--js/binary/decoder.js146
1 files changed, 105 insertions, 41 deletions
diff --git a/js/binary/decoder.js b/js/binary/decoder.js
index 9004eff0..e33bf1be 100644
--- a/js/binary/decoder.js
+++ b/js/binary/decoder.js
@@ -47,6 +47,7 @@ goog.provide('jspb.BinaryDecoder');
goog.provide('jspb.BinaryIterator');
goog.require('goog.asserts');
+goog.require('goog.crypt');
goog.require('jspb.utils');
@@ -57,7 +58,7 @@ goog.require('jspb.utils');
* @param {?jspb.BinaryDecoder=} opt_decoder
* @param {?function(this:jspb.BinaryDecoder):(number|boolean|string)=}
* opt_next The decoder method to use for next().
- * @param {?Array.<number|boolean|string>=} opt_elements
+ * @param {?Array<number|boolean|string>=} opt_elements
* @constructor
* @struct
*/
@@ -71,7 +72,7 @@ jspb.BinaryIterator = function(opt_decoder, opt_next, opt_elements) {
*/
this.nextMethod_ = null;
- /** @private {Array.<number>} */
+ /** @private {?Array<number|boolean|string>} */
this.elements_ = null;
/** @private {number} */
@@ -91,7 +92,7 @@ jspb.BinaryIterator = function(opt_decoder, opt_next, opt_elements) {
* @param {?jspb.BinaryDecoder=} opt_decoder
* @param {?function(this:jspb.BinaryDecoder):(number|boolean|string)=}
* opt_next The decoder method to use for next().
- * @param {?Array.<number|boolean|string>=} opt_elements
+ * @param {?Array<number|boolean|string>=} opt_elements
* @private
*/
jspb.BinaryIterator.prototype.init_ =
@@ -100,7 +101,7 @@ jspb.BinaryIterator.prototype.init_ =
this.decoder_ = opt_decoder;
this.nextMethod_ = opt_next;
}
- this.elements_ = opt_elements ? opt_elements : null;
+ this.elements_ = opt_elements || null;
this.cursor_ = 0;
this.nextValue_ = null;
this.atEnd_ = !this.decoder_ && !this.elements_;
@@ -111,7 +112,7 @@ jspb.BinaryIterator.prototype.init_ =
/**
* Global pool of BinaryIterator instances.
- * @private {!Array.<!jspb.BinaryIterator>}
+ * @private {!Array<!jspb.BinaryIterator>}
*/
jspb.BinaryIterator.instanceCache_ = [];
@@ -122,7 +123,7 @@ jspb.BinaryIterator.instanceCache_ = [];
* @param {?jspb.BinaryDecoder=} opt_decoder
* @param {?function(this:jspb.BinaryDecoder):(number|boolean|string)=}
* opt_next The decoder method to use for next().
- * @param {?Array.<number|boolean|string>=} opt_elements
+ * @param {?Array<number|boolean|string>=} opt_elements
* @return {!jspb.BinaryIterator}
*/
jspb.BinaryIterator.alloc = function(opt_decoder, opt_next, opt_elements) {
@@ -223,7 +224,7 @@ jspb.BinaryIterator.prototype.next = function() {
jspb.BinaryDecoder = function(opt_bytes, opt_start, opt_length) {
/**
* Typed byte-wise view of the source buffer.
- * @private {Uint8Array}
+ * @private {?Uint8Array}
*/
this.bytes_ = null;
@@ -273,7 +274,7 @@ jspb.BinaryDecoder = function(opt_bytes, opt_start, opt_length) {
/**
* Global pool of BinaryDecoder instances.
- * @private {!Array.<!jspb.BinaryDecoder>}
+ * @private {!Array<!jspb.BinaryDecoder>}
*/
jspb.BinaryDecoder.instanceCache_ = [];
@@ -335,7 +336,7 @@ jspb.BinaryDecoder.prototype.clear = function() {
/**
* Returns the raw buffer.
- * @return {Uint8Array} The raw buffer.
+ * @return {?Uint8Array} The raw buffer.
*/
jspb.BinaryDecoder.prototype.getBuffer = function() {
return this.bytes_;
@@ -582,27 +583,24 @@ jspb.BinaryDecoder.prototype.readUnsignedVarint32 = function() {
x |= (temp & 0x0F) << 28;
if (temp < 128) {
// We're reading the high bits of an unsigned varint. The byte we just read
- // also contains bits 33 through 35, which we're going to discard. Those
- // bits _must_ be zero, or the encoding is invalid.
- goog.asserts.assert((temp & 0xF0) == 0);
+ // also contains bits 33 through 35, which we're going to discard.
this.cursor_ += 5;
goog.asserts.assert(this.cursor_ <= this.end_);
return x >>> 0;
}
- // If we get here, we're reading the sign extension of a negative 32-bit int.
- // We can skip these bytes, as we know in advance that they have to be all
- // 1's if the varint is correctly encoded. Since we also know the value is
- // negative, we don't have to coerce it to unsigned before we return it.
-
- goog.asserts.assert((temp & 0xF0) == 0xF0);
- goog.asserts.assert(bytes[this.cursor_ + 5] == 0xFF);
- goog.asserts.assert(bytes[this.cursor_ + 6] == 0xFF);
- goog.asserts.assert(bytes[this.cursor_ + 7] == 0xFF);
- goog.asserts.assert(bytes[this.cursor_ + 8] == 0xFF);
- goog.asserts.assert(bytes[this.cursor_ + 9] == 0x01);
+ // If we get here, we need to truncate coming bytes. However we need to make
+ // sure cursor place is correct.
+ this.cursor_ += 5;
+ if (bytes[this.cursor_++] >= 128 &&
+ bytes[this.cursor_++] >= 128 &&
+ bytes[this.cursor_++] >= 128 &&
+ bytes[this.cursor_++] >= 128 &&
+ bytes[this.cursor_++] >= 128) {
+ // If we get here, the varint is too long.
+ goog.asserts.assert(false);
+ }
- this.cursor_ += 10;
goog.asserts.assert(this.cursor_ <= this.end_);
return x;
};
@@ -631,6 +629,7 @@ jspb.BinaryDecoder.prototype.readUnsignedVarint32String = function() {
return value.toString();
};
+
/**
* Reads a 32-bit signed variant and returns its value as a string.
*
@@ -732,6 +731,24 @@ jspb.BinaryDecoder.prototype.readZigzagVarint64 = function() {
/**
+ * Reads a signed, zigzag-encoded 64-bit varint from the binary stream and
+ * returns its valud as a string.
+ *
+ * Zigzag encoding is a modification of varint encoding that reduces the
+ * storage overhead for small negative integers - for more details on the
+ * format, see https://developers.google.com/protocol-buffers/docs/encoding
+ *
+ * @return {string} The decoded signed, zigzag-encoded 64-bit varint as a
+ * string.
+ */
+jspb.BinaryDecoder.prototype.readZigzagVarint64String = function() {
+ // TODO(haberman): write lossless 64-bit zig-zag math.
+ var value = this.readZigzagVarint64();
+ return value.toString();
+};
+
+
+/**
* Reads a raw unsigned 8-bit integer from the binary stream.
*
* @return {number} The unsigned 8-bit integer read from the binary stream.
@@ -790,6 +807,20 @@ jspb.BinaryDecoder.prototype.readUint64 = function() {
/**
+ * Reads a raw unsigned 64-bit integer from the binary stream. Note that since
+ * Javascript represents all numbers as double-precision floats, there will be
+ * precision lost if the absolute value of the integer is larger than 2^53.
+ *
+ * @return {string} The unsigned 64-bit integer read from the binary stream.
+ */
+jspb.BinaryDecoder.prototype.readUint64String = function() {
+ var bitsLow = this.readUint32();
+ var bitsHigh = this.readUint32();
+ return jspb.utils.joinUnsignedDecimalString(bitsLow, bitsHigh);
+};
+
+
+/**
* Reads a raw signed 8-bit integer from the binary stream.
*
* @return {number} The signed 8-bit integer read from the binary stream.
@@ -848,6 +879,20 @@ jspb.BinaryDecoder.prototype.readInt64 = function() {
/**
+ * Reads a raw signed 64-bit integer from the binary stream and returns it as a
+ * string.
+ *
+ * @return {string} The signed 64-bit integer read from the binary stream.
+ * Precision will be lost if the integer exceeds 2^53.
+ */
+jspb.BinaryDecoder.prototype.readInt64String = function() {
+ var bitsLow = this.readUint32();
+ var bitsHigh = this.readUint32();
+ return jspb.utils.joinSignedDecimalString(bitsLow, bitsHigh);
+};
+
+
+/**
* Reads a 32-bit floating-point number from the binary stream, using the
* temporary buffer to realign the data.
*
@@ -894,11 +939,9 @@ jspb.BinaryDecoder.prototype.readEnum = function() {
/**
* Reads and parses a UTF-8 encoded unicode string from the stream.
- * The code is inspired by maps.vectortown.parse.StreamedDataViewReader, with
- * the exception that the implementation here does not get confused if it
- * encounters characters longer than three bytes. These characters are ignored
- * though, as they are extremely rare: three UTF-8 bytes cover virtually all
- * characters in common use (http://en.wikipedia.org/wiki/UTF-8).
+ * The code is inspired by maps.vectortown.parse.StreamedDataViewReader.
+ * Supports codepoints from U+0000 up to U+10FFFF.
+ * (http://en.wikipedia.org/wiki/UTF-8).
* @param {number} length The length of the string to read.
* @return {string} The decoded string.
*/
@@ -906,30 +949,50 @@ jspb.BinaryDecoder.prototype.readString = function(length) {
var bytes = this.bytes_;
var cursor = this.cursor_;
var end = cursor + length;
- var chars = [];
+ var codeUnits = [];
+ var result = '';
while (cursor < end) {
var c = bytes[cursor++];
if (c < 128) { // Regular 7-bit ASCII.
- chars.push(c);
+ codeUnits.push(c);
} else if (c < 192) {
// UTF-8 continuation mark. We are out of sync. This
// might happen if we attempted to read a character
- // with more than three bytes.
+ // with more than four bytes.
continue;
} else if (c < 224) { // UTF-8 with two bytes.
var c2 = bytes[cursor++];
- chars.push(((c & 31) << 6) | (c2 & 63));
+ codeUnits.push(((c & 31) << 6) | (c2 & 63));
} else if (c < 240) { // UTF-8 with three bytes.
var c2 = bytes[cursor++];
var c3 = bytes[cursor++];
- chars.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
+ codeUnits.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
+ } else if (c < 248) { // UTF-8 with 4 bytes.
+ var c2 = bytes[cursor++];
+ var c3 = bytes[cursor++];
+ var c4 = bytes[cursor++];
+ // Characters written on 4 bytes have 21 bits for a codepoint.
+ // We can't fit that on 16bit characters, so we use surrogates.
+ var codepoint = ((c & 7) << 18) | ((c2 & 63) << 12) | ((c3 & 63) << 6) | (c4 & 63);
+ // Surrogates formula from wikipedia.
+ // 1. Subtract 0x10000 from codepoint
+ codepoint -= 0x10000;
+ // 2. Split this into the high 10-bit value and the low 10-bit value
+ // 3. Add 0xD800 to the high value to form the high surrogate
+ // 4. Add 0xDC00 to the low value to form the low surrogate:
+ var low = (codepoint & 1023) + 0xDC00;
+ var high = ((codepoint >> 10) & 1023) + 0xD800;
+ codeUnits.push(high, low);
}
- }
- // String.fromCharCode.apply is faster than manually appending characters on
- // Chrome 25+, and generates no additional cons string garbage.
- var result = String.fromCharCode.apply(null, chars);
+ // Avoid exceeding the maximum stack size when calling `apply`.
+ if (codeUnits.length >= 8192) {
+ result += String.fromCharCode.apply(null, codeUnits);
+ codeUnits.length = 0;
+ }
+ }
+ result += goog.crypt.byteArrayToString(codeUnits);
this.cursor_ = cursor;
return result;
};
@@ -950,14 +1013,15 @@ jspb.BinaryDecoder.prototype.readStringWithLength = function() {
* Reads a block of raw bytes from the binary stream.
*
* @param {number} length The number of bytes to read.
- * @return {Uint8Array} The decoded block of bytes, or null if the length was
- * invalid.
+ * @return {!Uint8Array} The decoded block of bytes, or an empty block if the
+ * length was invalid.
*/
jspb.BinaryDecoder.prototype.readBytes = function(length) {
if (length < 0 ||
this.cursor_ + length > this.bytes_.length) {
this.error_ = true;
- return null;
+ goog.asserts.fail('Invalid byte length!');
+ return new Uint8Array(0);
}
var result = this.bytes_.subarray(this.cursor_, this.cursor_ + length);