aboutsummaryrefslogtreecommitdiff
path: root/contexts/data/lib/closure-library/closure/goog/i18n/charlistdecompressor.js
blob: e41a5b0aef13a556f84fbd3b99e69113a59f22b3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
// Copyright 2009 The Closure Library Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS-IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
 * @fileoverview The decompressor for Base88 compressed character lists.
 *
 * The compression is by base 88 encoding the delta between two adjacent
 * characters in ths list. The deltas can be positive or negative. Also, there
 * would be character ranges. These three types of values
 * are given enum values 0, 1 and 2 respectively. Initial 3 bits are used for
 * encoding the type and total length of the encoded value. Length enums 0, 1
 * and 2 represents lengths 1, 2 and 4. So (value * 8 + type * 3 + length enum)
 * is encoded in base 88 by following characters for numbers from 0 to 87:
 * 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ (continued in next line)
 * abcdefghijklmnopqrstuvwxyz!#$%()*+,-.:;<=>?@[]^_`{|}~
 *
 * Value uses 0 based counting. That is value for the range [a, b] is 0 and
 * that of [a, c] is 1. Simillarly, the delta of "ab" is 0.
 *
 * Following python script can be used to compress character lists taken
 * standard input: http://go/charlistcompressor.py
 *
 */

goog.provide('goog.i18n.CharListDecompressor');

goog.require('goog.array');
goog.require('goog.i18n.uChar');



/**
 * Class to decompress base88 compressed character list.
 * @constructor
 */
goog.i18n.CharListDecompressor = function() {
  this.buildCharMap_('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqr' +
      'stuvwxyz!#$%()*+,-.:;<=>?@[]^_`{|}~');
};


/**
 * 1-1 mapping from ascii characters used in encoding to an integer in the
 * range 0 to 87.
 * @type {Object}
 * @private
 */
goog.i18n.CharListDecompressor.prototype.charMap_ = null;


/**
 * Builds the map from ascii characters used for the base88 scheme to number
 * each character represents.
 * @param {string} str The string of characters used in base88 scheme.
 * @private
 */
goog.i18n.CharListDecompressor.prototype.buildCharMap_ = function(str) {
  if (!this.charMap_) {
    this.charMap_ = {};
    for (var i = 0; i < str.length; i++) {
      this.charMap_[str.charAt(i)] = i;
    }
  }
};


/**
 * Gets the number encoded in base88 scheme by a substring of given length
 * and placed at the a given position of the string.
 * @param {string} str String containing sequence of characters encoding a
 *     number in base 88 scheme.
 * @param {number} start Starting position of substring encoding the number.
 * @param {number} leng Length of the substring encoding the number.
 * @return {number} The encoded number.
 * @private
 */
goog.i18n.CharListDecompressor.prototype.getCodeAt_ = function(str, start,
    leng) {
  var result = 0;
  for (var i = 0; i < leng; i++) {
    var c = this.charMap_[str.charAt(start + i)];
    result += c * Math.pow(88, i);
  }
  return result;
};


/**
 * Add character(s) specified by the value and type to given list and return
 * the next character in the sequence.
 * @param {Array.<string>} list The list of characters to which the specified
 *     characters are appended.
 * @param {number} lastcode The last codepoint that was added to the list.
 * @param {number} value The value component that representing the delta or
 *      range.
 * @param {number} type The type component that representing whether the value
 *      is a positive or negative delta or range.
 * @return {number} Last codepoint that is added to the list.
 * @private
 */
goog.i18n.CharListDecompressor.prototype.addChars_ = function(list, lastcode,
    value, type) {
   if (type == 0) {
     lastcode += value + 1;
     goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode));
   } else if (type == 1) {
     lastcode -= value + 1;
     goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode));
   } else if (type == 2) {
     for (var i = 0; i <= value; i++) {
       lastcode++;
       goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode));
     }
   }
  return lastcode;
};


/**
 * Gets the list of characters specified in the given string by base 88 scheme.
 * @param {string} str The string encoding character list.
 * @return {Array.<string>} The list of characters specified by the given string
 *     in base 88 scheme.
 */
goog.i18n.CharListDecompressor.prototype.toCharList = function(str) {
  var metasize = 8;
  var result = [];
  var lastcode = 0;
  var i = 0;
  while (i < str.length) {
    var c = this.charMap_[str.charAt(i)];
    var meta = c % metasize;
    var type = Math.floor(meta / 3);
    var leng = (meta % 3) + 1;
    if (leng == 3) {
      leng++;
    }
    var code = this.getCodeAt_(str, i, leng);
    var value = Math.floor(code / metasize);
    lastcode = this.addChars_(result, lastcode, value, type);

    i += leng;
  }
  return result;
};