aboutsummaryrefslogtreecommitdiff
path: root/contexts/data/lib/closure-library/third_party/closure/goog/caja/string/html/htmlparser.js
diff options
context:
space:
mode:
Diffstat (limited to 'contexts/data/lib/closure-library/third_party/closure/goog/caja/string/html/htmlparser.js')
-rw-r--r--contexts/data/lib/closure-library/third_party/closure/goog/caja/string/html/htmlparser.js611
1 files changed, 0 insertions, 611 deletions
diff --git a/contexts/data/lib/closure-library/third_party/closure/goog/caja/string/html/htmlparser.js b/contexts/data/lib/closure-library/third_party/closure/goog/caja/string/html/htmlparser.js
deleted file mode 100644
index d241d4b..0000000
--- a/contexts/data/lib/closure-library/third_party/closure/goog/caja/string/html/htmlparser.js
+++ /dev/null
@@ -1,611 +0,0 @@
-// Copyright 2006-2008, The Google Caja project.
-// Modifications Copyright 2009 The Closure Library Authors. All Rights Reserved.
-// All Rights Reserved
-
-/**
- * @license Portions of this code are from the google-caja project, received by
- * Google under the Apache license (http://code.google.com/p/google-caja/).
- * All other code is Copyright 2009 Google, Inc. All Rights Reserved.
-
-// Copyright (C) 2006 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
- */
-
-/**
- * @fileoverview A Html SAX parser.
- *
- * Examples of usage of the {@code goog.string.html.HtmlParser}:
- * <pre>
- * var handler = new MyCustomHtmlVisitorHandlerThatExtendsHtmlSaxHandler();
- * var parser = new goog.string.html.HtmlParser();
- * parser.parse(handler, '<html><a href="google.com">link found!</a></html>');
- * </pre>
- *
- * TODO(user, msamuel): validate sanitizer regex against the HTML5 grammar at
- * http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html
- * http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html
- * http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
- * http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html
- *
- * @supported IE6, IE7, IE8, FF1.5, FF2, FF3, Chrome 3.0, Safari and Opera 10.
- */
-
-goog.provide('goog.string.html.HtmlParser');
-goog.provide('goog.string.html.HtmlParser.EFlags');
-goog.provide('goog.string.html.HtmlParser.Elements');
-goog.provide('goog.string.html.HtmlParser.Entities');
-goog.provide('goog.string.html.HtmlSaxHandler');
-
-
-/**
- * An Html parser: {@code parse} takes a string and calls methods on
- * {@code goog.string.html.HtmlSaxHandler} while it is visiting it.
- *
- * @constructor
- */
-goog.string.html.HtmlParser = function() {
-};
-
-
-/**
- * HTML entities that are encoded/decoded.
- * TODO(user): use {@code goog.string.htmlEncode} instead.
- * @enum {string}
- */
-goog.string.html.HtmlParser.Entities = {
- lt: '<',
- gt: '>',
- amp: '&',
- nbsp: '\240',
- quot: '"',
- apos: '\''
-};
-
-
-/**
- * The html eflags, used internally on the parser.
- * @enum {number}
- */
-goog.string.html.HtmlParser.EFlags = {
- OPTIONAL_ENDTAG: 1,
- EMPTY: 2,
- CDATA: 4,
- RCDATA: 8,
- UNSAFE: 16,
- FOLDABLE: 32
-};
-
-
-/**
- * A map of element to a bitmap of flags it has, used internally on the parser.
- * @type {Object}
- */
-goog.string.html.HtmlParser.Elements = {
- 'a': 0,
- 'abbr': 0,
- 'acronym': 0,
- 'address': 0,
- 'applet': goog.string.html.HtmlParser.EFlags.UNSAFE,
- 'area': goog.string.html.HtmlParser.EFlags.EMPTY,
- 'b': 0,
- 'base': goog.string.html.HtmlParser.EFlags.EMPTY |
- goog.string.html.HtmlParser.EFlags.UNSAFE,
- 'basefont': goog.string.html.HtmlParser.EFlags.EMPTY |
- goog.string.html.HtmlParser.EFlags.UNSAFE,
- 'bdo': 0,
- 'big': 0,
- 'blockquote': 0,
- 'body': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG |
- goog.string.html.HtmlParser.EFlags.UNSAFE |
- goog.string.html.HtmlParser.EFlags.FOLDABLE,
- 'br': goog.string.html.HtmlParser.EFlags.EMPTY,
- 'button': 0,
- 'caption': 0,
- 'center': 0,
- 'cite': 0,
- 'code': 0,
- 'col': goog.string.html.HtmlParser.EFlags.EMPTY,
- 'colgroup': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
- 'dd': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
- 'del': 0,
- 'dfn': 0,
- 'dir': 0,
- 'div': 0,
- 'dl': 0,
- 'dt': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
- 'em': 0,
- 'fieldset': 0,
- 'font': 0,
- 'form': 0,
- 'frame': goog.string.html.HtmlParser.EFlags.EMPTY |
- goog.string.html.HtmlParser.EFlags.UNSAFE,
- 'frameset': goog.string.html.HtmlParser.EFlags.UNSAFE,
- 'h1': 0,
- 'h2': 0,
- 'h3': 0,
- 'h4': 0,
- 'h5': 0,
- 'h6': 0,
- 'head': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG |
- goog.string.html.HtmlParser.EFlags.UNSAFE |
- goog.string.html.HtmlParser.EFlags.FOLDABLE,
- 'hr': goog.string.html.HtmlParser.EFlags.EMPTY,
- 'html': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG |
- goog.string.html.HtmlParser.EFlags.UNSAFE |
- goog.string.html.HtmlParser.EFlags.FOLDABLE,
- 'i': 0,
- 'iframe': goog.string.html.HtmlParser.EFlags.UNSAFE |
- goog.string.html.HtmlParser.EFlags.CDATA,
- 'img': goog.string.html.HtmlParser.EFlags.EMPTY,
- 'input': goog.string.html.HtmlParser.EFlags.EMPTY,
- 'ins': 0,
- 'isindex': goog.string.html.HtmlParser.EFlags.EMPTY |
- goog.string.html.HtmlParser.EFlags.UNSAFE,
- 'kbd': 0,
- 'label': 0,
- 'legend': 0,
- 'li': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
- 'link': goog.string.html.HtmlParser.EFlags.EMPTY |
- goog.string.html.HtmlParser.EFlags.UNSAFE,
- 'map': 0,
- 'menu': 0,
- 'meta': goog.string.html.HtmlParser.EFlags.EMPTY |
- goog.string.html.HtmlParser.EFlags.UNSAFE,
- 'noframes': goog.string.html.HtmlParser.EFlags.UNSAFE |
- goog.string.html.HtmlParser.EFlags.CDATA,
- 'noscript': goog.string.html.HtmlParser.EFlags.UNSAFE |
- goog.string.html.HtmlParser.EFlags.CDATA,
- 'object': goog.string.html.HtmlParser.EFlags.UNSAFE,
- 'ol': 0,
- 'optgroup': 0,
- 'option': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
- 'p': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
- 'param': goog.string.html.HtmlParser.EFlags.EMPTY |
- goog.string.html.HtmlParser.EFlags.UNSAFE,
- 'pre': 0,
- 'q': 0,
- 's': 0,
- 'samp': 0,
- 'script': goog.string.html.HtmlParser.EFlags.UNSAFE |
- goog.string.html.HtmlParser.EFlags.CDATA,
- 'select': 0,
- 'small': 0,
- 'span': 0,
- 'strike': 0,
- 'strong': 0,
- 'style': goog.string.html.HtmlParser.EFlags.UNSAFE |
- goog.string.html.HtmlParser.EFlags.CDATA,
- 'sub': 0,
- 'sup': 0,
- 'table': 0,
- 'tbody': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
- 'td': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
- 'textarea': goog.string.html.HtmlParser.EFlags.RCDATA,
- 'tfoot': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
- 'th': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
- 'thead': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
- 'title': goog.string.html.HtmlParser.EFlags.RCDATA |
- goog.string.html.HtmlParser.EFlags.UNSAFE,
- 'tr': goog.string.html.HtmlParser.EFlags.OPTIONAL_ENDTAG,
- 'tt': 0,
- 'u': 0,
- 'ul': 0,
- 'var': 0
-};
-
-
-/**
- * Regular expression that matches &s.
- * @type {RegExp}
- * @private
- */
-goog.string.html.HtmlParser.AMP_RE_ = /&/g;
-
-
-/**
- * Regular expression that matches loose &s.
- * @type {RegExp}
- * @private
- */
-goog.string.html.HtmlParser.LOOSE_AMP_RE_ =
- /&([^a-z#]|#(?:[^0-9x]|x(?:[^0-9a-f]|$)|$)|$)/gi;
-
-
-/**
- * Regular expression that matches <.
- * @type {RegExp}
- * @private
- */
-goog.string.html.HtmlParser.LT_RE_ = /</g;
-
-
-/**
- * Regular expression that matches >.
- * @type {RegExp}
- * @private
- */
-goog.string.html.HtmlParser.GT_RE_ = />/g;
-
-
-/**
- * Regular expression that matches ".
- * @type {RegExp}
- * @private
- */
-goog.string.html.HtmlParser.QUOTE_RE_ = /\"/g;
-
-
-/**
- * Regular expression that matches =.
- * @type {RegExp}
- * @private
- */
-goog.string.html.HtmlParser.EQUALS_RE_ = /=/g;
-
-
-/**
- * Regular expression that matches null characters.
- * @type {RegExp}
- * @private
- */
-goog.string.html.HtmlParser.NULL_RE_ = /\0/g;
-
-
-/**
- * Regular expression that matches entities.
- * @type {RegExp}
- * @private
- */
-goog.string.html.HtmlParser.ENTITY_RE_ = /&(#\d+|#x[0-9A-Fa-f]+|\w+);/g;
-
-
-/**
- * Regular expression that matches decimal numbers.
- * @type {RegExp}
- * @private
- */
-goog.string.html.HtmlParser.DECIMAL_ESCAPE_RE_ = /^#(\d+)$/;
-
-
-/**
- * Regular expression that matches hexadecimal numbers.
- * @type {RegExp}
- * @private
- */
-goog.string.html.HtmlParser.HEX_ESCAPE_RE_ = /^#x([0-9A-Fa-f]+)$/;
-
-
-/**
- * Regular expression that matches the next token to be processed.
- * @type {RegExp}
- * @private
- */
-goog.string.html.HtmlParser.INSIDE_TAG_TOKEN_ = new RegExp(
- // Don't capture space.
- '^\\s*(?:' +
- // Capture an attribute name in group 1, and value in group 3.
- // We capture the fact that there was an attribute in group 2, since
- // interpreters are inconsistent in whether a group that matches nothing
- // is null, undefined, or the empty string.
- ('(?:' +
- '([a-z][a-z-]*)' + // attribute name
- ('(' + // optionally followed
- '\\s*=\\s*' +
- ('(' +
- // A double quoted string.
- '\"[^\"]*\"' +
- // A single quoted string.
- '|\'[^\']*\'' +
- // The positive lookahead is used to make sure that in
- // <foo bar= baz=boo>, the value for bar is blank, not "baz=boo".
- '|(?=[a-z][a-z-]*\\s*=)' +
- // An unquoted value that is not an attribute name.
- // We know it is not an attribute name because the previous
- // zero-width match would've eliminated that possibility.
- '|[^>\"\'\\s]*' +
- ')'
- ) +
- ')'
- ) + '?' +
- ')'
- ) +
- // End of tag captured in group 3.
- '|(/?>)' +
- // Don't capture cruft
- '|[^a-z\\s>]+)',
- 'i');
-
-
-/**
- * Regular expression that matches the next token to be processed when we are
- * outside a tag.
- * @type {RegExp}
- * @private
- */
-goog.string.html.HtmlParser.OUTSIDE_TAG_TOKEN_ = new RegExp(
- '^(?:' +
- // Entity captured in group 1.
- '&(\\#[0-9]+|\\#[x][0-9a-f]+|\\w+);' +
- // Comment, doctypes, and processing instructions not captured.
- '|<[!]--[\\s\\S]*?-->|<!\\w[^>]*>|<\\?[^>*]*>' +
- // '/' captured in group 2 for close tags, and name captured in group 3.
- '|<(/)?([a-z][a-z0-9]*)' +
- // Text captured in group 4.
- '|([^<&>]+)' +
- // Cruft captured in group 5.
- '|([<&>]))',
- 'i');
-
-
-/**
- * Given a SAX-like {@code goog.string.html.HtmlSaxHandler} parses a
- * {@code htmlText} and lets the {@code handler} know the structure while
- * visiting the nodes.
- *
- * @param {goog.string.html.HtmlSaxHandler} handler The HtmlSaxHandler that will
- * receive the events.
- * @param {string} htmlText The html text.
- */
-goog.string.html.HtmlParser.prototype.parse = function(handler, htmlText) {
- var htmlLower = null;
- var inTag = false; // True iff we're currently processing a tag.
- var attribs = []; // Accumulates attribute names and values.
- var tagName; // The name of the tag currently being processed.
- var eflags; // The element flags for the current tag.
- var openTag; // True if the current tag is an open tag.
-
- // Lets the handler know that we are starting to parse the document.
- handler.startDoc();
-
- // Consumes tokens from the htmlText and stops once all tokens are processed.
- while (htmlText) {
- var regex = inTag ?
- goog.string.html.HtmlParser.INSIDE_TAG_TOKEN_ :
- goog.string.html.HtmlParser.OUTSIDE_TAG_TOKEN_;
- // Gets the next token
- var m = htmlText.match(regex);
- // And removes it from the string
- htmlText = htmlText.substring(m[0].length);
-
- // TODO(goto): cleanup this code breaking it into separate methods.
- if (inTag) {
- if (m[1]) { // Attribute.
- // SetAttribute with uppercase names doesn't work on IE6.
- var attribName = goog.string.html.toLowerCase(m[1]);
- var decodedValue;
- if (m[2]) {
- var encodedValue = m[3];
- switch (encodedValue.charCodeAt(0)) { // Strip quotes.
- case 34: case 39:
- encodedValue = encodedValue.substring(
- 1, encodedValue.length - 1);
- break;
- }
- decodedValue = this.unescapeEntities_(this.stripNULs_(encodedValue));
- } else {
- // Use name as value for valueless attribs, so
- // <input type=checkbox checked>
- // gets attributes ['type', 'checkbox', 'checked', 'checked']
- decodedValue = attribName;
- }
- attribs.push(attribName, decodedValue);
- } else if (m[4]) {
- if (eflags !== void 0) { // False if not in whitelist.
- if (openTag) {
- if (handler.startTag) {
- handler.startTag(/** @type {string} */ (tagName), attribs);
- }
- } else {
- if (handler.endTag) {
- handler.endTag(/** @type {string} */ (tagName));
- }
- }
- }
-
- if (openTag && (eflags &
- (goog.string.html.HtmlParser.EFlags.CDATA |
- goog.string.html.HtmlParser.EFlags.RCDATA))) {
- if (htmlLower === null) {
- htmlLower = goog.string.html.toLowerCase (htmlText);
- } else {
- htmlLower = htmlLower.substring(
- htmlLower.length - htmlText.length);
- }
- var dataEnd = htmlLower.indexOf('</' + tagName);
- if (dataEnd < 0) {
- dataEnd = htmlText.length;
- }
- if (eflags & goog.string.html.HtmlParser.EFlags.CDATA) {
- if (handler.cdata) {
- handler.cdata(htmlText.substring(0, dataEnd));
- }
- } else if (handler.rcdata) {
- handler.rcdata(
- this.normalizeRCData_(htmlText.substring(0, dataEnd)));
- }
- htmlText = htmlText.substring(dataEnd);
- }
-
- tagName = eflags = openTag = void 0;
- attribs.length = 0;
- inTag = false;
- }
- } else {
- if (m[1]) { // Entity.
- handler.pcdata(m[0]);
- } else if (m[3]) { // Tag.
- openTag = !m[2];
- inTag = true;
- tagName = goog.string.html.toLowerCase (m[3]);
- eflags = goog.string.html.HtmlParser.Elements.hasOwnProperty(tagName) ?
- goog.string.html.HtmlParser.Elements[tagName] : void 0;
- } else if (m[4]) { // Text.
- handler.pcdata(m[4]);
- } else if (m[5]) { // Cruft.
- switch (m[5]) {
- case '<': handler.pcdata('&lt;'); break;
- case '>': handler.pcdata('&gt;'); break;
- default: handler.pcdata('&amp;'); break;
- }
- }
- }
- }
-
- // Lets the handler know that we are done parsing the document.
- handler.endDoc();
-};
-
-
-/**
- * Decodes an HTML entity.
- *
- * @param {string} name The content between the '&' and the ';'.
- * @return {string} A single unicode code-point as a string.
- * @private
- */
-goog.string.html.HtmlParser.prototype.lookupEntity_ = function(name) {
- // TODO(goto): use {goog.string.htmlDecode} instead ?
- // TODO(goto): &pi; is different from &Pi;
- name = goog.string.html.toLowerCase(name);
- if (goog.string.html.HtmlParser.Entities.hasOwnProperty(name)) {
- return goog.string.html.HtmlParser.Entities[name];
- }
- var m = name.match(goog.string.html.HtmlParser.DECIMAL_ESCAPE_RE_);
- if (m) {
- return String.fromCharCode(parseInt(m[1], 10));
- } else if (
- !!(m = name.match(goog.string.html.HtmlParser.HEX_ESCAPE_RE_))) {
- return String.fromCharCode(parseInt(m[1], 16));
- }
- return '';
-};
-
-
-/**
- * Removes null characters on the string.
- * @param {string} s The string to have the null characters removed.
- * @return {string} A string without null characters.
- * @private
- */
-goog.string.html.HtmlParser.prototype.stripNULs_ = function(s) {
- return s.replace(goog.string.html.HtmlParser.NULL_RE_, '');
-};
-
-
-/**
- * The plain text of a chunk of HTML CDATA which possibly containing.
- *
- * TODO(goto): use {@code goog.string.unescapeEntities} instead ?
- * @param {string} s A chunk of HTML CDATA. It must not start or end inside
- * an HTML entity.
- * @return {string} The unescaped entities.
- * @private
- */
-goog.string.html.HtmlParser.prototype.unescapeEntities_ = function(s) {
- return s.replace(
- goog.string.html.HtmlParser.ENTITY_RE_,
- goog.bind(this.lookupEntity_, this));
-};
-
-
-/**
- * Escape entities in RCDATA that can be escaped without changing the meaning.
- * @param {string} rcdata The RCDATA string we want to normalize.
- * @return {string} A normalized version of RCDATA.
- * @private
- */
-goog.string.html.HtmlParser.prototype.normalizeRCData_ = function(rcdata) {
- return rcdata.
- replace(goog.string.html.HtmlParser.LOOSE_AMP_RE_, '&amp;$1').
- replace(goog.string.html.HtmlParser.LT_RE_, '&lt;').
- replace(goog.string.html.HtmlParser.GT_RE_, '&gt;');
-};
-
-
-/**
- * TODO(goto): why isn't this in the string package ? does this solves any
- * real problem ? move it to the goog.string package if it does.
- *
- * @param {string} str The string to lower case.
- * @return {string} The str in lower case format.
- */
-goog.string.html.toLowerCase = function(str) {
- // The below may not be true on browsers in the Turkish locale.
- if ('script' === 'SCRIPT'.toLowerCase()) {
- return str.toLowerCase();
- } else {
- return str.replace(/[A-Z]/g, function(ch) {
- return String.fromCharCode(ch.charCodeAt(0) | 32);
- });
- }
-};
-
-
-/**
- * An interface to the {@code goog.string.html.HtmlParser} visitor, that gets
- * called while the HTML is being parsed.
- *
- * @constructor
- */
-goog.string.html.HtmlSaxHandler = function() {
-};
-
-
-/**
- * Handler called when the parser found a new tag.
- * @param {string} name The name of the tag that is starting.
- * @param {Array.<string>} attributes The attributes of the tag.
- */
-goog.string.html.HtmlSaxHandler.prototype.startTag = goog.abstractMethod;
-
-
-/**
- * Handler called when the parser found a closing tag.
- * @param {string} name The name of the tag that is ending.
- */
-goog.string.html.HtmlSaxHandler.prototype.endTag = goog.abstractMethod;
-
-
-/**
- * Handler called when PCDATA is found.
- * @param {string} text The PCDATA text found.
- */
-goog.string.html.HtmlSaxHandler.prototype.pcdata = goog.abstractMethod;
-
-
-/**
- * Handler called when RCDATA is found.
- * @param {string} text The RCDATA text found.
- */
-goog.string.html.HtmlSaxHandler.prototype.rcdata = goog.abstractMethod;
-
-
-/**
- * Handler called when CDATA is found.
- * @param {string} text The CDATA text found.
- */
-goog.string.html.HtmlSaxHandler.prototype.cdata = goog.abstractMethod;
-
-
-/**
- * Handler called when the parser is starting to parse the document.
- */
-goog.string.html.HtmlSaxHandler.prototype.startDoc = goog.abstractMethod;
-
-
-/**
- * Handler called when the parsing is done.
- */
-goog.string.html.HtmlSaxHandler.prototype.endDoc = goog.abstractMethod;