diff options
Diffstat (limited to 'third_party/java/jopt-simple/src/site/resources/scripts/prettify.js')
-rw-r--r-- | third_party/java/jopt-simple/src/site/resources/scripts/prettify.js | 1602 |
1 files changed, 0 insertions, 1602 deletions
diff --git a/third_party/java/jopt-simple/src/site/resources/scripts/prettify.js b/third_party/java/jopt-simple/src/site/resources/scripts/prettify.js deleted file mode 100644 index de1e07ffd3..0000000000 --- a/third_party/java/jopt-simple/src/site/resources/scripts/prettify.js +++ /dev/null @@ -1,1602 +0,0 @@ -// Copyright (C) 2006 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -/** - * @fileoverview - * some functions for browser-side pretty printing of code contained in html. - * - * The lexer should work on a number of languages including C and friends, - * Java, Python, Bash, SQL, HTML, XML, CSS, Javascript, and Makefiles. - * It works passably on Ruby, PHP and Awk and a decent subset of Perl, but, - * because of commenting conventions, doesn't work on Smalltalk, Lisp-like, or - * CAML-like languages. - * - * If there's a language not mentioned here, then I don't know it, and don't - * know whether it works. If it has a C-like, Bash-like, or XML-like syntax - * then it should work passably. - * - * Usage: - * 1) include this source file in an html page via - * <script type="text/javascript" src="/path/to/prettify.js"></script> - * 2) define style rules. See the example page for examples. - * 3) mark the <pre> and <code> tags in your source with class=prettyprint. - * You can also use the (html deprecated) <xmp> tag, but the pretty printer - * needs to do more substantial DOM manipulations to support that, so some - * css styles may not be preserved. - * That's it. I wanted to keep the API as simple as possible, so there's no - * need to specify which language the code is in. - * - * Change log: - * cbeust, 2006/08/22 - * Java annotations (start with "@") are now captured as literals ("lit") - */ - -var PR_keywords = {}; -/** initialize the keyword list for our target languages. */ -(function () { - var CPP_KEYWORDS = "abstract bool break case catch char class const " + - "const_cast continue default delete deprecated dllexport dllimport do " + - "double dynamic_cast else enum explicit extern false float for friend " + - "goto if inline int long mutable naked namespace new noinline noreturn " + - "nothrow novtable operator private property protected public register " + - "reinterpret_cast return selectany short signed sizeof static " + - "static_cast struct switch template this thread throw true try typedef " + - "typeid typename union unsigned using declaration, directive uuid " + - "virtual void volatile while typeof"; - var CSHARP_KEYWORDS = "as base by byte checked decimal delegate descending " + - "event finally fixed foreach from group implicit in interface internal " + - "into is lock null object out override orderby params readonly ref sbyte " + - "sealed stackalloc string select uint ulong unchecked unsafe ushort var"; - var JAVA_KEYWORDS = "package synchronized boolean implements import throws " + - "instanceof transient extends final strictfp native super"; - var JSCRIPT_KEYWORDS = "debugger export function with NaN Infinity"; - var PERL_KEYWORDS = "require sub unless until use elsif BEGIN END"; - var PYTHON_KEYWORDS = "and assert def del elif except exec global lambda " + - "not or pass print raise yield False True None"; - var RUBY_KEYWORDS = "then end begin rescue ensure module when undef next " + - "redo retry alias defined"; - var SH_KEYWORDS = "done fi"; - - var KEYWORDS = [CPP_KEYWORDS, CSHARP_KEYWORDS, JAVA_KEYWORDS, - JSCRIPT_KEYWORDS, PERL_KEYWORDS, PYTHON_KEYWORDS, - RUBY_KEYWORDS, SH_KEYWORDS]; - for ( var k = 0; k < KEYWORDS.length; k++ ) { - var kw = KEYWORDS[k].split(' '); - for ( var i = 0; i < kw.length; i++ ) { - if ( kw[i] ) { - PR_keywords[kw[i]] = true; - } - } - } -}).call(this); - -// token style names. correspond to css classes -/** token style for a string literal */ -var PR_STRING = 'str'; -/** token style for a keyword */ -var PR_KEYWORD = 'kwd'; -/** token style for a comment */ -var PR_COMMENT = 'com'; -/** token style for a type */ -var PR_TYPE = 'typ'; -/** token style for a literal value. e.g. 1, null, true. */ -var PR_LITERAL = 'lit'; -/** token style for a punctuation string. */ -var PR_PUNCTUATION = 'pun'; -/** token style for a punctuation string. */ -var PR_PLAIN = 'pln'; - -/** token style for an sgml tag. */ -var PR_TAG = 'tag'; -/** token style for a markup declaration such as a DOCTYPE. */ -var PR_DECLARATION = 'dec'; -/** token style for embedded source. */ -var PR_SOURCE = 'src'; -/** token style for an sgml attribute name. */ -var PR_ATTRIB_NAME = 'atn'; -/** token style for an sgml attribute value. */ -var PR_ATTRIB_VALUE = 'atv'; - -/** the number of characters between tab columns */ -var PR_TAB_WIDTH = 8; - -/** the position of the end of a token during. A division of a string into - * n tokens can be represented as a series n - 1 token ends, as long as - * runs of whitespace warrant their own token. - * @private - */ -function PR_TokenEnd( end, style ) { - if ( undefined === style ) { - throw new Error('BAD'); - } - if ( 'number' != typeof(end) ) { - throw new Error('BAD'); - } - this.end = end; - this.style = style; -} -PR_TokenEnd.prototype.toString = function () { - return '[PR_TokenEnd ' + this.end + - (this.style ? ':' + this.style : '') + ']'; -}; - -/** a chunk of text with a style. These are used to represent both the output - * from the lexing functions as well as intermediate results. - * @constructor - * @param token the token text - * @param style one of the token styles defined in designdoc-template, or null - * for a styleless token, such as an embedded html tag. - * @private - */ -function PR_Token( token, style ) { - if ( undefined === style ) { - throw new Error('BAD'); - } - this.token = token; - this.style = style; -} - -PR_Token.prototype.toString = function () { - return '[PR_Token ' + this.token + (this.style ? ':' + this.style : '') + ']'; -}; - -/** a helper class that decodes common html entities used to escape special - * characters in source code. - * @constructor - * @private - */ -function PR_DecodeHelper() { - this.next = 0; - this.ch = '\0'; -} - -var PR_NAMED_ENTITIES = { - 'lt': '<', - 'gt': '>', - 'quot': '"', - 'apos': "'", - 'amp': '&' // reencoding requires that & always be decoded properly -}; - -PR_DecodeHelper.prototype.decode = function ( s, i ) { - var next = i + 1; - var ch = s.charAt(i); - if ( '&' === ch ) { - var semi = s.indexOf(';', next); - if ( semi >= 0 && semi < next + 4 ) { - var entityName = s.substring(next, semi); - var decoded = null; - if ( entityName.charAt(0) === '#' ) { // check for numeric entity - var ch1 = entityName.charAt(1); - var charCode; - if ( ch1 === 'x' || ch1 === 'X' ) { // like   - charCode = parseInt(entityName.substring(2), 16); - } - else { // like   - charCode = parseInt(entityName.substring(1), 10); - } - if ( !isNaN(charCode) ) { - decoded = String.fromCharCode(charCode); - } - } - if ( !decoded ) { - decoded = PR_NAMED_ENTITIES[entityName.toLowerCase()]; - } - if ( decoded ) { - ch = decoded; - next = semi + 1; - } - else { // skip over unrecognized entity - next = i + 1; - ch = '\0'; - } - } - } - this.next = next; - this.ch = ch; - return this.ch; -}; - -// some string utilities -function PR_isWordChar( ch ) { - return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); -} - -function PR_isIdentifierStart( ch ) { - return PR_isWordChar(ch) || ch == '_' || ch == '$' || ch == '@'; -} - -function PR_isIdentifierPart( ch ) { - return PR_isIdentifierStart(ch) || PR_isDigitChar(ch); -} - -function PR_isSpaceChar( ch ) { - return "\t \r\n".indexOf(ch) >= 0; -} - -function PR_isDigitChar( ch ) { - return ch >= '0' && ch <= '9'; -} - -function PR_trim( s ) { - var i = 0, j = s.length - 1; - while ( i <= j && PR_isSpaceChar(s.charAt(i)) ) { - ++i; - } - while ( j > i && PR_isSpaceChar(s.charAt(j)) ) { - --j; - } - return s.substring(i, j + 1); -} - -function PR_startsWith( s, prefix ) { - return s.length >= prefix.length && prefix == s.substring(0, prefix.length); -} - -function PR_endsWith( s, suffix ) { - return s.length >= suffix.length && - suffix == s.substring(s.length - suffix.length, s.length); -} - -/** true iff prefix matches the first prefix characters in chars[0:len]. - * @private - */ -function PR_prefixMatch( chars, len, prefix ) { - if ( len < prefix.length ) { - return false; - } - for ( var i = 0, n = prefix.length; i < n; ++i ) { - if ( prefix.charAt(i) != chars[i] ) { - return false; - } - } - return true; -} - -/** like textToHtml but escapes double quotes to be attribute safe. */ -function PR_attribToHtml( str ) { - return str.replace(/&/g, '&') - .replace(/</g, '<') - .replace(/>/g, '>') - .replace(/\"/g, '"') - .replace(/\xa0/, ' '); -} - -/** escapest html special characters to html. */ -function PR_textToHtml( str ) { - return str.replace(/&/g, '&') - .replace(/</g, '<') - .replace(/>/g, '>') - .replace(/\xa0/g, ' '); -} - -/** is the given node's innerHTML normally unescaped? */ -function PR_isRawContent( node ) { - return 'XMP' == node.tagName; -} - -var PR_innerHtmlWorks = null; -function PR_getInnerHtml( node ) { - // inner html is hopelessly broken in Safari 2.0.4 when the content is - // an html description of well formed XML and the containing tag is a PRE - // tag, so we detect that case and emulate innerHTML. - if ( null == PR_innerHtmlWorks ) { - var testNode = document.createElement('PRE'); - testNode.appendChild( - document.createTextNode('<!DOCTYPE foo PUBLIC "foo bar">\n<foo />')); - PR_innerHtmlWorks = !/</.test(testNode.innerHTML); - } - - if ( PR_innerHtmlWorks ) { - var content = node.innerHTML; - // XMP tags contain unescaped entities so require special handling. - if ( PR_isRawContent(node) ) { - content = PR_textToHtml(content); - } - return content; - } - - var out = []; - for ( var child = node.firstChild; child; child = child.nextSibling ) { - PR_normalizedHtml(child, out); - } - return out.join(''); -} - -/** - * walks the DOM returning a properly escaped version of innerHTML. - */ -function PR_normalizedHtml( node, out ) { - switch ( node.nodeType ) { - case 1: // an element - var name = node.tagName.toLowerCase(); - out.push('\074', name); - for ( var i = 0; i < node.attributes.length; ++i ) { - var attr = node.attributes[i]; - if ( !attr.specified ) { - continue; - } - out.push(' '); - PR_normalizedHtml(attr, out); - } - out.push('>'); - for ( var child = node.firstChild; child; child = child.nextSibling ) { - PR_normalizedHtml(child, out); - } - if ( node.firstChild || !/^(?:br|link|img)$/.test(name) ) { - out.push('<\/', name, '>'); - } - break; - case 2: // an attribute - out.push(node.name.toLowerCase(), '="', PR_attribToHtml(node.value), '"'); - break; - case 3: case 4: // text - out.push(PR_textToHtml(node.nodeValue)); - break; - } -} - -/** expand tabs to spaces - * @param {Array} chunks PR_Tokens possibly containing tabs - * @param {Number} tabWidth number of spaces between tab columns - * @return {Array} chunks with tabs replaced with spaces - */ -function PR_expandTabs( chunks, tabWidth ) { - var SPACES = ' '; - - var charInLine = 0; - var decodeHelper = new PR_DecodeHelper(); - - var chunksOut = [] - for ( var chunkIndex = 0; chunkIndex < chunks.length; ++chunkIndex ) { - var chunk = chunks[chunkIndex]; - if ( chunk.style == null ) { - chunksOut.push(chunk); - continue; - } - - var s = chunk.token; - var pos = 0; - // index of last character output - var out = []; - - // walk over each character looking for tabs and newlines. - // On tabs, expand them. On newlines, reset charInLine. - // Otherwise increment charInLine - for ( var charIndex = 0, n = s.length; charIndex < n; - charIndex = decodeHelper.next ) { - decodeHelper.decode(s, charIndex); - var ch = decodeHelper.ch; - - switch ( ch ) { - case '\t': - out.push(s.substring(pos, charIndex)); - // calculate how much space we need in front of this part - // nSpaces is the amount of padding -- the number of spaces needed to - // move us to the next column, where columns occur at factors of - // tabWidth. - var nSpaces = tabWidth - (charInLine % tabWidth); - charInLine += nSpaces; - for ( ; nSpaces >= 0; nSpaces -= SPACES.length ) { - out.push(SPACES.substring(0, nSpaces)); - } - pos = decodeHelper.next; - break; - case '\n': case '\r': - charInLine = 0; - break; - default: - ++charInLine; - } - } - out.push(s.substring(pos)); - chunksOut.push(new PR_Token(out.join(''), chunk.style)); - } - return chunksOut -} - -/** split markup into chunks of html tags (style null) and - * plain text (style {@link #PR_PLAIN}). - * - * @param {String} s html. - * @return {Array} of PR_Tokens of style PR_PLAIN, and null. - * @private - */ -function PR_chunkify( s ) { - // The below pattern matches one of the following - // (1) /[^<]+/ : A run of characters other than '<' - // (2) /<\/?[a-zA-Z][^>]*>/ : A probably tag that should not be highlighted - // (3) /</ : A '<' that does not begin a larger chunk. Treated as 1 - var chunkPattern = /(?:[^<]+|<\/?[a-zA-Z][^>]*>|<)/g; - // since the pattern has the 'g' modifier and defines no capturing groups, - // this will return a list of all chunks which we then classify and wrap as - // PR_Tokens - var matches = s.match(chunkPattern); - var chunks = []; - if ( matches ) { - var lastChunk = null; - for ( var i = 0, n = matches.length; i < n; ++i ) { - var chunkText = matches[i]; - var style; - if ( chunkText.length < 2 || chunkText.charAt(0) !== '<' ) { - if ( lastChunk && lastChunk.style === PR_PLAIN ) { - lastChunk.token += chunkText; - continue; - } - style = PR_PLAIN; - } - else { // a tag - style = null; - } - lastChunk = new PR_Token(chunkText, style); - chunks.push(lastChunk); - } - } - return chunks; -} - -/** walk the tokenEnds list and the chunk list in parallel to generate a list - * of split tokens. - * @private - */ -function PR_splitChunks( chunks, tokenEnds ) { - var tokens = []; - // the output - - var ci = 0; - // index into chunks - // position of beginning of amount written so far in absolute space. - var posAbs = 0; - // position of amount written so far in chunk space - var posChunk = 0; - - // current chunk - var chunk = new PR_Token('', null); - - for ( var ei = 0, ne = tokenEnds.length, lastEnd = 0; ei < ne; ++ei ) { - var tokenEnd = tokenEnds[ei]; - var end = tokenEnd.end; - if ( end === lastEnd ) { - continue; - } // skip empty regions - - var tokLen = end - posAbs; - var remainingInChunk = chunk.token.length - posChunk; - while ( remainingInChunk <= tokLen ) { - if ( remainingInChunk > 0 ) { - tokens.push( - new PR_Token(chunk.token.substring(posChunk, chunk.token.length), - null == chunk.style ? null : tokenEnd.style)); - } - posAbs += remainingInChunk; - posChunk = 0; - if ( ci < chunks.length ) { - chunk = chunks[ci++]; - } - - tokLen = end - posAbs; - remainingInChunk = chunk.token.length - posChunk; - } - - if ( tokLen ) { - tokens.push( - new PR_Token(chunk.token.substring(posChunk, posChunk + tokLen), - tokenEnd.style)); - posAbs += tokLen; - posChunk += tokLen; - } - } - - return tokens; -} - -/** splits markup tokens into declarations, tags, and source chunks. - * @private - */ -function PR_splitMarkup( chunks ) { - // A state machine to split out declarations, tags, etc. - // This state machine deals with absolute space in the text, indexed by k, - // and position in the current chunk, indexed by pos and tokenStart to - // generate a list of the ends of tokens. - // Absolute space is calculated by considering the chunks as appended into - // one big string, as they were before being split. - - // Known failure cases - // Server side scripting sections such as <?...?> in attributes. - // i.e. <span class="<? foo ?>"> - // Handling this would require a stack, and we don't use PHP. - - // The output: a list of pairs of PR_TokenEnd instances - var tokenEnds = []; - - var state = 0; - // FSM state variable - var k = 0; - // position in absolute space of the start of the current chunk - var tokenStart = -1; - // the start of the current token - - // Try to find a closing tag for any open <style> or <script> tags - // We can't do this at a later stage because then the following case - // would fail: - // <script>document.writeln('<!--');</script> - - // We use tokenChars[:tokenCharsI] to accumulate the tag name so that we - // can check whether to enter into a no scripting section when the tag ends. - var tokenChars = new Array(12); - var tokenCharsI = 0; - // if non null, the tag prefix that we need to see to break out. - var endScriptTag = null; - var decodeHelper = new PR_DecodeHelper(); - - for ( var ci = 0, nc = chunks.length; ci < nc; ++ci ) { - var chunk = chunks[ci]; - if ( PR_PLAIN != chunk.style ) { - k += chunk.token.length; - continue; - } - - var s = chunk.token; - var pos = 0; - // the position past the last character processed so far in s - - for ( var i = 0, n = s.length; i < n; /* i = next at bottom */ ) { - decodeHelper.decode(s, i); - var ch = decodeHelper.ch; - var next = decodeHelper.next; - - var tokenStyle = null; - switch ( state ) { - case 0: - if ( '<' == ch ) { - state = 1; - } - break; - case 1: - tokenCharsI = 0; - if ( '/' == ch ) { // only consider close tags if we're in script/style - state = 7; - } - else if ( null == endScriptTag ) { - if ( '!' == ch ) { - state = 2; - } - else if ( PR_isWordChar(ch) ) { - state = 8; - } - else if ( '?' == ch ) { - state = 9; - } - else if ( '%' == ch ) { - state = 11; - } - else if ( '<' != ch ) { - state = 0; - } - } - else if ( '<' != ch ) { - state = 0; - } - break; - case 2: - if ( '-' == ch ) { - state = 4; - } - else if ( PR_isWordChar(ch) ) { - state = 3; - } - else if ( '<' == ch ) { - state = 1; - } - else { - state = 0; - } - break; - case 3: - if ( '>' == ch ) { - state = 0; - tokenStyle = PR_DECLARATION; - } - break; - case 4: - if ( '-' == ch ) { - state = 5; - } - break; - case 5: - if ( '-' == ch ) { - state = 6; - } - break; - case 6: - if ( '>' == ch ) { - state = 0; - tokenStyle = PR_COMMENT; - } - else if ( '-' == ch ) { - state = 6; - } - else { - state = 4; - } - break; - case 7: - if ( PR_isWordChar(ch) ) { - state = 8; - } - else if ( '<' == ch ) { - state = 1; - } - else { - state = 0; - } - break; - case 8: - if ( '>' == ch ) { - state = 0; - tokenStyle = PR_TAG; - } - break; - case 9: - if ( '?' == ch ) { - state = 10; - } - break; - case 10: - if ( '>' == ch ) { - state = 0; - tokenStyle = PR_SOURCE; - } - else if ( '?' != ch ) { - state = 9; - } - break; - case 11: - if ( '%' == ch ) { - state = 12; - } - break; - case 12: - if ( '>' == ch ) { - state = 0; - tokenStyle = PR_SOURCE; - } - else if ( '%' != ch ) { - state = 11; - } - break; - } - - if ( tokenCharsI < tokenChars.length ) { - tokenChars[tokenCharsI++] = ch.toLowerCase(); - } - if ( 1 == state ) { - tokenStart = k + i; - } - i = next; - if ( tokenStyle != null ) { - if ( null != tokenStyle ) { - if ( endScriptTag ) { - if ( PR_prefixMatch(tokenChars, tokenCharsI, endScriptTag) ) { - endScriptTag = null; - } - } - else { - if ( PR_prefixMatch(tokenChars, tokenCharsI, 'script') ) { - endScriptTag = '/script'; - } - else if ( PR_prefixMatch(tokenChars, tokenCharsI, 'style') ) { - endScriptTag = '/style'; - } - else if ( PR_prefixMatch(tokenChars, tokenCharsI, 'xmp') ) { - endScriptTag = '/xmp'; - } - } - // disallow the tag if endScriptTag is set and this was not an open - // tag. - if ( endScriptTag && tokenCharsI && '/' == tokenChars[0] ) { - tokenStyle = null; - } - } - if ( null != tokenStyle ) { - tokenEnds.push(new PR_TokenEnd(tokenStart, PR_PLAIN)); - tokenEnds.push(new PR_TokenEnd(k + next, tokenStyle)); - } - } - } - k += chunk.token.length; - } - tokenEnds.push(new PR_TokenEnd(k, PR_PLAIN)); - - return tokenEnds; -} - -/** splits the given string into comment, string, and "other" tokens. - * @return {Array} of PR_Tokens with style in - * (PR_STRING, PR_COMMENT, PR_PLAIN, null) - * The result array may contain spurious zero length tokens. Ignore them. - * - * @private - */ -function PR_splitStringAndCommentTokens( chunks ) { - // a state machine to split out comments, strings, and other stuff - var tokenEnds = []; - // positions of ends of tokens in absolute space - var state = 0; - // FSM state variable - var delim = -1; - // string delimiter - var k = 0; - // absolute position of beginning of current chunk - - for ( var ci = 0, nc = chunks.length; ci < nc; ++ci ) { - var chunk = chunks[ci]; - var s = chunk.token; - if ( PR_PLAIN == chunk.style ) { - var decodeHelper = new PR_DecodeHelper(); - var last = -1; - var next; - for ( var i = 0, n = s.length; i < n; last = i,i = next ) { - decodeHelper.decode(s, i); - var ch = decodeHelper.ch; - next = decodeHelper.next; - if ( 0 == state ) { - if ( ch == '"' || ch == '\'' || ch == '`' ) { - tokenEnds.push(new PR_TokenEnd(k + i, PR_PLAIN)); - state = 1; - delim = ch; - } - else if ( ch == '/' ) { - state = 3; - } - else if ( ch == '#' ) { - tokenEnds.push(new PR_TokenEnd(k + i, PR_PLAIN)); - state = 4; - } - } - else if ( 1 == state ) { - if ( ch == delim ) { - state = 0; - tokenEnds.push(new PR_TokenEnd(k + next, PR_STRING)); - } - else if ( ch == '\\' ) { - state = 2; - } - } - else if ( 2 == state ) { - state = 1; - } - else if ( 3 == state ) { - if ( ch == '/' ) { - state = 4; - tokenEnds.push(new PR_TokenEnd(k + last, PR_PLAIN)); - } - else if ( ch == '*' ) { - state = 5; - tokenEnds.push(new PR_TokenEnd(k + last, PR_PLAIN)); - } - else { - state = 0; - // next loop will reenter state 0 without same value of i, so - // ch will be reconsidered as start of new token. - next = i; - } - } - else if ( 4 == state ) { - if ( ch == '\r' || ch == '\n' ) { - state = 0; - tokenEnds.push(new PR_TokenEnd(k + i, PR_COMMENT)); - } - } - else if ( 5 == state ) { - if ( ch == '*' ) { - state = 6; - } - } - else if ( 6 == state ) { - if ( ch == '/' ) { - state = 0; - tokenEnds.push(new PR_TokenEnd(k + next, PR_COMMENT)); - } - else if ( ch != '*' ) { - state = 5; - } - } - } - } - k += s.length; - } - var endTokenType; - switch ( state ) { - case 1: case 2: - endTokenType = PR_STRING; - break; - case 4: case 5: case 6: - endTokenType = PR_COMMENT; - break; - default: - endTokenType = PR_PLAIN; - break; - } - // handle unclosed token which can legally happen for line comments (state 4) - tokenEnds.push(new PR_TokenEnd(k, endTokenType)); - // a token ends at the end - - return PR_splitChunks(chunks, tokenEnds); -} - -/** used by lexSource to split a non string, non comment token. - * @private - */ -function PR_splitNonStringNonCommentToken( s, outlist ) { - var pos = 0; - var state = 0; - - var decodeHelper = new PR_DecodeHelper(); - var next; - for ( var i = 0; i <= s.length; i = next ) { - if ( i == s.length ) { - // nstate will not be equal to state, so it will append the token - nstate = -2; - next = i + 1; - } - else { - decodeHelper.decode(s, i); - next = decodeHelper.next; - var ch = decodeHelper.ch; - - // the next state. - // if set to -1 then it will cause a reentry to state 0 without consuming - // another character. - var nstate = state; - - switch ( state ) { - case 0: // whitespace state - if ( PR_isIdentifierStart(ch) ) { - nstate = 1; - } - else if ( PR_isDigitChar(ch) ) { - nstate = 2; - } - else if ( !PR_isSpaceChar(ch) ) { - nstate = 3; - } - if ( nstate && pos < i ) { - var t = s.substring(pos, i); - outlist.push(new PR_Token(t, PR_PLAIN)); - pos = i; - } - break; - case 1: // identifier state - if ( !PR_isIdentifierPart(ch) ) { - nstate = -1; - } - break; - case 2: // number literal state - // handle numeric literals like - // 0x7f 300UL 100_000 - - // this does not treat floating point values as a single literal - // 0.1 and 3e-6 - // are each split into multiple tokens - if ( !(PR_isDigitChar(ch) || PR_isWordChar(ch) || ch == '_') ) { - nstate = -1; - } - break; - case 3: // punctuation state - if ( PR_isIdentifierStart(ch) || PR_isDigitChar(ch) || - PR_isSpaceChar(ch) ) { - nstate = -1; - } - break; - } - } - - if ( nstate != state ) { - if ( nstate < 0 ) { - if ( i > pos ) { - var t = s.substring(pos, i); - var wordDecodeHelper = new PR_DecodeHelper(); - wordDecodeHelper.decode(t, 0); - var ch0 = wordDecodeHelper.ch; - var isSingleCharacter = wordDecodeHelper.next == t.length; - var style; - if ( PR_isIdentifierStart(ch0) ) { - if ( PR_keywords[t] ) { - style = PR_KEYWORD; - } - else if ( ch0 === '@' ) { - style = PR_LITERAL; - } - else { - // Treat any word that starts with an uppercase character and - // contains at least one lowercase character as a type, or - // ends with _t. - // This works perfectly for Java, pretty well for C++, and - // passably for Python. The _t catches C structs. - var isType = false; - if ( ch0 >= 'A' && ch0 <= 'Z' ) { - for ( var j = wordDecodeHelper.next; - j < t.length; j = wordDecodeHelper.next ) { - wordDecodeHelper.decode(t, j); - var ch1 = wordDecodeHelper.ch; - if ( ch1 >= 'a' && ch1 <= 'z' ) { - isType = true; - break; - } - } - if ( !isType && !isSingleCharacter && - t.substring(t.length - 2) == '_t' ) { - isType = true; - } - } - style = isType ? PR_TYPE : PR_PLAIN; - } - } - else if ( PR_isDigitChar(ch0) ) { - style = PR_LITERAL; - } - else if ( !PR_isSpaceChar(ch0) ) { - style = PR_PUNCTUATION; - } - else { - style = PR_PLAIN; - } - pos = i; - outlist.push(new PR_Token(t, style)); - } - - state = 0; - if ( nstate == -1 ) { - // don't increment. This allows us to use state 0 to redispatch based - // on the current character. - next = i; - continue; - } - } - state = nstate; - } - } - -} - -/** split a group of chunks of markup. - * @private - */ -function PR_tokenizeMarkup( chunks ) { - if ( !(chunks && chunks.length) ) { - return chunks; - } - - var tokenEnds = PR_splitMarkup(chunks); - return PR_splitChunks(chunks, tokenEnds); -} - -/** split tags attributes and their values out from the tag name, and - * recursively lex source chunks. - * @private - */ -function PR_splitTagAttributes( tokens ) { - var tokensOut = []; - var state = 0; - var stateStyle = PR_TAG; - var delim = null; - // attribute delimiter for quoted value state. - var decodeHelper = new PR_DecodeHelper(); - for ( var ci = 0; ci < tokens.length; ++ci ) { - var tok = tokens[ci]; - if ( PR_TAG == tok.style ) { - var s = tok.token; - var start = 0; - for ( var i = 0; i < s.length; /* i = next at bottom */ ) { - decodeHelper.decode(s, i); - var ch = decodeHelper.ch; - var next = decodeHelper.next; - - var emitEnd = null; - // null or position of end of chunk to emit. - var nextStyle = null; - // null or next value of stateStyle - if ( ch == '>' ) { - if ( PR_TAG != stateStyle ) { - emitEnd = i; - nextStyle = PR_TAG; - } - } - else { - switch ( state ) { - case 0: - if ( '<' == ch ) { - state = 1; - } - break; - case 1: - if ( PR_isSpaceChar(ch) ) { - state = 2; - } - break; - case 2: - if ( !PR_isSpaceChar(ch) ) { - nextStyle = PR_ATTRIB_NAME; - emitEnd = i; - state = 3; - } - break; - case 3: - if ( '=' == ch ) { - emitEnd = i; - nextStyle = PR_TAG; - state = 5; - } - else if ( PR_isSpaceChar(ch) ) { - emitEnd = i; - nextStyle = PR_TAG; - state = 4; - } - break; - case 4: - if ( '=' == ch ) { - state = 5; - } - else if ( !PR_isSpaceChar(ch) ) { - emitEnd = i; - nextStyle = PR_ATTRIB_NAME; - state = 3; - } - break; - case 5: - if ( '"' == ch || '\'' == ch ) { - emitEnd = i; - nextStyle = PR_ATTRIB_VALUE; - state = 6; - delim = ch; - } - else if ( !PR_isSpaceChar(ch) ) { - emitEnd = i; - nextStyle = PR_ATTRIB_VALUE; - state = 7; - } - break; - case 6: - if ( ch == delim ) { - emitEnd = next; - nextStyle = PR_TAG; - state = 2; - } - break; - case 7: - if ( PR_isSpaceChar(ch) ) { - emitEnd = i; - nextStyle = PR_TAG; - state = 2; - } - break; - } - } - if ( emitEnd ) { - if ( emitEnd > start ) { - tokensOut.push( - new PR_Token(s.substring(start, emitEnd), stateStyle)); - start = emitEnd; - } - stateStyle = nextStyle; - } - i = next; - } - if ( s.length > start ) { - tokensOut.push(new PR_Token(s.substring(start, s.length), stateStyle)); - } - } - else { - if ( tok.style ) { - state = 0; - stateStyle = PR_TAG; - } - tokensOut.push(tok); - } - } - return tokensOut; -} - -/** identify regions of markup that are really source code, and recursivley - * lex them. - * @private - */ -function PR_splitSourceNodes( tokens ) { - var tokensOut = []; - // when we see a <script> tag, store '/' here so that we know to end the - // source processing - var endScriptTag = null; - var decodeHelper = new PR_DecodeHelper(); - - var sourceChunks = null; - - for ( var ci = 0, nc = tokens.length; /* break below */; ++ci ) { - var tok; - - if ( ci < nc ) { - tok = tokens[ci]; - if ( null == tok.style ) { - tokens.push(tok); - continue; - } - } - else if ( !endScriptTag ) { - break; - } - else { - // else pretend there's an end tag so we can gracefully handle - // unclosed source blocks - tok = new PR_Token('', null); - } - - var s = tok.token; - - if ( null == endScriptTag ) { - if ( PR_SOURCE == tok.style ) { - // split off any starting and trailing <?, <% - if ( '<' == decodeHelper.decode(s, 0) ) { - decodeHelper.decode(s, decodeHelper.next); - if ( '%' == decodeHelper.ch || '?' == decodeHelper.ch ) { - endScriptTag = decodeHelper.ch; - tokensOut.push(new PR_Token(s.substring(0, decodeHelper.next), - PR_TAG)); - s = s.substring(decodeHelper.next, s.length); - } - } - } - else if ( PR_TAG == tok.style ) { - if ( '<' == decodeHelper.decode(s, 0) && - '/' != s.charAt(decodeHelper.next) ) { - var tagContent = s.substring(decodeHelper.next).toLowerCase(); - // FIXME(msamuel): this does not mirror exactly the code in - // in PR_splitMarkup that defers splitting tags inside script and - // style blocks. - if ( PR_startsWith(tagContent, 'script') || - PR_startsWith(tagContent, 'style') || - PR_startsWith(tagContent, 'xmp') ) { - endScriptTag = '/'; - } - } - } - } - - if ( null != endScriptTag ) { - var endTok = null; - if ( PR_SOURCE == tok.style ) { - if ( endScriptTag == '%' || endScriptTag == '?' ) { - var pos = s.lastIndexOf(endScriptTag); - if ( pos >= 0 && '>' == decodeHelper.decode(s, pos + 1) && - s.length == decodeHelper.next ) { - endTok = new PR_Token(s.substring(pos, s.length), PR_TAG); - s = s.substring(0, pos); - } - } - if ( null == sourceChunks ) { - sourceChunks = []; - } - sourceChunks.push(new PR_Token(s, PR_PLAIN)); - } - else if ( PR_PLAIN == tok.style ) { - if ( null == sourceChunks ) { - sourceChunks = []; - } - sourceChunks.push(tok); - } - else if ( PR_TAG == tok.style ) { - // if it starts with </ then it must be the end tag. - if ( '<' == decodeHelper.decode(tok.token, 0) && - tok.token.length > decodeHelper.next && - '/' == decodeHelper.decode(tok.token, decodeHelper.next) ) { - endTok = tok; - } - else { - tokensOut.push(tok); - } - } - else if ( ci >= nc ) { - // force the token to close - endTok = tok; - } - else { - if ( sourceChunks ) { - sourceChunks.push(tok); - } - else { - // push remaining tag and attribute tokens from the opening tag - tokensOut.push(tok); - } - } - if ( endTok ) { - if ( sourceChunks ) { - var sourceTokens = PR_lexSource(sourceChunks); - tokensOut.push(new PR_Token('<span class=embsrc>', null)); - for ( var si = 0, ns = sourceTokens.length; si < ns; ++si ) { - tokensOut.push(sourceTokens[si]); - } - tokensOut.push(new PR_Token('</span>', null)); - sourceChunks = null; - } - if ( endTok.token ) { - tokensOut.push(endTok); - } - endScriptTag = null; - } - } - else { - tokensOut.push(tok); - } - } - return tokensOut; -} - -/** splits the quotes from an attribute value. - * ['"foo"'] -> ['"', 'foo', '"'] - * @private - */ -function PR_splitAttributeQuotes( tokens ) { - var firstPlain = null, lastPlain = null; - for ( var i = 0; i < tokens.length; ++i ) { - if ( PR_PLAIN == tokens[i].style ) { - firstPlain = i; - break; - } - } - for ( var i = tokens.length; --i >= 0; ) { - if ( PR_PLAIN == tokens[i].style ) { - lastPlain = i; - break; - } - } - if ( null == firstPlain ) { - return tokens; - } - - var decodeHelper = new PR_DecodeHelper(); - var fs = tokens[firstPlain].token; - var fc = decodeHelper.decode(fs, 0); - if ( '"' != fc && '\'' != fc ) { - return tokens; - } - var fpos = decodeHelper.next; - - var ls = tokens[lastPlain].token; - var lpos = ls.lastIndexOf('&'); - if ( lpos < 0 ) { - lpos = ls.length - 1; - } - var lc = decodeHelper.decode(ls, lpos); - if ( lc != fc || decodeHelper.next != ls.length ) { - lc = null; - lpos = ls.length; - } - - var tokensOut = []; - for ( var i = 0; i < firstPlain; ++i ) { - tokensOut.push(tokens[i]); - } - tokensOut.push(new PR_Token(fs.substring(0, fpos), PR_ATTRIB_VALUE)); - if ( lastPlain == firstPlain ) { - tokensOut.push(new PR_Token(fs.substring(fpos, lpos), PR_PLAIN)); - } - else { - tokensOut.push(new PR_Token(fs.substring(fpos, fs.length), PR_PLAIN)); - for ( var i = firstPlain + 1; i < lastPlain; ++i ) { - tokensOut.push(tokens[i]); - } - if ( lc ) { - tokens.push(new PR_Token(ls.substring(0, lpos), PR_PLAIN)); - } - else { - tokens.push(tokens[lastPlain]); - } - } - if ( lc ) { - tokensOut.push(new PR_Token(ls.substring(lpos, ls.length), PR_PLAIN)); - } - for ( var i = lastPlain + 1; i < tokens.length; ++i ) { - tokensOut.push(tokens[i]); - } - return tokensOut; -} - -/** identify attribute values that really contain source code and recursively - * lex them. - * @private - */ -function PR_splitSourceAttributes( tokens ) { - var tokensOut = []; - - var sourceChunks = null; - var inSource = false; - var name = ''; - - for ( var ci = 0, nc = tokens.length; ci < nc; ++ci ) { - var tok = tokens[ci]; - var outList = tokensOut; - if ( PR_TAG == tok.style ) { - if ( inSource ) { - inSource = false; - name = ''; - if ( sourceChunks ) { - tokensOut.push(new PR_Token('<span class=embsrc>', null)); - var sourceTokens = - PR_lexSource(PR_splitAttributeQuotes(sourceChunks)); - for ( var si = 0, ns = sourceTokens.length; si < ns; ++si ) { - tokensOut.push(sourceTokens[si]); - } - tokensOut.push(new PR_Token('</span>', null)); - sourceChunks = null; - } - } - else if ( name && tok.token.indexOf('=') >= 0 ) { - var nameLower = name.toLowerCase(); - if ( PR_startsWith(nameLower, 'on') || 'style' == nameLower ) { - inSource = true; - } - } - else { - name = ''; - } - } - else if ( PR_ATTRIB_NAME == tok.style ) { - name += tok.token; - } - else if ( PR_ATTRIB_VALUE == tok.style ) { - if ( inSource ) { - if ( null == sourceChunks ) { - sourceChunks = []; - } - outList = sourceChunks; - tok = new PR_Token(tok.token, PR_PLAIN); - } - } - else { - if ( sourceChunks ) { - outList = sourceChunks; - } - } - outList.push(tok); - } - return tokensOut; -} - -/** returns a list of PR_Token objects given chunks of source code. - * - * This code treats ", ', and ` as string delimiters, and \ as a string escape. - * It does not recognize perl's qq() style strings. It has no special handling - * for double delimiter escapes as in basic, or tje tripled delimiters used in - * python, but should work on those regardless although in those cases a single - * string literal may be broken up into multiple adjacent string literals. - * - * It recognizes C, C++, and shell style comments. - * - * @param chunks PR_Tokens with style in (null, PR_PLAIN) - */ -function PR_lexSource( chunks ) { - // split into strings, comments, and other. - // We do this because strings and comments are easily recognizable and can - // contain stuff that looks like other tokens, so we want to mark those early - // so we don't recurse into them. - var tokens = PR_splitStringAndCommentTokens(chunks); - - // split non comment|string tokens on whitespace and word boundaries - var tokensOut = []; - for ( var i = 0; i < tokens.length; ++i ) { - var tok = tokens[i]; - if ( PR_PLAIN === tok.style ) { - PR_splitNonStringNonCommentToken(tok.token, tokensOut); - continue; - } - tokensOut.push(tok); - } - - return tokensOut; -} - -/** returns a list of PR_Token objects given a string of markup. - * - * This code assumes that < tokens are html escaped, but " are not. - * It will do a resonable job with <, but will not recognize an " - * as starting a string. - * - * This code recognizes a number of constructs. - * <!-- ... --> comment - * <!\w ... > declaration - * <\w ... > tag - * </\w ... > tag - * <?...?> embedded source - * &[#\w]...; entity - * - * It does not recognizes %foo; entities. - * - * It will recurse into any <style>, <script>, and on* attributes using - * PR_lexSource. - */ -function PR_lexMarkup( chunks ) { - // This function works as follows: - // 1) Start by splitting the markup into text and tag chunks - // Input: String s - // Output: List<PR_Token> where style in (PR_PLAIN, null) - // 2) Then split the text chunks further into comments, declarations, - // tags, etc. - // After each split, consider whether the token is the start of an - // embedded source section, i.e. is an open <script> tag. If it is, - // find the corresponding close token, and don't bother to lex in between. - // Input: List<String> - // Output: List<PR_Token> with style in (PR_TAG, PR_PLAIN, PR_SOURCE, null) - // 3) Finally go over each tag token and split out attribute names and values. - // Input: List<PR_Token> - // Output: List<PR_Token> where style in - // (PR_TAG, PR_PLAIN, PR_SOURCE, NAME, VALUE, null) - var tokensOut = PR_tokenizeMarkup(chunks); - tokensOut = PR_splitTagAttributes(tokensOut); - tokensOut = PR_splitSourceNodes(tokensOut); - tokensOut = PR_splitSourceAttributes(tokensOut); - return tokensOut; -} - -/** - * classify the string as either source or markup and lex appropriately. - * @param {String} html - */ -function PR_lexOne( html ) { - var chunks = PR_expandTabs(PR_chunkify(html), PR_TAB_WIDTH); - - // treat it as markup if the first non whitespace character is a < and the - // last non-whitespace character is a > - var isMarkup = false; - for ( var i = 0; i < chunks.length; ++i ) { - if ( PR_PLAIN == chunks[i].style ) { - if ( PR_startsWith(PR_trim(chunks[i].token), '<') ) { - for ( var j = chunks.length; --j >= 0; ) { - if ( PR_PLAIN == chunks[j].style ) { - isMarkup = PR_endsWith(PR_trim(chunks[j].token), '>'); - break; - } - } - } - break; - } - } - - return isMarkup ? PR_lexMarkup(chunks) : PR_lexSource(chunks); -} - -/** pretty print a chunk of code. - * - * @param s code as html - * @return code as html, but prettier - */ -function prettyPrintOne( s ) { - try { - var tokens = PR_lexOne(s); - var out = []; - var lastStyle = null; - for ( var i = 0; i < tokens.length; i++ ) { - var t = tokens[i]; - if ( t.style != lastStyle ) { - if ( lastStyle != null ) { - out.push('</span>'); - } - if ( t.style != null ) { - out.push('<span class=', t.style, '>'); - } - lastStyle = t.style; - } - var html = t.token; - if ( null != t.style ) { - // This interacts badly with some wikis which introduces paragraph tags - // into pre blocks for some strange reason. - // It's necessary for IE though which seems to lose the preformattedness - // of <pre> tags when their innerHTML is assigned. - // http://stud3.tuwien.ac.at/~e0226430/innerHtmlQuirk.html - html = html - .replace(/(\r\n?|\n| ) /g, '$1 ') - .replace(/\r\n?|\n/g, '<br>'); - } - out.push(html); - } - if ( lastStyle != null ) { - out.push('</span>'); - } - return out.join(''); - } - catch ( e ) { - if ( 'console' in window ) { - console.log(e); - console.trace(); - } - return s; - } -} - -/** find all the < pre > and < code > tags in the DOM with class=prettyprint and - * prettify them. - */ -function prettyPrint() { - // fetch a list of nodes to rewrite - var codeSegments = [ - document.getElementsByTagName('pre'), - document.getElementsByTagName('code'), - document.getElementsByTagName('xmp') ]; - var elements = []; - for ( var i = 0; i < codeSegments.length; ++i ) { - for ( var j = 0; j < codeSegments[i].length; ++j ) { - elements.push(codeSegments[i][j]); - } - } - codeSegments = null; - - // the loop is broken into a series of continuations to make sure that we - // don't make the browser unresponsive when rewriting a large page. - var k = 0; - - function doWork() { - var endTime = new Date().getTime() + 250; - for ( ; k < elements.length && new Date().getTime() < endTime; k++ ) { - var cs = elements[k]; - if ( cs.className && cs.className.indexOf('prettyprint') >= 0 ) { - - // make sure this is not nested in an already prettified element - var nested = false; - for ( var p = cs.parentNode; p != null; p = p.parentNode ) { - if ( (p.tagName == 'pre' || p.tagName == 'code' || - p.tagName == 'xmp') && - p.className && p.className.indexOf('prettyprint') >= 0 ) { - nested = true; - break; - } - } - if ( !nested ) { - // fetch the content as a snippet of properly escaped HTML. - // Firefox adds newlines at the end. - var content = PR_getInnerHtml(cs); - content = content.replace(/(?:\r\n?|\n)$/, ''); - - // do the pretty printing - var newContent = prettyPrintOne(content); - - // push the prettified html back into the tag. - if ( !PR_isRawContent(cs) ) { - // just replace the old html with the new - cs.innerHTML = newContent; - } - else { - // we need to change the tag to a <pre> since <xmp>s do not allow - // embedded tags such as the span tags used to attach styles to - // sections of source code. - var pre = document.createElement('PRE'); - for ( var i = 0; i < cs.attributes.length; ++i ) { - var a = cs.attributes[i]; - if ( a.specified ) { - pre.setAttribute(a.name, a.value); - } - } - pre.innerHTML = newContent; - // remove the old - cs.parentNode.replaceChild(pre, cs); - } - } - } - } - if ( k < elements.length ) { - // finish up in a continuation - setTimeout(doWork, 250); - } - } - - doWork(); -} |