diff options
author | 2015-02-26 13:39:28 +0000 | |
---|---|---|
committer | 2015-02-26 13:39:28 +0000 | |
commit | 89f012dd8b5c75573668c0a5a984d814da31c46f (patch) | |
tree | 74d4c305f67ab2be73d18e22eb7597e8da6ec588 /src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java | |
parent | 5a4f28664237fd5d53273c791f5f2decbf27d45b (diff) |
Open source all the tests under lib/syntax/.
--
MOS_MIGRATED_REVID=87244284
Diffstat (limited to 'src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java')
-rw-r--r-- | src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java | 399 |
1 files changed, 399 insertions, 0 deletions
diff --git a/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java b/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java new file mode 100644 index 0000000000..fd5385c31d --- /dev/null +++ b/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java @@ -0,0 +1,399 @@ +// Copyright 2006 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.syntax; + +import com.google.devtools.build.lib.events.Event; +import com.google.devtools.build.lib.events.EventHandler; +import com.google.devtools.build.lib.events.EventKind; +import com.google.devtools.build.lib.events.Location; +import com.google.devtools.build.lib.events.Reporter; +import com.google.devtools.build.lib.vfs.Path; +import com.google.devtools.build.lib.vfs.util.FsApparatus; + +import junit.framework.TestCase; + +/** + * Tests of tokenization behavior of the {@link Lexer}. + */ +public class LexerTest extends TestCase implements EventHandler { + + private FsApparatus scratch = FsApparatus.newInMemory(); + + /** + * Create a lexer which takes input from the specified string. Resets the + * error handler beforehand. + */ + private Lexer createLexer(String input) { + Path somePath = scratch.path("/some/path.txt"); + ParserInputSource inputSource = ParserInputSource.create(input, somePath); + Reporter reporter = new Reporter(); + reporter.addHandler(this); + return new Lexer(inputSource, reporter); + } + + public Token[] tokens(String input) { + return createLexer(input).getTokens().toArray(new Token[0]); + } + + /** + * Lexes the specified input string, and returns a string containing just the + * linenumbers of each token. + */ + private String linenums(String input) { + Lexer lexer = createLexer(input); + StringBuilder buf = new StringBuilder(); + for (Token tok : lexer.getTokens()) { + if (buf.length() > 0) { + buf.append(' '); + } + int line = + lexer.createLocation(tok.left, tok.left).getStartLineAndColumn().getLine(); + buf.append(line); + } + return buf.toString(); + } + + private String lastError; + + private Location lastErrorLocation; + + @Override + public void handle(Event event) { + if (EventKind.ERRORS.contains(event.getKind())) { + lastErrorLocation = event.getLocation(); + lastError = lastErrorLocation.getPath() + ":" + + event.getLocation().getStartLineAndColumn().getLine() + ": " + + event.getMessage(); + } + } + + /** + * Returns a string containing the names of the tokens and their associated + * values. (String-literals are printed without escaping.) + */ + private static String values(Token[] tokens) { + StringBuilder buffer = new StringBuilder(); + for (Token token : tokens) { + if (buffer.length() > 0) { + buffer.append(' '); + } + buffer.append(token.kind.name()); + if (token.value != null) { + buffer.append('(').append(token.value).append(')'); + } + } + return buffer.toString(); + } + + /** + * Returns a string containing just the names of the tokens. + */ + private static String names(Token[] tokens) { + StringBuilder buf = new StringBuilder(); + for (Token tok : tokens) { + if (buf.length() > 0) { + buf.append(' '); + } + buf.append(tok.kind.name()); + } + return buf.toString(); + } + + /** + * Returns a string containing just the half-open position intervals of each + * token. e.g. "[3,4) [4,9)". + */ + private static String positions(Token[] tokens) { + StringBuilder buf = new StringBuilder(); + for (Token tok : tokens) { + if (buf.length() > 0) { + buf.append(' '); + } + buf.append('[') + .append(tok.left) + .append(',') + .append(tok.right) + .append(')'); + } + return buf.toString(); + } + + public void testBasics1() throws Exception { + assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens("wiz) "))); + assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens("wiz )"))); + assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens(" wiz)"))); + assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens(" wiz ) "))); + assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens("wiz\t)"))); + } + + public void testBasics2() throws Exception { + assertEquals("RPAREN NEWLINE EOF", names(tokens(")"))); + assertEquals("RPAREN NEWLINE EOF", names(tokens(" )"))); + assertEquals("RPAREN NEWLINE EOF", names(tokens(" ) "))); + assertEquals("RPAREN NEWLINE EOF", names(tokens(") "))); + } + + public void testBasics3() throws Exception { + assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123#456\n789"))); + assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123 #456\n789"))); + assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123#456 \n789"))); + assertEquals("INT COMMENT NEWLINE INDENT INT NEWLINE OUTDENT NEWLINE EOF", + names(tokens("123#456\n 789"))); + assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123#456\n789 "))); + } + + public void testBasics4() throws Exception { + assertEquals("NEWLINE EOF", names(tokens(""))); + assertEquals("COMMENT NEWLINE EOF", names(tokens("# foo"))); + assertEquals("INT INT INT INT NEWLINE EOF", names(tokens("1 2 3 4"))); + assertEquals("INT DOT INT NEWLINE EOF", names(tokens("1.234"))); + assertEquals("IDENTIFIER LPAREN IDENTIFIER COMMA IDENTIFIER RPAREN " + + "NEWLINE EOF", names(tokens("foo(bar, wiz)"))); + } + + public void testIntegers() throws Exception { + // Detection of MINUS immediately following integer constant proves we + // don't consume too many chars. + + // decimal + assertEquals("INT(12345) MINUS NEWLINE EOF", values(tokens("12345-"))); + + // octal + assertEquals("INT(5349) MINUS NEWLINE EOF", values(tokens("012345-"))); + + // octal (bad) + assertEquals("INT(0) MINUS NEWLINE EOF", values(tokens("012349-"))); + assertEquals("/some/path.txt:1: invalid base-8 integer constant: 012349", + lastError.toString()); + + // hexadecimal (uppercase) + assertEquals("INT(1193055) MINUS NEWLINE EOF", values(tokens("0X12345F-"))); + + // hexadecimal (lowercase) + assertEquals("INT(1193055) MINUS NEWLINE EOF", values(tokens("0x12345f-"))); + + // hexadecimal (lowercase) [note: "g" cause termination of token] + assertEquals("INT(74565) IDENTIFIER(g) MINUS NEWLINE EOF", + values(tokens("0x12345g-"))); + } + + public void testIntegersAndDot() throws Exception { + assertEquals("INT(1) DOT INT(2345) NEWLINE EOF", values(tokens("1.2345"))); + + assertEquals("INT(1) DOT INT(2) DOT INT(345) NEWLINE EOF", + values(tokens("1.2.345"))); + + assertEquals("INT(1) DOT INT(0) NEWLINE EOF", values(tokens("1.23E10"))); + assertEquals("/some/path.txt:1: invalid base-10 integer constant: 23E10", + lastError.toString()); + + assertEquals("INT(1) DOT INT(0) MINUS INT(10) NEWLINE EOF", + values(tokens("1.23E-10"))); + assertEquals("/some/path.txt:1: invalid base-10 integer constant: 23E", + lastError.toString()); + + assertEquals("DOT INT(123) NEWLINE EOF", values(tokens(". 123"))); + assertEquals("DOT INT(123) NEWLINE EOF", values(tokens(".123"))); + assertEquals("DOT IDENTIFIER(abc) NEWLINE EOF", values(tokens(".abc"))); + + assertEquals("IDENTIFIER(foo) DOT INT(123) NEWLINE EOF", + values(tokens("foo.123"))); + assertEquals("IDENTIFIER(foo) DOT IDENTIFIER(bcd) NEWLINE EOF", + values(tokens("foo.bcd"))); // 'b' are hex chars + assertEquals("IDENTIFIER(foo) DOT IDENTIFIER(xyz) NEWLINE EOF", + values(tokens("foo.xyz"))); + } + + public void testStringDelimiters() throws Exception { + assertEquals("STRING(foo) NEWLINE EOF", values(tokens("\"foo\""))); + assertEquals("STRING(foo) NEWLINE EOF", values(tokens("'foo'"))); + } + + public void testQuotesInStrings() throws Exception { + assertEquals("STRING(foo'bar) NEWLINE EOF", values(tokens("'foo\\'bar'"))); + assertEquals("STRING(foo'bar) NEWLINE EOF", values(tokens("\"foo'bar\""))); + assertEquals("STRING(foo\"bar) NEWLINE EOF", values(tokens("'foo\"bar'"))); + assertEquals("STRING(foo\"bar) NEWLINE EOF", + values(tokens("\"foo\\\"bar\""))); + } + + public void testStringEscapes() throws Exception { + assertEquals("STRING(a\tb\nc\rd) NEWLINE EOF", + values(tokens("'a\\tb\\nc\\rd'"))); // \t \r \n + assertEquals("STRING(x\\hx) NEWLINE EOF", + values(tokens("'x\\hx'"))); // \h is unknown => "\h" + assertEquals("STRING(\\$$) NEWLINE EOF", values(tokens("'\\$$'"))); + assertEquals("STRING(ab) NEWLINE EOF", + values(tokens("'a\\\nb'"))); // escape end of line + + assertEquals("STRING(abcd) NEWLINE EOF", + values(tokens("r'abcd'"))); + assertEquals("STRING(abcd) NEWLINE EOF", + values(tokens("r\"abcd\""))); + assertEquals("STRING(a\\tb\\nc\\rd) NEWLINE EOF", + values(tokens("r'a\\tb\\nc\\rd'"))); // r'a\tb\nc\rd' + assertEquals("STRING(a\\\") NEWLINE EOF", + values(tokens("r\"a\\\"\""))); // r"a\"" + assertEquals("STRING(a\\\\b) NEWLINE EOF", + values(tokens("r'a\\\\b'"))); // r'a\\b' + assertEquals("STRING(ab) IDENTIFIER(r) NEWLINE EOF", + values(tokens("r'ab'r"))); + + assertEquals("STRING(abcd) NEWLINE EOF", + values(tokens("\"ab\\ucd\""))); + assertEquals("/some/path.txt:1: escape sequence not implemented: \\u", + lastError.toString()); + } + + public void testOctalEscapes() throws Exception { + // Regression test for a bug. + assertEquals("STRING(\0 \1 \t \u003f I I1 \u00ff \u00ff \u00fe) NEWLINE EOF", + values(tokens("'\\0 \\1 \\11 \\77 \\111 \\1111 \\377 \\777 \\776'"))); + // Test boundaries (non-octal char, EOF). + assertEquals("STRING(\1b \1) NEWLINE EOF", values(tokens("'\\1b \\1'"))); + } + + public void testTripleQuotedStrings() throws Exception { + assertEquals("STRING(a\"b'c \n d\"\"e) NEWLINE EOF", + values(tokens("\"\"\"a\"b'c \n d\"\"e\"\"\""))); + assertEquals("STRING(a\"b'c \n d\"\"e) NEWLINE EOF", + values(tokens("'''a\"b'c \n d\"\"e'''"))); + } + + public void testBadChar() throws Exception { + assertEquals("IDENTIFIER(a) IDENTIFIER(b) NEWLINE EOF", + values(tokens("a$b"))); + assertEquals("/some/path.txt:1: invalid character: '$'", + lastError.toString()); + } + + public void testIndentation() throws Exception { + assertEquals("INT(1) NEWLINE INT(2) NEWLINE INT(3) NEWLINE EOF", + values(tokens("1\n2\n3"))); + assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT " + + "INT(4) NEWLINE EOF", values(tokens("1\n 2\n 3\n4 "))); + assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT " + + "NEWLINE EOF", values(tokens("1\n 2\n 3"))); + assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE " + + "OUTDENT OUTDENT NEWLINE EOF", + values(tokens("1\n 2\n 3"))); + assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE " + + "OUTDENT INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF", + values(tokens("1\n 2\n 3\n 4\n5"))); + + assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE " + + "OUTDENT INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF", + values(tokens("1\n 2\n 3\n 4\n5"))); + assertEquals("/some/path.txt:4: indentation error", lastError.toString()); + } + + public void testIndentationInsideParens() throws Exception { + // Indentation is ignored inside parens: + assertEquals("INT(1) LPAREN INT(2) INT(3) INT(4) INT(5) NEWLINE EOF", + values(tokens("1 (\n 2\n 3\n 4\n5"))); + assertEquals("INT(1) LBRACE INT(2) INT(3) INT(4) INT(5) NEWLINE EOF", + values(tokens("1 {\n 2\n 3\n 4\n5"))); + assertEquals("INT(1) LBRACKET INT(2) INT(3) INT(4) INT(5) NEWLINE EOF", + values(tokens("1 [\n 2\n 3\n 4\n5"))); + assertEquals("INT(1) LBRACKET INT(2) RBRACKET NEWLINE INDENT INT(3) " + + "NEWLINE INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF", + values(tokens("1 [\n 2]\n 3\n 4\n5"))); + } + + public void testIndentationAtEOF() throws Exception { + // Matching OUTDENTS are created at EOF: + assertEquals("INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF", + values(tokens("\n 1"))); + } + + public void testBlankLineIndentation() throws Exception { + // Blank lines and comment lines should not generate any newlines indents + // (but note that every input ends with NEWLINE EOF). + assertEquals("COMMENT NEWLINE EOF", names(tokens("\n #\n"))); + assertEquals("COMMENT NEWLINE EOF", names(tokens(" #"))); + assertEquals("COMMENT NEWLINE EOF", names(tokens(" #\n"))); + assertEquals("COMMENT NEWLINE EOF", names(tokens(" #comment\n"))); + assertEquals("DEF IDENTIFIER LPAREN IDENTIFIER RPAREN COLON NEWLINE " + + "COMMENT INDENT RETURN IDENTIFIER NEWLINE " + + "OUTDENT NEWLINE EOF", + names(tokens("def f(x):\n" + + " # comment\n" + + "\n" + + " \n" + + " return x\n"))); + } + + public void testMultipleCommentLines() throws Exception { + assertEquals("COMMENT NEWLINE COMMENT COMMENT COMMENT " + + "DEF IDENTIFIER LPAREN IDENTIFIER RPAREN COLON NEWLINE " + + "INDENT RETURN IDENTIFIER NEWLINE OUTDENT NEWLINE EOF", + names(tokens("# Copyright\n" + + "#\n" + + "# A comment line\n" + + "# An adjoining line\n" + + "def f(x):\n" + + " return x\n"))); + } + + public void testBackslash() throws Exception { + assertEquals("IDENTIFIER IDENTIFIER NEWLINE EOF", + names(tokens("a\\\nb"))); + assertEquals("IDENTIFIER ILLEGAL IDENTIFIER NEWLINE EOF", + names(tokens("a\\ b"))); + assertEquals("IDENTIFIER LPAREN INT RPAREN NEWLINE EOF", + names(tokens("a(\\\n2)"))); + } + + public void testTokenPositions() throws Exception { + // foo ( bar , { 1 : + assertEquals("[0,3) [3,4) [4,7) [7,8) [9,10) [10,11) [11,12)" + // 'quux' } ) NEWLINE EOF + + " [13,19) [19,20) [20,21) [20,21) [21,21)", + positions(tokens("foo(bar, {1: 'quux'})"))); + } + + public void testLineNumbers() throws Exception { + assertEquals("1 1 1 1 2 2 2 2 4 4 4 4 4", + linenums("foo = 1\nbar = 2\n\nwiz = 3")); + + assertEquals("IDENTIFIER(foo) EQUALS INT(1) NEWLINE " + + "IDENTIFIER(bar) EQUALS INT(2) NEWLINE " + + "IDENTIFIER(wiz) EQUALS NEWLINE " + + "IDENTIFIER(bar) EQUALS INT(2) NEWLINE EOF", + values(tokens("foo = 1\nbar = 2\n\nwiz = $\nbar = 2"))); + assertEquals("/some/path.txt:4: invalid character: '$'", + lastError.toString()); + + // '\\n' in string should not increment linenum: + String s = "1\n'foo\\nbar'\3"; + assertEquals("INT(1) NEWLINE STRING(foo\nbar) NEWLINE EOF", + values(tokens(s))); + assertEquals("1 1 2 2 2", linenums(s)); + } + + public void testContainsErrors() throws Exception { + Lexer lexerSuccess = createLexer("foo"); + assertFalse(lexerSuccess.containsErrors()); + + Lexer lexerFail = createLexer("f$o"); + assertTrue(lexerFail.containsErrors()); + + String s = "'unterminated"; + lexerFail = createLexer(s); + assertTrue(lexerFail.containsErrors()); + assertEquals(0, lastErrorLocation.getStartOffset()); + assertEquals(s.length(), lastErrorLocation.getEndOffset()); + assertEquals("STRING(unterminated) NEWLINE EOF", values(tokens(s))); + } +} |