aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java
diff options
context:
space:
mode:
authorGravatar Ulf Adams <ulfjack@google.com>2015-02-26 13:39:28 +0000
committerGravatar Han-Wen Nienhuys <hanwen@google.com>2015-02-26 13:39:28 +0000
commit89f012dd8b5c75573668c0a5a984d814da31c46f (patch)
tree74d4c305f67ab2be73d18e22eb7597e8da6ec588 /src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java
parent5a4f28664237fd5d53273c791f5f2decbf27d45b (diff)
Open source all the tests under lib/syntax/.
-- MOS_MIGRATED_REVID=87244284
Diffstat (limited to 'src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java')
-rw-r--r--src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java399
1 files changed, 399 insertions, 0 deletions
diff --git a/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java b/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java
new file mode 100644
index 0000000000..fd5385c31d
--- /dev/null
+++ b/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java
@@ -0,0 +1,399 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.syntax;
+
+import com.google.devtools.build.lib.events.Event;
+import com.google.devtools.build.lib.events.EventHandler;
+import com.google.devtools.build.lib.events.EventKind;
+import com.google.devtools.build.lib.events.Location;
+import com.google.devtools.build.lib.events.Reporter;
+import com.google.devtools.build.lib.vfs.Path;
+import com.google.devtools.build.lib.vfs.util.FsApparatus;
+
+import junit.framework.TestCase;
+
+/**
+ * Tests of tokenization behavior of the {@link Lexer}.
+ */
+public class LexerTest extends TestCase implements EventHandler {
+
+ private FsApparatus scratch = FsApparatus.newInMemory();
+
+ /**
+ * Create a lexer which takes input from the specified string. Resets the
+ * error handler beforehand.
+ */
+ private Lexer createLexer(String input) {
+ Path somePath = scratch.path("/some/path.txt");
+ ParserInputSource inputSource = ParserInputSource.create(input, somePath);
+ Reporter reporter = new Reporter();
+ reporter.addHandler(this);
+ return new Lexer(inputSource, reporter);
+ }
+
+ public Token[] tokens(String input) {
+ return createLexer(input).getTokens().toArray(new Token[0]);
+ }
+
+ /**
+ * Lexes the specified input string, and returns a string containing just the
+ * linenumbers of each token.
+ */
+ private String linenums(String input) {
+ Lexer lexer = createLexer(input);
+ StringBuilder buf = new StringBuilder();
+ for (Token tok : lexer.getTokens()) {
+ if (buf.length() > 0) {
+ buf.append(' ');
+ }
+ int line =
+ lexer.createLocation(tok.left, tok.left).getStartLineAndColumn().getLine();
+ buf.append(line);
+ }
+ return buf.toString();
+ }
+
+ private String lastError;
+
+ private Location lastErrorLocation;
+
+ @Override
+ public void handle(Event event) {
+ if (EventKind.ERRORS.contains(event.getKind())) {
+ lastErrorLocation = event.getLocation();
+ lastError = lastErrorLocation.getPath() + ":"
+ + event.getLocation().getStartLineAndColumn().getLine() + ": "
+ + event.getMessage();
+ }
+ }
+
+ /**
+ * Returns a string containing the names of the tokens and their associated
+ * values. (String-literals are printed without escaping.)
+ */
+ private static String values(Token[] tokens) {
+ StringBuilder buffer = new StringBuilder();
+ for (Token token : tokens) {
+ if (buffer.length() > 0) {
+ buffer.append(' ');
+ }
+ buffer.append(token.kind.name());
+ if (token.value != null) {
+ buffer.append('(').append(token.value).append(')');
+ }
+ }
+ return buffer.toString();
+ }
+
+ /**
+ * Returns a string containing just the names of the tokens.
+ */
+ private static String names(Token[] tokens) {
+ StringBuilder buf = new StringBuilder();
+ for (Token tok : tokens) {
+ if (buf.length() > 0) {
+ buf.append(' ');
+ }
+ buf.append(tok.kind.name());
+ }
+ return buf.toString();
+ }
+
+ /**
+ * Returns a string containing just the half-open position intervals of each
+ * token. e.g. "[3,4) [4,9)".
+ */
+ private static String positions(Token[] tokens) {
+ StringBuilder buf = new StringBuilder();
+ for (Token tok : tokens) {
+ if (buf.length() > 0) {
+ buf.append(' ');
+ }
+ buf.append('[')
+ .append(tok.left)
+ .append(',')
+ .append(tok.right)
+ .append(')');
+ }
+ return buf.toString();
+ }
+
+ public void testBasics1() throws Exception {
+ assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens("wiz) ")));
+ assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens("wiz )")));
+ assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens(" wiz)")));
+ assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens(" wiz ) ")));
+ assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens("wiz\t)")));
+ }
+
+ public void testBasics2() throws Exception {
+ assertEquals("RPAREN NEWLINE EOF", names(tokens(")")));
+ assertEquals("RPAREN NEWLINE EOF", names(tokens(" )")));
+ assertEquals("RPAREN NEWLINE EOF", names(tokens(" ) ")));
+ assertEquals("RPAREN NEWLINE EOF", names(tokens(") ")));
+ }
+
+ public void testBasics3() throws Exception {
+ assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123#456\n789")));
+ assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123 #456\n789")));
+ assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123#456 \n789")));
+ assertEquals("INT COMMENT NEWLINE INDENT INT NEWLINE OUTDENT NEWLINE EOF",
+ names(tokens("123#456\n 789")));
+ assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123#456\n789 ")));
+ }
+
+ public void testBasics4() throws Exception {
+ assertEquals("NEWLINE EOF", names(tokens("")));
+ assertEquals("COMMENT NEWLINE EOF", names(tokens("# foo")));
+ assertEquals("INT INT INT INT NEWLINE EOF", names(tokens("1 2 3 4")));
+ assertEquals("INT DOT INT NEWLINE EOF", names(tokens("1.234")));
+ assertEquals("IDENTIFIER LPAREN IDENTIFIER COMMA IDENTIFIER RPAREN "
+ + "NEWLINE EOF", names(tokens("foo(bar, wiz)")));
+ }
+
+ public void testIntegers() throws Exception {
+ // Detection of MINUS immediately following integer constant proves we
+ // don't consume too many chars.
+
+ // decimal
+ assertEquals("INT(12345) MINUS NEWLINE EOF", values(tokens("12345-")));
+
+ // octal
+ assertEquals("INT(5349) MINUS NEWLINE EOF", values(tokens("012345-")));
+
+ // octal (bad)
+ assertEquals("INT(0) MINUS NEWLINE EOF", values(tokens("012349-")));
+ assertEquals("/some/path.txt:1: invalid base-8 integer constant: 012349",
+ lastError.toString());
+
+ // hexadecimal (uppercase)
+ assertEquals("INT(1193055) MINUS NEWLINE EOF", values(tokens("0X12345F-")));
+
+ // hexadecimal (lowercase)
+ assertEquals("INT(1193055) MINUS NEWLINE EOF", values(tokens("0x12345f-")));
+
+ // hexadecimal (lowercase) [note: "g" cause termination of token]
+ assertEquals("INT(74565) IDENTIFIER(g) MINUS NEWLINE EOF",
+ values(tokens("0x12345g-")));
+ }
+
+ public void testIntegersAndDot() throws Exception {
+ assertEquals("INT(1) DOT INT(2345) NEWLINE EOF", values(tokens("1.2345")));
+
+ assertEquals("INT(1) DOT INT(2) DOT INT(345) NEWLINE EOF",
+ values(tokens("1.2.345")));
+
+ assertEquals("INT(1) DOT INT(0) NEWLINE EOF", values(tokens("1.23E10")));
+ assertEquals("/some/path.txt:1: invalid base-10 integer constant: 23E10",
+ lastError.toString());
+
+ assertEquals("INT(1) DOT INT(0) MINUS INT(10) NEWLINE EOF",
+ values(tokens("1.23E-10")));
+ assertEquals("/some/path.txt:1: invalid base-10 integer constant: 23E",
+ lastError.toString());
+
+ assertEquals("DOT INT(123) NEWLINE EOF", values(tokens(". 123")));
+ assertEquals("DOT INT(123) NEWLINE EOF", values(tokens(".123")));
+ assertEquals("DOT IDENTIFIER(abc) NEWLINE EOF", values(tokens(".abc")));
+
+ assertEquals("IDENTIFIER(foo) DOT INT(123) NEWLINE EOF",
+ values(tokens("foo.123")));
+ assertEquals("IDENTIFIER(foo) DOT IDENTIFIER(bcd) NEWLINE EOF",
+ values(tokens("foo.bcd"))); // 'b' are hex chars
+ assertEquals("IDENTIFIER(foo) DOT IDENTIFIER(xyz) NEWLINE EOF",
+ values(tokens("foo.xyz")));
+ }
+
+ public void testStringDelimiters() throws Exception {
+ assertEquals("STRING(foo) NEWLINE EOF", values(tokens("\"foo\"")));
+ assertEquals("STRING(foo) NEWLINE EOF", values(tokens("'foo'")));
+ }
+
+ public void testQuotesInStrings() throws Exception {
+ assertEquals("STRING(foo'bar) NEWLINE EOF", values(tokens("'foo\\'bar'")));
+ assertEquals("STRING(foo'bar) NEWLINE EOF", values(tokens("\"foo'bar\"")));
+ assertEquals("STRING(foo\"bar) NEWLINE EOF", values(tokens("'foo\"bar'")));
+ assertEquals("STRING(foo\"bar) NEWLINE EOF",
+ values(tokens("\"foo\\\"bar\"")));
+ }
+
+ public void testStringEscapes() throws Exception {
+ assertEquals("STRING(a\tb\nc\rd) NEWLINE EOF",
+ values(tokens("'a\\tb\\nc\\rd'"))); // \t \r \n
+ assertEquals("STRING(x\\hx) NEWLINE EOF",
+ values(tokens("'x\\hx'"))); // \h is unknown => "\h"
+ assertEquals("STRING(\\$$) NEWLINE EOF", values(tokens("'\\$$'")));
+ assertEquals("STRING(ab) NEWLINE EOF",
+ values(tokens("'a\\\nb'"))); // escape end of line
+
+ assertEquals("STRING(abcd) NEWLINE EOF",
+ values(tokens("r'abcd'")));
+ assertEquals("STRING(abcd) NEWLINE EOF",
+ values(tokens("r\"abcd\"")));
+ assertEquals("STRING(a\\tb\\nc\\rd) NEWLINE EOF",
+ values(tokens("r'a\\tb\\nc\\rd'"))); // r'a\tb\nc\rd'
+ assertEquals("STRING(a\\\") NEWLINE EOF",
+ values(tokens("r\"a\\\"\""))); // r"a\""
+ assertEquals("STRING(a\\\\b) NEWLINE EOF",
+ values(tokens("r'a\\\\b'"))); // r'a\\b'
+ assertEquals("STRING(ab) IDENTIFIER(r) NEWLINE EOF",
+ values(tokens("r'ab'r")));
+
+ assertEquals("STRING(abcd) NEWLINE EOF",
+ values(tokens("\"ab\\ucd\"")));
+ assertEquals("/some/path.txt:1: escape sequence not implemented: \\u",
+ lastError.toString());
+ }
+
+ public void testOctalEscapes() throws Exception {
+ // Regression test for a bug.
+ assertEquals("STRING(\0 \1 \t \u003f I I1 \u00ff \u00ff \u00fe) NEWLINE EOF",
+ values(tokens("'\\0 \\1 \\11 \\77 \\111 \\1111 \\377 \\777 \\776'")));
+ // Test boundaries (non-octal char, EOF).
+ assertEquals("STRING(\1b \1) NEWLINE EOF", values(tokens("'\\1b \\1'")));
+ }
+
+ public void testTripleQuotedStrings() throws Exception {
+ assertEquals("STRING(a\"b'c \n d\"\"e) NEWLINE EOF",
+ values(tokens("\"\"\"a\"b'c \n d\"\"e\"\"\"")));
+ assertEquals("STRING(a\"b'c \n d\"\"e) NEWLINE EOF",
+ values(tokens("'''a\"b'c \n d\"\"e'''")));
+ }
+
+ public void testBadChar() throws Exception {
+ assertEquals("IDENTIFIER(a) IDENTIFIER(b) NEWLINE EOF",
+ values(tokens("a$b")));
+ assertEquals("/some/path.txt:1: invalid character: '$'",
+ lastError.toString());
+ }
+
+ public void testIndentation() throws Exception {
+ assertEquals("INT(1) NEWLINE INT(2) NEWLINE INT(3) NEWLINE EOF",
+ values(tokens("1\n2\n3")));
+ assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT "
+ + "INT(4) NEWLINE EOF", values(tokens("1\n 2\n 3\n4 ")));
+ assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT "
+ + "NEWLINE EOF", values(tokens("1\n 2\n 3")));
+ assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE "
+ + "OUTDENT OUTDENT NEWLINE EOF",
+ values(tokens("1\n 2\n 3")));
+ assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE "
+ + "OUTDENT INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF",
+ values(tokens("1\n 2\n 3\n 4\n5")));
+
+ assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE "
+ + "OUTDENT INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF",
+ values(tokens("1\n 2\n 3\n 4\n5")));
+ assertEquals("/some/path.txt:4: indentation error", lastError.toString());
+ }
+
+ public void testIndentationInsideParens() throws Exception {
+ // Indentation is ignored inside parens:
+ assertEquals("INT(1) LPAREN INT(2) INT(3) INT(4) INT(5) NEWLINE EOF",
+ values(tokens("1 (\n 2\n 3\n 4\n5")));
+ assertEquals("INT(1) LBRACE INT(2) INT(3) INT(4) INT(5) NEWLINE EOF",
+ values(tokens("1 {\n 2\n 3\n 4\n5")));
+ assertEquals("INT(1) LBRACKET INT(2) INT(3) INT(4) INT(5) NEWLINE EOF",
+ values(tokens("1 [\n 2\n 3\n 4\n5")));
+ assertEquals("INT(1) LBRACKET INT(2) RBRACKET NEWLINE INDENT INT(3) "
+ + "NEWLINE INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF",
+ values(tokens("1 [\n 2]\n 3\n 4\n5")));
+ }
+
+ public void testIndentationAtEOF() throws Exception {
+ // Matching OUTDENTS are created at EOF:
+ assertEquals("INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF",
+ values(tokens("\n 1")));
+ }
+
+ public void testBlankLineIndentation() throws Exception {
+ // Blank lines and comment lines should not generate any newlines indents
+ // (but note that every input ends with NEWLINE EOF).
+ assertEquals("COMMENT NEWLINE EOF", names(tokens("\n #\n")));
+ assertEquals("COMMENT NEWLINE EOF", names(tokens(" #")));
+ assertEquals("COMMENT NEWLINE EOF", names(tokens(" #\n")));
+ assertEquals("COMMENT NEWLINE EOF", names(tokens(" #comment\n")));
+ assertEquals("DEF IDENTIFIER LPAREN IDENTIFIER RPAREN COLON NEWLINE "
+ + "COMMENT INDENT RETURN IDENTIFIER NEWLINE "
+ + "OUTDENT NEWLINE EOF",
+ names(tokens("def f(x):\n"
+ + " # comment\n"
+ + "\n"
+ + " \n"
+ + " return x\n")));
+ }
+
+ public void testMultipleCommentLines() throws Exception {
+ assertEquals("COMMENT NEWLINE COMMENT COMMENT COMMENT "
+ + "DEF IDENTIFIER LPAREN IDENTIFIER RPAREN COLON NEWLINE "
+ + "INDENT RETURN IDENTIFIER NEWLINE OUTDENT NEWLINE EOF",
+ names(tokens("# Copyright\n"
+ + "#\n"
+ + "# A comment line\n"
+ + "# An adjoining line\n"
+ + "def f(x):\n"
+ + " return x\n")));
+ }
+
+ public void testBackslash() throws Exception {
+ assertEquals("IDENTIFIER IDENTIFIER NEWLINE EOF",
+ names(tokens("a\\\nb")));
+ assertEquals("IDENTIFIER ILLEGAL IDENTIFIER NEWLINE EOF",
+ names(tokens("a\\ b")));
+ assertEquals("IDENTIFIER LPAREN INT RPAREN NEWLINE EOF",
+ names(tokens("a(\\\n2)")));
+ }
+
+ public void testTokenPositions() throws Exception {
+ // foo ( bar , { 1 :
+ assertEquals("[0,3) [3,4) [4,7) [7,8) [9,10) [10,11) [11,12)"
+ // 'quux' } ) NEWLINE EOF
+ + " [13,19) [19,20) [20,21) [20,21) [21,21)",
+ positions(tokens("foo(bar, {1: 'quux'})")));
+ }
+
+ public void testLineNumbers() throws Exception {
+ assertEquals("1 1 1 1 2 2 2 2 4 4 4 4 4",
+ linenums("foo = 1\nbar = 2\n\nwiz = 3"));
+
+ assertEquals("IDENTIFIER(foo) EQUALS INT(1) NEWLINE "
+ + "IDENTIFIER(bar) EQUALS INT(2) NEWLINE "
+ + "IDENTIFIER(wiz) EQUALS NEWLINE "
+ + "IDENTIFIER(bar) EQUALS INT(2) NEWLINE EOF",
+ values(tokens("foo = 1\nbar = 2\n\nwiz = $\nbar = 2")));
+ assertEquals("/some/path.txt:4: invalid character: '$'",
+ lastError.toString());
+
+ // '\\n' in string should not increment linenum:
+ String s = "1\n'foo\\nbar'\3";
+ assertEquals("INT(1) NEWLINE STRING(foo\nbar) NEWLINE EOF",
+ values(tokens(s)));
+ assertEquals("1 1 2 2 2", linenums(s));
+ }
+
+ public void testContainsErrors() throws Exception {
+ Lexer lexerSuccess = createLexer("foo");
+ assertFalse(lexerSuccess.containsErrors());
+
+ Lexer lexerFail = createLexer("f$o");
+ assertTrue(lexerFail.containsErrors());
+
+ String s = "'unterminated";
+ lexerFail = createLexer(s);
+ assertTrue(lexerFail.containsErrors());
+ assertEquals(0, lastErrorLocation.getStartOffset());
+ assertEquals(s.length(), lastErrorLocation.getEndOffset());
+ assertEquals("STRING(unterminated) NEWLINE EOF", values(tokens(s)));
+ }
+}