diff options
author | ridiculousfish <corydoras@ridiculousfish.com> | 2014-11-24 01:20:57 -0800 |
---|---|---|
committer | ridiculousfish <corydoras@ridiculousfish.com> | 2014-11-24 01:23:42 -0800 |
commit | eafd5776292c37d37870fc6013029f7146f34f70 (patch) | |
tree | 6d9d81452eef02560933c42734ce92562407d875 | |
parent | 196a7c9d188304cd6b189b1bcf4e2c088fcf3434 (diff) |
Hack the tokenizer to compress multiple adjacent newlines into one
This slightly reduces the size of parse trees, and is otherwise a
minor optimization
-rw-r--r-- | fish_tests.cpp | 4 | ||||
-rw-r--r-- | tokenizer.cpp | 12 |
2 files changed, 12 insertions, 4 deletions
diff --git a/fish_tests.cpp b/fish_tests.cpp index 4df8322c..9fa6a110 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -458,10 +458,10 @@ static void test_tok() say(L"Test destruction of broken tokenizer"); { - const wchar_t *str = L"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells ){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect"; + const wchar_t *str = L"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells ){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect Compress_Newlines\n \n\t\n \nInto_Just_One"; const int types[] = { - TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_END + TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_STRING, TOK_END, TOK_STRING, TOK_END }; say(L"Test correct tokenization"); diff --git a/tokenizer.cpp b/tokenizer.cpp index 17999356..29db04bd 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -621,14 +621,22 @@ void tok_next(tokenizer_t *tok) switch (*tok->buff) { - case L'\0': tok->last_type = TOK_END; /*fwprintf( stderr, L"End of string\n" );*/ tok->has_next = false; break; - case 13: + case 13: // carriage return case L'\n': + // Hack: when we get a newline, swallow as many as we can + // This compresses multiple subsequent newlines into a single one + while (*tok->buff == L'\n' || *tok->buff == 13 || *tok->buff == ' ' || *tok->buff == '\t') + { + tok->buff++; + } + tok->last_type = TOK_END; + break; + case L';': tok->last_type = TOK_END; tok->buff++; |