diff options
author | ridiculousfish <corydoras@ridiculousfish.com> | 2015-08-10 18:30:44 -0700 |
---|---|---|
committer | ridiculousfish <corydoras@ridiculousfish.com> | 2015-08-10 18:31:20 -0700 |
commit | e34a8da5d727ba26aeddd64e9b60a41c0c5312d3 (patch) | |
tree | 611cd6eeeee7a3dd466d80566779abc94464e2dd | |
parent | 6157a9a85890d79521e77cb4e51d4d52793ac516 (diff) |
Correct the positioning of the error caret
When an error occurs midway through a token, like abc(def,
make the caret point at the location of the error (i.e. the paren)
instead of at the beginning of the token.
-rw-r--r-- | src/fish_tests.cpp | 20 | ||||
-rw-r--r-- | src/parse_tree.cpp | 10 | ||||
-rw-r--r-- | src/tokenizer.cpp | 79 | ||||
-rw-r--r-- | src/tokenizer.h | 9 |
4 files changed, 96 insertions, 22 deletions
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 526ccd44..60de9fca 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -472,6 +472,26 @@ static void test_tok() err(L"Too few tokens returned from tokenizer"); } } + + /* Test some errors */ + { + tok_t token; + tokenizer_t t(L"abc\\", 0); + do_test(t.next(&token)); + do_test(token.type == TOK_ERROR); + do_test(token.error == TOK_UNTERMINATED_ESCAPE); + do_test(token.error_offset == 3); + } + + { + tok_t token; + tokenizer_t t(L"abc defg(hij (klm)", 0); + do_test(t.next(&token)); + do_test(t.next(&token)); + do_test(token.type == TOK_ERROR); + do_test(token.error == TOK_UNTERMINATED_SUBSHELL); + do_test(token.error_offset == 4); + } /* Test redirection_type_for_string */ if (redirection_type_for_string(L"<") != TOK_REDIRECT_IN) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp index ea84a23e..b31a6503 100644 --- a/src/parse_tree.cpp +++ b/src/parse_tree.cpp @@ -641,7 +641,7 @@ public: void accept_tokens(parse_token_t token1, parse_token_t token2); /* Report tokenizer errors */ - void report_tokenizer_error(parse_token_t token, int tok_err, const wcstring &tok_error); + void report_tokenizer_error(const tok_t &tok); /* Indicate if we hit a fatal error */ bool has_fatal_error(void) const @@ -896,10 +896,10 @@ void parse_ll_t::parse_error_failed_production(struct parse_stack_element_t &sta } } -void parse_ll_t::report_tokenizer_error(parse_token_t token, int tok_err_code, const wcstring &tok_error) +void parse_ll_t::report_tokenizer_error(const tok_t &tok) { parse_error_code_t parse_error_code; - switch (tok_err_code) + switch (tok.error) { case TOK_UNTERMINATED_QUOTE: parse_error_code = parse_error_tokenizer_unterminated_quote; @@ -919,7 +919,7 @@ void parse_ll_t::report_tokenizer_error(parse_token_t token, int tok_err_code, c break; } - this->parse_error(token, parse_error_code, L"%ls", tok_error.c_str()); + this->parse_error_at_location(tok.offset + tok.error_offset, parse_error_code, L"%ls", tok.text.c_str()); } void parse_ll_t::parse_error_unexpected_token(const wchar_t *expected, parse_token_t token) @@ -1336,7 +1336,7 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags, /* Handle tokenizer errors. This is a hack because really the parser should report this for itself; but it has no way of getting the tokenizer message */ if (queue[1].type == parse_special_type_tokenizer_error) { - parser.report_tokenizer_error(queue[1], tokenizer_token.error, tokenizer_token.text); + parser.report_tokenizer_error(tokenizer_token); } /* Handle errors */ diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 88066b1f..e361f88b 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -21,7 +21,7 @@ segments. #include "tokenizer.h" /* Wow what a hack */ -#define TOK_CALL_ERROR(t, e, x) do { (t)->call_error((e), (t)->squash_errors ? L"" : (x)); } while (0) +#define TOK_CALL_ERROR(t, e, x, where) do { (t)->call_error((e), where, (t)->squash_errors ? L"" : (x)); } while (0) /** Error string for unexpected end of string @@ -38,6 +38,12 @@ segments. */ #define SQUARE_BRACKET_ERROR _( L"Unexpected end of string, square brackets do not match" ) +/** + Error string for unterminated escape (backslash without continuation) + */ +#define UNTERMINATED_ESCAPE_ERROR _( L"Unexpected end of string, incomplete escape sequence" ) + + /** Error string for invalid redirections @@ -52,14 +58,15 @@ segments. /** Set the latest tokens string to be the specified error message */ -void tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *error_message) +void tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *where, const wchar_t *error_message) { this->last_type = TOK_ERROR; this->error = error_type; + this->global_error_offset = where ? where - this->orig_buff : 0; this->last_token = error_message; } -tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(b), orig_buff(b), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), show_blank_lines(false), error(TOK_ERROR_NONE), squash_errors(false), continue_line_after_comment(false) +tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(b), orig_buff(b), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), show_blank_lines(false), error(TOK_ERROR_NONE), global_error_offset(-1), squash_errors(false), continue_line_after_comment(false) { assert(b != NULL); @@ -79,14 +86,23 @@ bool tokenizer_t::next(struct tok_t *result) { return false; } + + const size_t current_pos = this->buff - this->orig_buff; + result->text = this->last_token; result->type = this->last_type; - result->offset = last_pos; + result->offset = this->last_pos; result->error = this->last_type == TOK_ERROR ? this->error : TOK_ERROR_NONE; assert(this->buff >= this->orig_buff); + /* Compute error offset */ + result->error_offset = 0; + if (this->last_type == TOK_ERROR && this->global_error_offset >= this->last_pos && this->global_error_offset < current_pos) + { + result->error_offset = this->global_error_offset - this->last_pos; + } + assert(this->buff >= this->orig_buff); - size_t current_pos = this->buff - this->orig_buff; result->length = current_pos >= this->last_pos ? current_pos - this->last_pos : 0; this->tok_next(); @@ -140,12 +156,15 @@ static int myal(wchar_t c) */ void tokenizer_t::read_string() { - const wchar_t *start; long len; int do_loop=1; - int paran_count=0; + size_t paran_count=0; + + // up to 96 open parens, before we give up on good error reporting + const size_t paran_offsets_max = 96; + size_t paran_offsets[paran_offsets_max]; - start = this->buff; + const wchar_t * const start = this->buff; bool is_first = true; enum tok_mode_t @@ -162,12 +181,13 @@ void tokenizer_t::read_string() { if (*this->buff == L'\\') { + const wchar_t *error_location = this->buff; this->buff++; if (*this->buff == L'\0') { if ((!this->accept_unfinished)) { - TOK_CALL_ERROR(this, TOK_UNTERMINATED_ESCAPE, QUOTE_ERROR); + TOK_CALL_ERROR(this, TOK_UNTERMINATED_ESCAPE, UNTERMINATED_ESCAPE_ERROR, error_location); return; } else @@ -191,6 +211,7 @@ void tokenizer_t::read_string() case L'(': { paran_count=1; + paran_offsets[0] = this->buff - this->orig_buff; mode = mode_subshell; break; } @@ -213,11 +234,12 @@ void tokenizer_t::read_string() } else { + const wchar_t *error_loc = this->buff; this->buff += wcslen(this->buff); if (! this->accept_unfinished) { - TOK_CALL_ERROR(this, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR); + TOK_CALL_ERROR(this, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR, error_loc); return; } do_loop = 0; @@ -239,6 +261,7 @@ void tokenizer_t::read_string() case mode_array_brackets_and_subshell: case mode_subshell: + { switch (*this->buff) { case L'\'': @@ -251,10 +274,11 @@ void tokenizer_t::read_string() } else { + const wchar_t *error_loc = this->buff; this->buff += wcslen(this->buff); if ((!this->accept_unfinished)) { - TOK_CALL_ERROR(this, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR); + TOK_CALL_ERROR(this, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR, error_loc); return; } do_loop = 0; @@ -264,9 +288,14 @@ void tokenizer_t::read_string() } case L'(': + if (paran_count < paran_offsets_max) + { + paran_offsets[paran_count] = this->buff - this->orig_buff; + } paran_count++; break; case L')': + assert(paran_count > 0); paran_count--; if (paran_count == 0) { @@ -278,12 +307,15 @@ void tokenizer_t::read_string() break; } break; + } case mode_array_brackets: + { switch (*this->buff) { case L'(': paran_count=1; + paran_offsets[0] = this->buff - this->orig_buff; mode = mode_array_brackets_and_subshell; break; @@ -296,6 +328,7 @@ void tokenizer_t::read_string() break; } break; + } } } @@ -312,12 +345,27 @@ void tokenizer_t::read_string() switch (mode) { case mode_subshell: - TOK_CALL_ERROR(this, TOK_UNTERMINATED_SUBSHELL, PARAN_ERROR); + { + // Determine the innermost opening paran offset by interrogating paran_offsets + assert(paran_count > 0); + size_t offset_of_open_paran = 0; + if (paran_count <= paran_offsets_max) + { + offset_of_open_paran = paran_offsets[paran_count - 1]; + } + + TOK_CALL_ERROR(this, TOK_UNTERMINATED_SUBSHELL, PARAN_ERROR, this->orig_buff + offset_of_open_paran); break; + } + case mode_array_brackets: case mode_array_brackets_and_subshell: - TOK_CALL_ERROR(this, TOK_UNTERMINATED_SUBSHELL, SQUARE_BRACKET_ERROR); // TOK_UNTERMINATED_SUBSHELL is a lie but nobody actually looks at it + { + size_t offset_of_bracket = 0; + TOK_CALL_ERROR(this, TOK_UNTERMINATED_SUBSHELL, SQUARE_BRACKET_ERROR, this->orig_buff + offset_of_bracket); // TOK_UNTERMINATED_SUBSHELL is a lie but nobody actually looks at it break; + } + default: assert(0 && "Unexpected mode in read_string"); break; @@ -612,7 +660,7 @@ void tokenizer_t::tok_next() size_t consumed = read_redirection_or_fd_pipe(this->buff, &mode, &fd); if (consumed == 0 || fd < 0) { - TOK_CALL_ERROR(this, TOK_OTHER, REDIRECT_ERROR); + TOK_CALL_ERROR(this, TOK_OTHER, REDIRECT_ERROR, this->buff); } else { @@ -626,6 +674,7 @@ void tokenizer_t::tok_next() default: { /* Maybe a redirection like '2>&1', maybe a pipe like 2>|, maybe just a string */ + const wchar_t *error_location = this->buff; size_t consumed = 0; enum token_type mode = TOK_NONE; int fd = -1; @@ -637,7 +686,7 @@ void tokenizer_t::tok_next() /* It looks like a redirection or a pipe. But we don't support piping fd 0. Note that fd 0 may be -1, indicating overflow; but we don't treat that as a tokenizer error. */ if (mode == TOK_PIPE && fd == 0) { - TOK_CALL_ERROR(this, TOK_OTHER, PIPE_ERROR); + TOK_CALL_ERROR(this, TOK_OTHER, PIPE_ERROR, error_location); } else { diff --git a/src/tokenizer.h b/src/tokenizer.h index ff774d9b..dca8e5c2 100644 --- a/src/tokenizer.h +++ b/src/tokenizer.h @@ -79,13 +79,16 @@ struct tok_t /* If an error, this is the error code */ enum tokenizer_error error; + /* If an error, this is the offset of the error within the token. A value of 0 means it occurred at 'offset' */ + size_t error_offset; + /* Offset of the token */ size_t offset; /* Length of the token */ size_t length; - tok_t() : type(TOK_NONE), error(TOK_ERROR_NONE), offset(-1), length(-1) {} + tok_t() : type(TOK_NONE), error(TOK_ERROR_NONE), error_offset(-1), offset(-1), length(-1) {} }; /** @@ -119,13 +122,15 @@ class tokenizer_t bool show_blank_lines; /** Last error */ tokenizer_error error; + /** Last error offset, in "global" coordinates (relative to orig_buff) */ + size_t global_error_offset; /* Whether we are squashing errors */ bool squash_errors; /* Whether to continue the previous line after the comment */ bool continue_line_after_comment; - void call_error(enum tokenizer_error error_type, const wchar_t *error_message); + void call_error(enum tokenizer_error error_type, const wchar_t *where, const wchar_t *error_message); void read_string(); void read_comment(); void tok_next(); |