diff options
author | ridiculousfish <corydoras@ridiculousfish.com> | 2015-07-25 23:05:47 -0700 |
---|---|---|
committer | ridiculousfish <corydoras@ridiculousfish.com> | 2015-07-25 23:05:47 -0700 |
commit | 618896c0436e6ca70feb2fa317b34171cc4e1a81 (patch) | |
tree | 4f6d8130f52fe7c9523e034ab85fe3b49fd0ea0b /src | |
parent | 0dbd83ffaf571dce9b1e8449c28e3ae0040d4e75 (diff) |
Early reworking of tokenizer interface
Diffstat (limited to 'src')
-rw-r--r-- | src/fish_tests.cpp | 22 | ||||
-rw-r--r-- | src/reader.cpp | 24 | ||||
-rw-r--r-- | src/tokenizer.cpp | 20 | ||||
-rw-r--r-- | src/tokenizer.h | 26 |
4 files changed, 72 insertions, 20 deletions
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index a7587575..cbece61c 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -468,22 +468,34 @@ static void test_tok() const wchar_t *str = L"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells ){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect Compress_Newlines\n \n\t\n \nInto_Just_One"; const int types[] = { - TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_STRING, TOK_END, TOK_STRING, TOK_END + TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_STRING, TOK_END, TOK_STRING }; say(L"Test correct tokenization"); tokenizer_t t(str, 0); - for (size_t i=0; i < sizeof types / sizeof *types; i++, tok_next(&t)) + tok_t token; + size_t i = 0; + while (t.next(&token)) { - if (types[i] != tok_last_type(&t)) + if (i > sizeof types / sizeof *types) + { + err(L"Too many tokens returned from tokenizer"); + break; + } + if (types[i] != token.type) { err(L"Tokenization error:"); - wprintf(L"Token number %d of string \n'%ls'\n, got token '%ls'\n", + wprintf(L"Token number %d of string \n'%ls'\n, got token type %ld\n", i+1, str, - tok_last(&t)); + (long)token.type); } + i++; + } + if (i < sizeof types / sizeof *types) + { + err(L"Too few tokens returned from tokenizer"); } } diff --git a/src/reader.cpp b/src/reader.cpp index 5e386dc7..5828aba1 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -246,7 +246,7 @@ public: /** Saved position used by token history search */ - int token_history_pos; + size_t token_history_pos; /** Saved search string for token history search. Not handled by command_line_changed. @@ -2256,7 +2256,7 @@ static void handle_token_history(int forward, int reset) return; wcstring str; - long current_pos; + size_t current_pos; if (reset) { @@ -2292,7 +2292,7 @@ static void handle_token_history(int forward, int reset) } else { - if (current_pos == -1) + if (current_pos == size_t(-1)) { data->token_history_buff.clear(); @@ -2330,26 +2330,26 @@ static void handle_token_history(int forward, int reset) //debug( 3, L"new '%ls'", data->token_history_buff.c_str() ); tokenizer_t tok(data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED); - for (; tok_has_next(&tok); tok_next(&tok)) + tok_t token; + while (tok.next(&token)) { - switch (tok_last_type(&tok)) + switch (token.type) { case TOK_STRING: - { - if (wcsstr(tok_last(&tok), data->search_buff.c_str())) + { + if (token.text.find(data->search_buff) != wcstring::npos) { //debug( 3, L"Found token at pos %d\n", tok_get_pos( &tok ) ); - if (tok_get_pos(&tok) >= current_pos) + if (token.offset >= current_pos) { break; } //debug( 3, L"ok pos" ); - const wcstring last_tok = tok_last(&tok); - if (find(data->search_prev.begin(), data->search_prev.end(), last_tok) == data->search_prev.end()) + if (find(data->search_prev.begin(), data->search_prev.end(), token.text) == data->search_prev.end()) { - data->token_history_pos = tok_get_pos(&tok); - str = tok_last(&tok); + data->token_history_pos = token.offset; + str = token.text; } } diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 55e9dc9c..22412c47 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -55,7 +55,7 @@ segments. /** Set the latest tokens string to be the specified error message */ -static void tok_call_error(tokenizer_t *tok, int error_type, const wchar_t *error_message) +static void tok_call_error(tokenizer_t *tok, enum tokenizer_error error_type, const wchar_t *error_message) { tok->last_type = TOK_ERROR; tok->error = error_type; @@ -67,7 +67,7 @@ int tok_get_error(tokenizer_t *tok) return tok->error; } -tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), show_blank_lines(false), error(0), squash_errors(false), continue_line_after_comment(false) +tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), show_blank_lines(false), error(TOK_ERROR_NONE), squash_errors(false), continue_line_after_comment(false) { CHECK(b,); @@ -81,6 +81,22 @@ tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig tok_next(this); } +bool tokenizer_t::next(struct tok_t *result) +{ + assert(result != NULL); + if (! this->has_next) + { + return false; + } + result->text = this->last_token; + result->type = this->last_type; + result->offset = last_pos; + assert(this->buff >= this->orig_buff); + result->length = this->buff - this->orig_buff; + tok_next(this); + return true; +} + enum token_type tok_last_type(tokenizer_t *tok) { CHECK(tok, TOK_ERROR); diff --git a/src/tokenizer.h b/src/tokenizer.h index f5f0455d..14e648fd 100644 --- a/src/tokenizer.h +++ b/src/tokenizer.h @@ -36,6 +36,7 @@ enum token_type */ enum tokenizer_error { + TOK_ERROR_NONE, TOK_UNTERMINATED_QUOTE, TOK_UNTERMINATED_SUBSHELL, TOK_UNTERMINATED_ESCAPE, @@ -67,6 +68,26 @@ enum tokenizer_error typedef unsigned int tok_flags_t; +struct tok_t +{ + /* The text of the token, or an error message for type error */ + wcstring text; + + /* The type of the token */ + token_type type; + + /* Offset of the token */ + size_t offset; + + /* Length of the token */ + size_t length; + + /* If an error, this is the error code */ + enum tokenizer_error error; + + tok_t() : type(TOK_NONE), offset(-1), length(-1), error(TOK_ERROR_NONE) {} +}; + /** The tokenizer struct. */ @@ -93,7 +114,7 @@ struct tokenizer_t /** Whether all blank lines are returned */ bool show_blank_lines; /** Last error */ - int error; + tokenizer_error error; /* Whether we are squashing errors */ bool squash_errors; @@ -112,6 +133,9 @@ struct tokenizer_t */ tokenizer_t(const wchar_t *b, tok_flags_t flags); + + /** Returns the next token by reference. Returns true if we got one, false if we're at the end. */ + bool next(struct tok_t *result); }; /** |