From 4ebaa7b6bd595f37f66f8d77ee836c1e5a4ef647 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 26 Jul 2015 00:12:36 -0700 Subject: Continue migration to the new tokenizer interface --- src/tokenizer.cpp | 48 +++++++++++++----------------------------------- 1 file changed, 13 insertions(+), 35 deletions(-) (limited to 'src/tokenizer.cpp') diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 22412c47..18a617b1 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -52,6 +52,10 @@ segments. */ #define PIPE_ERROR _( L"Cannot use stdin (fd 0) as pipe output" ) +static void tok_next(tokenizer_t *tok); +static enum token_type tok_last_type(tokenizer_t *tok); +static const wchar_t *tok_last(tokenizer_t *tok); + /** Set the latest tokens string to be the specified error message */ @@ -62,11 +66,6 @@ static void tok_call_error(tokenizer_t *tok, enum tokenizer_error error_type, co tok->last_token = error_message; } -int tok_get_error(tokenizer_t *tok) -{ - return tok->error; -} - tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), show_blank_lines(false), error(TOK_ERROR_NONE), squash_errors(false), continue_line_after_comment(false) { CHECK(b,); @@ -91,13 +90,18 @@ bool tokenizer_t::next(struct tok_t *result) result->text = this->last_token; result->type = this->last_type; result->offset = last_pos; + result->error = this->last_type == TOK_ERROR ? this->error : TOK_ERROR_NONE; + assert(this->buff >= this->orig_buff); + assert(this->buff >= this->orig_buff); - result->length = this->buff - this->orig_buff; + size_t current_pos = this->buff - this->orig_buff; + result->length = current_pos >= this->last_pos ? current_pos - this->last_pos : 0; + tok_next(this); return true; } -enum token_type tok_last_type(tokenizer_t *tok) +static enum token_type tok_last_type(tokenizer_t *tok) { CHECK(tok, TOK_ERROR); CHECK(tok->buff, TOK_ERROR); @@ -105,25 +109,13 @@ enum token_type tok_last_type(tokenizer_t *tok) return tok->last_type; } -const wchar_t *tok_last(tokenizer_t *tok) +static const wchar_t *tok_last(tokenizer_t *tok) { CHECK(tok, 0); return tok->last_token.c_str(); } -int tok_has_next(tokenizer_t *tok) -{ - /* - Return 1 on broken tokenizer - */ - CHECK(tok, 1); - CHECK(tok->buff, 1); - - /* fwprintf( stderr, L"has_next is %ls \n", tok->has_next?L"true":L"false" );*/ - return tok->has_next; -} - /** Tests if this character can be a part of a string. The redirect ^ is allowed unless it's the first character. Hash (#) starts a comment if it's the first character in a token; otherwise it is considered a string character. @@ -539,7 +531,7 @@ static bool my_iswspace(wchar_t c) return c != L'\n' && iswspace(c); } -void tok_next(tokenizer_t *tok) +static void tok_next(tokenizer_t *tok) { CHECK(tok,); @@ -718,20 +710,6 @@ wcstring tok_first(const wchar_t *str) return result; } -int tok_get_pos(const tokenizer_t *tok) -{ - CHECK(tok, 0); - return (int)tok->last_pos; -} - -size_t tok_get_extent(const tokenizer_t *tok) -{ - CHECK(tok, 0); - size_t current_pos = tok->buff - tok->orig_buff; - return current_pos > tok->last_pos ? current_pos - tok->last_pos : 0; -} - - bool move_word_state_machine_t::consume_char_punctuation(wchar_t c) { enum -- cgit v1.2.3