aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/fish_tests.cpp22
-rw-r--r--src/reader.cpp24
-rw-r--r--src/tokenizer.cpp20
-rw-r--r--src/tokenizer.h26
4 files changed, 72 insertions, 20 deletions
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index a7587575..cbece61c 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -468,22 +468,34 @@ static void test_tok()
const wchar_t *str = L"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells ){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect Compress_Newlines\n \n\t\n \nInto_Just_One";
const int types[] =
{
- TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_STRING, TOK_END, TOK_STRING, TOK_END
+ TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_STRING, TOK_END, TOK_STRING
};
say(L"Test correct tokenization");
tokenizer_t t(str, 0);
- for (size_t i=0; i < sizeof types / sizeof *types; i++, tok_next(&t))
+ tok_t token;
+ size_t i = 0;
+ while (t.next(&token))
{
- if (types[i] != tok_last_type(&t))
+ if (i > sizeof types / sizeof *types)
+ {
+ err(L"Too many tokens returned from tokenizer");
+ break;
+ }
+ if (types[i] != token.type)
{
err(L"Tokenization error:");
- wprintf(L"Token number %d of string \n'%ls'\n, got token '%ls'\n",
+ wprintf(L"Token number %d of string \n'%ls'\n, got token type %ld\n",
i+1,
str,
- tok_last(&t));
+ (long)token.type);
}
+ i++;
+ }
+ if (i < sizeof types / sizeof *types)
+ {
+ err(L"Too few tokens returned from tokenizer");
}
}
diff --git a/src/reader.cpp b/src/reader.cpp
index 5e386dc7..5828aba1 100644
--- a/src/reader.cpp
+++ b/src/reader.cpp
@@ -246,7 +246,7 @@ public:
/**
Saved position used by token history search
*/
- int token_history_pos;
+ size_t token_history_pos;
/**
Saved search string for token history search. Not handled by command_line_changed.
@@ -2256,7 +2256,7 @@ static void handle_token_history(int forward, int reset)
return;
wcstring str;
- long current_pos;
+ size_t current_pos;
if (reset)
{
@@ -2292,7 +2292,7 @@ static void handle_token_history(int forward, int reset)
}
else
{
- if (current_pos == -1)
+ if (current_pos == size_t(-1))
{
data->token_history_buff.clear();
@@ -2330,26 +2330,26 @@ static void handle_token_history(int forward, int reset)
//debug( 3, L"new '%ls'", data->token_history_buff.c_str() );
tokenizer_t tok(data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED);
- for (; tok_has_next(&tok); tok_next(&tok))
+ tok_t token;
+ while (tok.next(&token))
{
- switch (tok_last_type(&tok))
+ switch (token.type)
{
case TOK_STRING:
- {
- if (wcsstr(tok_last(&tok), data->search_buff.c_str()))
+ {
+ if (token.text.find(data->search_buff) != wcstring::npos)
{
//debug( 3, L"Found token at pos %d\n", tok_get_pos( &tok ) );
- if (tok_get_pos(&tok) >= current_pos)
+ if (token.offset >= current_pos)
{
break;
}
//debug( 3, L"ok pos" );
- const wcstring last_tok = tok_last(&tok);
- if (find(data->search_prev.begin(), data->search_prev.end(), last_tok) == data->search_prev.end())
+ if (find(data->search_prev.begin(), data->search_prev.end(), token.text) == data->search_prev.end())
{
- data->token_history_pos = tok_get_pos(&tok);
- str = tok_last(&tok);
+ data->token_history_pos = token.offset;
+ str = token.text;
}
}
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index 55e9dc9c..22412c47 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -55,7 +55,7 @@ segments.
/**
Set the latest tokens string to be the specified error message
*/
-static void tok_call_error(tokenizer_t *tok, int error_type, const wchar_t *error_message)
+static void tok_call_error(tokenizer_t *tok, enum tokenizer_error error_type, const wchar_t *error_message)
{
tok->last_type = TOK_ERROR;
tok->error = error_type;
@@ -67,7 +67,7 @@ int tok_get_error(tokenizer_t *tok)
return tok->error;
}
-tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), show_blank_lines(false), error(0), squash_errors(false), continue_line_after_comment(false)
+tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), show_blank_lines(false), error(TOK_ERROR_NONE), squash_errors(false), continue_line_after_comment(false)
{
CHECK(b,);
@@ -81,6 +81,22 @@ tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig
tok_next(this);
}
+bool tokenizer_t::next(struct tok_t *result)
+{
+ assert(result != NULL);
+ if (! this->has_next)
+ {
+ return false;
+ }
+ result->text = this->last_token;
+ result->type = this->last_type;
+ result->offset = last_pos;
+ assert(this->buff >= this->orig_buff);
+ result->length = this->buff - this->orig_buff;
+ tok_next(this);
+ return true;
+}
+
enum token_type tok_last_type(tokenizer_t *tok)
{
CHECK(tok, TOK_ERROR);
diff --git a/src/tokenizer.h b/src/tokenizer.h
index f5f0455d..14e648fd 100644
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@@ -36,6 +36,7 @@ enum token_type
*/
enum tokenizer_error
{
+ TOK_ERROR_NONE,
TOK_UNTERMINATED_QUOTE,
TOK_UNTERMINATED_SUBSHELL,
TOK_UNTERMINATED_ESCAPE,
@@ -67,6 +68,26 @@ enum tokenizer_error
typedef unsigned int tok_flags_t;
+struct tok_t
+{
+ /* The text of the token, or an error message for type error */
+ wcstring text;
+
+ /* The type of the token */
+ token_type type;
+
+ /* Offset of the token */
+ size_t offset;
+
+ /* Length of the token */
+ size_t length;
+
+ /* If an error, this is the error code */
+ enum tokenizer_error error;
+
+ tok_t() : type(TOK_NONE), offset(-1), length(-1), error(TOK_ERROR_NONE) {}
+};
+
/**
The tokenizer struct.
*/
@@ -93,7 +114,7 @@ struct tokenizer_t
/** Whether all blank lines are returned */
bool show_blank_lines;
/** Last error */
- int error;
+ tokenizer_error error;
/* Whether we are squashing errors */
bool squash_errors;
@@ -112,6 +133,9 @@ struct tokenizer_t
*/
tokenizer_t(const wchar_t *b, tok_flags_t flags);
+
+ /** Returns the next token by reference. Returns true if we got one, false if we're at the end. */
+ bool next(struct tok_t *result);
};
/**