aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar ridiculousfish <corydoras@ridiculousfish.com>2012-11-21 17:48:35 -0800
committerGravatar ridiculousfish <corydoras@ridiculousfish.com>2012-11-21 17:48:35 -0800
commitf545fb2491d36e02dc6c4005ada86be8678bba78 (patch)
tree3237ef337b1723b24dc32618cc5e03360e30e13e
parente73be48d9622d06039926fd2ba4a4b68b675f34a (diff)
Work towards refactoring tokenizer to be a real object
-rw-r--r--builtin_commandline.cpp6
-rw-r--r--complete.cpp6
-rw-r--r--fish_indent.cpp4
-rw-r--r--fish_tests.cpp13
-rw-r--r--highlight.cpp10
-rw-r--r--history.cpp6
-rw-r--r--parse_util.cpp18
-rw-r--r--parser.cpp54
-rw-r--r--parser.h10
-rw-r--r--reader.cpp7
-rw-r--r--tokenizer.cpp109
-rw-r--r--tokenizer.h47
12 files changed, 122 insertions, 168 deletions
diff --git a/builtin_commandline.cpp b/builtin_commandline.cpp
index cb3ee9e3..07dc2c96 100644
--- a/builtin_commandline.cpp
+++ b/builtin_commandline.cpp
@@ -143,7 +143,6 @@ static void write_part(const wchar_t *begin,
int cut_at_cursor,
int tokenize)
{
- tokenizer tok;
wcstring out;
wchar_t *buff;
size_t pos;
@@ -155,9 +154,8 @@ static void write_part(const wchar_t *begin,
buff = wcsndup(begin, end-begin);
// fwprintf( stderr, L"Subshell: %ls, end char %lc\n", buff, *end );
out.clear();
-
- for (tok_init(&tok, buff, TOK_ACCEPT_UNFINISHED);
- tok_has_next(&tok);
+ tokenizer_t tok(buff, TOK_ACCEPT_UNFINISHED);
+ for (; tok_has_next(&tok);
tok_next(&tok))
{
if ((cut_at_cursor) &&
diff --git a/complete.cpp b/complete.cpp
index ce094485..c21beafa 100644
--- a/complete.cpp
+++ b/complete.cpp
@@ -1765,7 +1765,6 @@ void complete(const wcstring &cmd, std::vector<completion_t> &comps, complete_ty
completer_t completer(cmd, type);
const wchar_t *tok_begin, *tok_end, *cmdsubst_begin, *cmdsubst_end, *prev_begin, *prev_end;
- tokenizer tok;
const wchar_t *current_token=0, *prev_token=0;
wcstring current_command;
int on_command=0;
@@ -1807,9 +1806,8 @@ void complete(const wcstring &cmd, std::vector<completion_t> &comps, complete_ty
int had_cmd=0;
int end_loop=0;
-
- tok_init(&tok, buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
-
+
+ tokenizer_t tok(buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
while (tok_has_next(&tok) && !end_loop)
{
diff --git a/fish_indent.cpp b/fish_indent.cpp
index dedf11f2..c50eeba6 100644
--- a/fish_indent.cpp
+++ b/fish_indent.cpp
@@ -84,7 +84,6 @@ static void insert_tabs(wcstring &out, int indent)
*/
static int indent(wcstring &out, const wcstring &in, int flags)
{
- tokenizer tok;
int res=0;
int is_command = 1;
int indent = 0;
@@ -92,8 +91,7 @@ static int indent(wcstring &out, const wcstring &in, int flags)
int prev_type = 0;
int prev_prev_type = 0;
- tok_init(&tok, in.c_str(), TOK_SHOW_COMMENTS);
-
+ tokenizer_t tok(in.c_str(), TOK_SHOW_COMMENTS);
for (; tok_has_next(&tok); tok_next(&tok))
{
int type = tok_last_type(&tok);
diff --git a/fish_tests.cpp b/fish_tests.cpp
index 3a1b382d..a2ac00c4 100644
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@@ -289,13 +289,12 @@ static void test_convert()
*/
static void test_tok()
{
- tokenizer t;
say(L"Testing tokenizer");
say(L"Testing invalid input");
- tok_init(&t, 0, 0);
+ tokenizer_t t(NULL, 0);
if (tok_last_type(&t) != TOK_ERROR)
{
@@ -326,14 +325,12 @@ static void test_tok()
const int types[] =
{
TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_END
- }
- ;
- size_t i;
+ };
say(L"Test correct tokenization");
-
- for (i=0, tok_init(&t, str, 0); i<(sizeof(types)/sizeof(int)); i++,tok_next(&t))
- {
+
+ tokenizer_t t(str, 0);
+ for (size_t i=0; i < sizeof types / sizeof *types; i++, tok_next(&t)) {
if (types[i] != tok_last_type(&t))
{
err(L"Tokenization error:");
diff --git a/highlight.cpp b/highlight.cpp
index 543f33aa..77acd8da 100644
--- a/highlight.cpp
+++ b/highlight.cpp
@@ -691,8 +691,8 @@ static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command
int arg_pos = -1;
bool had_cmd = false;
- tokenizer tok;
- for (tok_init(&tok, str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); tok_has_next(&tok); tok_next(&tok))
+ tokenizer_t tok(str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
+ for (; tok_has_next(&tok); tok_next(&tok))
{
int last_type = tok_last_type(&tok);
@@ -955,10 +955,8 @@ static void tokenize(const wchar_t * const buff, std::vector<int> &color, const
std::fill(color.begin(), color.end(), -1);
- tokenizer tok;
- for (tok_init(&tok, buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
- tok_has_next(&tok);
- tok_next(&tok))
+ tokenizer_t tok(buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
+ for (; tok_has_next(&tok); tok_next(&tok))
{
int last_type = tok_last_type(&tok);
diff --git a/history.cpp b/history.cpp
index dd12eb95..0a87e8b9 100644
--- a/history.cpp
+++ b/history.cpp
@@ -1415,10 +1415,8 @@ void history_t::add_with_file_detection(const wcstring &str)
ASSERT_IS_MAIN_THREAD();
path_list_t potential_paths;
- tokenizer tokenizer;
- for (tok_init(&tokenizer, str.c_str(), TOK_SQUASH_ERRORS);
- tok_has_next(&tokenizer);
- tok_next(&tokenizer))
+ tokenizer_t tokenizer(str.c_str(), TOK_SQUASH_ERRORS);
+ for (; tok_has_next(&tokenizer); tok_next(&tokenizer))
{
int type = tok_last_type(&tokenizer);
if (type == TOK_STRING)
diff --git a/parse_util.cpp b/parse_util.cpp
index 711b22e2..10072ec8 100644
--- a/parse_util.cpp
+++ b/parse_util.cpp
@@ -326,8 +326,6 @@ static void job_or_process_extent(const wchar_t *buff,
wchar_t *buffcpy;
int finished=0;
- tokenizer tok;
-
CHECK(buff,);
if (a)
@@ -365,9 +363,8 @@ static void job_or_process_extent(const wchar_t *buff,
DIE_MEM();
}
- for (tok_init(&tok, buffcpy, TOK_ACCEPT_UNFINISHED);
- tok_has_next(&tok) && !finished;
- tok_next(&tok))
+ tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED);
+ for (; tok_has_next(&tok) && !finished; tok_next(&tok))
{
int tok_begin = tok_get_pos(&tok);
@@ -440,8 +437,6 @@ void parse_util_token_extent(const wchar_t *buff,
long pos;
wchar_t *buffcpy;
- tokenizer tok;
-
const wchar_t *a = NULL, *b = NULL, *pa = NULL, *pb = NULL;
CHECK(buff,);
@@ -474,9 +469,8 @@ void parse_util_token_extent(const wchar_t *buff,
DIE_MEM();
}
- for (tok_init(&tok, buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
- tok_has_next(&tok);
- tok_next(&tok))
+ tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
+ for (; tok_has_next(&tok); tok_next(&tok))
{
size_t tok_begin = tok_get_pos(&tok);
size_t tok_end = tok_begin;
@@ -711,9 +705,7 @@ void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_
wchar_t last_quote = '\0';
int unfinished;
- tokenizer tok;
- tok_init(&tok, cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
-
+ tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
for (; tok_has_next(&tok); tok_next(&tok))
{
if (tok_get_pos(&tok) > pos)
diff --git a/parser.cpp b/parser.cpp
index 6713fddf..8a1d2984 100644
--- a/parser.cpp
+++ b/parser.cpp
@@ -525,7 +525,6 @@ static int parser_is_pipe_forbidden(const wcstring &word)
*/
static const wchar_t *parser_find_end(const wchar_t * buff)
{
- tokenizer tok;
int had_cmd=0;
int count = 0;
int error=0;
@@ -533,9 +532,8 @@ static const wchar_t *parser_find_end(const wchar_t * buff)
CHECK(buff, 0);
- for (tok_init(&tok, buff, 0);
- tok_has_next(&tok) && !error;
- tok_next(&tok))
+ tokenizer_t tok(buff, 0);
+ for (; tok_has_next(&tok) && !error; tok_next(&tok))
{
int last_type = tok_last_type(&tok);
switch (last_type)
@@ -796,7 +794,6 @@ void parser_t::print_errors_stderr()
int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
{
- tokenizer tok;
expand_flags_t eflags = 0;
if (! show_errors)
@@ -808,8 +805,8 @@ int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
eval_args may be called while evaulating another command, so we
save the previous tokenizer and restore it on exit
*/
- tokenizer *previous_tokenizer=current_tokenizer;
- int previous_pos=current_tokenizer_pos;
+ tokenizer_t * const previous_tokenizer = current_tokenizer;
+ const int previous_pos = current_tokenizer_pos;
int do_loop=1;
CHECK(line, 1);
@@ -819,10 +816,10 @@ int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
if (this->parser_type == PARSER_TYPE_GENERAL)
proc_push_interactive(0);
+ tokenizer_t tok(line, (show_errors ? 0 : TOK_SQUASH_ERRORS));
current_tokenizer = &tok;
current_tokenizer_pos = 0;
- tok_init(&tok, line, (show_errors ? 0 : TOK_SQUASH_ERRORS));
error_code=0;
for (; do_loop && tok_has_next(&tok) ; tok_next(&tok))
@@ -1319,7 +1316,7 @@ job_t *parser_t::job_get_from_pid(int pid)
*/
void parser_t::parse_job_argument_list(process_t *p,
job_t *j,
- tokenizer *tok,
+ tokenizer_t *tok,
std::vector<completion_t> &args,
bool unskip)
{
@@ -1718,7 +1715,7 @@ f
*/
int parser_t::parse_job(process_t *p,
job_t *j,
- tokenizer *tok)
+ tokenizer_t *tok)
{
std::vector<completion_t> args; // The list that will become the argc array for the program
int use_function = 1; // May functions be considered when checking what action this command represents
@@ -2185,7 +2182,6 @@ int parser_t::parse_job(process_t *p,
const wchar_t *end=parser_find_end(tok_string(tok) +
current_tokenizer_pos);
- tokenizer subtok;
int make_sub_block = j->first_process != p;
if (!end)
@@ -2202,9 +2198,8 @@ int parser_t::parse_job(process_t *p,
{
int done=0;
- for (tok_init(&subtok, end, 0);
- !done && tok_has_next(&subtok);
- tok_next(&subtok))
+ tokenizer_t subtok(end, 0);
+ for (; ! done && tok_has_next(&subtok); tok_next(&subtok))
{
switch (tok_last_type(&subtok))
@@ -2388,7 +2383,7 @@ static bool job_should_skip_elseif(const job_t *job, const block_t *current_bloc
\param tok The tokenizer to read tokens from
*/
-void parser_t::eval_job(tokenizer *tok)
+void parser_t::eval_job(tokenizer_t *tok)
{
ASSERT_IS_MAIN_THREAD();
job_t *j;
@@ -2630,7 +2625,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type
const wchar_t * const cmd = cmdStr.c_str();
size_t forbid_count;
int code;
- tokenizer *previous_tokenizer=current_tokenizer;
+ tokenizer_t *previous_tokenizer=current_tokenizer;
block_t *start_current_block = current_block;
/* Record the current chain so we can put it back later */
@@ -2676,8 +2671,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type
this->push_block(new scope_block_t(block_type));
- current_tokenizer = new tokenizer;
- tok_init(current_tokenizer, cmd, 0);
+ current_tokenizer = new tokenizer_t(cmd, 0);
error_code = 0;
@@ -2907,19 +2901,17 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha
int parser_t::test_args(const wchar_t * buff, wcstring *out, const wchar_t *prefix)
{
- tokenizer tok;
- tokenizer *previous_tokenizer = current_tokenizer;
- int previous_pos = current_tokenizer_pos;
+ tokenizer_t *const previous_tokenizer = current_tokenizer;
+ const int previous_pos = current_tokenizer_pos;
int do_loop = 1;
int err = 0;
CHECK(buff, 1);
- current_tokenizer = &tok;
- for (tok_init(&tok, buff, 0);
- do_loop && tok_has_next(&tok);
- tok_next(&tok))
+ tokenizer_t tok(buff, 0);
+ current_tokenizer = &tok;
+ for (; do_loop && tok_has_next(&tok); tok_next(&tok))
{
current_tokenizer_pos = tok_get_pos(&tok);
switch (tok_last_type(&tok))
@@ -2970,7 +2962,7 @@ int parser_t::test_args(const wchar_t * buff, wcstring *out, const wchar_t *pre
tok_destroy(&tok);
- current_tokenizer=previous_tokenizer;
+ current_tokenizer = previous_tokenizer;
current_tokenizer_pos = previous_pos;
error_code=0;
@@ -2985,7 +2977,6 @@ int parser_t::test(const wchar_t * buff,
{
ASSERT_IS_MAIN_THREAD();
- tokenizer tok;
/*
Set to one if a command name has been given for the currently
parsed process specification
@@ -2994,8 +2985,8 @@ int parser_t::test(const wchar_t * buff,
int err=0;
int unfinished = 0;
- tokenizer *previous_tokenizer=current_tokenizer;
- int previous_pos=current_tokenizer_pos;
+ tokenizer_t * const previous_tokenizer=current_tokenizer;
+ const int previous_pos=current_tokenizer_pos;
int block_pos[BLOCK_MAX_COUNT] = {};
block_type_t block_type[BLOCK_MAX_COUNT] = {};
@@ -3043,11 +3034,10 @@ int parser_t::test(const wchar_t * buff,
}
+ tokenizer_t tok(buff, 0);
current_tokenizer = &tok;
- for (tok_init(&tok, buff, 0);
- ;
- tok_next(&tok))
+ for (;; tok_next(&tok))
{
current_tokenizer_pos = tok_get_pos(&tok);
diff --git a/parser.h b/parser.h
index e8653983..751182c3 100644
--- a/parser.h
+++ b/parser.h
@@ -295,7 +295,7 @@ struct profile_item_t
wcstring cmd;
};
-struct tokenizer;
+struct tokenizer_t;
class parser_t
{
@@ -316,7 +316,7 @@ private:
wcstring err_buff;
/** Pointer to the current tokenizer */
- tokenizer *current_tokenizer;
+ tokenizer_t *current_tokenizer;
/** String for representing the current line */
wcstring lineinfo;
@@ -344,10 +344,10 @@ private:
parser_t(const parser_t&);
parser_t& operator=(const parser_t&);
- void parse_job_argument_list(process_t *p, job_t *j, tokenizer *tok, std::vector<completion_t>&, bool);
- int parse_job(process_t *p, job_t *j, tokenizer *tok);
+ void parse_job_argument_list(process_t *p, job_t *j, tokenizer_t *tok, std::vector<completion_t>&, bool);
+ int parse_job(process_t *p, job_t *j, tokenizer_t *tok);
void skipped_exec(job_t * j);
- void eval_job(tokenizer *tok);
+ void eval_job(tokenizer_t *tok);
int parser_test_argument(const wchar_t *arg, wcstring *out, const wchar_t *prefix, int offset);
void print_errors(wcstring &target, const wchar_t *prefix);
void print_errors_stderr();
diff --git a/reader.cpp b/reader.cpp
index 5548dabf..df39f3a2 100644
--- a/reader.cpp
+++ b/reader.cpp
@@ -1821,7 +1821,6 @@ static void handle_token_history(int forward, int reset)
const wchar_t *str=0;
long current_pos;
- tokenizer tok;
if (reset)
{
@@ -1895,10 +1894,8 @@ static void handle_token_history(int forward, int reset)
{
//debug( 3, L"new '%ls'", data->token_history_buff.c_str() );
-
- for (tok_init(&tok, data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED);
- tok_has_next(&tok);
- tok_next(&tok))
+ tokenizer_t tok(data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED);
+ for (; tok_has_next(&tok); tok_next(&tok))
{
switch (tok_last_type(&tok))
{
diff --git a/tokenizer.cpp b/tokenizer.cpp
index e5f131f8..1a59820e 100644
--- a/tokenizer.cpp
+++ b/tokenizer.cpp
@@ -83,7 +83,7 @@ static const wchar_t *tok_desc[] =
\return 0 if the system could not provide the memory needed, and 1 otherwise.
*/
-static int check_size(tokenizer *tok, size_t len)
+static int check_size(tokenizer_t *tok, size_t len)
{
if (tok->last_len <= len)
{
@@ -103,7 +103,7 @@ static int check_size(tokenizer *tok, size_t len)
/**
Set the latest tokens string to be the specified error message
*/
-static void tok_call_error(tokenizer *tok, int error_type, const wchar_t *error_message)
+static void tok_call_error(tokenizer_t *tok, int error_type, const wchar_t *error_message)
{
tok->last_type = TOK_ERROR;
tok->error = error_type;
@@ -117,13 +117,13 @@ static void tok_call_error(tokenizer *tok, int error_type, const wchar_t *error_
wcscpy(tok->last, error_message);
}
-int tok_get_error(tokenizer *tok)
+int tok_get_error(tokenizer_t *tok)
{
return tok->error;
}
-void tok_init(tokenizer *tok, const wchar_t *b, int flags)
+tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last(NULL), last_type(0), last_len(0), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), last_quote(0), error(0), squash_errors(false), cached_lineno_offset(0), cached_lineno_count(0)
{
/* We can only generate error messages on the main thread due to wgettext() thread safety issues. */
@@ -132,33 +132,28 @@ void tok_init(tokenizer *tok, const wchar_t *b, int flags)
ASSERT_IS_MAIN_THREAD();
}
- CHECK(tok,);
-
- memset(tok, 0, sizeof(tokenizer));
-
CHECK(b,);
- tok->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED);
- tok->show_comments = !!(flags & TOK_SHOW_COMMENTS);
- tok->squash_errors = !!(flags & TOK_SQUASH_ERRORS);
- tok->has_next=true;
+ this->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED);
+ this->show_comments = !!(flags & TOK_SHOW_COMMENTS);
+ this->squash_errors = !!(flags & TOK_SQUASH_ERRORS);
- tok->has_next = (*b != L'\0');
- tok->orig_buff = tok->buff = b;
- tok->cached_lineno_offset = 0;
- tok->cached_lineno_count = 0;
- tok_next(tok);
+ this->has_next = (*b != L'\0');
+ this->orig_buff = this->buff = b;
+ this->cached_lineno_offset = 0;
+ this->cached_lineno_count = 0;
+ tok_next(this);
}
-void tok_destroy(tokenizer *tok)
+void tok_destroy(tokenizer_t *tok)
{
CHECK(tok,);
free(tok->last);
}
-int tok_last_type(tokenizer *tok)
+int tok_last_type(tokenizer_t *tok)
{
CHECK(tok, TOK_ERROR);
CHECK(tok->buff, TOK_ERROR);
@@ -166,14 +161,14 @@ int tok_last_type(tokenizer *tok)
return tok->last_type;
}
-wchar_t *tok_last(tokenizer *tok)
+wchar_t *tok_last(tokenizer_t *tok)
{
CHECK(tok, 0);
return tok->last;
}
-int tok_has_next(tokenizer *tok)
+int tok_has_next(tokenizer_t *tok)
{
/*
Return 1 on broken tokenizer
@@ -185,7 +180,7 @@ int tok_has_next(tokenizer *tok)
return tok->has_next;
}
-int tokenizer::line_number_of_character_at_offset(size_t offset)
+int tokenizer_t::line_number_of_character_at_offset(size_t offset)
{
// we want to return (one plus) the number of newlines at offsets less than the given offset
// cached_lineno_count is the number of newlines at indexes less than cached_lineno_offset
@@ -265,24 +260,28 @@ static int myal(wchar_t c)
/**
Read the next token as a string
*/
-static void read_string(tokenizer *tok)
+static void read_string(tokenizer_t *tok)
{
const wchar_t *start;
long len;
- int mode=0;
int do_loop=1;
int paran_count=0;
start = tok->buff;
bool is_first = true;
+ enum tok_mode_t {
+ mode_regular_text = 0, // regular text
+ mode_subshell = 1, // inside of subshell
+ mode_array_brackets = 2, // inside of array brackets
+ mode_array_brackets_and_subshell = 3 // inside of array brackets and subshell, like in '$foo[(ech'
+ } mode = mode_regular_text;
+
while (1)
{
if (!myal(*tok->buff))
{
-// debug(1, L"%lc", *tok->buff );
-
if (*tok->buff == L'\\')
{
tok->buff++;
@@ -296,13 +295,13 @@ static void read_string(tokenizer *tok)
else
{
/* Since we are about to increment tok->buff, decrement it first so the increment doesn't go past the end of the buffer. https://github.com/fish-shell/fish-shell/issues/389 */
- do_loop = 0;
tok->buff--;
+ do_loop = 0;
}
}
- else if (*tok->buff == L'\n' && mode == 0)
+ else if (*tok->buff == L'\n' && mode == mode_regular_text)
{
tok->buff--;
do_loop = 0;
@@ -312,33 +311,24 @@ static void read_string(tokenizer *tok)
tok->buff++;
continue;
}
-
-
- /*
- The modes are as follows:
-
- 0: regular text
- 1: inside of subshell
- 2: inside of array brackets
- 3: inside of array brackets and subshell, like in '$foo[(ech'
- */
+
switch (mode)
{
- case 0:
+ case mode_regular_text:
{
switch (*tok->buff)
{
case L'(':
{
paran_count=1;
- mode = 1;
+ mode = mode_subshell;
break;
}
case L'[':
{
if (tok->buff != start)
- mode=2;
+ mode = mode_array_brackets;
break;
}
@@ -356,7 +346,7 @@ static void read_string(tokenizer *tok)
{
tok->buff += wcslen(tok->buff);
- if ((!tok->accept_unfinished))
+ if (! tok->accept_unfinished)
{
TOK_CALL_ERROR(tok, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR);
return;
@@ -369,7 +359,7 @@ static void read_string(tokenizer *tok)
default:
{
- if (!tok_is_string_character(*(tok->buff), is_first))
+ if (! tok_is_string_character(*(tok->buff), is_first))
{
do_loop=0;
}
@@ -378,8 +368,8 @@ static void read_string(tokenizer *tok)
break;
}
- case 3:
- case 1:
+ case mode_array_brackets_and_subshell:
+ case mode_subshell:
switch (*tok->buff)
{
case L'\'':
@@ -411,7 +401,7 @@ static void read_string(tokenizer *tok)
paran_count--;
if (paran_count == 0)
{
- mode--;
+ mode = (mode == mode_array_brackets_and_subshell ? mode_array_brackets : mode_regular_text);
}
break;
case L'\0':
@@ -419,16 +409,17 @@ static void read_string(tokenizer *tok)
break;
}
break;
- case 2:
+
+ case mode_array_brackets:
switch (*tok->buff)
{
case L'(':
paran_count=1;
- mode = 3;
+ mode = mode_array_brackets_and_subshell;
break;
case L']':
- mode=0;
+ mode = mode_regular_text;
break;
case L'\0':
@@ -447,7 +438,7 @@ static void read_string(tokenizer *tok)
is_first = false;
}
- if ((!tok->accept_unfinished) && (mode!=0))
+ if ((!tok->accept_unfinished) && (mode != mode_regular_text))
{
TOK_CALL_ERROR(tok, TOK_UNTERMINATED_SUBSHELL, PARAN_ERROR);
return;
@@ -467,7 +458,7 @@ static void read_string(tokenizer *tok)
/**
Read the next token as a comment.
*/
-static void read_comment(tokenizer *tok)
+static void read_comment(tokenizer_t *tok)
{
const wchar_t *start;
@@ -487,7 +478,7 @@ static void read_comment(tokenizer *tok)
/**
Read a FD redirection.
*/
-static void read_redirect(tokenizer *tok, int fd)
+static void read_redirect(tokenizer_t *tok, int fd)
{
int mode = -1;
@@ -552,7 +543,7 @@ static void read_redirect(tokenizer *tok, int fd)
}
}
-wchar_t tok_last_quote(tokenizer *tok)
+wchar_t tok_last_quote(tokenizer_t *tok)
{
CHECK(tok, 0);
@@ -582,7 +573,7 @@ const wchar_t *tok_get_desc(int type)
}
-void tok_next(tokenizer *tok)
+void tok_next(tokenizer_t *tok)
{
CHECK(tok,);
@@ -705,20 +696,18 @@ void tok_next(tokenizer *tok)
}
-const wchar_t *tok_string(tokenizer *tok)
+const wchar_t *tok_string(tokenizer_t *tok)
{
return tok?tok->orig_buff:0;
}
wchar_t *tok_first(const wchar_t *str)
{
- tokenizer t;
wchar_t *res=0;
CHECK(str, 0);
- tok_init(&t, str, TOK_SQUASH_ERRORS);
-
+ tokenizer_t t(str, TOK_SQUASH_ERRORS);
switch (tok_last_type(&t))
{
case TOK_STRING:
@@ -733,7 +722,7 @@ wchar_t *tok_first(const wchar_t *str)
return res;
}
-int tok_get_pos(tokenizer *tok)
+int tok_get_pos(tokenizer_t *tok)
{
CHECK(tok, 0);
@@ -741,7 +730,7 @@ int tok_get_pos(tokenizer *tok)
}
-void tok_set_pos(tokenizer *tok, int pos)
+void tok_set_pos(tokenizer_t *tok, int pos)
{
CHECK(tok,);
diff --git a/tokenizer.h b/tokenizer.h
index ae6b6ecc..4d4deacc 100644
--- a/tokenizer.h
+++ b/tokenizer.h
@@ -61,11 +61,12 @@ enum tokenizer_error
*/
#define TOK_SQUASH_ERRORS 4
+typedef unsigned int tok_flags_t;
/**
The tokenizer struct.
*/
-struct tokenizer
+struct tokenizer_t
{
/** A pointer into the original string, showing where the next token begins */
const wchar_t *buff;
@@ -100,62 +101,60 @@ struct tokenizer
/** Return the line number of the character at the given offset */
int line_number_of_character_at_offset(size_t offset);
-};
-
-/**
- Initialize the tokenizer. b is the string that is to be
- tokenized. It is not copied, and should not be freed by the caller
- until after the tokenizer is destroyed.
+ /**
+ Constructor for a tokenizer. b is the string that is to be
+ tokenized. It is not copied, and should not be freed by the caller
+ until after the tokenizer is destroyed.
- \param tok The tokenizer to initialize
- \param b The string to tokenize
- \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
- to accept incomplete tokens, such as a subshell without a closing
- parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
+ \param b The string to tokenize
+ \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
+ to accept incomplete tokens, such as a subshell without a closing
+ parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
-*/
-void tok_init(tokenizer *tok, const wchar_t *b, int flags);
+ */
+ tokenizer_t(const wchar_t *b, tok_flags_t flags);
+};
/**
Jump to the next token.
*/
-void tok_next(tokenizer *tok);
+void tok_next(tokenizer_t *tok);
/**
Returns the type of the last token. Must be one of the values in the token_type enum.
*/
-int tok_last_type(tokenizer *tok);
+int tok_last_type(tokenizer_t *tok);
/**
Returns the last token string. The string should not be freed by the caller.
*/
-wchar_t *tok_last(tokenizer *tok);
+wchar_t *tok_last(tokenizer_t *tok);
/**
Returns the type of quote from the last TOK_QSTRING
*/
-wchar_t tok_last_quote(tokenizer *tok);
+wchar_t tok_last_quote(tokenizer_t *tok);
/**
Returns true as long as there are more tokens left
*/
-int tok_has_next(tokenizer *tok);
+int tok_has_next(tokenizer_t *tok);
/**
Returns the position of the beginning of the current token in the original string
*/
-int tok_get_pos(tokenizer *tok);
+int tok_get_pos(tokenizer_t *tok);
/**
Destroy the tokenizer and free asociated memory
*/
-void tok_destroy(tokenizer *tok);
+void tok_destroy(tokenizer_t *tok);
/**
Returns the original string to tokenizer
*/
-const wchar_t *tok_string(tokenizer *tok);
+const wchar_t *tok_string(tokenizer_t *tok);
/**
@@ -178,7 +177,7 @@ bool tok_is_string_character(wchar_t c, bool is_first);
/**
Move tokenizer position
*/
-void tok_set_pos(tokenizer *tok, int pos);
+void tok_set_pos(tokenizer_t *tok, int pos);
/**
Returns a string description of the specified token type
@@ -188,7 +187,7 @@ const wchar_t *tok_get_desc(int type);
/**
Get tokenizer error type. Should only be called if tok_last_tope returns TOK_ERROR.
*/
-int tok_get_error(tokenizer *tok);
+int tok_get_error(tokenizer_t *tok);
#endif