aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar ridiculousfish <corydoras@ridiculousfish.com>2014-01-12 22:39:12 -0800
committerGravatar ridiculousfish <corydoras@ridiculousfish.com>2014-01-12 22:39:12 -0800
commit096f8504335577d05392436ddcffd5bceabef6d4 (patch)
tree6d7f48aa9d76cc03a14cf51eb78437036b66cd74
parentec469782c8f146476de28453e971642095e4f381 (diff)
Eliminate class parse_t
-rw-r--r--builtin.cpp4
-rw-r--r--complete.cpp2
-rw-r--r--fish_tests.cpp145
-rw-r--r--highlight.cpp6
-rw-r--r--parse_tree.cpp63
-rw-r--r--parse_tree.h25
-rw-r--r--parse_util.cpp4
-rw-r--r--parser.cpp2
-rw-r--r--reader.cpp2
9 files changed, 86 insertions, 167 deletions
diff --git a/builtin.cpp b/builtin.cpp
index e4a3bf6f..4aab71d4 100644
--- a/builtin.cpp
+++ b/builtin.cpp
@@ -4298,7 +4298,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
const wcstring src = str2wcstring(&txt.at(0), txt.size());
parse_node_tree_t parse_tree;
parse_error_list_t errors;
- bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors, true);
+ bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors, true);
if (! success)
{
stdout_buffer.append(L"Parsing failed:\n");
@@ -4311,7 +4311,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
stdout_buffer.append(L"(Reparsed with continue after error)\n");
parse_tree.clear();
errors.clear();
- parse_t::parse(src, parse_flag_continue_after_error, &parse_tree, &errors, true);
+ parse_tree_from_string(src, parse_flag_continue_after_error, &parse_tree, &errors, true);
}
const wcstring dump = parse_dump_tree(parse_tree, src);
stdout_buffer.append(dump);
diff --git a/complete.cpp b/complete.cpp
index fdc62e1e..33e0536b 100644
--- a/complete.cpp
+++ b/complete.cpp
@@ -1839,7 +1839,7 @@ void complete(const wcstring &cmd_with_subcmds, std::vector<completion_t> &comps
//const wcstring prev_token(prev_begin, prev_token_len);
parse_node_tree_t tree;
- parse_t::parse(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL);
+ parse_tree_from_string(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL);
/* Find the plain statement that contains the position */
const parse_node_t *plain_statement = tree.find_node_matching_source_location(symbol_plain_statement, pos, NULL);
diff --git a/fish_tests.cpp b/fish_tests.cpp
index 0ce1092a..070004b1 100644
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@@ -660,11 +660,11 @@ static void test_parser()
err(L"Invalid block mode when evaluating undetected");
}
- /* These are disabled since they produce a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */
-#if 1
/* Ensure that we don't crash on infinite self recursion and mutual recursion. These must use the principal parser because we cannot yet execute jobs on other parsers (!) */
say(L"Testing recursion detection");
parser_t::principal_parser().eval(L"function recursive ; recursive ; end ; recursive; ", io_chain_t(), TOP);
+#if 0
+ /* This is disabled since it produces a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */
parser_t::principal_parser().eval(L"function recursive1 ; recursive2 ; end ; function recursive2 ; recursive1 ; end ; recursive1; ", io_chain_t(), TOP);
#endif
}
@@ -720,10 +720,9 @@ static void test_cancellation()
test_1_cancellation(L"while true ; end");
fprintf(stderr, ".");
- test_1_cancellation(L"for i in (whiel true ; end) ; end");
+ test_1_cancellation(L"for i in (while true ; end) ; end");
fprintf(stderr, ".");
-
fprintf(stderr, "\n");
/* Restore signal handling */
@@ -2234,7 +2233,7 @@ static void test_new_parser_correctness(void)
const parser_test_t *test = &parser_tests[i];
parse_node_tree_t parse_tree;
- bool success = parse_t::parse(test->src, parse_flag_none, &parse_tree, NULL);
+ bool success = parse_tree_from_string(test->src, parse_flag_none, &parse_tree, NULL);
say(L"%lu / %lu: Parse \"%ls\": %s", i+1, sizeof parser_tests / sizeof *parser_tests, test->src, success ? "yes" : "no");
if (success && ! test->ok)
{
@@ -2248,95 +2247,75 @@ static void test_new_parser_correctness(void)
say(L"Parse tests complete");
}
-struct parser_fuzz_token_t
-{
- parse_token_type_t token_type;
- parse_keyword_t keyword;
-
- parser_fuzz_token_t() : token_type(FIRST_TERMINAL_TYPE), keyword(parse_keyword_none)
- {
- }
-};
-
-static bool increment(std::vector<parser_fuzz_token_t> &tokens)
+/* Given that we have an array of 'fuzz_count' strings, we wish to enumerate all permutations of 'len' values. We do this by incrementing an integer, interpreting it as "base fuzz_count". */
+static inline bool string_for_permutation(const wcstring *fuzzes, size_t fuzz_count, size_t len, size_t permutation, wcstring *out_str)
{
- size_t i, end = tokens.size();
- for (i=0; i < end; i++)
+ out_str->clear();
+
+ size_t remaining_permutation = permutation;
+ for (size_t i=0; i < len; i++)
{
- bool wrapped = false;
-
- struct parser_fuzz_token_t &token = tokens[i];
- bool incremented_in_keyword = false;
- if (token.token_type == parse_token_type_string)
- {
- // try incrementing the keyword
- token.keyword++;
- if (token.keyword <= LAST_KEYWORD)
- {
- incremented_in_keyword = true;
- }
- else
- {
- token.keyword = parse_keyword_none;
- incremented_in_keyword = false;
- }
- }
-
- if (! incremented_in_keyword)
- {
- token.token_type++;
- // Skip the very special parse_token_type_terminate, since that's always the last thing delivered
- if (token.token_type == parse_token_type_terminate)
- {
- token.token_type++;
- }
-
- if (token.token_type > LAST_TERMINAL_TYPE)
- {
- token.token_type = FIRST_TERMINAL_TYPE;
- wrapped = true;
- }
- }
-
- if (! wrapped)
- {
- break;
- }
+ size_t idx = remaining_permutation % fuzz_count;
+ remaining_permutation /= fuzz_count;
+
+ out_str->append(fuzzes[idx]);
+ out_str->push_back(L' ');
}
- return i == end;
+ // Return false if we wrapped
+ return remaining_permutation == 0;
}
static void test_new_parser_fuzzing(void)
{
say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t));
+ const wcstring fuzzes[] =
+ {
+ L"if",
+ L"else",
+ L"for",
+ L"in",
+ L"while",
+ L"begin",
+ L"function",
+ L"switch",
+ L"case",
+ L"end",
+ L"and",
+ L"or",
+ L"not",
+ L"command",
+ L"builtin",
+ L"foo",
+ L"|",
+ L"^",
+ L"&",
+ L";",
+ };
+
+ /* Generate a list of strings of all keyword / token combinations. */
+ wcstring src;
+ src.reserve(128);
+
+ parse_node_tree_t node_tree;
+ parse_error_list_t errors;
+
double start = timef();
- bool log_it = false;
- // ensure nothing crashes
- size_t max = 4;
- for (size_t len=1; len <= max; len++)
+ bool log_it = true;
+ size_t max_len = 5;
+ for (size_t len = 0; len < max_len; len++)
{
if (log_it)
- fprintf(stderr, "%lu / %lu...", len, max);
- std::vector<parser_fuzz_token_t> tokens(len);
- size_t count = 0;
- parse_t parser;
- parse_node_tree_t parse_tree;
- do
- {
- parser.clear();
- parse_tree.clear();
- count++;
- for (size_t i=0; i < len; i++)
- {
- const parser_fuzz_token_t &token = tokens[i];
- parser.parse_1_token(token.token_type, token.keyword, &parse_tree, NULL);
- }
+ fprintf(stderr, "%lu / %lu...", len, max_len);
- // keep going until we wrap
+ /* We wish to look at all permutations of 4 elements of 'fuzzes' (with replacement). Construct an int and keep incrementing it. */
+ size_t permutation = 0;
+ while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++, &src))
+ {
+ parse_tree_from_string(src, parse_flag_continue_after_error, &node_tree, &errors);
}
- while (! increment(tokens));
if (log_it)
- fprintf(stderr, "done (%lu)\n", count);
+ fprintf(stderr, "done (%lu)\n", permutation);
+
}
double end = timef();
if (log_it)
@@ -2352,7 +2331,7 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o
bool result = false;
parse_node_tree_t tree;
- if (parse_t::parse(src, parse_flag_none, &tree, NULL))
+ if (parse_tree_from_string(src, parse_flag_none, &tree, NULL))
{
/* Get the statement. Should only have one */
const parse_node_tree_t::parse_node_list_t stmt_nodes = tree.find_nodes(tree.at(0), symbol_plain_statement);
@@ -2432,7 +2411,7 @@ static void test_new_parser_ad_hoc()
/* Ensure that 'case' terminates a job list */
const wcstring src = L"switch foo ; case bar; case baz; end";
parse_node_tree_t parse_tree;
- bool success = parse_t::parse(src, parse_flag_none, &parse_tree, NULL);
+ bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, NULL);
if (! success)
{
err(L"Parsing failed");
@@ -2476,7 +2455,7 @@ static void test_new_parser_errors(void)
parse_error_list_t errors;
parse_node_tree_t parse_tree;
- bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors);
+ bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors);
if (success)
{
err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str());
diff --git a/highlight.cpp b/highlight.cpp
index e9923fb0..f24bd6f1 100644
--- a/highlight.cpp
+++ b/highlight.cpp
@@ -714,7 +714,7 @@ static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expand
/* Parse the buffer */
parse_node_tree_t parse_tree;
- parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
+ parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
/* Find the last statement */
const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL);
@@ -1716,7 +1716,7 @@ class highlighter_t
{
/* Parse the tree */
this->parse_tree.clear();
- parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL);
+ parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL);
}
/* Perform highlighting, returning an array of colors */
@@ -2062,7 +2062,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight()
/* Parse the buffer */
parse_node_tree_t parse_tree;
- parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL);
+ parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL);
#if 0
const wcstring dump = parse_dump_tree(parse_tree, buff);
diff --git a/parse_tree.cpp b/parse_tree.cpp
index 536520fc..b55f43bb 100644
--- a/parse_tree.cpp
+++ b/parse_tree.cpp
@@ -973,15 +973,6 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2)
}
}
-parse_t::parse_t() : parser(new parse_ll_t())
-{
-}
-
-parse_t::~parse_t()
-{
- delete parser;
-}
-
static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt)
{
parse_keyword_t result = parse_keyword_none;
@@ -1056,9 +1047,10 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok)
return result;
}
-bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it)
+bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it)
{
- this->parser->set_should_generate_error_messages(errors != NULL);
+ parse_ll_t parser;
+ parser.set_should_generate_error_messages(errors != NULL);
/* Construct the tokenizer */
tok_flags_t tok_options = 0;
@@ -1090,16 +1082,16 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
}
/* Pass these two tokens. We know that queue[0] is valid; queue[1] may be invalid. */
- this->parser->accept_tokens(queue[0], queue[1]);
+ parser.accept_tokens(queue[0], queue[1]);
/* Handle tokenizer errors. This is a hack because really the parser should report this for itself; but it has no way of getting the tokenizer message */
if (queue[1].type == parse_special_type_tokenizer_error)
{
- this->parser->report_tokenizer_error(queue[1], tok_last(&tok));
+ parser.report_tokenizer_error(queue[1], tok_last(&tok));
}
/* Handle errors */
- if (this->parser->has_fatal_error())
+ if (parser.has_fatal_error())
{
if (parse_flags & parse_flag_continue_after_error)
{
@@ -1108,8 +1100,8 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
/* Mark a special error token, and then keep going */
const parse_token_t token = {parse_special_type_parse_error, parse_keyword_none, false, queue[error_token_idx].source_start, queue[error_token_idx].source_length};
- this->parser->accept_tokens(token, kInvalidToken);
- this->parser->reset_symbols();
+ parser.accept_tokens(token, kInvalidToken);
+ parser.reset_symbols();
}
else
{
@@ -1123,10 +1115,10 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
// Teach each node where its source range is
- this->parser->determine_node_ranges();
+ parser.determine_node_ranges();
// Acquire the output from the parser
- this->parser->acquire_output(output, errors);
+ parser.acquire_output(output, errors);
#if 0
//wcstring result = dump_tree(this->parser->nodes, str);
@@ -1135,40 +1127,7 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
#endif
// Indicate if we had a fatal error
- return ! this->parser->has_fatal_error();
-}
-
-bool parse_t::parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it)
-{
- parse_t parse;
- return parse.parse_internal(str, flags, output, errors, log_it);
-}
-
-bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors)
-{
- const parse_token_t invalid_token = {token_type_invalid, parse_keyword_none, -1, -1};
-
- // Only strings can have keywords. So if we have a keyword, the type must be a string
- assert(keyword == parse_keyword_none || token_type == parse_token_type_string);
-
- parse_token_t token;
- token.type = token_type;
- token.keyword = keyword;
- token.source_start = -1;
- token.source_length = 0;
-
- bool wants_errors = (errors != NULL);
- this->parser->set_should_generate_error_messages(wants_errors);
-
- /* Passing invalid_token here is totally wrong. This code is only used in testing however. */
- this->parser->accept_tokens(token, invalid_token);
-
- return ! this->parser->has_fatal_error();
-}
-
-void parse_t::clear()
-{
- this->parser->reset_symbols_and_nodes();
+ return ! parser.has_fatal_error();
}
const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const
diff --git a/parse_tree.h b/parse_tree.h
index 76cd80ec..6ce82299 100644
--- a/parse_tree.h
+++ b/parse_tree.h
@@ -74,28 +74,6 @@ enum
};
typedef unsigned int parse_tree_flags_t;
-class parse_ll_t;
-class parse_t
-{
- parse_ll_t * const parser;
-
- bool parse_internal(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
-
-public:
- parse_t();
- ~parse_t();
-
- /* Parse a string all at once */
- static bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
-
- /* Parse a single token */
- bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors);
-
- /* Reset, ready to parse something else */
- void clear();
-
-};
-
wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src);
wcstring token_type_description(parse_token_type_t type);
@@ -218,6 +196,9 @@ public:
parse_node_list_t specific_statements_for_job(const parse_node_t &job) const;
};
+/* The big entry point. Parse a string! */
+bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
+
/* Fish grammar:
# A job_list is a list of jobs, separated by semicolons or newlines
diff --git a/parse_util.cpp b/parse_util.cpp
index cf196db1..027a0e9b 100644
--- a/parse_util.cpp
+++ b/parse_util.cpp
@@ -878,7 +878,7 @@ std::vector<int> parse_util_compute_indents(const wcstring &src)
/* Parse the string. We pass continue_after_error to produce a forest; the trailing indent of the last node we visited becomes the input indent of the next. I.e. in the case of 'switch foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it were a case item list */
parse_node_tree_t tree;
- parse_t::parse(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */);
+ parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */);
/* Start indenting at the first node. If we have a parse error, we'll have to start indenting from the top again */
node_offset_t start_node_idx = 0;
@@ -994,7 +994,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, pars
// Parse the input string into a parse tree
// Some errors are detected here
- bool parsed = parse_t::parse(buff_src, parse_flag_leave_unterminated, &node_tree, &parse_errors);
+ bool parsed = parse_tree_from_string(buff_src, parse_flag_leave_unterminated, &node_tree, &parse_errors);
if (! parsed)
{
errored = true;
diff --git a/parser.cpp b/parser.cpp
index 5f3f5dc1..e2259689 100644
--- a/parser.cpp
+++ b/parser.cpp
@@ -2581,7 +2581,7 @@ int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum bl
/* Parse the source into a tree, if we can */
parse_node_tree_t tree;
- if (! parse_t::parse(cmd, parse_flag_none, &tree, NULL))
+ if (! parse_tree_from_string(cmd, parse_flag_none, &tree, NULL))
{
return 1;
}
diff --git a/reader.cpp b/reader.cpp
index a2bda51e..6cf77ae4 100644
--- a/reader.cpp
+++ b/reader.cpp
@@ -664,7 +664,7 @@ bool reader_expand_abbreviation_in_command(const wcstring &cmdline, size_t curso
/* Parse this subcmd */
parse_node_tree_t parse_tree;
- parse_t::parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
+ parse_tree_from_string(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
/* Look for plain statements where the cursor is at the end of the command */
const parse_node_t *matching_cmd_node = NULL;