From 096f8504335577d05392436ddcffd5bceabef6d4 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 12 Jan 2014 22:39:12 -0800 Subject: Eliminate class parse_t --- builtin.cpp | 4 +- complete.cpp | 2 +- fish_tests.cpp | 145 ++++++++++++++++++++++++--------------------------------- highlight.cpp | 6 +-- parse_tree.cpp | 63 +++++-------------------- parse_tree.h | 25 ++-------- parse_util.cpp | 4 +- parser.cpp | 2 +- reader.cpp | 2 +- 9 files changed, 86 insertions(+), 167 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index e4a3bf6f..4aab71d4 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -4298,7 +4298,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv) const wcstring src = str2wcstring(&txt.at(0), txt.size()); parse_node_tree_t parse_tree; parse_error_list_t errors; - bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors, true); + bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors, true); if (! success) { stdout_buffer.append(L"Parsing failed:\n"); @@ -4311,7 +4311,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv) stdout_buffer.append(L"(Reparsed with continue after error)\n"); parse_tree.clear(); errors.clear(); - parse_t::parse(src, parse_flag_continue_after_error, &parse_tree, &errors, true); + parse_tree_from_string(src, parse_flag_continue_after_error, &parse_tree, &errors, true); } const wcstring dump = parse_dump_tree(parse_tree, src); stdout_buffer.append(dump); diff --git a/complete.cpp b/complete.cpp index fdc62e1e..33e0536b 100644 --- a/complete.cpp +++ b/complete.cpp @@ -1839,7 +1839,7 @@ void complete(const wcstring &cmd_with_subcmds, std::vector &comps //const wcstring prev_token(prev_begin, prev_token_len); parse_node_tree_t tree; - parse_t::parse(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL); + parse_tree_from_string(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL); /* Find the plain statement that contains the position */ const parse_node_t *plain_statement = tree.find_node_matching_source_location(symbol_plain_statement, pos, NULL); diff --git a/fish_tests.cpp b/fish_tests.cpp index 0ce1092a..070004b1 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -660,11 +660,11 @@ static void test_parser() err(L"Invalid block mode when evaluating undetected"); } - /* These are disabled since they produce a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */ -#if 1 /* Ensure that we don't crash on infinite self recursion and mutual recursion. These must use the principal parser because we cannot yet execute jobs on other parsers (!) */ say(L"Testing recursion detection"); parser_t::principal_parser().eval(L"function recursive ; recursive ; end ; recursive; ", io_chain_t(), TOP); +#if 0 + /* This is disabled since it produces a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */ parser_t::principal_parser().eval(L"function recursive1 ; recursive2 ; end ; function recursive2 ; recursive1 ; end ; recursive1; ", io_chain_t(), TOP); #endif } @@ -720,10 +720,9 @@ static void test_cancellation() test_1_cancellation(L"while true ; end"); fprintf(stderr, "."); - test_1_cancellation(L"for i in (whiel true ; end) ; end"); + test_1_cancellation(L"for i in (while true ; end) ; end"); fprintf(stderr, "."); - fprintf(stderr, "\n"); /* Restore signal handling */ @@ -2234,7 +2233,7 @@ static void test_new_parser_correctness(void) const parser_test_t *test = &parser_tests[i]; parse_node_tree_t parse_tree; - bool success = parse_t::parse(test->src, parse_flag_none, &parse_tree, NULL); + bool success = parse_tree_from_string(test->src, parse_flag_none, &parse_tree, NULL); say(L"%lu / %lu: Parse \"%ls\": %s", i+1, sizeof parser_tests / sizeof *parser_tests, test->src, success ? "yes" : "no"); if (success && ! test->ok) { @@ -2248,95 +2247,75 @@ static void test_new_parser_correctness(void) say(L"Parse tests complete"); } -struct parser_fuzz_token_t -{ - parse_token_type_t token_type; - parse_keyword_t keyword; - - parser_fuzz_token_t() : token_type(FIRST_TERMINAL_TYPE), keyword(parse_keyword_none) - { - } -}; - -static bool increment(std::vector &tokens) +/* Given that we have an array of 'fuzz_count' strings, we wish to enumerate all permutations of 'len' values. We do this by incrementing an integer, interpreting it as "base fuzz_count". */ +static inline bool string_for_permutation(const wcstring *fuzzes, size_t fuzz_count, size_t len, size_t permutation, wcstring *out_str) { - size_t i, end = tokens.size(); - for (i=0; i < end; i++) + out_str->clear(); + + size_t remaining_permutation = permutation; + for (size_t i=0; i < len; i++) { - bool wrapped = false; - - struct parser_fuzz_token_t &token = tokens[i]; - bool incremented_in_keyword = false; - if (token.token_type == parse_token_type_string) - { - // try incrementing the keyword - token.keyword++; - if (token.keyword <= LAST_KEYWORD) - { - incremented_in_keyword = true; - } - else - { - token.keyword = parse_keyword_none; - incremented_in_keyword = false; - } - } - - if (! incremented_in_keyword) - { - token.token_type++; - // Skip the very special parse_token_type_terminate, since that's always the last thing delivered - if (token.token_type == parse_token_type_terminate) - { - token.token_type++; - } - - if (token.token_type > LAST_TERMINAL_TYPE) - { - token.token_type = FIRST_TERMINAL_TYPE; - wrapped = true; - } - } - - if (! wrapped) - { - break; - } + size_t idx = remaining_permutation % fuzz_count; + remaining_permutation /= fuzz_count; + + out_str->append(fuzzes[idx]); + out_str->push_back(L' '); } - return i == end; + // Return false if we wrapped + return remaining_permutation == 0; } static void test_new_parser_fuzzing(void) { say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t)); + const wcstring fuzzes[] = + { + L"if", + L"else", + L"for", + L"in", + L"while", + L"begin", + L"function", + L"switch", + L"case", + L"end", + L"and", + L"or", + L"not", + L"command", + L"builtin", + L"foo", + L"|", + L"^", + L"&", + L";", + }; + + /* Generate a list of strings of all keyword / token combinations. */ + wcstring src; + src.reserve(128); + + parse_node_tree_t node_tree; + parse_error_list_t errors; + double start = timef(); - bool log_it = false; - // ensure nothing crashes - size_t max = 4; - for (size_t len=1; len <= max; len++) + bool log_it = true; + size_t max_len = 5; + for (size_t len = 0; len < max_len; len++) { if (log_it) - fprintf(stderr, "%lu / %lu...", len, max); - std::vector tokens(len); - size_t count = 0; - parse_t parser; - parse_node_tree_t parse_tree; - do - { - parser.clear(); - parse_tree.clear(); - count++; - for (size_t i=0; i < len; i++) - { - const parser_fuzz_token_t &token = tokens[i]; - parser.parse_1_token(token.token_type, token.keyword, &parse_tree, NULL); - } + fprintf(stderr, "%lu / %lu...", len, max_len); - // keep going until we wrap + /* We wish to look at all permutations of 4 elements of 'fuzzes' (with replacement). Construct an int and keep incrementing it. */ + size_t permutation = 0; + while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++, &src)) + { + parse_tree_from_string(src, parse_flag_continue_after_error, &node_tree, &errors); } - while (! increment(tokens)); if (log_it) - fprintf(stderr, "done (%lu)\n", count); + fprintf(stderr, "done (%lu)\n", permutation); + } double end = timef(); if (log_it) @@ -2352,7 +2331,7 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o bool result = false; parse_node_tree_t tree; - if (parse_t::parse(src, parse_flag_none, &tree, NULL)) + if (parse_tree_from_string(src, parse_flag_none, &tree, NULL)) { /* Get the statement. Should only have one */ const parse_node_tree_t::parse_node_list_t stmt_nodes = tree.find_nodes(tree.at(0), symbol_plain_statement); @@ -2432,7 +2411,7 @@ static void test_new_parser_ad_hoc() /* Ensure that 'case' terminates a job list */ const wcstring src = L"switch foo ; case bar; case baz; end"; parse_node_tree_t parse_tree; - bool success = parse_t::parse(src, parse_flag_none, &parse_tree, NULL); + bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, NULL); if (! success) { err(L"Parsing failed"); @@ -2476,7 +2455,7 @@ static void test_new_parser_errors(void) parse_error_list_t errors; parse_node_tree_t parse_tree; - bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors); + bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors); if (success) { err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str()); diff --git a/highlight.cpp b/highlight.cpp index e9923fb0..f24bd6f1 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -714,7 +714,7 @@ static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expand /* Parse the buffer */ parse_node_tree_t parse_tree; - parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); + parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); /* Find the last statement */ const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL); @@ -1716,7 +1716,7 @@ class highlighter_t { /* Parse the tree */ this->parse_tree.clear(); - parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL); + parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL); } /* Perform highlighting, returning an array of colors */ @@ -2062,7 +2062,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight() /* Parse the buffer */ parse_node_tree_t parse_tree; - parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); + parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); #if 0 const wcstring dump = parse_dump_tree(parse_tree, buff); diff --git a/parse_tree.cpp b/parse_tree.cpp index 536520fc..b55f43bb 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -973,15 +973,6 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) } } -parse_t::parse_t() : parser(new parse_ll_t()) -{ -} - -parse_t::~parse_t() -{ - delete parser; -} - static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) { parse_keyword_t result = parse_keyword_none; @@ -1056,9 +1047,10 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok) return result; } -bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) +bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) { - this->parser->set_should_generate_error_messages(errors != NULL); + parse_ll_t parser; + parser.set_should_generate_error_messages(errors != NULL); /* Construct the tokenizer */ tok_flags_t tok_options = 0; @@ -1090,16 +1082,16 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags } /* Pass these two tokens. We know that queue[0] is valid; queue[1] may be invalid. */ - this->parser->accept_tokens(queue[0], queue[1]); + parser.accept_tokens(queue[0], queue[1]); /* Handle tokenizer errors. This is a hack because really the parser should report this for itself; but it has no way of getting the tokenizer message */ if (queue[1].type == parse_special_type_tokenizer_error) { - this->parser->report_tokenizer_error(queue[1], tok_last(&tok)); + parser.report_tokenizer_error(queue[1], tok_last(&tok)); } /* Handle errors */ - if (this->parser->has_fatal_error()) + if (parser.has_fatal_error()) { if (parse_flags & parse_flag_continue_after_error) { @@ -1108,8 +1100,8 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags /* Mark a special error token, and then keep going */ const parse_token_t token = {parse_special_type_parse_error, parse_keyword_none, false, queue[error_token_idx].source_start, queue[error_token_idx].source_length}; - this->parser->accept_tokens(token, kInvalidToken); - this->parser->reset_symbols(); + parser.accept_tokens(token, kInvalidToken); + parser.reset_symbols(); } else { @@ -1123,10 +1115,10 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags // Teach each node where its source range is - this->parser->determine_node_ranges(); + parser.determine_node_ranges(); // Acquire the output from the parser - this->parser->acquire_output(output, errors); + parser.acquire_output(output, errors); #if 0 //wcstring result = dump_tree(this->parser->nodes, str); @@ -1135,40 +1127,7 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags #endif // Indicate if we had a fatal error - return ! this->parser->has_fatal_error(); -} - -bool parse_t::parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) -{ - parse_t parse; - return parse.parse_internal(str, flags, output, errors, log_it); -} - -bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors) -{ - const parse_token_t invalid_token = {token_type_invalid, parse_keyword_none, -1, -1}; - - // Only strings can have keywords. So if we have a keyword, the type must be a string - assert(keyword == parse_keyword_none || token_type == parse_token_type_string); - - parse_token_t token; - token.type = token_type; - token.keyword = keyword; - token.source_start = -1; - token.source_length = 0; - - bool wants_errors = (errors != NULL); - this->parser->set_should_generate_error_messages(wants_errors); - - /* Passing invalid_token here is totally wrong. This code is only used in testing however. */ - this->parser->accept_tokens(token, invalid_token); - - return ! this->parser->has_fatal_error(); -} - -void parse_t::clear() -{ - this->parser->reset_symbols_and_nodes(); + return ! parser.has_fatal_error(); } const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const diff --git a/parse_tree.h b/parse_tree.h index 76cd80ec..6ce82299 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -74,28 +74,6 @@ enum }; typedef unsigned int parse_tree_flags_t; -class parse_ll_t; -class parse_t -{ - parse_ll_t * const parser; - - bool parse_internal(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); - -public: - parse_t(); - ~parse_t(); - - /* Parse a string all at once */ - static bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); - - /* Parse a single token */ - bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors); - - /* Reset, ready to parse something else */ - void clear(); - -}; - wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src); wcstring token_type_description(parse_token_type_t type); @@ -218,6 +196,9 @@ public: parse_node_list_t specific_statements_for_job(const parse_node_t &job) const; }; +/* The big entry point. Parse a string! */ +bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); + /* Fish grammar: # A job_list is a list of jobs, separated by semicolons or newlines diff --git a/parse_util.cpp b/parse_util.cpp index cf196db1..027a0e9b 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -878,7 +878,7 @@ std::vector parse_util_compute_indents(const wcstring &src) /* Parse the string. We pass continue_after_error to produce a forest; the trailing indent of the last node we visited becomes the input indent of the next. I.e. in the case of 'switch foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it were a case item list */ parse_node_tree_t tree; - parse_t::parse(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */); + parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */); /* Start indenting at the first node. If we have a parse error, we'll have to start indenting from the top again */ node_offset_t start_node_idx = 0; @@ -994,7 +994,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, pars // Parse the input string into a parse tree // Some errors are detected here - bool parsed = parse_t::parse(buff_src, parse_flag_leave_unterminated, &node_tree, &parse_errors); + bool parsed = parse_tree_from_string(buff_src, parse_flag_leave_unterminated, &node_tree, &parse_errors); if (! parsed) { errored = true; diff --git a/parser.cpp b/parser.cpp index 5f3f5dc1..e2259689 100644 --- a/parser.cpp +++ b/parser.cpp @@ -2581,7 +2581,7 @@ int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum bl /* Parse the source into a tree, if we can */ parse_node_tree_t tree; - if (! parse_t::parse(cmd, parse_flag_none, &tree, NULL)) + if (! parse_tree_from_string(cmd, parse_flag_none, &tree, NULL)) { return 1; } diff --git a/reader.cpp b/reader.cpp index a2bda51e..6cf77ae4 100644 --- a/reader.cpp +++ b/reader.cpp @@ -664,7 +664,7 @@ bool reader_expand_abbreviation_in_command(const wcstring &cmdline, size_t curso /* Parse this subcmd */ parse_node_tree_t parse_tree; - parse_t::parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); + parse_tree_from_string(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); /* Look for plain statements where the cursor is at the end of the command */ const parse_node_t *matching_cmd_node = NULL; -- cgit v1.2.3