From 096f8504335577d05392436ddcffd5bceabef6d4 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sun, 12 Jan 2014 22:39:12 -0800
Subject: Eliminate class parse_t

---
 builtin.cpp    |   4 +-
 complete.cpp   |   2 +-
 fish_tests.cpp | 145 ++++++++++++++++++++++++---------------------------------
 highlight.cpp  |   6 +--
 parse_tree.cpp |  63 +++++--------------------
 parse_tree.h   |  25 ++--------
 parse_util.cpp |   4 +-
 parser.cpp     |   2 +-
 reader.cpp     |   2 +-
 9 files changed, 86 insertions(+), 167 deletions(-)

diff --git a/builtin.cpp b/builtin.cpp
index e4a3bf6f..4aab71d4 100644
--- a/builtin.cpp
+++ b/builtin.cpp
@@ -4298,7 +4298,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
         const wcstring src = str2wcstring(&txt.at(0), txt.size());
         parse_node_tree_t parse_tree;
         parse_error_list_t errors;
-        bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors, true);
+        bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors, true);
         if (! success)
         {
             stdout_buffer.append(L"Parsing failed:\n");
@@ -4311,7 +4311,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
             stdout_buffer.append(L"(Reparsed with continue after error)\n");
             parse_tree.clear();
             errors.clear();
-            parse_t::parse(src, parse_flag_continue_after_error, &parse_tree, &errors, true);
+            parse_tree_from_string(src, parse_flag_continue_after_error, &parse_tree, &errors, true);
         }
         const wcstring dump = parse_dump_tree(parse_tree, src);
         stdout_buffer.append(dump);
diff --git a/complete.cpp b/complete.cpp
index fdc62e1e..33e0536b 100644
--- a/complete.cpp
+++ b/complete.cpp
@@ -1839,7 +1839,7 @@ void complete(const wcstring &cmd_with_subcmds, std::vector<completion_t> &comps
         //const wcstring prev_token(prev_begin, prev_token_len);
         
         parse_node_tree_t tree;
-        parse_t::parse(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL);
+        parse_tree_from_string(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL);
         
         /* Find the plain statement that contains the position */
         const parse_node_t *plain_statement = tree.find_node_matching_source_location(symbol_plain_statement, pos, NULL);
diff --git a/fish_tests.cpp b/fish_tests.cpp
index 0ce1092a..070004b1 100644
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@@ -660,11 +660,11 @@ static void test_parser()
         err(L"Invalid block mode when evaluating undetected");
     }
     
-    /* These are disabled since they produce a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */
-#if 1
     /* Ensure that we don't crash on infinite self recursion and mutual recursion. These must use the principal parser because we cannot yet execute jobs on other parsers (!) */
     say(L"Testing recursion detection");
     parser_t::principal_parser().eval(L"function recursive ; recursive ; end ; recursive; ", io_chain_t(), TOP);
+#if 0
+    /* This is disabled since it produces a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */
     parser_t::principal_parser().eval(L"function recursive1 ; recursive2 ; end ; function recursive2 ; recursive1 ; end ; recursive1; ", io_chain_t(), TOP);
 #endif
 }
@@ -720,10 +720,9 @@ static void test_cancellation()
     test_1_cancellation(L"while true ; end");
     fprintf(stderr, ".");
     
-    test_1_cancellation(L"for i in (whiel true ; end) ; end");
+    test_1_cancellation(L"for i in (while true ; end) ; end");
     fprintf(stderr, ".");
 
-    
     fprintf(stderr, "\n");
     
     /* Restore signal handling */
@@ -2234,7 +2233,7 @@ static void test_new_parser_correctness(void)
         const parser_test_t *test = &parser_tests[i];
 
         parse_node_tree_t parse_tree;
-        bool success = parse_t::parse(test->src, parse_flag_none, &parse_tree, NULL);
+        bool success = parse_tree_from_string(test->src, parse_flag_none, &parse_tree, NULL);
         say(L"%lu / %lu: Parse \"%ls\": %s", i+1, sizeof parser_tests / sizeof *parser_tests, test->src, success ? "yes" : "no");
         if (success && ! test->ok)
         {
@@ -2248,95 +2247,75 @@ static void test_new_parser_correctness(void)
     say(L"Parse tests complete");
 }
 
-struct parser_fuzz_token_t
-{
-    parse_token_type_t token_type;
-    parse_keyword_t keyword;
-
-    parser_fuzz_token_t() : token_type(FIRST_TERMINAL_TYPE), keyword(parse_keyword_none)
-    {
-    }
-};
-
-static bool increment(std::vector<parser_fuzz_token_t> &tokens)
+/* Given that we have an array of 'fuzz_count' strings, we wish to enumerate all permutations of 'len' values. We do this by incrementing an integer, interpreting it as "base fuzz_count". */
+static inline bool string_for_permutation(const wcstring *fuzzes, size_t fuzz_count, size_t len, size_t permutation, wcstring *out_str)
 {
-    size_t i, end = tokens.size();
-    for (i=0; i < end; i++)
+    out_str->clear();
+    
+    size_t remaining_permutation = permutation;
+    for (size_t i=0; i < len; i++)
     {
-        bool wrapped = false;
-
-        struct parser_fuzz_token_t &token = tokens[i];
-        bool incremented_in_keyword = false;
-        if (token.token_type == parse_token_type_string)
-        {
-            // try incrementing the keyword
-            token.keyword++;
-            if (token.keyword <= LAST_KEYWORD)
-            {
-                incremented_in_keyword = true;
-            }
-            else
-            {
-                token.keyword = parse_keyword_none;
-                incremented_in_keyword = false;
-            }
-        }
-
-        if (! incremented_in_keyword)
-        {
-            token.token_type++;
-            // Skip the very special parse_token_type_terminate, since that's always the last thing delivered
-            if (token.token_type == parse_token_type_terminate)
-            {
-                token.token_type++;
-            }
-            
-            if (token.token_type > LAST_TERMINAL_TYPE)
-            {
-                token.token_type = FIRST_TERMINAL_TYPE;
-                wrapped = true;
-            }
-        }
-
-        if (! wrapped)
-        {
-            break;
-        }
+        size_t idx = remaining_permutation % fuzz_count;
+        remaining_permutation /= fuzz_count;
+        
+        out_str->append(fuzzes[idx]);
+        out_str->push_back(L' ');
     }
-    return i == end;
+    // Return false if we wrapped
+    return remaining_permutation == 0;
 }
 
 static void test_new_parser_fuzzing(void)
 {
     say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t));
+    const wcstring fuzzes[] =
+    {
+        L"if",
+        L"else",
+        L"for",
+        L"in",
+        L"while",
+        L"begin",
+        L"function",
+        L"switch",
+        L"case",
+        L"end",
+        L"and",
+        L"or",
+        L"not",
+        L"command",
+        L"builtin",
+        L"foo",
+        L"|",
+        L"^",
+        L"&",
+        L";",
+    };
+    
+    /* Generate a list of strings of all keyword / token combinations. */
+    wcstring src;
+    src.reserve(128);
+    
+    parse_node_tree_t node_tree;
+    parse_error_list_t errors;
+
     double start = timef();
-    bool log_it = false;
-    // ensure nothing crashes
-    size_t max = 4;
-    for (size_t len=1; len <= max; len++)
+    bool log_it = true;
+    size_t max_len = 5;
+    for (size_t len = 0; len < max_len; len++)
     {
         if (log_it)
-            fprintf(stderr, "%lu / %lu...", len, max);
-        std::vector<parser_fuzz_token_t> tokens(len);
-        size_t count = 0;
-        parse_t parser;
-        parse_node_tree_t parse_tree;
-        do
-        {
-            parser.clear();
-            parse_tree.clear();
-            count++;
-            for (size_t i=0; i < len; i++)
-            {
-                const parser_fuzz_token_t &token = tokens[i];
-                parser.parse_1_token(token.token_type, token.keyword, &parse_tree, NULL);
-            }
+            fprintf(stderr, "%lu / %lu...", len, max_len);
 
-            // keep going until we wrap
+        /* We wish to look at all permutations of 4 elements of 'fuzzes' (with replacement). Construct an int and keep incrementing it. */
+        size_t permutation = 0;
+        while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++, &src))
+        {
+            parse_tree_from_string(src, parse_flag_continue_after_error, &node_tree, &errors);
         }
-        while (! increment(tokens));
         if (log_it)
-            fprintf(stderr, "done (%lu)\n", count);
+            fprintf(stderr, "done (%lu)\n", permutation);
+        
     }
     double end = timef();
     if (log_it)
@@ -2352,7 +2331,7 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o
     
     bool result = false;
     parse_node_tree_t tree;
-    if (parse_t::parse(src, parse_flag_none, &tree, NULL))
+    if (parse_tree_from_string(src, parse_flag_none, &tree, NULL))
     {
         /* Get the statement. Should only have one */
         const parse_node_tree_t::parse_node_list_t stmt_nodes = tree.find_nodes(tree.at(0), symbol_plain_statement);
@@ -2432,7 +2411,7 @@ static void test_new_parser_ad_hoc()
     /* Ensure that 'case' terminates a job list */
     const wcstring src = L"switch foo ; case bar; case baz; end";
     parse_node_tree_t parse_tree;
-    bool success = parse_t::parse(src, parse_flag_none, &parse_tree, NULL);
+    bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, NULL);
     if (! success)
     {
         err(L"Parsing failed");
@@ -2476,7 +2455,7 @@ static void test_new_parser_errors(void)
         
         parse_error_list_t errors;
         parse_node_tree_t parse_tree;
-        bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors);
+        bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors);
         if (success)
         {
             err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str());
diff --git a/highlight.cpp b/highlight.cpp
index e9923fb0..f24bd6f1 100644
--- a/highlight.cpp
+++ b/highlight.cpp
@@ -714,7 +714,7 @@ static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expand
     
     /* Parse the buffer */
     parse_node_tree_t parse_tree;
-    parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
+    parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
     
     /* Find the last statement */
     const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL);
@@ -1716,7 +1716,7 @@ class highlighter_t
     {
         /* Parse the tree */
         this->parse_tree.clear();
-        parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL);
+        parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL);
     }
     
     /* Perform highlighting, returning an array of colors */
@@ -2062,7 +2062,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight()
     
     /* Parse the buffer */
     parse_node_tree_t parse_tree;
-    parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL);
+    parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL);
 
 #if 0
     const wcstring dump = parse_dump_tree(parse_tree, buff);
diff --git a/parse_tree.cpp b/parse_tree.cpp
index 536520fc..b55f43bb 100644
--- a/parse_tree.cpp
+++ b/parse_tree.cpp
@@ -973,15 +973,6 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2)
     }
 }
 
-parse_t::parse_t() : parser(new parse_ll_t())
-{
-}
-
-parse_t::~parse_t()
-{
-    delete parser;
-}
-
 static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt)
 {
     parse_keyword_t result = parse_keyword_none;
@@ -1056,9 +1047,10 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok)
     return result;
 }
 
-bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it)
+bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it)
 {
-    this->parser->set_should_generate_error_messages(errors != NULL);
+    parse_ll_t parser;
+    parser.set_should_generate_error_messages(errors != NULL);
 
     /* Construct the tokenizer */
     tok_flags_t tok_options = 0;
@@ -1090,16 +1082,16 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
         }
         
         /* Pass these two tokens. We know that queue[0] is valid; queue[1] may be invalid. */
-        this->parser->accept_tokens(queue[0], queue[1]);
+        parser.accept_tokens(queue[0], queue[1]);
         
         /* Handle tokenizer errors. This is a hack because really the parser should report this for itself; but it has no way of getting the tokenizer message */
         if (queue[1].type == parse_special_type_tokenizer_error)
         {
-            this->parser->report_tokenizer_error(queue[1], tok_last(&tok));
+            parser.report_tokenizer_error(queue[1], tok_last(&tok));
         }
         
         /* Handle errors */
-        if (this->parser->has_fatal_error())
+        if (parser.has_fatal_error())
         {
             if (parse_flags & parse_flag_continue_after_error)
             {
@@ -1108,8 +1100,8 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
 
                 /* Mark a special error token, and then keep going */
                 const parse_token_t token = {parse_special_type_parse_error, parse_keyword_none, false, queue[error_token_idx].source_start, queue[error_token_idx].source_length};
-                this->parser->accept_tokens(token, kInvalidToken);
-                this->parser->reset_symbols();
+                parser.accept_tokens(token, kInvalidToken);
+                parser.reset_symbols();
             }
             else
             {
@@ -1123,10 +1115,10 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
 
 
     // Teach each node where its source range is
-    this->parser->determine_node_ranges();
+    parser.determine_node_ranges();
     
     // Acquire the output from the parser
-    this->parser->acquire_output(output, errors);
+    parser.acquire_output(output, errors);
 
 #if 0
     //wcstring result = dump_tree(this->parser->nodes, str);
@@ -1135,40 +1127,7 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
 #endif
     
     // Indicate if we had a fatal error
-    return ! this->parser->has_fatal_error();
-}
-
-bool parse_t::parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it)
-{
-    parse_t parse;
-    return parse.parse_internal(str, flags, output, errors, log_it);
-}
-
-bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors)
-{
-    const parse_token_t invalid_token = {token_type_invalid, parse_keyword_none, -1, -1};
-    
-    // Only strings can have keywords. So if we have a keyword, the type must be a string
-    assert(keyword == parse_keyword_none || token_type == parse_token_type_string);
-
-    parse_token_t token;
-    token.type = token_type;
-    token.keyword = keyword;
-    token.source_start = -1;
-    token.source_length = 0;
-    
-    bool wants_errors = (errors != NULL);
-    this->parser->set_should_generate_error_messages(wants_errors);
-
-    /* Passing invalid_token here is totally wrong. This code is only used in testing however. */
-    this->parser->accept_tokens(token, invalid_token);
-
-    return ! this->parser->has_fatal_error();
-}
-
-void parse_t::clear()
-{
-    this->parser->reset_symbols_and_nodes();
+    return ! parser.has_fatal_error();
 }
 
 const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const
diff --git a/parse_tree.h b/parse_tree.h
index 76cd80ec..6ce82299 100644
--- a/parse_tree.h
+++ b/parse_tree.h
@@ -74,28 +74,6 @@ enum
 };
 typedef unsigned int parse_tree_flags_t;
 
-class parse_ll_t;
-class parse_t
-{
-    parse_ll_t * const parser;
-
-    bool parse_internal(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
-
-public:
-    parse_t();
-    ~parse_t();
-
-    /* Parse a string all at once */
-    static bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
-
-    /* Parse a single token */
-    bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors);
-    
-    /* Reset, ready to parse something else */
-    void clear();
-
-};
-
 wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src);
 
 wcstring token_type_description(parse_token_type_t type);
@@ -218,6 +196,9 @@ public:
     parse_node_list_t specific_statements_for_job(const parse_node_t &job) const;
 };
 
+/* The big entry point. Parse a string! */
+bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
+
 /* Fish grammar:
 
 # A job_list is a list of jobs, separated by semicolons or newlines
diff --git a/parse_util.cpp b/parse_util.cpp
index cf196db1..027a0e9b 100644
--- a/parse_util.cpp
+++ b/parse_util.cpp
@@ -878,7 +878,7 @@ std::vector<int> parse_util_compute_indents(const wcstring &src)
     
     /* Parse the string. We pass continue_after_error to produce a forest; the trailing indent of the last node we visited becomes the input indent of the next. I.e. in the case of 'switch foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it were a case item list */
     parse_node_tree_t tree;
-    parse_t::parse(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */);
+    parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */);
     
     /* Start indenting at the first node. If we have a parse error, we'll have to start indenting from the top again */
     node_offset_t start_node_idx = 0;
@@ -994,7 +994,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, pars
     
     // Parse the input string into a parse tree
     // Some errors are detected here
-    bool parsed = parse_t::parse(buff_src, parse_flag_leave_unterminated, &node_tree, &parse_errors);
+    bool parsed = parse_tree_from_string(buff_src, parse_flag_leave_unterminated, &node_tree, &parse_errors);
     if (! parsed)
     {
         errored = true;
diff --git a/parser.cpp b/parser.cpp
index 5f3f5dc1..e2259689 100644
--- a/parser.cpp
+++ b/parser.cpp
@@ -2581,7 +2581,7 @@ int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum bl
     
     /* Parse the source into a tree, if we can */
     parse_node_tree_t tree;
-    if (! parse_t::parse(cmd, parse_flag_none, &tree, NULL))
+    if (! parse_tree_from_string(cmd, parse_flag_none, &tree, NULL))
     {
         return 1;
     }
diff --git a/reader.cpp b/reader.cpp
index a2bda51e..6cf77ae4 100644
--- a/reader.cpp
+++ b/reader.cpp
@@ -664,7 +664,7 @@ bool reader_expand_abbreviation_in_command(const wcstring &cmdline, size_t curso
     
     /* Parse this subcmd */
     parse_node_tree_t parse_tree;
-    parse_t::parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
+    parse_tree_from_string(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
     
     /* Look for plain statements where the cursor is at the end of the command */
     const parse_node_t *matching_cmd_node = NULL;
-- 
cgit v1.2.3