Finish rewriting detect_errors to use new parser. All tests now pass (!)

author: ridiculousfish <corydoras@ridiculousfish.com> 2013-12-12 18:18:07 -0800
committer: ridiculousfish <corydoras@ridiculousfish.com> 2013-12-12 18:18:07 -0800
commit: 5cf59de6763a0000fdc87f0101ca78bd137dffcc (patch)
tree: b9700e9650a4a06af91ec24dad0c621f8381c3d5
parent: e25d49b80b0668b55a58e1445aa047a68a1043d3 (diff)
10 files changed, 133 insertions, 94 deletions
diff --git a/builtin_complete.cpp b/builtin_complete.cpp
index 14b3a4b7..0cc3b7e7 100644
--- a/builtin_complete.cpp
+++ b/builtin_complete.cpp
@@ -497,15 +497,19 @@ static int builtin_complete(parser_t &parser, wchar_t **argv)
     {
         if (condition && wcslen(condition))
         {
-            if (parser.detect_errors(condition))
+            const wcstring condition_string = condition;
+            parse_error_list_t errors;
+            if (parser.detect_errors(condition_string, &errors))
             {
                 append_format(stderr_buffer,
-                              L"%ls: Condition '%ls' contained a syntax error\n",
+                              L"%ls: Condition '%ls' contained a syntax error",
                               argv[0],
                               condition);
-
-                parser.detect_errors(condition, &stderr_buffer, argv[0]);
-
+                for (size_t i=0; i < errors.size(); i++)
+                {
+                    append_format(stderr_buffer, L"\n%s: ", argv[0]);
+                    stderr_buffer.append(errors.at(i).describe(condition_string));
+                }
                 res = true;
             }
         }
diff --git a/fish_tests.cpp b/fish_tests.cpp
index c43381b3..0c273643 100644
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@@ -584,12 +584,6 @@ static void test_parser()
 
     parser_t parser(PARSER_TYPE_GENERAL, true);
 
-    say(L"Testing null input to parser");
-    if (!parser.detect_errors(NULL))
-    {
-        err(L"Null input to parser.detect_errors undetected");
-    }
-
     say(L"Testing block nesting");
     if (!parser.detect_errors(L"if; end"))
     {
@@ -630,10 +624,28 @@ static void test_parser()
     {
         err(L"'break' command outside of loop block context undetected");
     }
+    
+    if (parser.detect_errors(L"break --help"))
+    {
+        err(L"'break --help' incorrectly marked as error");
+    }
+    
+    if (! parser.detect_errors(L"while false ; function foo ; break ; end ; end "))
+    {
+        err(L"'break' command inside function allowed to break from loop outside it");
+    }
+
+    
     if (!parser.detect_errors(L"exec ls|less") || !parser.detect_errors(L"echo|return"))
     {
         err(L"Invalid pipe command undetected");
     }
+    
+    if (parser.detect_errors(L"for i in foo ; switch $i ; case blah ; break; end; end "))
+    {
+        err(L"'break' command inside switch falsely reported as error");
+    }
+    
 
     say(L"Testing basic evaluation");
 #if 0
diff --git a/parse_constants.h b/parse_constants.h
index 706c31b8..7ccc962c 100644
--- a/parse_constants.h
+++ b/parse_constants.h
@@ -104,7 +104,13 @@ enum parse_statement_decoration_t
 enum parse_error_code_t
 {
     parse_error_none,
-    parse_error_generic, //unknown type
+    
+    /* matching values from enum parser_error */
+    parse_error_syntax,
+    parse_error_eval,
+    parse_error_cmdsubst,
+    
+    parse_error_generic, // unclassified error types
     
     parse_error_tokenizer, //tokenizer error
     
diff --git a/parse_tree.cpp b/parse_tree.cpp
index b8cb348c..ad83a0d6 100644
--- a/parse_tree.cpp
+++ b/parse_tree.cpp
@@ -1,6 +1,8 @@
 #include "parse_productions.h"
 #include "tokenizer.h"
+#include "fallback.h"
 #include <vector>
+#include <algorithm>
 
 using namespace parse_productions;
 
@@ -32,21 +34,58 @@ wcstring parse_error_t::describe(const wcstring &src) const
             line_end = src.size();
         }
         assert(line_end >= line_start);
-        //fprintf(stderr, "source start: %lu, line start %lu\n", source_start, line_start);
+        //fprintf(stderr, "source start: %lu, source_length %lu, line start %lu, line end %lu\n", source_start, source_length, line_start, line_end);
         assert(source_start >= line_start);
 
         // Append the line of text
         result.push_back(L'\n');
         result.append(src, line_start, line_end - line_start);
 
-        // Append the caret line
+        // Append the caret line. The input source may include tabs; for that reason we construct a "caret line" that has tabs in corresponding positions
+        wcstring caret_space_line;
+        caret_space_line.reserve(source_start - line_start);
+        for (size_t i=line_start; i < source_start; i++)
+        {
+            wchar_t wc = src.at(i);
+            if (wc == L'\t')
+            {
+                caret_space_line.push_back(L'\t');
+            }
+            else
+            {
+                int width = fish_wcwidth(wc);
+                if (width > 0)
+                {
+                    caret_space_line.append(static_cast<size_t>(width), L' ');
+                }
+            }
+        }
         result.push_back(L'\n');
-        result.append(source_start - line_start, L' ');
+        result.append(caret_space_line);
         result.push_back(L'^');
     }
     return result;
 }
 
+wcstring parse_errors_description(const parse_error_list_t &errors, const wcstring &src, const wchar_t *prefix)
+{
+    wcstring target;
+    for (size_t i=0; i < errors.size(); i++)
+    {
+        if (i > 0)
+        {
+            target.push_back(L'\n');
+        }
+        if (prefix != NULL)
+        {
+            target.append(prefix);
+            target.append(L": ");
+        }
+        target.append(errors.at(i).describe(src));
+    }
+    return target;
+}
+
 /** Returns a string description of the given token type */
 wcstring token_type_description(parse_token_type_t type)
 {
diff --git a/parse_tree.h b/parse_tree.h
index e65d1baf..8a0b3eed 100644
--- a/parse_tree.h
+++ b/parse_tree.h
@@ -38,6 +38,9 @@ struct parse_error_t
 };
 typedef std::vector<parse_error_t> parse_error_list_t;
 
+/* Returns a description of a list of parse errors */
+wcstring parse_errors_description(const parse_error_list_t &errors, const wcstring &src, const wchar_t *prefix = NULL);
+
 /** A struct representing the token type that we use internally */
 struct parse_token_t
 {
diff --git a/parser.cpp b/parser.cpp
index 6053b4a3..136bc74a 100644
--- a/parser.cpp
+++ b/parser.cpp
@@ -565,7 +565,6 @@ void parser_t::error(int ec, size_t p, const wchar_t *str, ...)
     va_start(va, str);
     err_buff = vformat_string(str, va);
     va_end(va);
-
 }
 
 /**
@@ -2753,7 +2752,7 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha
             case 1:
             {
 
-                wchar_t *subst = wcsndup(paran_begin+1, paran_end-paran_begin-1);
+                const wcstring subst(paran_begin + 1, paran_end);
                 wcstring tmp;
 
                 tmp.append(arg_cpy, paran_begin - arg_cpy);
@@ -2762,17 +2761,16 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha
 
 //        debug( 1, L"%ls -> %ls %ls", arg_cpy, subst, tmp.buff );
 
-                err |= parser_t::detect_errors(subst, out, prefix);
+                parse_error_list_t errors;
+                err |= parser_t::detect_errors(subst, &errors);
+                if (out && ! errors.empty())
+                {
+                    out->append(parse_errors_description(errors, subst, prefix));
+                }
 
-                free(subst);
                 free(arg_cpy);
                 arg_cpy = wcsdup(tmp.c_str());
 
-                /*
-                  Do _not_ call sb_destroy on this stringbuffer - it's
-                  buffer is used as the new 'arg_cpy'. It is free'd at
-                  the end of the loop.
-                */
                 break;
             }
         }
@@ -2914,39 +2912,43 @@ struct block_info_t
     block_type_t type; //type of the block
 };
 
-parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *out, const wchar_t *prefix)
+/* Append a syntax error to the given error list */
+static bool append_syntax_error(parse_error_list_t *errors, const parse_node_t &node, const wchar_t *fmt, ...)
 {
-    ASSERT_IS_MAIN_THREAD();
+    parse_error_t error;
+    error.source_start = node.source_start;
+    error.source_length = node.source_length;
+    error.code = parse_error_syntax;
+    
+    va_list va;
+    va_start(va, fmt);
+    error.text = vformat_string(fmt, va);
+    va_end(va);
     
-    if (! buff)
-        return PARSER_TEST_ERROR;
+    errors->push_back(error);
+    return true;
+}
+
+parser_test_error_bits_t parser_t::detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors, const wchar_t *prefix)
+{
+    ASSERT_IS_MAIN_THREAD();
     
-    const wcstring buff_src = buff;
     parse_node_tree_t node_tree;
     parse_error_list_t parse_errors;
     
     // Whether we encountered a parse error
     bool errored = false;
-    long error_line = -1;
     
     // Whether we encountered an unclosed block
     // We detect this via an 'end_command' block without source
     bool has_unclosed_block = false;
     
+    // Parse the input string into a parse tree
+    // Some errors are detected here
     bool parsed = parse_t::parse(buff_src, 0, &node_tree, &parse_errors);
     if (! parsed)
     {
-        // report errors
-        if (out)
-        {
-            for (size_t i=0; i < parse_errors.size(); i++)
-            {
-                const parse_error_t &error = parse_errors.at(i);
-                this->error(SYNTAX_ERROR, error.source_start, L"%ls", error.text.c_str());
-            }
-        }
         errored = true;
-        error_line = __LINE__;
     }
     
     // Expand all commands
@@ -2973,9 +2975,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
                     // Check that we can expand the command
                     if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS))
                     {
-                        error(SYNTAX_ERROR, node.source_start, ILLEGAL_CMD_ERR_MSG, command.c_str());
-                        errored = true;
-                                error_line = __LINE__;
+                        errored = append_syntax_error(&parse_errors, node, ILLEGAL_CMD_ERR_MSG, command.c_str());
                     }
                     
                     // Check that pipes are sound
@@ -2986,9 +2986,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
                         // 'or' and 'and' can be first in the pipeline. forbidden commands cannot be in a pipeline at all
                         if (node_tree.plain_statement_is_in_pipeline(node, is_pipe_forbidden))
                         {
-                            error(SYNTAX_ERROR, node.source_start, EXEC_ERR_MSG);
-                            errored = true;
-                                    error_line = __LINE__;
+                            errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG);
                         }
                     }
                     
@@ -3011,9 +3009,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
                         }
                         if (! found_function && ! first_argument_is_help(node_tree, node, buff_src))
                         {
-                            error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG);
-                            errored = true;
-                                    error_line = __LINE__;
+                            errored = append_syntax_error(&parse_errors, node, INVALID_RETURN_ERR_MSG);
                         }
                     }
                     
@@ -3026,7 +3022,6 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
                         const parse_node_t *ancestor = &node;
                         while (ancestor != NULL && ! end_search)
                         {
-                            bool end_search = false;
                             const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor);
                             if (loop_or_function_header != NULL)
                             {
@@ -3037,6 +3032,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
                                         // this is a loop header, so we can break or continue
                                         found_loop = true;
                                         end_search = true;
+                                        break;
                                         
                                     case symbol_function_header:
                                         // this is a function header, so we cannot break or continue. We stop our search here.
@@ -3052,31 +3048,9 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
                             ancestor = node_tree.get_parent(*ancestor);
                         }
                         
-                        
-                        
-                        const parse_node_t *function_node = node_tree.get_first_ancestor_of_type(node, symbol_function_header);
-                        if (function_node == NULL)
+                        if (! found_loop && ! first_argument_is_help(node_tree, node, buff_src))
                         {
-                            // Ok, this looks bad: return not in a function!
-                            // But we allow it if it's 'return --help'
-                            // Get the arguments
-                            bool is_help = false;
-                            const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument);
-                            if (! arg_nodes.empty())
-                            {
-                                // Check the first argument only
-                                const parse_node_t &arg = *arg_nodes.at(0);
-                                const wcstring first_arg_src = arg.get_source(buff_src);
-                                is_help = parser_t::is_help(first_arg_src.c_str(), 3);
-                            }
-                            
-                            // If it's not help, then it's an invalid return
-                            if (! is_help)
-                            {
-                                error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG);
-                                errored = true;
-                                        error_line = __LINE__;
-                            }
+                            errored = append_syntax_error(&parse_errors, node, INVALID_LOOP_ERR_MSG);
                         }
                     }
                 }
@@ -3092,6 +3066,11 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *
     if (has_unclosed_block)
         res |= PARSER_TEST_INCOMPLETE;
     
+    if (out_errors)
+    {
+        out_errors->swap(parse_errors);
+    }
+    
     error_code=0;
 
 
diff --git a/parser.h b/parser.h
index b2fbfe13..fb3efad8 100644
--- a/parser.h
+++ b/parser.h
@@ -11,6 +11,7 @@
 #include "util.h"
 #include "event.h"
 #include "function.h"
+#include "parse_tree.h"
 #include <vector>
 
 enum {
@@ -487,7 +488,7 @@ public:
        \param out if non-null, any errors in the command will be filled out into this buffer
        \param prefix the prefix string to prepend to each error message written to the \c out buffer
     */
-    parser_test_error_bits_t detect_errors(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL);
+    parser_test_error_bits_t detect_errors(const wcstring &buff, parse_error_list_t *out_errors = NULL, const wchar_t *prefix = NULL);
     parser_test_error_bits_t detect_errors2(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL);
 
     /**
diff --git a/reader.cpp b/reader.cpp
index df7f070e..3eeae627 100644
--- a/reader.cpp
+++ b/reader.cpp
@@ -2472,12 +2472,11 @@ void reader_run_command(parser_t &parser, const wcstring &cmd)
 
 int reader_shell_test(const wchar_t *b)
 {
-    int res = parser_t::principal_parser().detect_errors(b);
+    wcstring bstr = b;
+    int res = parser_t::principal_parser().detect_errors(bstr);
 
     if (res & PARSER_TEST_ERROR)
     {
-        wcstring sb;
-
         const int tmp[1] = {0};
         const int tmp2[1] = {0};
         const wcstring empty;
@@ -2490,10 +2489,15 @@ int reader_shell_test(const wchar_t *b)
                 tmp,
                 tmp2,
                 0);
-
-
-        parser_t::principal_parser().detect_errors(b, &sb, L"fish");
-        fwprintf(stderr, L"%ls", sb.c_str());
+        
+        parse_error_list_t errors;
+        parser_t::principal_parser().detect_errors(bstr, &errors, L"fish");
+        
+        if (! errors.empty())
+        {
+            const wcstring sb = parse_errors_description(errors, b, L"fish");
+            fwprintf(stderr, L"%ls", sb.c_str());
+        }
     }
     return res;
 }
@@ -3903,13 +3907,14 @@ static int read_ni(int fd, const io_chain_t &io)
             res = 1;
         }
 
-        wcstring sb;
-        if (! parser.detect_errors(str.c_str(), &sb, L"fish"))
+        parse_error_list_t errors;
+        if (! parser.detect_errors(str, &errors, L"fish"))
         {
             parser.eval(str, io, TOP);
         }
         else
         {
+            const wcstring sb = parse_errors_description(errors, str);
             fwprintf(stderr, L"%ls", sb.c_str());
             res = 1;
         }
diff --git a/tests/test7.in b/tests/test7.in
index 22f5d92c..a3ae8360 100644
--- a/tests/test7.in
+++ b/tests/test7.in
@@ -20,15 +20,6 @@ case one
 	echo $status
 end
 
-# Test that non-case tokens inside `switch` don't blow away status
-# (why are these even allowed?)
-false
-switch one
-true
-case one
-	echo $status
-end
-
 #test contains -i
 echo test contains -i
 contains -i string a b c string d
diff --git a/tests/test7.out b/tests/test7.out
index fd3b8a70..bbe2ab1a 100644
--- a/tests/test7.out
+++ b/tests/test7.out
@@ -4,7 +4,6 @@
 0
 
 1
-1
 test contains -i
 4
 nothing
author	ridiculousfish <corydoras@ridiculousfish.com>	2013-12-12 18:18:07 -0800
committer	ridiculousfish <corydoras@ridiculousfish.com>	2013-12-12 18:18:07 -0800
commit	5cf59de6763a0000fdc87f0101ca78bd137dffcc (patch)
tree	b9700e9650a4a06af91ec24dad0c621f8381c3d5
parent	e25d49b80b0668b55a58e1445aa047a68a1043d3 (diff)