Fix to indent comments in blocks properly (#1710)

This makes two changes to parse trees: 1. Unmaterialized nodes no longer have an invalid source location For example, with the code `while false;end` there are no tokens associated with the while loop's job_list, and therefore it is unmaterialized. Previously it would have had a SOURCE_OFFSET_INVALID. But now it has a zero source length, but an offset equal to the end of the while loop (i.e. the semicolon), and a zero length. Correspondingly, the has_source function now checks the length instead of the offset. 2. Special (comment and error) nodes have always been "disconnected," meaning they are not the child of any other node. However, they now have their parent offsets set to whatever the top of the node stack was when the node was encountered. This gives us a sense of which node the comment is "in", e.g. if we are constructing a job list then the comment's parent will be the job list. This lets us determine the comment's indent.
author: ridiculousfish <corydoras@ridiculousfish.com> 2014-09-29 11:29:50 -0700
committer: ridiculousfish <corydoras@ridiculousfish.com> 2014-09-29 11:30:21 -0700
commit: 27dd37ebb4c660ab122ece69bc187b36e390b25b (patch)
tree: 9e89fdd3c6e8692bba41f398ca3388673101a831 /parse_util.cpp
parent: a186f04f15773b4595d0e359d1e27a903a68d3f7 (diff)
1 files changed, 50 insertions, 5 deletions
diff --git a/parse_util.cpp b/parse_util.cpp
index 13eca166..a941113f 100644
--- a/parse_util.cpp
+++ b/parse_util.cpp
@@ -840,7 +840,7 @@ static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset
     if (node_idx > *max_visited_node_idx)
         *max_visited_node_idx = node_idx;
 
-    /* We could implement this by utilizing the fish grammar. But there's an easy trick instead: almost everything that wraps a job list should be indented by 1. So just find all of the job lists. One exception is switch; the other exception is job_list itself: a job_list is a job and a job_list, and we want that child list to be indented the same as the parent. So just find all job_lists whose parent is not a job_list, and increment their indent by 1. */
+    /* We could implement this by utilizing the fish grammar. But there's an easy trick instead: almost everything that wraps a job list should be indented by 1. So just find all of the job lists. One exception is switch, which wraps a case_item_list instead of a job_list. The other exception is job_list itself: a job_list is a job and a job_list, and we want that child list to be indented the same as the parent. So just find all job_lists whose parent is not a job_list, and increment their indent by 1. */
 
     const parse_node_t &node = tree.at(node_idx);
     const parse_token_type_t node_type = node.type;
@@ -877,10 +877,39 @@ static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset
 
 
     /* Store the indent into the indent array */
-    if (node.has_source())
+    if (node.source_start != SOURCE_OFFSET_INVALID && node.source_start < indents->size())
     {
-        assert(node.source_start < indents->size());
-        indents->at(node.source_start) = node_indent;
+        if (node.has_source())
+        {
+            /* A normal non-empty node. Store the indent unconditionally. */
+            indents->at(node.source_start) = node_indent;
+        }
+        else
+        {
+            /* An empty node. We have a source offset but no source length. This can come about when a node legitimately empty:
+
+                  while true; end
+
+               The job_list inside the while loop is empty. It still has a source offset (at the end of the while statement) but no source extent.
+               We still need to capture that indent, because there may be comments inside:
+                    while true
+                       # loop forever
+                    end
+
+               The 'loop forever' comment must be indented, by virtue of storing the indent.
+
+               Now consider what happens if we remove the end:
+
+                   while true
+                     # loop forever
+
+                Now both the job_list and end_command are unmaterialized. However, we want the indent to be of the job_list and not the end_command.  Therefore, we only store the indent if it's bigger.
+            */
+            if (node_indent > indents->at(node.source_start))
+            {
+                indents->at(node.source_start) = node_indent;
+            }
+        }
     }
 
 
@@ -900,7 +929,7 @@ std::vector<int> parse_util_compute_indents(const wcstring &src)
 
     /* Parse the string. We pass continue_after_error to produce a forest; the trailing indent of the last node we visited becomes the input indent of the next. I.e. in the case of 'switch foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it were a case item list */
     parse_node_tree_t tree;
-    parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */);
+    parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_include_comments | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */);
 
     /* Start indenting at the first node. If we have a parse error, we'll have to start indenting from the top again */
     node_offset_t start_node_idx = 0;
@@ -922,6 +951,22 @@ std::vector<int> parse_util_compute_indents(const wcstring &src)
         start_node_idx = max_visited_node_idx + 1;
     }
 
+    /* Handle comments. Each comment node has a parent (which is whatever the top of the symbol stack was when the comment was encountered). So the source range of the comment has the same indent as its parent. */
+    const size_t tree_size = tree.size();
+    for (node_offset_t i=0; i < tree_size; i++)
+    {
+        const parse_node_t &node = tree.at(i);
+        if (node.type == parse_special_type_comment && node.has_source() && node.parent < tree_size)
+        {
+            const parse_node_t &parent = tree.at(node.parent);
+            if (parent.source_start != SOURCE_OFFSET_INVALID)
+            {
+                indents.at(node.source_start) = indents.at(parent.source_start);
+            }
+        }
+    }
+
+    /* Now apply the indents. The indents array has -1 for places where the indent does not change, so start at each value and extend it along the run of -1s */
     int last_indent = 0;
     for (size_t i=0; i<src_size; i++)
     {
author	ridiculousfish <corydoras@ridiculousfish.com>	2014-09-29 11:29:50 -0700
committer	ridiculousfish <corydoras@ridiculousfish.com>	2014-09-29 11:30:21 -0700
commit	27dd37ebb4c660ab122ece69bc187b36e390b25b (patch)
tree	9e89fdd3c6e8692bba41f398ca3388673101a831 /parse_util.cpp
parent	a186f04f15773b4595d0e359d1e27a903a68d3f7 (diff)