Fix to indent comments in blocks properly (#1710)

This makes two changes to parse trees: 1. Unmaterialized nodes no longer have an invalid source location For example, with the code `while false;end` there are no tokens associated with the while loop's job_list, and therefore it is unmaterialized. Previously it would have had a SOURCE_OFFSET_INVALID. But now it has a zero source length, but an offset equal to the end of the while loop (i.e. the semicolon), and a zero length. Correspondingly, the has_source function now checks the length instead of the offset. 2. Special (comment and error) nodes have always been "disconnected," meaning they are not the child of any other node. However, they now have their parent offsets set to whatever the top of the node stack was when the node was encountered. This gives us a sense of which node the comment is "in", e.g. if we are constructing a job list then the comment's parent will be the job list. This lets us determine the comment's indent.
author: ridiculousfish <corydoras@ridiculousfish.com> 2014-09-29 11:29:50 -0700
committer: ridiculousfish <corydoras@ridiculousfish.com> 2014-09-29 11:30:21 -0700
commit: 27dd37ebb4c660ab122ece69bc187b36e390b25b (patch)
tree: 9e89fdd3c6e8692bba41f398ca3388673101a831 /parse_tree.cpp
parent: a186f04f15773b4595d0e359d1e27a903a68d3f7 (diff)
1 files changed, 32 insertions, 11 deletions
diff --git a/parse_tree.cpp b/parse_tree.cpp
index 4825c6eb..ea0b0abd 100644
--- a/parse_tree.cpp
+++ b/parse_tree.cpp
@@ -13,13 +13,6 @@ static bool production_is_empty(const production_t *production)
     return (*production)[0] == token_type_invalid;
 }
 
-void swap2(parse_node_tree_t &a, parse_node_tree_t &b)
-{
-    fprintf(stderr, "Swapping!\n");
-    // This uses the base vector implementation
-    a.swap(b);
-}
-
 /** Returns a string description of this parse error */
 wcstring parse_error_t::describe_with_prefix(const wcstring &src, const wcstring &prefix, bool is_interactive, bool skip_caret) const
 {
@@ -682,6 +675,8 @@ void parse_ll_t::dump_stack(void) const
 // Give each node a source range equal to the union of the ranges of its children
 // Terminal nodes already have source ranges (and no children)
 // Since children always appear after their parents, we can implement this very simply by walking backwards
+// We then do a second pass to give empty nodes an empty source range (but with a valid offset)
+// We do this by walking forward. If a child of a node has an invalid source range, we set it equal to the end of the source range of its previous child
 void parse_ll_t::determine_node_ranges(void)
 {
     size_t idx = nodes.size();
@@ -712,6 +707,30 @@ void parse_ll_t::determine_node_ranges(void)
             parent->source_length = max_end - min_start;
         }
     }
+
+    /* Forwards pass */
+    size_t size = nodes.size();
+    for (idx = 0; idx < size; idx++)
+    {
+        /* Since we populate the source range based on the sibling node, it's simpler to walk over the children of each node.
+        We keep a running "child_source_cursor" which is meant to be the end of the child's source range. It's initially set to the beginning of the parent' source range. */
+        parse_node_t *parent = &nodes[idx];
+        // If the parent doesn't have a valid source range, then none of its children will either; skip it entirely
+        if (parent->source_start == SOURCE_OFFSET_INVALID)
+        {
+            continue;
+        }
+        source_offset_t child_source_cursor = parent->source_start;
+        for (size_t child_idx = 0; child_idx < parent->child_count; child_idx++)
+        {
+            parse_node_t *child = &nodes[parent->child_start + child_idx];
+            if (child->source_start == SOURCE_OFFSET_INVALID)
+            {
+                child->source_start = child_source_cursor;
+            }
+            child_source_cursor = child->source_start + child->source_length;
+        }
+    }
 }
 
 void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *errors)
@@ -1002,10 +1021,12 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2)
     // Handle special types specially. Note that these are the only types that can be pushed if the symbol stack is empty.
     if (token1.type == parse_special_type_parse_error || token1.type == parse_special_type_tokenizer_error || token1.type == parse_special_type_comment)
     {
-        parse_node_t err_node(token1.type);
-        err_node.source_start = token1.source_start;
-        err_node.source_length = token1.source_length;
-        nodes.push_back(err_node);
+        /* We set the special node's parent to the top of the stack. This means that we have an asymmetric relationship: the special node has a parent (which is the node we were trying to generate when we encountered the special node), but the parent node does not have the special node as a child. This means for example that parents don't have to worry about tracking any comment nodes, but we can still recover the parent from the comment. */
+        parse_node_t special_node(token1.type);
+        special_node.parent = symbol_stack.back().node_idx;
+        special_node.source_start = token1.source_start;
+        special_node.source_length = token1.source_length;
+        nodes.push_back(special_node);
         consumed = true;
 
         /* tokenizer errors are fatal */
author	ridiculousfish <corydoras@ridiculousfish.com>	2014-09-29 11:29:50 -0700
committer	ridiculousfish <corydoras@ridiculousfish.com>	2014-09-29 11:30:21 -0700
commit	27dd37ebb4c660ab122ece69bc187b36e390b25b (patch)
tree	9e89fdd3c6e8692bba41f398ca3388673101a831 /parse_tree.cpp
parent	a186f04f15773b4595d0e359d1e27a903a68d3f7 (diff)