diff options
Diffstat (limited to 'src/parse_tree.h')
-rw-r--r-- | src/parse_tree.h | 422 |
1 files changed, 213 insertions, 209 deletions
diff --git a/src/parse_tree.h b/src/parse_tree.h index 9fd17645..4ec98ae8 100644 --- a/src/parse_tree.h +++ b/src/parse_tree.h @@ -1,20 +1,17 @@ -/**\file parse_tree.h - - Programmatic representation of fish code. -*/ +// Programmatic representation of fish code. #ifndef FISH_PARSE_PRODUCTIONS_H #define FISH_PARSE_PRODUCTIONS_H #include <assert.h> +#include <stdbool.h> #include <stddef.h> -#include <vector> -#include <memory> #include <sys/types.h> -#include <stdbool.h> +#include <memory> +#include <vector> #include "common.h" -#include "tokenizer.h" #include "parse_constants.h" +#include "tokenizer.h" class parse_node_tree_t; @@ -26,13 +23,12 @@ typedef uint32_t source_offset_t; #define SOURCE_OFFSET_INVALID (static_cast<source_offset_t>(-1)) -/** A struct representing the token type that we use internally */ -struct parse_token_t -{ - enum parse_token_type_t type; // The type of the token as represented by the parser - enum parse_keyword_t keyword; // Any keyword represented by this token - bool has_dash_prefix; // Hackish: whether the source contains a dash prefix - bool is_help_argument; // Hackish: whether the source looks like '-h' or '--help' +/// A struct representing the token type that we use internally. +struct parse_token_t { + enum parse_token_type_t type; // The type of the token as represented by the parser + enum parse_keyword_t keyword; // Any keyword represented by this token + bool has_dash_prefix; // Hackish: whether the source contains a dash prefix + bool is_help_argument; // Hackish: whether the source looks like '-h' or '--help' source_offset_t source_start; source_offset_t source_length; @@ -40,24 +36,20 @@ struct parse_token_t wcstring user_presentable_description() const; }; - -enum -{ +enum { parse_flag_none = 0, - /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */ + /// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of + /// disconnected trees. This is intended to be used by syntax highlighting. parse_flag_continue_after_error = 1 << 0, - - /* Include comment tokens */ + /// Include comment tokens. parse_flag_include_comments = 1 << 1, - - /* Indicate that the tokenizer should accept incomplete tokens */ + /// Indicate that the tokenizer should accept incomplete tokens */ parse_flag_accept_incomplete_tokens = 1 << 2, - - /* Indicate that the parser should not generate the terminate token, allowing an 'unfinished' tree where some nodes may have no productions. */ + /// Indicate that the parser should not generate the terminate token, allowing an 'unfinished' + /// tree where some nodes may have no productions. parse_flag_leave_unterminated = 1 << 3, - - /* Indicate that the parser should generate job_list entries for blank lines. */ + /// Indicate that the parser should generate job_list entries for blank lines. parse_flag_show_blank_lines = 1 << 4 }; typedef unsigned int parse_tree_flags_t; @@ -67,247 +59,259 @@ wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src); const wchar_t *token_type_description(parse_token_type_t type); const wchar_t *keyword_description(parse_keyword_t type); -/* Node flags */ -enum -{ - /* Flag indicating that the node has associated comment nodes */ +// Node flags. +enum { + /// Flag indicating that the node has associated comment nodes. parse_node_flag_has_comments = 1 << 0, }; typedef uint8_t parse_node_flags_t; -/* Node-type specific tag value */ +/// Node-type specific tag value. typedef uint8_t parse_node_tag_t; -/** Class for nodes of a parse tree. Since there's a lot of these, the size and order of the fields is important. */ -class parse_node_t -{ -public: - /* Start in the source code */ +/// Class for nodes of a parse tree. Since there's a lot of these, the size and order of the fields +/// is important. +class parse_node_t { + public: + // Start in the source code. source_offset_t source_start; - - /* Length of our range in the source code */ + // Length of our range in the source code. source_offset_t source_length; - - /* Parent */ + // Parent node_offset_t parent; - - /* Children */ + // Children node_offset_t child_start; - - /* Number of children */ + // Number of children. uint8_t child_count; - - /* Type of the node */ + // Type of the node. enum parse_token_type_t type; - - /* Keyword associated with node */ + // Keyword associated with node. enum parse_keyword_t keyword; - - /* Node flags */ - parse_node_flags_t flags:4; - - /* This is used to store e.g. the statement decoration. */ - parse_node_tag_t tag:4; - - /* Description */ + // Node flags. + parse_node_flags_t flags : 4; + // This is used to store e.g. the statement decoration. + parse_node_tag_t tag : 4; + // Description wcstring describe(void) const; - /* Constructor */ - explicit parse_node_t(parse_token_type_t ty) : - source_start(SOURCE_OFFSET_INVALID), - source_length(0), - parent(NODE_OFFSET_INVALID), - child_start(0), - child_count(0), - type(ty), - keyword(parse_keyword_none), - flags(0), - tag(0) - { - } - - node_offset_t child_offset(node_offset_t which) const - { + // Constructor + explicit parse_node_t(parse_token_type_t ty) + : source_start(SOURCE_OFFSET_INVALID), + source_length(0), + parent(NODE_OFFSET_INVALID), + child_start(0), + child_count(0), + type(ty), + keyword(parse_keyword_none), + flags(0), + tag(0) {} + + node_offset_t child_offset(node_offset_t which) const { PARSE_ASSERT(which < child_count); return child_start + which; } - /* Indicate if this node has a range of source code associated with it */ - bool has_source() const - { - /* Should never have a nonempty range with an invalid offset */ + /// Indicate if this node has a range of source code associated with it. + bool has_source() const { + // Should never have a nonempty range with an invalid offset. assert(this->source_start != SOURCE_OFFSET_INVALID || this->source_length == 0); return this->source_length > 0; } - /* Indicate if the node has comment nodes */ - bool has_comments() const - { - return !! (this->flags & parse_node_flag_has_comments); - } + /// Indicate if the node has comment nodes. + bool has_comments() const { return !!(this->flags & parse_node_flag_has_comments); } - /* Gets source for the node, or the empty string if it has no source */ - wcstring get_source(const wcstring &str) const - { - if (! has_source()) + /// Gets source for the node, or the empty string if it has no source. + wcstring get_source(const wcstring &str) const { + if (!has_source()) return wcstring(); else return wcstring(str, this->source_start, this->source_length); } - /* Returns whether the given location is within the source range or at its end */ - bool location_in_or_at_end_of_source_range(size_t loc) const - { + /// Returns whether the given location is within the source range or at its end. + bool location_in_or_at_end_of_source_range(size_t loc) const { return has_source() && source_start <= loc && loc - source_start <= source_length; } }; -/* The parse tree itself */ -class parse_node_tree_t : public std::vector<parse_node_t> -{ -public: - +/// The parse tree itself. +class parse_node_tree_t : public std::vector<parse_node_t> { + public: parse_node_tree_t() {} - - parse_node_tree_t(moved_ref<parse_node_tree_t> t) - { - this->swap(t.val); - } - /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. - */ - const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; + parse_node_tree_t(moved_ref<parse_node_tree_t> t) { this->swap(t.val); } + + // Get the node corresponding to a child of the given node, or NULL if there is no such child. + // If expected_type is provided, assert that the node has that type. + const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, + parse_token_type_t expected_type = token_type_invalid) const; - /* Find the first direct child of the given node of the given type. asserts on failure - */ + // Find the first direct child of the given node of the given type. asserts on failure. const parse_node_t &find_child(const parse_node_t &parent, parse_token_type_t type) const; - /* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */ - const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const; + // Get the node corresponding to the parent of the given node, or NULL if there is no such + // child. If expected_type is provided, only returns the parent if it is of that type. Note the + // asymmetry: get_child asserts since the children are known, but get_parent does not, since the + // parent may not be known. + const parse_node_t *get_parent(const parse_node_t &node, + parse_token_type_t expected_type = token_type_invalid) const; - /* Find all the nodes of a given type underneath a given node, up to max_count of them */ + // Find all the nodes of a given type underneath a given node, up to max_count of them. typedef std::vector<const parse_node_t *> parse_node_list_t; - parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type, size_t max_count = (size_t)(-1)) const; - - /* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */ - const parse_node_t *find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent = NULL) const; - - /* Finds a node containing the given source location. If 'parent' is not NULL, it must be an ancestor. */ - const parse_node_t *find_node_matching_source_location(parse_token_type_t type, size_t source_loc, const parse_node_t *parent) const; - - /* Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has a parent */ + parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type, + size_t max_count = (size_t)(-1)) const; + + // Finds the last node of a given type underneath a given node, or NULL if it could not be + // found. If parent is NULL, this finds the last node in the tree of that type. + const parse_node_t *find_last_node_of_type(parse_token_type_t type, + const parse_node_t *parent = NULL) const; + + // Finds a node containing the given source location. If 'parent' is not NULL, it must be an + // ancestor. + const parse_node_t *find_node_matching_source_location(parse_token_type_t type, + size_t source_loc, + const parse_node_t *parent) const; + + // Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has + // a parent. bool argument_list_is_root(const parse_node_t &node) const; - /* Utilities */ + // Utilities - /* Given a plain statement, get the decoration (from the parent node), or none if there is no decoration */ - enum parse_statement_decoration_t decoration_for_plain_statement(const parse_node_t &node) const; + /// Given a plain statement, get the decoration (from the parent node), or none if there is no + /// decoration. + enum parse_statement_decoration_t decoration_for_plain_statement( + const parse_node_t &node) const; - /* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */ - bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const; + /// Given a plain statement, get the command by reference (from the child node). Returns true if + /// successful. Clears the command on failure. + bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, + wcstring *out_cmd) const; - /* Given a plain statement, return true if the statement is part of a pipeline. If include_first is set, the first command in a pipeline is considered part of it; otherwise only the second or additional commands are */ + /// Given a plain statement, return true if the statement is part of a pipeline. If + /// include_first is set, the first command in a pipeline is considered part of it; otherwise + /// only the second or additional commands are. bool statement_is_in_pipeline(const parse_node_t &node, bool include_first) const; - /* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */ - enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, int *out_fd, wcstring *out_target) const; + /// Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd). + enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, int *out_fd, + wcstring *out_target) const; - /* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */ + /// If the given node is a block statement, returns the header node (for_header, while_header, + /// begin_header, or function_header). Otherwise returns NULL. const parse_node_t *header_node_for_block_statement(const parse_node_t &node) const; - /* Given a node list (e.g. of type symbol_job_list) and a node type (e.g. symbol_job), return the next element of the given type in that list, and the tail (by reference). Returns NULL if we've exhausted the list. */ - const parse_node_t *next_node_in_node_list(const parse_node_t &node_list, parse_token_type_t item_type, const parse_node_t **list_tail) const; + /// Given a node list (e.g. of type symbol_job_list) and a node type (e.g. symbol_job), return + /// the next element of the given type in that list, and the tail (by reference). Returns NULL + /// if we've exhausted the list. + const parse_node_t *next_node_in_node_list(const parse_node_t &node_list, + parse_token_type_t item_type, + const parse_node_t **list_tail) const; - /* Given a job, return all of its statements. These are 'specific statements' (e.g. symbol_decorated_statement, not symbol_statement) */ + /// Given a job, return all of its statements. These are 'specific statements' (e.g. + /// symbol_decorated_statement, not symbol_statement). parse_node_list_t specific_statements_for_job(const parse_node_t &job) const; - /* Given a node, return all of its comment nodes. */ + /// Given a node, return all of its comment nodes. parse_node_list_t comment_nodes_for_node(const parse_node_t &node) const; - /* Returns the boolean type for a boolean node */ + /// Returns the boolean type for a boolean node. static enum parse_bool_statement_type_t statement_boolean_type(const parse_node_t &node); - /* Given a job, return whether it should be backgrounded, because it has a & specifier */ + /// Given a job, return whether it should be backgrounded, because it has a & specifier. bool job_should_be_backgrounded(const parse_node_t &job) const; }; -/* The big entry point. Parse a string, attempting to produce a tree for the given goal type */ -bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, parse_token_type_t goal = symbol_job_list); - -/* Fish grammar: - -# A job_list is a list of jobs, separated by semicolons or newlines - - job_list = <empty> | - job job_list | - <TOK_END> job_list - -# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation, and then optionally a background specifier '&' - - job = statement job_continuation optional_background - job_continuation = <empty> | - <TOK_PIPE> statement job_continuation - -# A statement is a normal command, or an if / while / and etc - - statement = boolean_statement | block_statement | if_statement | switch_statement | decorated_statement - -# A block is a conditional, loop, or begin/end - - if_statement = if_clause else_clause end_command arguments_or_redirections_list - if_clause = <IF> job <TOK_END> andor_job_list job_list - else_clause = <empty> | - <ELSE> else_continuation - else_continuation = if_clause else_clause | - <TOK_END> job_list - - switch_statement = SWITCH argument <TOK_END> case_item_list end_command arguments_or_redirections_list - case_item_list = <empty> | - case_item case_item_list | - <TOK_END> case_item_list - - case_item = CASE argument_list <TOK_END> job_list - - block_statement = block_header job_list end_command arguments_or_redirections_list - block_header = for_header | while_header | function_header | begin_header - for_header = FOR var_name IN argument_list <TOK_END> - while_header = WHILE job <TOK_END> andor_job_list - begin_header = BEGIN - -# Functions take arguments, and require at least one (the name). No redirections allowed. - function_header = FUNCTION argument argument_list <TOK_END> - -# A boolean statement is AND or OR or NOT - boolean_statement = AND statement | OR statement | NOT statement - -# An andor_job_list is zero or more job lists, where each starts with an `and` or `or` boolean statement - andor_job_list = <empty> | - job andor_job_list | - <TOK_END> andor_job_list - -# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" or "exec" - - decorated_statement = plain_statement | COMMAND plain_statement | BUILTIN plain_statement | EXEC plain_statement - plain_statement = <TOK_STRING> arguments_or_redirections_list - - argument_list = <empty> | argument argument_list - - arguments_or_redirections_list = <empty> | - argument_or_redirection arguments_or_redirections_list - argument_or_redirection = argument | redirection - argument = <TOK_STRING> - - redirection = <TOK_REDIRECTION> <TOK_STRING> - - optional_background = <empty> | <TOK_BACKGROUND> - - end_command = END - - # A freestanding_argument_list is equivalent to a normal argument list, except it may contain TOK_END (newlines, and even semicolons, for historical reasons - - freestanding_argument_list = <empty> | - argument freestanding_argument_list | - <TOK_END> freestanding_argument_list -*/ +/// The big entry point. Parse a string, attempting to produce a tree for the given goal type. +bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags, + parse_node_tree_t *output, parse_error_list_t *errors, + parse_token_type_t goal = symbol_job_list); + +// Fish grammar: +// +// # A job_list is a list of jobs, separated by semicolons or newlines +// +// job_list = <empty> | +// job job_list | +// <TOK_END> job_list +// +// # A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases +// like if statements, where we require a command). To represent "non-empty", we require a +// statement, followed by a possibly empty job_continuation, and then optionally a background +// specifier '&' +// +// job = statement job_continuation optional_background +// job_continuation = <empty> | +// <TOK_PIPE> statement job_continuation +// +// # A statement is a normal command, or an if / while / and etc +// +// statement = boolean_statement | block_statement | if_statement | switch_statement | +// decorated_statement +// +// # A block is a conditional, loop, or begin/end +// +// if_statement = if_clause else_clause end_command arguments_or_redirections_list +// if_clause = <IF> job <TOK_END> andor_job_list job_list +// else_clause = <empty> | +// <ELSE> else_continuation +// else_continuation = if_clause else_clause | +// <TOK_END> job_list +// +// switch_statement = SWITCH argument <TOK_END> case_item_list end_command +// arguments_or_redirections_list +// case_item_list = <empty> | +// case_item case_item_list | +// <TOK_END> case_item_list +// +// case_item = CASE argument_list <TOK_END> job_list +// +// block_statement = block_header job_list end_command arguments_or_redirections_list +// block_header = for_header | while_header | function_header | begin_header +// for_header = FOR var_name IN argument_list <TOK_END> +// while_header = WHILE job <TOK_END> andor_job_list +// begin_header = BEGIN +// +// # Functions take arguments, and require at least one (the name). No redirections allowed. +// function_header = FUNCTION argument argument_list <TOK_END> +// +// # A boolean statement is AND or OR or NOT +// boolean_statement = AND statement | OR statement | NOT statement +// +// # An andor_job_list is zero or more job lists, where each starts with an `and` or `or` boolean +// statement +// andor_job_list = <empty> | +// job andor_job_list | +// <TOK_END> andor_job_list +// +// # A decorated_statement is a command with a list of arguments_or_redirections, possibly with +// "builtin" or "command" or "exec" +// +// decorated_statement = plain_statement | COMMAND plain_statement | BUILTIN plain_statement | +// EXEC +// +// plain_statement +// plain_statement = <TOK_STRING> arguments_or_redirections_list +// +// argument_list = <empty> | argument argument_list +// +// arguments_or_redirections_list = <empty> | +// argument_or_redirection arguments_or_redirections_list +// argument_or_redirection = argument | redirection +// argument = <TOK_STRING> +// +// redirection = <TOK_REDIRECTION> <TOK_STRING> +// +// optional_background = <empty> | <TOK_BACKGROUND> +// +// end_command = END +// +// # A freestanding_argument_list is equivalent to a normal argument list, except it may contain +// TOK_END (newlines, and even semicolons, for historical reasons +// +// freestanding_argument_list = <empty> | +// argument freestanding_argument_list | +// <TOK_END> freestanding_argument_list #endif |