diff options
author | ridiculousfish <corydoras@ridiculousfish.com> | 2013-07-22 18:26:15 -0700 |
---|---|---|
committer | ridiculousfish <corydoras@ridiculousfish.com> | 2013-07-22 18:26:15 -0700 |
commit | 3e3eefc2dcb2e0e31b224703a063e05dc8c67996 (patch) | |
tree | 9535fc9f721b7e184bab85a3476b09869d215e4c /parse_tree.h | |
parent | 77b6b0a9b2e7260884064dbac72f17bb5e86431f (diff) |
Improvements to new parser. All functions and completions now parse.
Diffstat (limited to 'parse_tree.h')
-rw-r--r-- | parse_tree.h | 414 |
1 files changed, 32 insertions, 382 deletions
diff --git a/parse_tree.h b/parse_tree.h index 39e370af..6b1fc0d1 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -26,11 +26,11 @@ struct parse_error_t { /** Text of the error */ wcstring text; - + /** Offset and length of the token in the source code that triggered this error */ size_t source_start; size_t source_length; - + /** Return a string describing the error, suitable for presentation to the user */ wcstring describe(const wcstring &src) const; }; @@ -40,8 +40,8 @@ class parse_ll_t; class parse_t { parse_ll_t * const parser; - - public: + +public: parse_t(); bool parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors); }; @@ -49,7 +49,7 @@ class parse_t enum parse_token_type_t { token_type_invalid, - + // Non-terminal tokens symbol_job_list, symbol_job, @@ -61,33 +61,35 @@ enum parse_token_type_t symbol_while_header, symbol_begin_header, symbol_function_header, - + symbol_if_statement, symbol_if_clause, symbol_else_clause, symbol_else_continuation, - + symbol_switch_statement, symbol_case_item_list, symbol_case_item, - + symbol_boolean_statement, symbol_decorated_statement, symbol_plain_statement, symbol_arguments_or_redirections_list, symbol_argument_or_redirection, - + symbol_argument_list_nonempty, symbol_argument_list, + + symbol_optional_background, // Terminal types parse_token_type_string, parse_token_type_pipe, parse_token_type_redirection, - parse_token_background, + parse_token_type_background, parse_token_type_end, parse_token_type_terminate, - + FIRST_PARSE_TOKEN_TYPE = parse_token_type_string }; @@ -117,32 +119,32 @@ wcstring keyword_description(parse_keyword_t type); /** Base class for nodes of a parse tree */ class parse_node_t { - public: - +public: + /* Type of the node */ enum parse_token_type_t type; - + /* Start in the source code */ size_t source_start; - + /* Length of our range in the source code */ size_t source_length; /* Children */ node_offset_t child_start; node_offset_t child_count; - + /* Type-dependent data */ uint32_t tag; - + /* Description */ wcstring describe(void) const; - + /* Constructor */ explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) { } - + node_offset_t child_offset(node_offset_t which) const { PARSE_ASSERT(which < child_count); @@ -154,360 +156,6 @@ class parse_node_tree_t : public std::vector<parse_node_t> { }; -namespace parse_symbols -{ - - #define SYMBOL(x) static inline parse_token_type_t get_token() { return x; } - - #define PRODUCE(X) static int production(parse_token_type_t tok, parse_keyword_t key) { return X; } - - #define NO_PRODUCTION (-1) - - - template<parse_token_type_t WHICH> - struct Token - { - SYMBOL(WHICH); - - typedef Token<WHICH> t0; - typedef Token<token_type_invalid> t1; - typedef Token<token_type_invalid> t2; - typedef Token<token_type_invalid> t3; - typedef Token<token_type_invalid> t4; - typedef Token<token_type_invalid> t5; - }; - - /* Placeholder */ - typedef Token<token_type_invalid> none; - - struct EMPTY - { - typedef none t0; - typedef none t1; - typedef none t2; - typedef none t3; - typedef none t4; - typedef none t5; - }; - - template<typename T0, typename T1, typename T2 = none, typename T3 = none, typename T4 = none, typename T5 = none> - struct Seq - { - typedef T0 t0; - typedef T1 t1; - typedef T2 t2; - typedef T3 t3; - typedef T4 t4; - typedef T5 t5; - }; - - template<typename P0, typename P1, typename P2 = none, typename P3 = none, typename P4 = none, typename P5 = none> - struct OR - { - typedef P0 p0; - typedef P1 p1; - typedef P2 p2; - typedef P3 p3; - typedef P4 p4; - typedef P5 p5; - }; - - template<parse_keyword_t WHICH> - struct Keyword - { - static inline parse_keyword_t get_token() { return WHICH; } - }; - - struct job; - struct statement; - struct job_continuation; - struct boolean_statement; - struct block_statement; - struct if_statement; - struct if_clause; - struct else_clause; - struct else_continuation; - struct switch_statement; - struct decorated_statement; - struct switch_statement; - struct case_item_list; - struct case_item; - struct argument_list_nonempty; - struct argument_list; - struct block_statement; - struct block_header; - struct for_header; - struct while_header; - struct begin_header; - struct function_header; - struct boolean_statement; - struct decorated_statement; - struct plain_statement; - struct arguments_or_redirections_list; - struct argument_or_redirection; - struct redirection; - struct statement_terminator; - - /* A job_list is a list of jobs, separated by semicolons or newlines */ - struct job_list : OR< - EMPTY, - Seq<job, job_list>, - Seq<Token<parse_token_type_end>, job_list> - > - { - SYMBOL(symbol_job_list) - }; - - /* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */ - struct job : Seq<statement, job_continuation> - { - SYMBOL(symbol_job); - }; - - struct job_continuation : OR< - EMPTY, - Seq<Token<parse_token_type_pipe>, statement, job_continuation> - > - { - SYMBOL(symbol_job_continuation); - }; - - /* A statement is a normal command, or an if / while / and etc */ - struct statement : OR< - boolean_statement, - block_statement, - if_statement, - switch_statement, - decorated_statement - > - { - SYMBOL(symbol_statement); - }; - - struct if_statement : Seq<if_clause, else_clause, Keyword<parse_keyword_end> > - { - SYMBOL(symbol_if_statement); - PRODUCE(0) - }; - - struct if_clause : Seq<Keyword<parse_keyword_if>, job, statement_terminator, job_list> - { - SYMBOL(symbol_if_clause); - PRODUCE(0) - }; - - struct else_clause : OR< - EMPTY, - Keyword<parse_keyword_else>, else_continuation - > - { - SYMBOL(symbol_else_clause); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_else: return 1; - default: return 0; - } - } - }; - - struct else_continuation : OR< - Seq<if_clause, else_clause>, - Seq<statement_terminator, job_list> - > - { - SYMBOL(symbol_else_continuation); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_if: return 0; - default: return 1; - } - } - }; - - struct switch_statement : Seq<Keyword<parse_keyword_switch>, Token<parse_token_type_string>, statement_terminator, case_item_list, Keyword<parse_keyword_end> - > - { - SYMBOL(symbol_switch_statement); - }; - - struct case_item_list : OR - < - EMPTY, - case_item, case_item_list - > - { - SYMBOL(symbol_case_item_list); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_case: return 1; - default: return 0; - } - } - }; - - struct case_item : Seq<Keyword<parse_keyword_case>, argument_list, statement_terminator, job_list> - { - SYMBOL(symbol_case_item); - }; - - struct argument_list_nonempty : Seq<Token<parse_token_type_string>, argument_list> - { - SYMBOL(symbol_argument_list_nonempty); - }; - - struct argument_list : OR<EMPTY, argument_list_nonempty> - { - SYMBOL(symbol_argument_list); - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: return 1; - default: return 0; - } - } - }; - - struct block_statement : Seq<block_header, statement_terminator, job_list, Keyword<parse_keyword_end>, arguments_or_redirections_list> - { - SYMBOL(symbol_block_statement); - PRODUCE(0) - }; - - struct block_header : OR<for_header, while_header, function_header, begin_header> - { - SYMBOL(symbol_block_header); - }; - - struct for_header : Seq<Keyword<parse_keyword_for>, Token<parse_token_type_string>, Keyword<parse_keyword_in>, arguments_or_redirections_list> - { - SYMBOL(symbol_for_header); - }; - - struct while_header : Seq<Keyword<parse_keyword_while>, statement> - { - SYMBOL(symbol_while_header); - }; - - struct begin_header : Keyword<parse_keyword_begin> - { - SYMBOL(symbol_begin_header); - }; - - struct function_header : Keyword<parse_keyword_function> - { - SYMBOL(symbol_function_header); - }; - - /* A boolean statement is AND or OR or NOT */ - struct boolean_statement : OR< - Seq<Keyword<parse_keyword_and>, statement>, - Seq<Keyword<parse_keyword_or>, statement>, - Seq<Keyword<parse_keyword_not>, statement> - > - { - SYMBOL(symbol_boolean_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_and: return 0; - case parse_keyword_or: return 1; - case parse_keyword_not: return 2; - default: return NO_PRODUCTION; - } - } - }; - - /* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */ - struct decorated_statement : OR< - Seq<Keyword<parse_keyword_command>, plain_statement>, - Seq<Keyword<parse_keyword_builtin>, plain_statement>, - plain_statement - > - { - SYMBOL(symbol_decorated_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_command: return 0; - case parse_keyword_builtin: return 1; - default: return 2; - } - } - }; - - struct plain_statement : Seq<Token<parse_token_type_string>, arguments_or_redirections_list> - { - SYMBOL(symbol_plain_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - return 0; - } - - }; - - struct arguments_or_redirections_list : OR< - EMPTY, - Seq<argument_or_redirection, arguments_or_redirections_list> > - { - SYMBOL(symbol_arguments_or_redirections_list); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: - case parse_token_type_redirection: - return 1; - default: - return 0; - } - } - }; - - struct argument_or_redirection : OR< - Token<parse_token_type_string>, - redirection - > - { - SYMBOL(symbol_argument_or_redirection); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: return 0; - case parse_token_type_redirection: return 1; - default: return NO_PRODUCTION; - } - } - }; - - struct redirection : Token<parse_token_type_redirection> - { - SYMBOL(parse_token_type_redirection); - }; - - struct statement_terminator : Token<parse_token_type_end> - { - SYMBOL(parse_token_type_end); - }; -} - /* Fish grammar: @@ -520,45 +168,45 @@ namespace parse_symbols # A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation job = statement job_continuation - job_continuation = <empty> | + job_continuation = <empty> | <TOK_PIPE> statement job_continuation # A statement is a normal command, or an if / while / and etc statement = boolean_statement | block_statement | if_statement | switch_statement | decorated_statement - + # A block is a conditional, loop, or begin/end - if_statement = if_clause else_clause <END> + if_statement = if_clause else_clause <END> arguments_or_redirections_list if_clause = <IF> job STATEMENT_TERMINATOR job_list else_clause = <empty> | <ELSE> else_continuation else_continuation = if_clause else_clause | STATEMENT_TERMINATOR job_list - + switch_statement = SWITCH <TOK_STRING> STATEMENT_TERMINATOR case_item_list <END> case_item_list = <empty> | case_item case_item_list case_item = CASE argument_list STATEMENT_TERMINATOR job_list - + argument_list_nonempty = <TOK_STRING> argument_list argument_list = <empty> | argument_list_nonempty - block_statement = block_header STATEMENT_TERMINATOR job_list <END> arguments_or_redirections_list + block_statement = block_header <TOK_END> job_list <END> arguments_or_redirections_list block_header = for_header | while_header | function_header | begin_header for_header = FOR var_name IN arguments_or_redirections_list while_header = WHILE statement begin_header = BEGIN function_header = FUNCTION function_name argument_list - + # A boolean statement is AND or OR or NOT boolean_statement = AND statement | OR statement | NOT statement - + # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement - plain_statement = COMMAND arguments_or_redirections_list + plain_statement = COMMAND arguments_or_redirections_list optional_background arguments_or_redirections_list = <empty> | argument_or_redirection arguments_or_redirections_list @@ -567,6 +215,8 @@ namespace parse_symbols terminator = <TOK_END> | <TOK_BACKGROUND> + optional_background = <empty> | <TOK_BACKGROUND> + */ #endif |