aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/parse_productions.cpp
diff options
context:
space:
mode:
authorGravatar David Adam <zanchey@ucc.gu.uwa.edu.au>2015-07-26 10:20:13 +0800
committerGravatar David Adam <zanchey@ucc.gu.uwa.edu.au>2015-07-26 10:20:13 +0800
commit3929e9de0e69666b37df87347d5ce15663e81347 (patch)
treeb2701c439c0260840ce1c68beaebf7de1178cc53 /src/parse_productions.cpp
parent793e1afa084982dac92c4fe19e50c25e326a79c2 (diff)
parentf4d1657c22c81a7720a91026f915b80d2d6aa6e8 (diff)
Merge branch 'master' into iwyu
Diffstat (limited to 'src/parse_productions.cpp')
-rw-r--r--src/parse_productions.cpp579
1 files changed, 579 insertions, 0 deletions
diff --git a/src/parse_productions.cpp b/src/parse_productions.cpp
new file mode 100644
index 00000000..0d28c6a1
--- /dev/null
+++ b/src/parse_productions.cpp
@@ -0,0 +1,579 @@
+#include "parse_productions.h"
+#include <assert.h>
+#include <stdio.h>
+#include "parse_tree.h"
+
+using namespace parse_productions;
+#define NO_PRODUCTION ((production_option_idx_t)(-1))
+
+static bool production_is_empty(const production_t production)
+{
+ return production[0] == token_type_invalid;
+}
+
+/* Empty productions are allowed but must be first. Validate that the given production is in the valid range, i.e. it is either not empty or there is a non-empty production after it */
+static bool production_is_valid(const production_options_t production_list, production_option_idx_t which)
+{
+ assert(which >= 0);
+ if (which >= MAX_PRODUCTIONS)
+ return false;
+
+ bool nonempty_found = false;
+ for (int i=which; i < MAX_PRODUCTIONS; i++)
+ {
+ if (! production_is_empty(production_list[i]))
+ {
+ nonempty_found = true;
+ break;
+ }
+ }
+ return nonempty_found;
+}
+
+#define PRODUCTIONS(sym) static const production_options_t productions_##sym
+#define RESOLVE(sym) static production_option_idx_t resolve_##sym (const parse_token_t &token1, const parse_token_t &token2)
+#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (const parse_token_t &input1, const parse_token_t &input2) { return 0; }
+
+#define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1)
+
+
+/* A job_list is a list of jobs, separated by semicolons or newlines */
+PRODUCTIONS(job_list) =
+{
+ {},
+ {symbol_job, symbol_job_list},
+ {parse_token_type_end, symbol_job_list}
+};
+
+RESOLVE(job_list)
+{
+ switch (token1.type)
+ {
+ case parse_token_type_string:
+ // some keywords are special
+ switch (token1.keyword)
+ {
+ case parse_keyword_end:
+ case parse_keyword_else:
+ case parse_keyword_case:
+ // End this job list
+ return 0;
+
+ default:
+ // Normal string
+ return 1;
+ }
+
+ case parse_token_type_pipe:
+ case parse_token_type_redirection:
+ case parse_token_type_background:
+ return 1;
+
+ case parse_token_type_end:
+ // Empty line
+ return 2;
+
+ case parse_token_type_terminate:
+ // no more commands, just transition to empty
+ return 0;
+
+ default:
+ return NO_PRODUCTION;
+ }
+}
+
+/* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */
+
+PRODUCTIONS(job) =
+{
+ {symbol_statement, symbol_job_continuation, symbol_optional_background}
+};
+RESOLVE_ONLY(job)
+
+PRODUCTIONS(job_continuation) =
+{
+ {},
+ {parse_token_type_pipe, symbol_statement, symbol_job_continuation}
+};
+RESOLVE(job_continuation)
+{
+ switch (token1.type)
+ {
+ case parse_token_type_pipe:
+ // Pipe, continuation
+ return 1;
+
+ default:
+ // Not a pipe, no job continuation
+ return 0;
+ }
+}
+
+/* A statement is a normal command, or an if / while / and etc */
+PRODUCTIONS(statement) =
+{
+ {symbol_boolean_statement},
+ {symbol_block_statement},
+ {symbol_if_statement},
+ {symbol_switch_statement},
+ {symbol_decorated_statement}
+};
+RESOLVE(statement)
+{
+ /* The only block-like builtin that takes any parameters is 'function' So go to decorated statements if the subsequent token looks like '--'.
+ The logic here is subtle:
+ If we are 'begin', then we expect to be invoked with no arguments.
+ If we are 'function', then we are a non-block if we are invoked with -h or --help
+ If we are anything else, we require an argument, so do the same thing if the subsequent token is a statement terminator.
+ */
+
+ if (token1.type == parse_token_type_string)
+ {
+ // If we are a function, then look for help arguments
+ // Otherwise, if the next token looks like an option (starts with a dash), then parse it as a decorated statement
+ if (token1.keyword == parse_keyword_function && token2.is_help_argument)
+ {
+ return 4;
+ }
+ else if (token1.keyword != parse_keyword_function && token2.has_dash_prefix)
+ {
+ return 4;
+ }
+
+ // Likewise if the next token doesn't look like an argument at all. This corresponds to e.g. a "naked if".
+ bool naked_invocation_invokes_help = (token1.keyword != parse_keyword_begin && token1.keyword != parse_keyword_end);
+ if (naked_invocation_invokes_help && (token2.type == parse_token_type_end || token2.type == parse_token_type_terminate))
+ {
+ return 4;
+ }
+
+ }
+
+ switch (token1.type)
+ {
+ case parse_token_type_string:
+ switch (token1.keyword)
+ {
+ case parse_keyword_and:
+ case parse_keyword_or:
+ case parse_keyword_not:
+ return 0;
+
+ case parse_keyword_for:
+ case parse_keyword_while:
+ case parse_keyword_function:
+ case parse_keyword_begin:
+ return 1;
+
+ case parse_keyword_if:
+ return 2;
+
+ case parse_keyword_else:
+ return NO_PRODUCTION;
+
+ case parse_keyword_switch:
+ return 3;
+
+ case parse_keyword_end:
+ return NO_PRODUCTION;
+
+ // All other keywords fall through to decorated statement
+ default:
+ return 4;
+ }
+ break;
+
+ case parse_token_type_pipe:
+ case parse_token_type_redirection:
+ case parse_token_type_background:
+ case parse_token_type_terminate:
+ return NO_PRODUCTION;
+ //parse_error(L"statement", token);
+
+ default:
+ return NO_PRODUCTION;
+ }
+}
+
+PRODUCTIONS(if_statement) =
+{
+ {symbol_if_clause, symbol_else_clause, symbol_end_command, symbol_arguments_or_redirections_list}
+};
+RESOLVE_ONLY(if_statement)
+
+PRODUCTIONS(if_clause) =
+{
+ { KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, symbol_job_list }
+};
+RESOLVE_ONLY(if_clause)
+
+PRODUCTIONS(else_clause) =
+{
+ { },
+ { KEYWORD(parse_keyword_else), symbol_else_continuation }
+};
+RESOLVE(else_clause)
+{
+ switch (token1.keyword)
+ {
+ case parse_keyword_else:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+PRODUCTIONS(else_continuation) =
+{
+ {symbol_if_clause, symbol_else_clause},
+ {parse_token_type_end, symbol_job_list}
+};
+RESOLVE(else_continuation)
+{
+ switch (token1.keyword)
+ {
+ case parse_keyword_if:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+PRODUCTIONS(switch_statement) =
+{
+ { KEYWORD(parse_keyword_switch), symbol_argument, parse_token_type_end, symbol_case_item_list, symbol_end_command, symbol_arguments_or_redirections_list}
+};
+RESOLVE_ONLY(switch_statement)
+
+PRODUCTIONS(case_item_list) =
+{
+ {},
+ {symbol_case_item, symbol_case_item_list},
+ {parse_token_type_end, symbol_case_item_list}
+};
+RESOLVE(case_item_list)
+{
+ if (token1.keyword == parse_keyword_case) return 1;
+ else if (token1.type == parse_token_type_end) return 2; //empty line
+ else return 0;
+}
+
+PRODUCTIONS(case_item) =
+{
+ {KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, symbol_job_list}
+};
+RESOLVE_ONLY(case_item)
+
+PRODUCTIONS(argument_list) =
+{
+ {},
+ {symbol_argument, symbol_argument_list}
+};
+RESOLVE(argument_list)
+{
+ switch (token1.type)
+ {
+ case parse_token_type_string:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+PRODUCTIONS(freestanding_argument_list) =
+{
+ {},
+ {symbol_argument, symbol_freestanding_argument_list},
+ {parse_token_type_end, symbol_freestanding_argument_list},
+};
+RESOLVE(freestanding_argument_list)
+{
+ switch (token1.type)
+ {
+ case parse_token_type_string:
+ return 1;
+ case parse_token_type_end:
+ return 2;
+ default:
+ return 0;
+ }
+}
+
+
+PRODUCTIONS(block_statement) =
+{
+ {symbol_block_header, symbol_job_list, symbol_end_command, symbol_arguments_or_redirections_list}
+};
+RESOLVE_ONLY(block_statement)
+
+PRODUCTIONS(block_header) =
+{
+ {symbol_for_header},
+ {symbol_while_header},
+ {symbol_function_header},
+ {symbol_begin_header}
+};
+RESOLVE(block_header)
+{
+ switch (token1.keyword)
+ {
+ case parse_keyword_for:
+ return 0;
+ case parse_keyword_while:
+ return 1;
+ case parse_keyword_function:
+ return 2;
+ case parse_keyword_begin:
+ return 3;
+ default:
+ return NO_PRODUCTION;
+ }
+}
+
+PRODUCTIONS(for_header) =
+{
+ {KEYWORD(parse_keyword_for), parse_token_type_string, KEYWORD
+ (parse_keyword_in), symbol_argument_list, parse_token_type_end}
+};
+RESOLVE_ONLY(for_header)
+
+PRODUCTIONS(while_header) =
+{
+ {KEYWORD(parse_keyword_while), symbol_job, parse_token_type_end}
+};
+RESOLVE_ONLY(while_header)
+
+PRODUCTIONS(begin_header) =
+{
+ {KEYWORD(parse_keyword_begin)}
+};
+RESOLVE_ONLY(begin_header)
+
+PRODUCTIONS(function_header) =
+{
+ {KEYWORD(parse_keyword_function), symbol_argument, symbol_argument_list, parse_token_type_end}
+};
+RESOLVE_ONLY(function_header)
+
+/* A boolean statement is AND or OR or NOT */
+PRODUCTIONS(boolean_statement) =
+{
+ {KEYWORD(parse_keyword_and), symbol_statement},
+ {KEYWORD(parse_keyword_or), symbol_statement},
+ {KEYWORD(parse_keyword_not), symbol_statement}
+};
+RESOLVE(boolean_statement)
+{
+ switch (token1.keyword)
+ {
+ case parse_keyword_and:
+ return 0;
+ case parse_keyword_or:
+ return 1;
+ case parse_keyword_not:
+ return 2;
+ default:
+ return NO_PRODUCTION;
+ }
+}
+
+PRODUCTIONS(decorated_statement) =
+{
+ {symbol_plain_statement},
+ {KEYWORD(parse_keyword_command), symbol_plain_statement},
+ {KEYWORD(parse_keyword_builtin), symbol_plain_statement},
+ {KEYWORD(parse_keyword_exec), symbol_plain_statement}
+};
+RESOLVE(decorated_statement)
+{
+ /* If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the second token is not a string, then this is a naked 'command' and we should execute it as undecorated. */
+ if (token2.type != parse_token_type_string || token2.has_dash_prefix)
+ {
+ return 0;
+ }
+
+ switch (token1.keyword)
+ {
+ default:
+ return 0;
+ case parse_keyword_command:
+ return 1;
+ case parse_keyword_builtin:
+ return 2;
+ case parse_keyword_exec:
+ return 3;
+ }
+}
+
+PRODUCTIONS(plain_statement) =
+{
+ {parse_token_type_string, symbol_arguments_or_redirections_list}
+};
+RESOLVE_ONLY(plain_statement)
+
+PRODUCTIONS(arguments_or_redirections_list) =
+{
+ {},
+ {symbol_argument_or_redirection, symbol_arguments_or_redirections_list}
+};
+RESOLVE(arguments_or_redirections_list)
+{
+ switch (token1.type)
+ {
+ case parse_token_type_string:
+ case parse_token_type_redirection:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+PRODUCTIONS(argument_or_redirection) =
+{
+ {symbol_argument},
+ {symbol_redirection}
+};
+RESOLVE(argument_or_redirection)
+{
+ switch (token1.type)
+ {
+ case parse_token_type_string:
+ return 0;
+ case parse_token_type_redirection:
+ return 1;
+ default:
+ return NO_PRODUCTION;
+ }
+}
+
+PRODUCTIONS(argument) =
+{
+ {parse_token_type_string}
+};
+RESOLVE_ONLY(argument)
+
+PRODUCTIONS(redirection) =
+{
+ {parse_token_type_redirection, parse_token_type_string}
+};
+RESOLVE_ONLY(redirection)
+
+PRODUCTIONS(optional_background) =
+{
+ {},
+ { parse_token_type_background }
+};
+
+RESOLVE(optional_background)
+{
+ switch (token1.type)
+ {
+ case parse_token_type_background:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+PRODUCTIONS(end_command) =
+{
+ {KEYWORD(parse_keyword_end)}
+};
+RESOLVE_ONLY(end_command)
+
+#define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break;
+const production_t *parse_productions::production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text)
+{
+ const bool log_it = false;
+ if (log_it)
+ {
+ fprintf(stderr, "Resolving production for %ls with input token <%ls>\n", token_type_description(node_type).c_str(), input1.describe().c_str());
+ }
+
+ /* Fetch the list of productions and the function to resolve them */
+ const production_options_t *production_list = NULL;
+ production_option_idx_t (*resolver)(const parse_token_t &input1, const parse_token_t &input2) = NULL;
+ switch (node_type)
+ {
+ TEST(job_list)
+ TEST(job)
+ TEST(statement)
+ TEST(job_continuation)
+ TEST(boolean_statement)
+ TEST(block_statement)
+ TEST(if_statement)
+ TEST(if_clause)
+ TEST(else_clause)
+ TEST(else_continuation)
+ TEST(switch_statement)
+ TEST(decorated_statement)
+ TEST(case_item_list)
+ TEST(case_item)
+ TEST(argument_list)
+ TEST(freestanding_argument_list)
+ TEST(block_header)
+ TEST(for_header)
+ TEST(while_header)
+ TEST(begin_header)
+ TEST(function_header)
+ TEST(plain_statement)
+ TEST(arguments_or_redirections_list)
+ TEST(argument_or_redirection)
+ TEST(argument)
+ TEST(redirection)
+ TEST(optional_background)
+ TEST(end_command)
+
+ case parse_token_type_string:
+ case parse_token_type_pipe:
+ case parse_token_type_redirection:
+ case parse_token_type_background:
+ case parse_token_type_end:
+ case parse_token_type_terminate:
+ fprintf(stderr, "Terminal token type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__);
+ PARSER_DIE();
+ break;
+
+ case parse_special_type_parse_error:
+ case parse_special_type_tokenizer_error:
+ case parse_special_type_comment:
+ fprintf(stderr, "Special type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__);
+ PARSER_DIE();
+ break;
+
+
+ case token_type_invalid:
+ fprintf(stderr, "token_type_invalid passed to %s\n", __FUNCTION__);
+ PARSER_DIE();
+ break;
+
+ }
+ PARSE_ASSERT(production_list != NULL);
+ PARSE_ASSERT(resolver != NULL);
+
+ const production_t *result = NULL;
+ production_option_idx_t which = resolver(input1, input2);
+
+ if (log_it)
+ {
+ fprintf(stderr, "\tresolved to %u\n", (unsigned)which);
+ }
+
+
+ if (which == NO_PRODUCTION)
+ {
+ if (log_it)
+ {
+ fprintf(stderr, "Node type '%ls' has no production for input '%ls' (in %s)\n", token_type_description(node_type).c_str(), input1.describe().c_str(), __FUNCTION__);
+ }
+ result = NULL;
+ }
+ else
+ {
+ PARSE_ASSERT(production_is_valid(*production_list, which));
+ result = &((*production_list)[which]);
+ }
+ *out_which_production = which;
+ return result;
+}
+