aboutsummaryrefslogtreecommitdiffhomepage
path: root/parse_tree.h
blob: 4530a6326dc7f3714e86ab55b72625add6436e2b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/**\file parse_tree.h

    Programmatic representation of fish code.
*/

#ifndef FISH_PARSE_TREE_H
#define FISH_PARSE_TREE_H

#include <wchar.h>

#include "config.h"
#include "util.h"
#include "common.h"
#include "tokenizer.h"
#include <vector>

#define PARSE_ASSERT(a) assert(a)
#define PARSER_DIE() exit_without_destructors(-1)

class parse_node_t;
class parse_node_tree_t;
typedef size_t node_offset_t;
#define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1))

struct parse_error_t
{
    /** Text of the error */
    wcstring text;
    
    /** Offset and length of the token in the source code that triggered this error */
    size_t source_start;
    size_t source_length;
    
    /** Return a string describing the error, suitable for presentation to the user */
    wcstring describe(const wcstring &src) const;
};
typedef std::vector<parse_error_t> parse_error_list_t;

class parse_ll_t;
class parse_t
{
    parse_ll_t * const parser;
    
    public:
    parse_t();
    bool parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors);
};


enum parse_token_type_t
{
    token_type_invalid,
    
    // Non-terminal tokens
    symbol_job_list,
    symbol_job,
    symbol_job_continuation,
    symbol_statement,
    symbol_block_statement,
    symbol_block_header,
    symbol_for_header,
    symbol_while_header,
    symbol_begin_header,
    symbol_function_header,
    
    symbol_if_statement,
    symbol_if_clause,
    symbol_else_clause,
    symbol_else_continuation,
    
    symbol_boolean_statement,
    symbol_decorated_statement,
    symbol_plain_statement,
    symbol_arguments_or_redirections_list,
    symbol_argument_or_redirection,

    // Terminal types
    parse_token_type_string,
    parse_token_type_pipe,
    parse_token_type_redirection,
    parse_token_background,
    parse_token_type_end,
    parse_token_type_terminate,
    
    FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
};

enum parse_keyword_t
{
    parse_keyword_none,
    parse_keyword_if,
    parse_keyword_else,
    parse_keyword_for,
    parse_keyword_in,
    parse_keyword_while,
    parse_keyword_begin,
    parse_keyword_function,
    parse_keyword_switch,
    parse_keyword_end,
    parse_keyword_and,
    parse_keyword_or,
    parse_keyword_not,
    parse_keyword_command,
    parse_keyword_builtin
};

wcstring token_type_description(parse_token_type_t type);
wcstring keyword_description(parse_keyword_t type);

/** Base class for nodes of a parse tree */
class parse_node_t
{
    public:
    
    /* Type of the node */
    enum parse_token_type_t type;
        
    /* Start in the source code */
    size_t source_start;
    
    /* Length of our range in the source code */
    size_t source_length;

    /* Children */
    node_offset_t child_start;
    node_offset_t child_count;
    
    /* Type-dependent data */
    uint32_t tag;
    
    /* Description */
    wcstring describe(void) const;
    
    /* Constructor */
    explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0)
    {
    }
    
    node_offset_t child_offset(node_offset_t which) const
    {
        PARSE_ASSERT(which < child_count);
        return child_start + which;
    }
};

class parse_node_tree_t : public std::vector<parse_node_t>
{
};


/* Fish grammar:

# A job_list is a list of jobs, separated by semicolons or newlines

    job_list = <empty> |
                <TOK_END> job_list |
                job job_list

# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation

    job = statement job_continuation
    job_continuation = <empty> | 
                       <TOK_PIPE> statement job_continuation

# A statement is a normal command, or an if / while / and etc

    statement = boolean_statement | block_statement | if_statement | decorated_statement
    
# A block is a conditional, loop, or begin/end

    if_statement = if_clause else_clause <END>
    if_clause = <IF> job STATEMENT_TERMINATOR job_list
    else_clause = <empty> |
                 <ELSE> else_continuation
    else_continuation = if_clause else_clause |
                        STATEMENT_TERMINATOR job_list

    block_statement = block_header STATEMENT_TERMINATOR job_list <END> arguments_or_redirections_list
    block_header = for_header | while_header | function_header | begin_header
    for_header = FOR var_name IN arguments_or_redirections_list
    while_header = WHILE statement
    begin_header = BEGIN
    function_header = FUNCTION function_name arguments_or_redirections_list
 
    
#(TODO: functions should not support taking redirections in their arguments)
 
# A boolean statement is AND or OR or NOT

    boolean_statement = AND statement | OR statement | NOT statement
 
# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command"

    decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement
    plain_statement = command arguments_or_redirections_list 

    arguments_or_redirections_list = <empty> |
                                     argument_or_redirection arguments_or_redirections_list
    argument_or_redirection = redirection | <TOK_STRING>
    redirection = <TOK_REDIRECTION>
    
    terminator = <TOK_END> | <TOK_BACKGROUND>
 
*/

#endif