aboutsummaryrefslogtreecommitdiffhomepage
path: root/tokenizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'tokenizer.h')
-rw-r--r--tokenizer.h146
1 files changed, 146 insertions, 0 deletions
diff --git a/tokenizer.h b/tokenizer.h
new file mode 100644
index 00000000..45bbb5cd
--- /dev/null
+++ b/tokenizer.h
@@ -0,0 +1,146 @@
+/** \file tokenizer.h
+
+ A specialized tokenizer for tokenizing the fish language. In the
+ future, the tokenizer should be extended to support marks,
+ tokenizing multiple strings and disposing of unused string
+ segments.
+*/
+
+/**
+ Token types
+*/
+enum token_type
+{
+ TOK_NONE, /**< Tokenizer not yet constructed */
+ TOK_ERROR, /**< Error reading token */
+ TOK_INVALID,/**< Invalid token */
+ TOK_STRING,/**< String token */
+ TOK_PIPE,/**< Pipe token */
+ TOK_END,/**< End token */
+ TOK_REDIRECT_OUT, /**< redirection token */
+ TOK_REDIRECT_APPEND,/**< redirection append token */
+ TOK_REDIRECT_IN,/**< input redirection token */
+ TOK_REDIRECT_FD,/**< redirection to new fd token */
+ TOK_BACKGROUND,/**< send job to bg token */
+ TOK_COMMENT/**< comment token */
+}
+;
+
+/**
+ Flag telling the tokenizer to accept incomplete parameters,
+ i.e. parameters with mismatching paranthesis, etc. This is useful
+ for tab-completion.
+*/
+#define TOK_ACCEPT_UNFINISHED 1
+
+/**
+ Flag telling the tokenizer not to remove comments. Useful for
+ syntax highlighting.
+*/
+#define TOK_SHOW_COMMENTS 2
+
+
+/**
+ The tokenizer struct.
+*/
+typedef struct
+{
+ /** A pointer into the original string, showing where the next token begins */
+ wchar_t *buff;
+ /** A copy of the original string */
+ wchar_t *orig_buff;
+ /** A pointer to the last token*/
+ wchar_t *last;
+
+ /** Type of last token*/
+ int last_type;
+ /** Length of last token*/
+ int last_len;
+ /** Offset of last token*/
+ int last_pos;
+ /** Whether there are more tokens*/
+ int has_next;
+ /** Whether incomplete tokens are accepted*/
+ int accept_unfinished;
+ /** Whether commants should be returned*/
+ int show_comments;
+ /** Type of last quote, can be either ' or ".*/
+ wchar_t last_quote;
+}
+tokenizer;
+
+/**
+ Initialize the tokenizer. b is the string that is to be
+ tokenized. It is not copied, and should not be freed by the caller
+ until after the tokenizer is destroyed.
+
+ \param tok The tokenizer to initialize
+ \param b The string to tokenize
+ \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
+ to accept incomplete tokens, such as a subshell without a closing
+ parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
+
+*/
+void tok_init( tokenizer *tok, const wchar_t *b, int flags );
+
+/**
+ Jump to the next token.
+*/
+void tok_next( tokenizer *tok );
+
+/**
+ Returns the type of the last token. Must be one of the values in the token_type enum.
+*/
+int tok_last_type( tokenizer *tok );
+
+/**
+ Returns the last token string. The string should not be freed by the caller.
+*/
+wchar_t *tok_last( tokenizer *tok );
+
+/**
+ Returns the type of quote from the last TOK_QSTRING
+*/
+wchar_t tok_last_quote( tokenizer *tok );
+
+/**
+ Returns true as long as there are more tokens left
+*/
+int tok_has_next( tokenizer *tok );
+
+/**
+ Returns the position of the beginning of the current token in the original string
+*/
+int tok_get_pos( tokenizer *tok );
+
+/**
+ Destroy the tokenizer and free asociated memory
+*/
+void tok_destroy( tokenizer *tok );
+
+
+/**
+ Returns the original string to tokenizer
+ */
+wchar_t *tok_string( tokenizer *tok );
+
+
+/**
+ Returns only the first token from the specified string. This is a
+ convenience function, used to retrieve the first token of a
+ string. This can be useful for error messages, etc.
+
+ The string should be freed. After use.
+*/
+wchar_t *tok_first( const wchar_t *str );
+
+/**
+ Move tokenizer position
+*/
+void tok_set_pos( tokenizer *tok, int pos );
+
+/**
+ Returns a string description of the specified token type
+*/
+const wchar_t *tok_get_desc( int type );
+