aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/tokenizer.h
diff options
context:
space:
mode:
authorGravatar Kurtis Rader <krader@skepticism.us>2016-05-03 14:35:12 -0700
committerGravatar Kurtis Rader <krader@skepticism.us>2016-05-03 15:00:44 -0700
commitc14bac42846506ab01090e1f6384a60126d7c596 (patch)
tree4fc4aa1a75c279a117319933b107c00e4ee86042 /src/tokenizer.h
parent0aa7fd95b82187e08caba461e7a5f4da7fc908e1 (diff)
restyle tokenizer module to match project style
Reduces lint errors from 70 to 46 (-34%). Line count from 1158 to 936 (-19%). Another step in resolving issue #2902.
Diffstat (limited to 'src/tokenizer.h')
-rw-r--r--src/tokenizer.h208
1 files changed, 83 insertions, 125 deletions
diff --git a/src/tokenizer.h b/src/tokenizer.h
index 3c3b6236..c223e438 100644
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@@ -1,191 +1,151 @@
-/** \file tokenizer.h
-
- A specialized tokenizer for tokenizing the fish language. In the
- future, the tokenizer should be extended to support marks,
- tokenizing multiple strings and disposing of unused string
- segments.
-*/
+// A specialized tokenizer for tokenizing the fish language. In the future, the tokenizer should be
+// extended to support marks, tokenizing multiple strings and disposing of unused string segments.
#ifndef FISH_TOKENIZER_H
#define FISH_TOKENIZER_H
-#include <stddef.h>
#include <stdbool.h>
+#include <stddef.h>
#include "common.h"
-/**
- Token types
-*/
-enum token_type
-{
- TOK_NONE, /**< Tokenizer not yet constructed */
- TOK_ERROR, /**< Error reading token */
- TOK_STRING,/**< String token */
- TOK_PIPE,/**< Pipe token */
- TOK_END,/**< End token (semicolon or newline, not literal end) */
- TOK_REDIRECT_OUT, /**< redirection token */
- TOK_REDIRECT_APPEND,/**< redirection append token */
- TOK_REDIRECT_IN,/**< input redirection token */
- TOK_REDIRECT_FD,/**< redirection to new fd token */
- TOK_REDIRECT_NOCLOB, /**<? redirection token */
- TOK_BACKGROUND,/**< send job to bg token */
- TOK_COMMENT/**< comment token */
+/// Token types.
+enum token_type {
+ TOK_NONE, /// Tokenizer not yet constructed
+ TOK_ERROR, /// Error reading token
+ TOK_STRING, /// String token
+ TOK_PIPE, /// Pipe token
+ TOK_END, /// End token (semicolon or newline, not literal end)
+ TOK_REDIRECT_OUT, /// redirection token
+ TOK_REDIRECT_APPEND, /// redirection append token
+ TOK_REDIRECT_IN, /// input redirection token
+ TOK_REDIRECT_FD, /// redirection to new fd token
+ TOK_REDIRECT_NOCLOB, /// redirection token
+ TOK_BACKGROUND, /// send job to bg token
+ TOK_COMMENT /// comment token
};
-/**
- Tokenizer error types
-*/
-enum tokenizer_error
-{
+/// Tokenizer error types.
+enum tokenizer_error {
TOK_ERROR_NONE,
TOK_UNTERMINATED_QUOTE,
TOK_UNTERMINATED_SUBSHELL,
TOK_UNTERMINATED_SLICE,
TOK_UNTERMINATED_ESCAPE,
TOK_OTHER
-}
-;
-
+};
-/**
- Flag telling the tokenizer to accept incomplete parameters,
- i.e. parameters with mismatching paranthesis, etc. This is useful
- for tab-completion.
-*/
+/// Flag telling the tokenizer to accept incomplete parameters, i.e. parameters with mismatching
+/// paranthesis, etc. This is useful for tab-completion.
#define TOK_ACCEPT_UNFINISHED 1
-/**
- Flag telling the tokenizer not to remove comments. Useful for
- syntax highlighting.
-*/
+/// Flag telling the tokenizer not to remove comments. Useful for syntax highlighting.
#define TOK_SHOW_COMMENTS 2
-/** Flag telling the tokenizer to not generate error messages, which we need to do when tokenizing off of the main thread (since wgettext is not thread safe).
-*/
+/// Flag telling the tokenizer to not generate error messages, which we need to do when tokenizing
+/// off of the main thread (since wgettext is not thread safe).
#define TOK_SQUASH_ERRORS 4
-/** Ordinarily, the tokenizer ignores newlines following a newline, or a semicolon.
- This flag tells the tokenizer to return each of them as a separate END. */
+/// Ordinarily, the tokenizer ignores newlines following a newline, or a semicolon. This flag tells
+/// the tokenizer to return each of them as a separate END.
#define TOK_SHOW_BLANK_LINES 8
typedef unsigned int tok_flags_t;
-struct tok_t
-{
- /* The text of the token, or an error message for type error */
+struct tok_t {
+ // The text of the token, or an error message for type error.
wcstring text;
-
- /* The type of the token */
+ // The type of the token.
token_type type;
-
- /* If an error, this is the error code */
+ // If an error, this is the error code.
enum tokenizer_error error;
-
- /* If an error, this is the offset of the error within the token. A value of 0 means it occurred at 'offset' */
+ // If an error, this is the offset of the error within the token. A value of 0 means it occurred
+ // at 'offset'.
size_t error_offset;
-
- /* Offset of the token */
+ // Offset of the token.
size_t offset;
-
- /* Length of the token */
+ // Length of the token.
size_t length;
-
+
tok_t() : type(TOK_NONE), error(TOK_ERROR_NONE), error_offset(-1), offset(-1), length(-1) {}
};
-/**
- The tokenizer struct.
-*/
-class tokenizer_t
-{
- /* No copying, etc. */
- tokenizer_t(const tokenizer_t&);
- void operator=(const tokenizer_t&);
+/// The tokenizer struct.
+class tokenizer_t {
+ // No copying, etc.
+ tokenizer_t(const tokenizer_t &);
+ void operator=(const tokenizer_t &);
- /** A pointer into the original string, showing where the next token begins */
+ /// A pointer into the original string, showing where the next token begins.
const wchar_t *buff;
- /** A copy of the original string */
+ /// A copy of the original string.
const wchar_t *orig_buff;
- /** The last token */
+ /// The last token.
wcstring last_token;
-
- /** Type of last token*/
+ /// Type of last token.
enum token_type last_type;
-
- /** Offset of last token*/
+ /// Offset of last token.
size_t last_pos;
- /** Whether there are more tokens*/
+ /// Whether there are more tokens.
bool has_next;
- /** Whether incomplete tokens are accepted*/
+ /// Whether incomplete tokens are accepted.
bool accept_unfinished;
- /** Whether comments should be returned*/
+ /// Whether comments should be returned.
bool show_comments;
- /** Whether all blank lines are returned */
+ /// Whether all blank lines are returned.
bool show_blank_lines;
- /** Last error */
+ /// Last error.
tokenizer_error error;
- /** Last error offset, in "global" coordinates (relative to orig_buff) */
+ /// Last error offset, in "global" coordinates (relative to orig_buff).
size_t global_error_offset;
- /* Whether we are squashing errors */
+ /// Whether we are squashing errors.
bool squash_errors;
-
- /* Whether to continue the previous line after the comment */
+ /// Whether to continue the previous line after the comment.
bool continue_line_after_comment;
-
- void call_error(enum tokenizer_error error_type, const wchar_t *where, const wchar_t *error_message);
+
+ void call_error(enum tokenizer_error error_type, const wchar_t *where,
+ const wchar_t *error_message);
void read_string();
void read_comment();
void tok_next();
-
-public:
- /**
- Constructor for a tokenizer. b is the string that is to be
- tokenized. It is not copied, and should not be freed by the caller
- until after the tokenizer is destroyed.
-
- \param b The string to tokenize
- \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
- to accept incomplete tokens, such as a subshell without a closing
- parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
-
- */
+
+ public:
+ /// Constructor for a tokenizer. b is the string that is to be tokenized. It is not copied, and
+ /// should not be freed by the caller until after the tokenizer is destroyed.
+ ///
+ /// \param b The string to tokenize
+ /// \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
+ /// to accept incomplete tokens, such as a subshell without a closing parenthesis, as a valid
+ /// token. Setting TOK_SHOW_COMMENTS will return comments as tokens
tokenizer_t(const wchar_t *b, tok_flags_t flags);
-
- /** Returns the next token by reference. Returns true if we got one, false if we're at the end. */
+
+ /// Returns the next token by reference. Returns true if we got one, false if we're at the end.
bool next(struct tok_t *result);
};
-
-/**
- Returns only the first token from the specified string. This is a
- convenience function, used to retrieve the first token of a
- string. This can be useful for error messages, etc.
-
- On failure, returns the empty string.
-*/
+/// Returns only the first token from the specified string. This is a convenience function, used to
+/// retrieve the first token of a string. This can be useful for error messages, etc. On failure,
+/// returns the empty string.
wcstring tok_first(const wcstring &str);
-/* Helper function to determine redirection type from a string, or TOK_NONE if the redirection is invalid. Also returns the fd by reference. */
+/// Helper function to determine redirection type from a string, or TOK_NONE if the redirection is
+/// invalid. Also returns the fd by reference.
enum token_type redirection_type_for_string(const wcstring &str, int *out_fd = NULL);
-/* Helper function to determine which fd is redirected by a pipe */
+/// Helper function to determine which fd is redirected by a pipe.
int fd_redirected_by_pipe(const wcstring &str);
-/* Helper function to return oflags (as in open(2)) for a redirection type */
+/// Helper function to return oflags (as in open(2)) for a redirection type.
int oflags_for_redirection_type(enum token_type type);
-enum move_word_style_t
-{
- move_word_style_punctuation, //stop at punctuation
- move_word_style_path_components, //stops at path components
- move_word_style_whitespace // stops at whitespace
+enum move_word_style_t {
+ move_word_style_punctuation, // stop at punctuation
+ move_word_style_path_components, // stops at path components
+ move_word_style_whitespace // stops at whitespace
};
-/* Our state machine that implements "one word" movement or erasure. */
-class move_word_state_machine_t
-{
-private:
-
+/// Our state machine that implements "one word" movement or erasure.
+class move_word_state_machine_t {
+ private:
bool consume_char_punctuation(wchar_t c);
bool consume_char_path_components(wchar_t c);
bool is_path_component_character(wchar_t c);
@@ -194,12 +154,10 @@ private:
int state;
move_word_style_t style;
-public:
-
+ public:
explicit move_word_state_machine_t(move_word_style_t st);
bool consume_char(wchar_t c);
void reset();
};
-
#endif