From 790c7f80c7d0fa1c97380885201eb50a2abcce02 Mon Sep 17 00:00:00 2001 From: Aaron Gyes Date: Fri, 8 Apr 2016 10:18:58 +0800 Subject: Implement an --invert/-v for string match, like grep -v. Only lines that do not match the pattern are shown. --- doc_src/string.txt | 12 +++++-- src/builtin_string.cpp | 89 +++++++++++++++++++++++++++++++------------------- tests/string.in | 25 ++++++++++++++ tests/string.out | 17 ++++++++++ 4 files changed, 107 insertions(+), 36 deletions(-) diff --git a/doc_src/string.txt b/doc_src/string.txt index bae21dc9..58de93e4 100644 --- a/doc_src/string.txt +++ b/doc_src/string.txt @@ -12,7 +12,7 @@ string trim [(-l | --left)] [(-r | --right)] [(-c | --chars CHARS)] [(-q | --quiet)] [STRING...] string escape [(-n | --no-quoted)] [STRING...] string match [(-a | --all)] [(-i | --ignore-case)] [(-r | --regex)] - [(-n | --index)] [(-q | --quiet)] PATTERN [STRING...] + [(-n | --index)] [(-q | --quiet)] [(-v | --invert)] PATTERN [STRING...] string replace [(-a | --all)] [(-i | --ignore-case)] [(-r | --regex)] [(-q | --quiet)] PATTERN REPLACEMENT [STRING...] \endfish @@ -44,7 +44,7 @@ The following subcommands are available: - `escape` escapes each STRING such that it can be passed back to `eval` to produce the original argument again. By default, all special characters are escaped, and quotes are used to simplify the output when possible. If `-n` or `--no-quote` is given, the simplifying quoted format is not used. Exit status: 0 if at least one string was escaped, or 1 otherwise. -- `match` tests each STRING against PATTERN and prints matching substrings. Only the first match for each STRING is reported unless `-a` or `--all` is given, in which case all matches are reported. Matching can be made case-insensitive with `-i` or `--ignore-case`. If `-n` or `--index` is given, each match is reported as a 1-based start position and a length. By default, PATTERN is interpreted as a glob pattern matched against each entire STRING argument. If `-r` or `--regex` is given, PATTERN is interpreted as a Perl-compatible regular expression. For a regular expression containing capturing groups, multiple items will be reported for each match, one for the entire match and one for each capturing group. Exit status: 0 if at least one match was found, or 1 otherwise. +- `match` tests each STRING against PATTERN and prints matching substrings. Only the first match for each STRING is reported unless `-a` or `--all` is given, in which case all matches are reported. Matching can be made case-insensitive with `-i` or `--ignore-case`. If `-n` or `--index` is given, each match is reported as a 1-based start position and a length. By default, PATTERN is interpreted as a glob pattern matched against each entire STRING argument. If `-r` or `--regex` is given, PATTERN is interpreted as a Perl-compatible regular expression. For a regular expression containing capturing groups, multiple items will be reported for each match, one for the entire match and one for each capturing group. If --invert or -v is used the selected lines will be only those which do not match the given glob pattern or regular expression. Exit status: 0 if at least one match was found, or 1 otherwise. - `replace` is similar to `match` but replaces non-overlapping matching substrings with a replacement string and prints the result. By default, PATTERN is treated as a literal substring to be matched. If `-r` or `--regex` is given, PATTERN is interpreted as a Perl-compatible regular expression, and REPLACEMENT can contain C-style escape sequences like `\t` as well as references to capturing groups by number or name as `$n` or `${n}`. Exit status: 0 if at least one replacement was performed, or 1 otherwise. @@ -120,6 +120,14 @@ The following subcommands are available: >_ echo 'ok?' | string match '*\\?' >_ ok? + +>_ string match -r -v "c.*[12]" {cat,dog}(seq 1 4) +dog1 +dog2 +cat3 +dog3 +cat4 +dog4 \endfish \subsection string-example-match-regex Match Regex Examples diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp index 8911d1ab..10508a71 100644 --- a/src/builtin_string.cpp +++ b/src/builtin_string.cpp @@ -304,9 +304,10 @@ struct match_options_t bool all; bool ignore_case; bool index; + bool invert_match; bool quiet; - match_options_t(): all(false), ignore_case(false), index(false), quiet(false) { } + match_options_t(): all(false), ignore_case(false), index(false), invert_match(false), quiet(false) { } }; class string_matcher_t @@ -328,17 +329,15 @@ public: class wildcard_matcher_t: public string_matcher_t { +private: wcstring wcpattern; - public: wildcard_matcher_t(const wchar_t * /*argv0*/, const wchar_t *pattern, const match_options_t &opts, io_streams_t &streams) - : string_matcher_t(opts, streams) + : string_matcher_t(opts, streams), wcpattern(parse_util_unescape_wildcards(pattern)) { - wcpattern = parse_util_unescape_wildcards(pattern); - if (opts.ignore_case) { - for (int i = 0; i < wcpattern.length(); i++) + for (size_t i = 0; i < wcpattern.length(); i++) { wcpattern[i] = towlower(wcpattern[i]); } @@ -352,10 +351,11 @@ public: // Note: --all is a no-op for glob matching since the pattern is always // matched against the entire argument bool match; + if (opts.ignore_case) { wcstring s = arg; - for (int i = 0; i < s.length(); i++) + for (size_t i = 0; i < s.length(); i++) { s[i] = towlower(s[i]); } @@ -365,13 +365,11 @@ public: { match = wildcard_match(arg, wcpattern, false); } - if (match) + if (match ^ opts.invert_match) { total_matched++; - } - if (!opts.quiet) - { - if (match) + + if (!opts.quiet) { if (opts.index) { @@ -458,42 +456,53 @@ class pcre2_matcher_t: public string_matcher_t // Return values: -1 = error, 0 = no match, 1 = match if (pcre2_rc == PCRE2_ERROR_NOMATCH) { - return 0; + if (opts.invert_match && !opts.quiet) + { + streams.out.append(arg); + streams.out.push_back(L'\n'); + } + + return opts.invert_match ? 1 : 0; } - if (pcre2_rc < 0) + else if (pcre2_rc < 0) { string_error(streams, _(L"%ls: Regular expression match error: %ls\n"), argv0, pcre2_strerror(pcre2_rc).c_str()); return -1; } - if (pcre2_rc == 0) + else if (pcre2_rc == 0) { // The output vector wasn't big enough. Should not happen. string_error(streams, _(L"%ls: Regular expression internal error\n"), argv0); return -1; } + + else if (opts.invert_match) + return 0; + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match); + for (int j = 0; j < pcre2_rc; j++) { PCRE2_SIZE begin = ovector[2*j]; PCRE2_SIZE end = ovector[2*j + 1]; - if (!opts.quiet) + + if (begin != PCRE2_UNSET && end != PCRE2_UNSET && !opts.quiet) { - if (begin != PCRE2_UNSET && end != PCRE2_UNSET) + if (opts.index) { - if (opts.index) - { - streams.out.append_format(L"%lu %lu", (unsigned long)(begin + 1), (unsigned long)(end - begin)); - } - else if (end > begin) // may have end < begin if \K is used - { - streams.out.append(wcstring(&arg[begin], end - begin)); - } - streams.out.append(L'\n'); + streams.out.append_format(L"%lu %lu", (unsigned long)(begin + 1), (unsigned long)(end - begin)); + } + else if (end > begin) // may have end < begin if \K is used + { + streams.out.append(wcstring(&arg[begin], end - begin)); } + streams.out.push_back(L'\n'); } } - return 1; + + return opts.invert_match ? 0 : 1; + } public: @@ -525,7 +534,7 @@ public: // pcre2 match error return false; } - if (rc == 0) + else if (rc == 0) { // no match return true; @@ -533,6 +542,11 @@ public: matched++; total_matched++; + if (opts.invert_match) + { + return true; + } + // Report any additional matches PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match); while (opts.all || matched == 0) @@ -573,12 +587,13 @@ public: static int string_match(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) { - const wchar_t *short_options = L"ainqr"; + const wchar_t *short_options = L"ainvqr"; const struct woption long_options[] = { { L"all", no_argument, 0, 'a'}, { L"ignore-case", no_argument, 0, 'i'}, { L"index", no_argument, 0, 'n'}, + { L"invert", no_argument, 0, 'v'}, { L"quiet", no_argument, 0, 'q'}, { L"regex", no_argument, 0, 'r'}, { 0, 0, 0, 0 } @@ -612,6 +627,10 @@ static int string_match(parser_t &parser, io_streams_t &streams, int argc, wchar opts.index = true; break; + case 'v': + opts.invert_match = true; + break; + case 'q': opts.quiet = true; break; @@ -750,7 +769,7 @@ class regex_replacer_t: public string_replacer_t compiled_regex_t regex; wcstring replacement; - wcstring interpret_escapes(const wchar_t *orig) + static wcstring interpret_escapes(const wchar_t *orig) { wcstring result; @@ -782,6 +801,7 @@ public: bool replace_matches(const wchar_t *arg) { + // A return value of true means all is well (even if no replacements // were performed), false indicates an unrecoverable error. if (regex.code == 0) @@ -1064,7 +1084,7 @@ static int string_split(parser_t &parser, io_streams_t &streams, int argc, wchar string_error(streams, BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); return BUILTIN_STRING_ERROR; } - + wcstring_list_t splits; size_t arg_count = 0; wcstring storage; @@ -1091,9 +1111,9 @@ static int string_split(parser_t &parser, io_streams_t &streams, int argc, wchar // If we are from the right, split_about gave us reversed strings, in reversed order! if (right) { - for (size_t i=0; i < splits.size(); i++) + for (size_t j = 0; j < splits.size(); j++) { - std::reverse(splits[i].begin(), splits[i].end()); + std::reverse(splits[j].begin(), splits[j].end()); } std::reverse(splits.begin(), splits.end()); } @@ -1293,7 +1313,7 @@ static int string_trim(parser_t &parser, io_streams_t &streams, int argc, wchar_ string_error(streams, BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); return BUILTIN_STRING_ERROR; } - + /* if neither left or right is specified, we do both */ if (! do_left && ! do_right) { @@ -1339,6 +1359,7 @@ static const struct string_subcommand const wchar_t *name; int (*handler)(parser_t &, io_streams_t &, int argc, wchar_t **argv); } + string_subcommands[] = { { L"escape", &string_escape }, diff --git a/tests/string.in b/tests/string.in index 8afc97ff..ac858618 100644 --- a/tests/string.in +++ b/tests/string.in @@ -1,5 +1,26 @@ # tests for string builtin # mostly taken from examples + +string match -r -v "c.*" dog can cat diz; and echo "exit 0" + +string match -q -r -v "c.*" dog can cat diz; and echo "exit 0" + +string match -v "c*" dog can cat diz; and echo "exit 0" + +string match -q -v "c*" dog can cat diz; and echo "exit 0" + +string match -v "d*" dog dan dat diz; or echo "exit 1" + +string match -q -v "d*" dog dan dat diz; or echo "exit 1" + +string match -r -v x y; and echo "exit 0" + +string match -r -v x x; or echo "exit 1" + +string match -q -r -v x y; and echo "exit 0" + +string match -q -r -v x x; or echo "exit 1" + string length 'hello, world' string length -q ""; and echo not zero length @@ -63,3 +84,7 @@ string match -r '[' 'a[sd' 2>/dev/null; or echo "invalid expression error" string invalidarg 2>/dev/null; or echo "invalid argument error" string length 2>/dev/null; or echo "missing argument returns 0" + +string match -r -v "[dcantg].*" dog can cat diz; or echo "no regexp invert match" + +string match -v "???" dog can cat diz; or echo "no glob invert match" diff --git a/tests/string.out b/tests/string.out index 45ccb369..bd3fff45 100644 --- a/tests/string.out +++ b/tests/string.out @@ -1,3 +1,18 @@ +dog +diz +exit 0 +exit 0 +dog +diz +exit 0 +exit 0 +exit 1 +exit 1 +y +exit 0 +exit 1 +exit 0 +exit 1 12 ab bc @@ -45,3 +60,5 @@ aabb invalid expression error invalid argument error missing argument returns 0 +no regexp invert match +no glob invert match -- cgit v1.2.3