aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Michael Steed <msteed@saltstack.com>2015-09-12 12:59:40 -0700
committerGravatar ridiculousfish <corydoras@ridiculousfish.com>2015-09-21 16:41:25 -0700
commitd83ef07ca76c03852366e4e810053edc19796761 (patch)
tree93f671c5fe577128fd14a1f013fff764e5c5abba /src
parente70ed961eab80dab41ebfda7b91d80d9ef041be7 (diff)
Merge new string builtin
This adds the new builtin 'string' which supports various string manipulation and matching algorithms, including PCRE based regular expressions. Fixes #2296 Squashed commit of the following: commit 4c3eaeb6e57d76463e9683c327142b0aeafb92b8 Author: ridiculousfish <corydoras@ridiculousfish.com> Date: Sat Sep 12 12:51:30 2015 -0700 Remove testdata and doc dirs from pcre2 source commit b2a8b4b50f2398b204fb72cfe4b5ba77ece2e1ab Merge: 11c8a47 7974aab Author: ridiculousfish <corydoras@ridiculousfish.com> Date: Sat Sep 12 12:32:40 2015 -0700 Merge branch 'string' of git://github.com/msteed/fish-shell into string-test commit 7974aab6d367f999f1140ab34c2535cef5cf3b00 Author: Michael Steed <msteed@saltstack.com> Date: Fri Sep 11 13:00:02 2015 -0600 build pcre2 lib only, no docs commit eb20b43d2d96b7e6d24618158ce71078de83c40b Merge: 1a09e70 5f519cb Author: Michael Steed <msteed68@gmail.com> Date: Thu Sep 10 20:00:47 2015 -0600 Merge branch 'string' of github.com:msteed/fish-shell into string commit 1a09e709d028393c9e9e6dc9a84278f399a15f3d Author: Michael Steed <msteed68@gmail.com> Date: Thu Sep 10 19:58:24 2015 -0600 rebase on master & address the fallout commit a0ec9772cd1a0a548a501a7633be05dab4e5ee46 Author: Michael Steed <msteed68@gmail.com> Date: Thu Sep 10 19:26:45 2015 -0600 use fish's wildcard_match() for glob matching commit 64c25a01e3f7234f220ba13545cf658a7492b1a4 Author: Michael Steed <msteed68@gmail.com> Date: Thu Aug 27 08:19:23 2015 -0600 some fixes from review - string_get_arg_stdin(): simplify and don't discard the argument when the trailing newline is absent - fix calls to pcre2 for e.g. string match -r -a 'a*' 'b' - correct test for args coming from stdin commit ece7f35ec5f4093763627d68d671b6c0c876896d Author: Michael Steed <msteed68@gmail.com> Date: Sat Aug 22 19:35:56 2015 -0600 fixes from review - Makefile.in: restore iwyu target - regex_replacer_t::replace_matches(): correct size passed to realloc() commit 9ff7477a926c4572e26171cab3cd42f8086be678 Author: Michael Steed <msteed68@gmail.com> Date: Thu Aug 20 13:08:33 2015 -0600 Minor doc improvements commit baf4e096b22dde3063b85b833795eb570d660ba7 Author: Michael Steed <msteed68@gmail.com> Date: Wed Aug 19 18:29:02 2015 -0600 another attempt to fix the ci build commit 896a2c2b279a419747bea26102229fbe84534a6f Author: Michael Steed <msteed68@gmail.com> Date: Wed Aug 19 18:03:49 2015 -0600 Updates after review comments - make match/replace without -a operate on the first match on each argument - use different exit codes for "no operation performed" and errors, as grep does - refactor regex compile code - use human-friendly error messages from pcre2 - improve error handling & reporting elsewhere - add a few tests - make some doc fixes - some simplification & cleanup - fix ci build failure (I hope) commit efd47dcbda2ca247d58bee56a7774cd75a1062fd Author: Michael Steed <msteed68@gmail.com> Date: Wed Aug 12 00:26:07 2015 -0600 fix dependencies for parallel make commit ed0850e2db467362066a3d94e3ececd17c1756cd Author: Michael Steed <msteed68@gmail.com> Date: Tue Aug 11 23:37:22 2015 -0600 Add missing pcre2 files + .gitignore commit 9492e7a7e929c03554336be1ddf80ca6b37f53c5 Author: Michael Steed <msteed68@gmail.com> Date: Tue Aug 11 22:44:05 2015 -0600 add pcre2-10.20 and update license.hdr commit 1a60b933718feb20c0bf7c9e257b8e495014ea1b Author: Michael Steed <msteed68@gmail.com> Date: Tue Aug 11 22:41:19 2015 -0600 add string builtin files - string builtin source, tests, & docs - changes to configure.ac & Makefile.in commit 5f519cb2a2c05213e0a88a7add7af288bc1c1352 Author: Michael Steed <msteed68@gmail.com> Date: Thu Sep 10 19:26:45 2015 -0600 use fish's wildcard_match() for glob matching commit 2ecd24f79500879e2de5bdf1b4c19dd44fc6ac85 Author: Michael Steed <msteed68@gmail.com> Date: Thu Aug 27 08:19:23 2015 -0600 some fixes from review - string_get_arg_stdin(): simplify and don't discard the argument when the trailing newline is absent - fix calls to pcre2 for e.g. string match -r -a 'a*' 'b' - correct test for args coming from stdin commit 45b777e4dc85c05cd4a186f4bdcae543c21aaf08 Author: Michael Steed <msteed68@gmail.com> Date: Sat Aug 22 19:35:56 2015 -0600 fixes from review - Makefile.in: restore iwyu target - regex_replacer_t::replace_matches(): correct size passed to realloc() commit 981cbb6ddf742a5fe8881af916e7b870b7e6422a Author: Michael Steed <msteed68@gmail.com> Date: Thu Aug 20 13:08:33 2015 -0600 Minor doc improvements commit ddb6a2a8fdb6aa31aad41e80d5481bb32c6ed8ff Author: Michael Steed <msteed68@gmail.com> Date: Wed Aug 19 18:29:02 2015 -0600 another attempt to fix the ci build commit 1e34e3191b028162863d263e9868052f75194aa5 Author: Michael Steed <msteed68@gmail.com> Date: Wed Aug 19 18:03:49 2015 -0600 Updates after review comments - make match/replace without -a operate on the first match on each argument - use different exit codes for "no operation performed" and errors, as grep does - refactor regex compile code - use human-friendly error messages from pcre2 - improve error handling & reporting elsewhere - add a few tests - make some doc fixes - some simplification & cleanup - fix ci build failure (I hope) commit 34232e152df17a3cfbf0a094dd51d148a4f04e6f Author: Michael Steed <msteed68@gmail.com> Date: Wed Aug 12 00:26:07 2015 -0600 fix dependencies for parallel make commit 00d7e781697f53454beb91c1d0fc4b2d28d6e034 Author: Michael Steed <msteed68@gmail.com> Date: Tue Aug 11 23:37:22 2015 -0600 Add missing pcre2 files + .gitignore commit 4498aa5f576e09634f7f619443e74d2f33c108e4 Author: Michael Steed <msteed68@gmail.com> Date: Tue Aug 11 22:44:05 2015 -0600 add pcre2-10.20 and update license.hdr commit 290c58c72e22db644ccf6fa9088051644980ed0a Author: Michael Steed <msteed68@gmail.com> Date: Tue Aug 11 22:41:19 2015 -0600 add string builtin files - string builtin source, tests, & docs - changes to configure.ac & Makefile.in
Diffstat (limited to 'src')
-rw-r--r--src/builtin.cpp2
-rw-r--r--src/builtin_string.cpp1375
-rw-r--r--src/common.cpp2
-rw-r--r--src/common.h3
-rw-r--r--src/fish_tests.cpp286
5 files changed, 1667 insertions, 1 deletions
diff --git a/src/builtin.cpp b/src/builtin.cpp
index bc1b18de..a55916f7 100644
--- a/src/builtin.cpp
+++ b/src/builtin.cpp
@@ -399,6 +399,7 @@ static void builtin_missing_argument(parser_t &parser, const wchar_t *cmd, const
#include "builtin_jobs.cpp"
#include "builtin_set_color.cpp"
#include "builtin_printf.cpp"
+#include "builtin_string.cpp"
/* builtin_test lives in builtin_test.cpp */
int builtin_test(parser_t &parser, wchar_t **argv);
@@ -4123,6 +4124,7 @@ static const builtin_data_t builtin_datas[]=
{ L"set_color", &builtin_set_color, N_(L"Set the terminal color") },
{ L"source", &builtin_source, N_(L"Evaluate contents of file") },
{ L"status", &builtin_status, N_(L"Return status information about fish") },
+ { L"string", &builtin_string, N_(L"Manipulate strings") },
{ L"switch", &builtin_generic, N_(L"Conditionally execute a block of commands") },
{ L"test", &builtin_test, N_(L"Test a condition") },
{ L"true", &builtin_true, N_(L"Return a successful result") },
diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp
new file mode 100644
index 00000000..6a53e548
--- /dev/null
+++ b/src/builtin_string.cpp
@@ -0,0 +1,1375 @@
+/** \file builtin_string.cpp
+ Implementation of the string builtin.
+*/
+
+#define PCRE2_CODE_UNIT_WIDTH WCHAR_T_BITS
+#ifdef _WIN32
+#define PCRE2_STATIC
+#endif
+#include "pcre2.h"
+
+#include "wildcard.h"
+
+#define MAX_REPLACE_SIZE size_t(1048576) // pcre2_substitute maximum output size in wchar_t
+#define STRING_ERR_MISSING _(L"%ls: Expected argument\n")
+
+enum
+{
+ BUILTIN_STRING_OK = 0,
+ BUILTIN_STRING_NONE = 1,
+ BUILTIN_STRING_ERROR = 2
+};
+
+static void string_error(const wchar_t *fmt, ...)
+{
+ va_list va;
+ va_start(va, fmt);
+ wcstring errstr = vformat_string(fmt, va);
+ va_end(va);
+
+ stderr_buffer += L"string ";
+ stderr_buffer += errstr;
+}
+
+static void string_unknown_option(parser_t &parser, const wchar_t *subcmd, const wchar_t *opt)
+{
+ string_error(BUILTIN_ERR_UNKNOWN, subcmd, opt);
+ builtin_print_help(parser, L"string", stderr_buffer);
+}
+
+static bool string_args_from_stdin()
+{
+ return builtin_stdin != STDIN_FILENO || !isatty(builtin_stdin);
+}
+
+static const wchar_t *string_get_arg_stdin()
+{
+ static wcstring warg;
+
+ std::string arg;
+ for (;;)
+ {
+ char ch = '\0';
+ int rc = read_blocked(builtin_stdin, &ch, 1);
+
+ if (rc < 0)
+ {
+ // failure
+ return 0;
+ }
+
+ if (rc == 0)
+ {
+ // eof
+ if (arg.empty())
+ {
+ return 0;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ if (ch == '\n')
+ {
+ break;
+ }
+
+ arg += ch;
+ }
+
+ warg = str2wcstring(arg.c_str(), arg.size());
+ return warg.c_str();
+}
+
+static const wchar_t *string_get_arg_argv(int *argidx, wchar_t **argv)
+{
+ return (argv && argv[*argidx]) ? argv[(*argidx)++] : 0;
+}
+
+static const wchar_t *string_get_arg(int *argidx, wchar_t **argv)
+{
+ if (string_args_from_stdin())
+ {
+ return string_get_arg_stdin();
+ }
+ else
+ {
+ return string_get_arg_argv(argidx, argv);
+ }
+}
+
+static int string_escape(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L"n";
+ const struct woption long_options[] =
+ {
+ { L"no-quoted", no_argument, 0, 'n' },
+ { 0, 0, 0, 0 }
+ };
+
+ escape_flags_t flags = ESCAPE_ALL;
+ wgetopter_t w;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'n':
+ flags |= ESCAPE_NO_QUOTED;
+ break;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ int nesc = 0;
+ const wchar_t *arg;
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ stdout_buffer += escape(arg, flags);
+ stdout_buffer += L'\n';
+ nesc++;
+ }
+
+ return (nesc > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+}
+
+static int string_join(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L"q";
+ const struct woption long_options[] =
+ {
+ { L"quiet", no_argument, 0, 'q'},
+ { 0, 0, 0, 0 }
+ };
+
+ bool quiet = false;
+ wgetopter_t w;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'q':
+ quiet = true;
+ break;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ const wchar_t *sep;
+ if ((sep = string_get_arg_argv(&i, argv)) == 0)
+ {
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ int nargs = 0;
+ const wchar_t *arg;
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ if (!quiet)
+ {
+ stdout_buffer += arg;
+ stdout_buffer += sep;
+ }
+ nargs++;
+ }
+ if (nargs > 0 && !quiet)
+ {
+ stdout_buffer.resize(stdout_buffer.length() - wcslen(sep));
+ stdout_buffer += L'\n';
+ }
+
+ return (nargs > 1) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+}
+
+static int string_length(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L"q";
+ const struct woption long_options[] =
+ {
+ { L"quiet", no_argument, 0, 'q'},
+ { 0, 0, 0, 0 }
+ };
+
+ bool quiet = false;
+ wgetopter_t w;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'q':
+ quiet = true;
+ break;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ const wchar_t *arg;
+ int nnonempty = 0;
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ size_t n = wcslen(arg);
+ if (n > 0)
+ {
+ nnonempty++;
+ }
+ if (!quiet)
+ {
+ stdout_buffer += to_string(int(n));
+ stdout_buffer += L'\n';
+ }
+ }
+
+ return (nnonempty > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+}
+
+struct match_options_t
+{
+ bool all;
+ bool ignore_case;
+ bool index;
+ bool quiet;
+
+ match_options_t(): all(false), ignore_case(false), index(false), quiet(false) { }
+};
+
+class string_matcher_t
+{
+protected:
+ match_options_t opts;
+ int total_matched;
+
+public:
+ string_matcher_t(const match_options_t &opts_)
+ : opts(opts_), total_matched(0)
+ { }
+
+ virtual ~string_matcher_t() { }
+ virtual bool report_matches(const wchar_t *arg) = 0;
+ int match_count() { return total_matched; }
+};
+
+class wildcard_matcher_t: public string_matcher_t
+{
+ wcstring wcpattern;
+
+public:
+ wildcard_matcher_t(const wchar_t * /*argv0*/, const wchar_t *pattern, const match_options_t &opts)
+ : string_matcher_t(opts)
+ {
+ wcpattern = parse_util_unescape_wildcards(pattern);
+
+ if (opts.ignore_case)
+ {
+ for (int i = 0; i < wcpattern.length(); i++)
+ {
+ wcpattern[i] = towlower(wcpattern[i]);
+ }
+ }
+ }
+
+ virtual ~wildcard_matcher_t() { }
+
+ bool report_matches(const wchar_t *arg)
+ {
+ // Note: --all is a no-op for glob matching since the pattern is always
+ // matched against the entire argument
+ bool match;
+ if (opts.ignore_case)
+ {
+ wcstring s = arg;
+ for (int i = 0; i < s.length(); i++)
+ {
+ s[i] = towlower(s[i]);
+ }
+ match = wildcard_match(s, wcpattern, false);
+ }
+ else
+ {
+ match = wildcard_match(arg, wcpattern, false);
+ }
+ if (match)
+ {
+ total_matched++;
+ }
+ if (!opts.quiet)
+ {
+ if (match)
+ {
+ if (opts.index)
+ {
+ stdout_buffer += L"1 ";
+ stdout_buffer += to_string(wcslen(arg));
+ stdout_buffer += L'\n';
+ }
+ else
+ {
+ stdout_buffer += arg;
+ stdout_buffer += L'\n';
+ }
+ }
+ }
+ return true;
+ }
+};
+
+static const wchar_t *pcre2_strerror(int err_code)
+{
+ static wchar_t buf[128];
+ pcre2_get_error_message(err_code, (PCRE2_UCHAR *)buf, sizeof(buf) / sizeof(wchar_t));
+ return buf;
+}
+
+struct compiled_regex_t
+{
+ pcre2_code *code;
+ pcre2_match_data *match;
+
+ compiled_regex_t(const wchar_t *argv0, const wchar_t *pattern, bool ignore_case)
+ : code(0), match(0)
+ {
+ // Disable some sequences that can lead to security problems
+ uint32_t options = PCRE2_NEVER_UTF;
+#if PCRE2_CODE_UNIT_WIDTH < 32
+ options |= PCRE2_NEVER_BACKSLASH_C;
+#endif
+
+ int err_code = 0;
+ PCRE2_SIZE err_offset = 0;
+
+ code = pcre2_compile(
+ PCRE2_SPTR(pattern),
+ PCRE2_ZERO_TERMINATED,
+ options | (ignore_case ? PCRE2_CASELESS : 0),
+ &err_code,
+ &err_offset,
+ 0);
+ if (code == 0)
+ {
+ string_error(_(L"%ls: Regular expression compile error: %ls\n"),
+ argv0, pcre2_strerror(err_code));
+ string_error(L"%ls: %ls\n", argv0, pattern);
+ string_error(L"%ls: %*ls\n", argv0, err_offset, L"^");
+ return;
+ }
+
+ match = pcre2_match_data_create_from_pattern(code, 0);
+ if (match == 0)
+ {
+ DIE_MEM();
+ }
+ }
+
+ ~compiled_regex_t()
+ {
+ if (match != 0)
+ {
+ pcre2_match_data_free(match);
+ }
+ if (code != 0)
+ {
+ pcre2_code_free(code);
+ }
+ }
+};
+
+class pcre2_matcher_t: public string_matcher_t
+{
+ const wchar_t *argv0;
+ compiled_regex_t regex;
+
+ int report_match(const wchar_t *arg, int pcre2_rc)
+ {
+ // Return values: -1 = error, 0 = no match, 1 = match
+ if (pcre2_rc == PCRE2_ERROR_NOMATCH)
+ {
+ return 0;
+ }
+ if (pcre2_rc < 0)
+ {
+ string_error(_(L"%ls: Regular expression match error: %ls\n"),
+ argv0, pcre2_strerror(pcre2_rc));
+ return -1;
+ }
+ if (pcre2_rc == 0)
+ {
+ // The output vector wasn't big enough. Should not happen.
+ string_error(_(L"%ls: Regular expression internal error\n"), argv0);
+ return -1;
+ }
+ PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match);
+ for (int j = 0; j < pcre2_rc; j++)
+ {
+ PCRE2_SIZE begin = ovector[2*j];
+ PCRE2_SIZE end = ovector[2*j + 1];
+ if (!opts.quiet)
+ {
+ if (begin != PCRE2_UNSET && end != PCRE2_UNSET)
+ {
+ if (opts.index)
+ {
+ stdout_buffer += to_string(begin + 1);
+ stdout_buffer += ' ';
+ stdout_buffer += to_string(end - begin);
+ }
+ else if (end > begin) // may have end < begin if \K is used
+ {
+ stdout_buffer += wcstring(&arg[begin], end - begin);
+ }
+ stdout_buffer += L'\n';
+ }
+ }
+ }
+ return 1;
+ }
+
+public:
+ pcre2_matcher_t(const wchar_t *argv0_, const wchar_t *pattern, const match_options_t &opts)
+ : string_matcher_t(opts),
+ argv0(argv0_),
+ regex(argv0_, pattern, opts.ignore_case)
+ { }
+
+ virtual ~pcre2_matcher_t() { }
+
+ bool report_matches(const wchar_t *arg)
+ {
+ // A return value of true means all is well (even if no matches were
+ // found), false indicates an unrecoverable error.
+ if (regex.code == 0)
+ {
+ // pcre2_compile() failed
+ return false;
+ }
+
+ int matched = 0;
+
+ // See pcre2demo.c for an explanation of this logic
+ PCRE2_SIZE arglen = wcslen(arg);
+ int rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg), arglen, 0, 0, regex.match, 0));
+ if (rc < 0)
+ {
+ // pcre2 match error
+ return false;
+ }
+ if (rc == 0)
+ {
+ // no match
+ return true;
+ }
+ matched++;
+ total_matched++;
+
+ // Report any additional matches
+ PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match);
+ while (opts.all || matched == 0)
+ {
+ uint32_t options = 0;
+ PCRE2_SIZE offset = ovector[1]; // Start at end of previous match
+
+ if (ovector[0] == ovector[1])
+ {
+ if (ovector[0] == arglen)
+ {
+ break;
+ }
+ options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+ }
+
+ rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg), arglen, offset, options, regex.match, 0));
+ if (rc < 0)
+ {
+ return false;
+ }
+ if (rc == 0)
+ {
+ if (options == 0)
+ {
+ // All matches found
+ break;
+ }
+ ovector[1] = offset + 1;
+ continue;
+ }
+ matched++;
+ total_matched++;
+ }
+ return true;
+ }
+};
+
+static int string_match(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L"ainqr";
+ const struct woption long_options[] =
+ {
+ { L"all", no_argument, 0, 'a'},
+ { L"ignore-case", no_argument, 0, 'i'},
+ { L"index", no_argument, 0, 'n'},
+ { L"quiet", no_argument, 0, 'q'},
+ { L"regex", no_argument, 0, 'r'},
+ { 0, 0, 0, 0 }
+ };
+
+ match_options_t opts;
+ bool regex = false;
+ wgetopter_t w;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'a':
+ opts.all = true;
+ break;
+
+ case 'i':
+ opts.ignore_case = true;
+ break;
+
+ case 'n':
+ opts.index = true;
+ break;
+
+ case 'q':
+ opts.quiet = true;
+ break;
+
+ case 'r':
+ regex = true;
+ break;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ const wchar_t *pattern;
+ if ((pattern = string_get_arg_argv(&i, argv)) == 0)
+ {
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ string_matcher_t *matcher;
+ if (regex)
+ {
+ matcher = new pcre2_matcher_t(argv[0], pattern, opts);
+ }
+ else
+ {
+ matcher = new wildcard_matcher_t(argv[0], pattern, opts);
+ }
+
+ const wchar_t *arg;
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ if (!matcher->report_matches(arg))
+ {
+ delete matcher;
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int rc = matcher->match_count() > 0 ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+ delete matcher;
+ return rc;
+}
+
+struct replace_options_t
+{
+ bool all;
+ bool ignore_case;
+ bool quiet;
+
+ replace_options_t(): all(false), ignore_case(false), quiet(false) { }
+};
+
+class string_replacer_t
+{
+protected:
+ const wchar_t *argv0;
+ replace_options_t opts;
+ int total_replaced;
+
+public:
+ string_replacer_t(const wchar_t *argv0_, const replace_options_t &opts_)
+ : argv0(argv0_), opts(opts_), total_replaced(0)
+ { }
+
+ virtual ~string_replacer_t() {}
+ virtual bool replace_matches(const wchar_t *arg) = 0;
+ int replace_count() { return total_replaced; }
+};
+
+class literal_replacer_t: public string_replacer_t
+{
+ const wchar_t *pattern;
+ const wchar_t *replacement;
+ int patlen;
+
+public:
+ literal_replacer_t(const wchar_t *argv0, const wchar_t *pattern_, const wchar_t *replacement_,
+ const replace_options_t &opts)
+ : string_replacer_t(argv0, opts),
+ pattern(pattern_), replacement(replacement_), patlen(wcslen(pattern))
+ { }
+
+ virtual ~literal_replacer_t() { }
+
+ bool replace_matches(const wchar_t *arg)
+ {
+ wcstring result;
+ if (patlen == 0)
+ {
+ result = arg;
+ }
+ else
+ {
+ int replaced = 0;
+ const wchar_t *cur = arg;
+ while (*cur != L'\0')
+ {
+ if ((opts.all || replaced == 0) &&
+ (opts.ignore_case ? wcsncasecmp(cur, pattern, patlen) : wcsncmp(cur, pattern, patlen)) == 0)
+ {
+ result += replacement;
+ cur += patlen;
+ replaced++;
+ total_replaced++;
+ }
+ else
+ {
+ result += *cur;
+ cur++;
+ }
+ }
+ }
+ if (!opts.quiet)
+ {
+ stdout_buffer += result;
+ stdout_buffer += L'\n';
+ }
+ return true;
+ }
+};
+
+class regex_replacer_t: public string_replacer_t
+{
+ compiled_regex_t regex;
+ wcstring replacement;
+
+ wcstring interpret_escapes(const wchar_t *orig)
+ {
+ wcstring result;
+
+ while (*orig != L'\0')
+ {
+ if (*orig == L'\\')
+ {
+ orig += read_unquoted_escape(orig, &result, true, false);
+ }
+ else
+ {
+ result += *orig;
+ orig++;
+ }
+ }
+
+ return result;
+ }
+
+public:
+ regex_replacer_t(const wchar_t *argv0, const wchar_t *pattern, const wchar_t *replacement_,
+ const replace_options_t &opts)
+ : string_replacer_t(argv0, opts),
+ regex(argv0, pattern, opts.ignore_case),
+ replacement(interpret_escapes(replacement_))
+ { }
+
+ virtual ~regex_replacer_t() { }
+
+ bool replace_matches(const wchar_t *arg)
+ {
+ // A return value of true means all is well (even if no replacements
+ // were performed), false indicates an unrecoverable error.
+ if (regex.code == 0)
+ {
+ // pcre2_compile() failed
+ return false;
+ }
+
+ uint32_t options = opts.all ? PCRE2_SUBSTITUTE_GLOBAL : 0;
+ int arglen = wcslen(arg);
+ PCRE2_SIZE outlen = (arglen == 0) ? 16 : 2 * arglen;
+ wchar_t *output = (wchar_t *)malloc(sizeof(wchar_t) * outlen);
+ if (output == 0)
+ {
+ DIE_MEM();
+ }
+ int pcre2_rc = 0;
+ for (;;)
+ {
+ pcre2_rc = pcre2_substitute(
+ regex.code,
+ PCRE2_SPTR(arg),
+ arglen,
+ 0, // start offset
+ options,
+ regex.match,
+ 0, // match context
+ PCRE2_SPTR(replacement.c_str()),
+ PCRE2_ZERO_TERMINATED,
+ (PCRE2_UCHAR *)output,
+ &outlen);
+
+ if (pcre2_rc == PCRE2_ERROR_NOMEMORY)
+ {
+ if (outlen < MAX_REPLACE_SIZE)
+ {
+ outlen = std::min(2 * outlen, MAX_REPLACE_SIZE);
+ output = (wchar_t *)realloc(output, sizeof(wchar_t) * outlen);
+ if (output == 0)
+ {
+ DIE_MEM();
+ }
+ continue;
+ }
+ string_error(_(L"%ls: Replacement string too large\n"), argv0);
+ free(output);
+ return false;
+ }
+ break;
+ }
+
+ bool rc = true;
+ if (pcre2_rc < 0)
+ {
+ string_error(_(L"%ls: Regular expression substitute error: %ls\n"),
+ argv0, pcre2_strerror(pcre2_rc));
+ rc = false;
+ }
+ else
+ {
+ if (!opts.quiet)
+ {
+ stdout_buffer += output;
+ stdout_buffer += L'\n';
+ }
+ total_replaced += pcre2_rc;
+ }
+
+ free(output);
+ return rc;
+ }
+};
+
+static int string_replace(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L"aiqr";
+ const struct woption long_options[] =
+ {
+ { L"all", no_argument, 0, 'a'},
+ { L"ignore-case", no_argument, 0, 'i'},
+ { L"quiet", no_argument, 0, 'q'},
+ { L"regex", no_argument, 0, 'r'},
+ { 0, 0, 0, 0 }
+ };
+
+ replace_options_t opts;
+ bool regex = false;
+ wgetopter_t w;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'a':
+ opts.all = true;
+ break;
+
+ case 'i':
+ opts.ignore_case = true;
+ break;
+
+ case 'q':
+ opts.quiet = true;
+ break;
+
+ case 'r':
+ regex = true;
+ break;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ const wchar_t *pattern, *replacement;
+ if ((pattern = string_get_arg_argv(&i, argv)) == 0)
+ {
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+ if ((replacement = string_get_arg_argv(&i, argv)) == 0)
+ {
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ string_replacer_t *replacer;
+ if (regex)
+ {
+ replacer = new regex_replacer_t(argv[0], pattern, replacement, opts);
+ }
+ else
+ {
+ replacer = new literal_replacer_t(argv[0], pattern, replacement, opts);
+ }
+
+ const wchar_t *arg;
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ if (!replacer->replace_matches(arg))
+ {
+ delete replacer;
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int rc = replacer->replace_count() > 0 ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+ delete replacer;
+ return rc;
+}
+
+static int string_split(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L":m:qr";
+ const struct woption long_options[] =
+ {
+ { L"max", required_argument, 0, 'm'},
+ { L"quiet", no_argument, 0, 'q'},
+ { L"right", no_argument, 0, 'r'},
+ { 0, 0, 0, 0 }
+ };
+
+ long max = LONG_MAX;
+ bool quiet = false;
+ bool right = false;
+ wgetopter_t w;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'm':
+ {
+ errno = 0;
+ wchar_t *endptr = 0;
+ max = wcstol(w.woptarg, &endptr, 10);
+ if (*endptr != L'\0' || errno != 0)
+ {
+ string_error(BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
+ return BUILTIN_STRING_ERROR;
+ }
+ break;
+ }
+
+ case 'q':
+ quiet = true;
+ break;
+
+ case 'r':
+ right = true;
+ break;
+
+ case ':':
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ const wchar_t *sep;
+ if ((sep = string_get_arg_argv(&i, argv)) == 0)
+ {
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ std::list<wcstring> splits;
+ int seplen = wcslen(sep);
+ int nsplit = 0;
+ const wchar_t *arg;
+ if (right)
+ {
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ int nargsplit = 0;
+ if (seplen == 0)
+ {
+ // Split to individual characters
+ const wchar_t *cur = arg + wcslen(arg) - 1;
+ while (cur > arg && nargsplit < max)
+ {
+ splits.push_front(wcstring(cur, 1));
+ cur--;
+ nargsplit++;
+ nsplit++;
+ }
+ splits.push_front(wcstring(arg, cur - arg + 1));
+ }
+ else
+ {
+ const wchar_t *end = arg + wcslen(arg);
+ const wchar_t *cur = end - seplen;
+ while (cur >= arg && nargsplit < max)
+ {
+ if (wcsncmp(cur, sep, seplen) == 0)
+ {
+ splits.push_front(wcstring(cur + seplen, end - cur - seplen));
+ end = cur;
+ cur -= seplen;
+ nargsplit++;
+ nsplit++;
+ }
+ else
+ {
+ cur--;
+ }
+ }
+ splits.push_front(wcstring(arg, end - arg));
+ }
+ }
+ }
+ else
+ {
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ const wchar_t *cur = arg;
+ int nargsplit = 0;
+ if (seplen == 0)
+ {
+ // Split to individual characters
+ const wchar_t *last = arg + wcslen(arg) - 1;
+ while (cur < last && nargsplit < max)
+ {
+ splits.push_back(wcstring(cur, 1));
+ cur++;
+ nargsplit++;
+ nsplit++;
+ }
+ splits.push_back(cur);
+ }
+ else
+ {
+ while (cur != 0)
+ {
+ const wchar_t *ptr = (nargsplit < max) ? wcsstr(cur, sep) : 0;
+ if (ptr == 0)
+ {
+ splits.push_back(cur);
+ cur = 0;
+ }
+ else
+ {
+ splits.push_back(wcstring(cur, ptr - cur));
+ cur = ptr + seplen;
+ nargsplit++;
+ nsplit++;
+ }
+ }
+ }
+ }
+ }
+
+ if (!quiet)
+ {
+ std::list<wcstring>::const_iterator si = splits.begin();
+ while (si != splits.end())
+ {
+ stdout_buffer += *si;
+ stdout_buffer += L'\n';
+ si++;
+ }
+ }
+
+ return (nsplit > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+}
+
+static int string_sub(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L":l:qs:";
+ const struct woption long_options[] =
+ {
+ { L"length", required_argument, 0, 'l'},
+ { L"quiet", no_argument, 0, 'q'},
+ { L"start", required_argument, 0, 's'},
+ { 0, 0, 0, 0 }
+ };
+
+ int start = 0;
+ int length = -1;
+ bool quiet = false;
+ wgetopter_t w;
+ wchar_t *endptr = 0;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'l':
+ errno = 0;
+ length = int(wcstol(w.woptarg, &endptr, 10));
+ if (*endptr != L'\0' || errno != 0)
+ {
+ string_error(BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
+ return BUILTIN_STRING_ERROR;
+ }
+ if (length < 0)
+ {
+ string_error(_(L"%ls: Invalid length value '%d'\n"), argv[0], length);
+ return BUILTIN_STRING_ERROR;
+ }
+ break;
+
+ case 'q':
+ quiet = true;
+ break;
+
+ case 's':
+ errno = 0;
+ start = int(wcstol(w.woptarg, &endptr, 10));
+ if (*endptr != L'\0' || errno != 0)
+ {
+ string_error(BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
+ return BUILTIN_STRING_ERROR;
+ }
+ if (start == 0)
+ {
+ string_error(_(L"%ls: Invalid start value '%d'\n"), argv[0], start);
+ return BUILTIN_STRING_ERROR;
+ }
+ break;
+
+ case ':':
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ int nsub = 0;
+ const wchar_t *arg;
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ wcstring::size_type pos = 0;
+ wcstring::size_type count = wcstring::npos;
+ wcstring s(arg);
+ if (start > 0)
+ {
+ pos = start - 1;
+ }
+ else if (start < 0)
+ {
+ wcstring::size_type n = -start;
+ pos = n > s.length() ? 0 : s.length() - n;
+ }
+ if (pos > s.length())
+ {
+ pos = s.length();
+ }
+
+ if (length >= 0)
+ {
+ count = length;
+ }
+ if (pos + count > s.length())
+ {
+ count = wcstring::npos;
+ }
+
+ if (!quiet)
+ {
+ stdout_buffer += s.substr(pos, count);
+ stdout_buffer += L'\n';
+ }
+ nsub++;
+ }
+
+ return (nsub > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+}
+
+static int string_trim(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L":c:lqr";
+ const struct woption long_options[] =
+ {
+ { L"chars", required_argument, 0, 'c'},
+ { L"left", no_argument, 0, 'l'},
+ { L"quiet", no_argument, 0, 'q'},
+ { L"right", no_argument, 0, 'r'},
+ { 0, 0, 0, 0 }
+ };
+
+ int leftright = 0;
+ bool quiet = false;
+ wcstring chars = L" \f\n\r\t";
+ wgetopter_t w;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'c':
+ chars = w.woptarg;
+ break;
+
+ case 'l':
+ leftright |= 1;
+ break;
+
+ case 'q':
+ quiet = true;
+ break;
+
+ case 'r':
+ leftright |= 2;
+ break;
+
+ case ':':
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ const wchar_t *arg;
+ int ntrim = 0;
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ const wchar_t *begin = arg;
+ const wchar_t *end = arg + wcslen(arg);
+ if (!leftright || (leftright & 1))
+ {
+ while (begin != end && chars.find_first_of(begin, 0, 1) != wcstring::npos)
+ {
+ begin++;
+ ntrim++;
+ }
+ }
+ if (!leftright || (leftright & 2))
+ {
+ while (begin != end && chars.find_first_of(end - 1, 0, 1) != wcstring::npos)
+ {
+ end--;
+ ntrim++;
+ }
+ }
+ if (!quiet)
+ {
+ stdout_buffer += wcstring(begin, end - begin);
+ stdout_buffer += L'\n';
+ }
+ }
+
+ return (ntrim > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+}
+
+static const struct string_subcommand
+{
+ const wchar_t *name;
+ int (*handler)(parser_t &, int argc, wchar_t **argv);
+}
+string_subcommands[] =
+{
+ { L"escape", &string_escape },
+ { L"join", &string_join },
+ { L"length", &string_length },
+ { L"match", &string_match },
+ { L"replace", &string_replace },
+ { L"split", &string_split },
+ { L"sub", &string_sub },
+ { L"trim", &string_trim },
+ { 0, 0 }
+};
+
+/**
+ The string builtin, for manipulating strings.
+*/
+/*static*/ int builtin_string(parser_t &parser, wchar_t **argv)
+{
+ int argc = builtin_count_args(argv);
+ if (argc <= 1)
+ {
+ string_error(STRING_ERR_MISSING, argv[0]);
+ builtin_print_help(parser, L"string", stderr_buffer);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ if (wcscmp(argv[1], L"-h") == 0 || wcscmp(argv[1], L"--help") == 0)
+ {
+ builtin_print_help(parser, L"string", stderr_buffer);
+ return BUILTIN_STRING_OK;
+ }
+
+ const string_subcommand *subcmd = &string_subcommands[0];
+ while (subcmd->name != 0 && wcscmp(subcmd->name, argv[1]) != 0)
+ {
+ subcmd++;
+ }
+ if (subcmd->handler == 0)
+ {
+ string_error(_(L"%ls: Unknown subcommand '%ls'\n"), argv[0], argv[1]);
+ builtin_print_help(parser, L"string", stderr_buffer);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ argc--;
+ argv++;
+ return subcmd->handler(parser, argc, argv);
+}
diff --git a/src/common.cpp b/src/common.cpp
index 78d0e238..50cb1102 100644
--- a/src/common.cpp
+++ b/src/common.cpp
@@ -1106,7 +1106,7 @@ static wint_t string_last_char(const wcstring &str)
}
/* Given a null terminated string starting with a backslash, read the escape as if it is unquoted, appending to result. Return the number of characters consumed, or 0 on error */
-static size_t read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete, bool unescape_special)
+size_t read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete, bool unescape_special)
{
if (input[0] != L'\\')
{
diff --git a/src/common.h b/src/common.h
index e27968fd..88bbf480 100644
--- a/src/common.h
+++ b/src/common.h
@@ -825,6 +825,9 @@ wcstring escape_string(const wcstring &in, escape_flags_t flags);
character set.
*/
+/** Given a null terminated string starting with a backslash, read the escape as if it is unquoted, appending to result. Return the number of characters consumed, or 0 on error */
+size_t read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete, bool unescape_special);
+
/** Unescapes a string in-place. A true result indicates the string was unescaped, a false result indicates the string was unmodified. */
bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special);
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index 3b36b577..1b314799 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -4017,6 +4017,291 @@ static void test_wcstring_tok(void)
}
}
+int builtin_string(parser_t &parser, wchar_t **argv);
+extern wcstring stdout_buffer;
+static void run_one_string_test(const wchar_t **argv, int expected_rc, const wchar_t *expected_out)
+{
+ parser_t parser(PARSER_TYPE_GENERAL, true);
+ wcstring &out = stdout_buffer;
+ out.clear();
+ int rc = builtin_string(parser, const_cast<wchar_t**>(argv));
+ wcstring args;
+ for (int i = 0; argv[i] != 0; i++)
+ {
+ args += escape_string(argv[i], ESCAPE_ALL) + L' ';
+ }
+ args.resize(args.size() - 1);
+ if (rc != expected_rc)
+ {
+ err(L"Test failed on line %lu: [%ls]: expected return code %d but got %d",
+ __LINE__, args.c_str(), expected_rc, rc);
+ }
+ else if (out != expected_out)
+ {
+ err(L"Test failed on line %lu: [%ls]: expected [%ls] but got [%ls]",
+ __LINE__, args.c_str(),
+ escape_string(expected_out, ESCAPE_ALL).c_str(),
+ escape_string(out, ESCAPE_ALL).c_str());
+ }
+}
+
+static void test_string(void)
+{
+ static struct string_test
+ {
+ const wchar_t *argv[15];
+ int expected_rc;
+ const wchar_t *expected_out;
+ }
+ string_tests[] =
+ {
+ { {L"string", L"escape", 0}, 1, L"" },
+ { {L"string", L"escape", L"", 0}, 0, L"''\n" },
+ { {L"string", L"escape", L"-n", L"", 0}, 0, L"\n" },
+ { {L"string", L"escape", L"a", 0}, 0, L"a\n" },
+ { {L"string", L"escape", L"\x07", 0}, 0, L"\\cg\n" },
+ { {L"string", L"escape", L"\"x\"", 0}, 0, L"'\"x\"'\n" },
+ { {L"string", L"escape", L"hello world", 0}, 0, L"'hello world'\n" },
+ { {L"string", L"escape", L"-n", L"hello world", 0}, 0, L"hello\\ world\n" },
+ { {L"string", L"escape", L"hello", L"world", 0}, 0, L"hello\nworld\n" },
+ { {L"string", L"escape", L"-n", L"~", 0}, 0, L"\\~\n" },
+
+ { {L"string", L"join", 0}, 2, L"" },
+ { {L"string", L"join", L"", 0}, 1, L"" },
+ { {L"string", L"join", L"", L"", L"", L"", 0}, 0, L"\n" },
+ { {L"string", L"join", L"", L"a", L"b", L"c", 0}, 0, L"abc\n" },
+ { {L"string", L"join", L".", L"fishshell", L"com", 0}, 0, L"fishshell.com\n" },
+ { {L"string", L"join", L"/", L"usr", 0}, 1, L"usr\n" },
+ { {L"string", L"join", L"/", L"usr", L"local", L"bin", 0}, 0, L"usr/local/bin\n" },
+ { {L"string", L"join", L"...", L"3", L"2", L"1", 0}, 0, L"3...2...1\n" },
+ { {L"string", L"join", L"-q", 0}, 2, L"" },
+ { {L"string", L"join", L"-q", L".", 0}, 1, L"" },
+ { {L"string", L"join", L"-q", L".", L".", 0}, 1, L"" },
+
+ { {L"string", L"length", 0}, 1, L"" },
+ { {L"string", L"length", L"", 0}, 1, L"0\n" },
+ { {L"string", L"length", L"", L"", L"", 0}, 1, L"0\n0\n0\n" },
+ { {L"string", L"length", L"a", 0}, 0, L"1\n" },
+ { {L"string", L"length", L"\U0002008A", 0}, 0, L"1\n" },
+ { {L"string", L"length", L"um", L"dois", L"três", 0}, 0, L"2\n4\n4\n" },
+ { {L"string", L"length", L"um", L"dois", L"três", 0}, 0, L"2\n4\n4\n" },
+ { {L"string", L"length", L"-q", 0}, 1, L"" },
+ { {L"string", L"length", L"-q", L"", 0}, 1, L"" },
+ { {L"string", L"length", L"-q", L"a", 0}, 0, L"" },
+
+ { {L"string", L"match", 0}, 2, L"" },
+ { {L"string", L"match", L"", 0}, 1, L"" },
+ { {L"string", L"match", L"", L"", 0}, 0, L"\n" },
+ { {L"string", L"match", L"?", L"a", 0}, 0, L"a\n" },
+ { {L"string", L"match", L"*", L"", 0}, 0, L"\n" },
+ { {L"string", L"match", L"**", L"", 0}, 0, L"\n" },
+ { {L"string", L"match", L"*", L"xyzzy", 0}, 0, L"xyzzy\n" },
+ { {L"string", L"match", L"**", L"plugh", 0}, 0, L"plugh\n" },
+ { {L"string", L"match", L"a*b", L"axxb", 0}, 0, L"axxb\n" },
+ { {L"string", L"match", L"a??b", L"axxb", 0}, 0, L"axxb\n" },
+ { {L"string", L"match", L"-i", L"a??B", L"axxb", 0}, 0, L"axxb\n" },
+ { {L"string", L"match", L"-i", L"a??b", L"Axxb", 0}, 0, L"Axxb\n" },
+ { {L"string", L"match", L"a*", L"axxb", 0}, 0, L"axxb\n" },
+ { {L"string", L"match", L"*a", L"xxa", 0}, 0, L"xxa\n" },
+ { {L"string", L"match", L"*a*", L"axa", 0}, 0, L"axa\n" },
+ { {L"string", L"match", L"*a*", L"xax", 0}, 0, L"xax\n" },
+ { {L"string", L"match", L"*a*", L"bxa", 0}, 0, L"bxa\n" },
+ { {L"string", L"match", L"*a", L"a", 0}, 0, L"a\n" },
+ { {L"string", L"match", L"a*", L"a", 0}, 0, L"a\n" },
+ { {L"string", L"match", L"a*b*c", L"axxbyyc", 0}, 0, L"axxbyyc\n" },
+ { {L"string", L"match", L"a*b?c", L"axxbyc", 0}, 0, L"axxbyc\n" },
+ { {L"string", L"match", L"*?", L"a", 0}, 0, L"a\n" },
+ { {L"string", L"match", L"*?", L"ab", 0}, 0, L"ab\n" },
+ { {L"string", L"match", L"?*", L"a", 0}, 0, L"a\n" },
+ { {L"string", L"match", L"?*", L"ab", 0}, 0, L"ab\n" },
+ { {L"string", L"match", L"\\*", L"*", 0}, 0, L"*\n" },
+ { {L"string", L"match", L"a*\\", L"abc\\", 0}, 0, L"abc\\\n" },
+ { {L"string", L"match", L"a*\\?", L"abc?", 0}, 0, L"abc?\n" },
+
+ { {L"string", L"match", L"?", L"", 0}, 1, L"" },
+ { {L"string", L"match", L"?", L"ab", 0}, 1, L"" },
+ { {L"string", L"match", L"??", L"a", 0}, 1, L"" },
+ { {L"string", L"match", L"?a", L"a", 0}, 1, L"" },
+ { {L"string", L"match", L"a?", L"a", 0}, 1, L"" },
+ { {L"string", L"match", L"a??B", L"axxb", 0}, 1, L"" },
+ { {L"string", L"match", L"a*b", L"axxbc", 0}, 1, L"" },
+ { {L"string", L"match", L"*b", L"bbba", 0}, 1, L"" },
+ { {L"string", L"match", L"0x[0-9a-fA-F][0-9a-fA-F]", L"0xbad", 0}, 1, L"" },
+
+ { {L"string", L"match", L"-a", L"*", L"ab", L"cde", 0}, 0, L"ab\ncde\n" },
+ { {L"string", L"match", L"*", L"ab", L"cde", 0}, 0, L"ab\ncde\n" },
+ { {L"string", L"match", L"-n", L"*d*", L"cde", 0}, 0, L"1 3\n" },
+ { {L"string", L"match", L"-n", L"*x*", L"cde", 0}, 1, L"" },
+ { {L"string", L"match", L"-q", L"a*", L"b", L"c", 0}, 1, L"" },
+ { {L"string", L"match", L"-q", L"a*", L"b", L"a", 0}, 0, L"" },
+
+ { {L"string", L"match", L"-r", 0}, 2, L"" },
+ { {L"string", L"match", L"-r", L"", 0}, 1, L"" },
+ { {L"string", L"match", L"-r", L"", L"", 0}, 0, L"\n" },
+ { {L"string", L"match", L"-r", L".", L"a", 0}, 0, L"a\n" },
+ { {L"string", L"match", L"-r", L".*", L"", 0}, 0, L"\n" },
+ { {L"string", L"match", L"-r", L"a*b", L"b", 0}, 0, L"b\n" },
+ { {L"string", L"match", L"-r", L"a*b", L"aab", 0}, 0, L"aab\n" },
+ { {L"string", L"match", L"-r", L"-i", L"a*b", L"Aab", 0}, 0, L"Aab\n" },
+ { {L"string", L"match", L"-r", L"-a", L"a[bc]", L"abadac", 0}, 0, L"ab\nac\n" },
+ { {L"string", L"match", L"-r", L"a", L"xaxa", L"axax", 0}, 0, L"a\na\n" },
+ { {L"string", L"match", L"-r", L"-a", L"a", L"xaxa", L"axax", 0}, 0, L"a\na\na\na\n" },
+ { {L"string", L"match", L"-r", L"a[bc]", L"abadac", 0}, 0, L"ab\n" },
+ { {L"string", L"match", L"-r", L"-q", L"a[bc]", L"abadac", 0}, 0, L"" },
+ { {L"string", L"match", L"-r", L"-q", L"a[bc]", L"ad", 0}, 1, L"" },
+ { {L"string", L"match", L"-r", L"(a+)b(c)", L"aabc", 0}, 0, L"aabc\naa\nc\n" },
+ { {L"string", L"match", L"-r", L"-a", L"(a)b(c)", L"abcabc", 0}, 0, L"abc\na\nc\nabc\na\nc\n" },
+ { {L"string", L"match", L"-r", L"(a)b(c)", L"abcabc", 0}, 0, L"abc\na\nc\n" },
+ { {L"string", L"match", L"-r", L"(a|(z))(bc)", L"abc", 0}, 0, L"abc\na\nbc\n" },
+ { {L"string", L"match", L"-r", L"-n", L"a", L"ada", L"dad", 0}, 0, L"1 1\n2 1\n" },
+ { {L"string", L"match", L"-r", L"-n", L"-a", L"a", L"bacadae", 0}, 0, L"2 1\n4 1\n6 1\n" },
+ { {L"string", L"match", L"-r", L"-n", L"(a).*(b)", L"a---b", 0}, 0, L"1 5\n1 1\n5 1\n" },
+ { {L"string", L"match", L"-r", L"-n", L"(a)(b)", L"ab", 0}, 0, L"1 2\n1 1\n2 1\n" },
+ { {L"string", L"match", L"-r", L"-n", L"(a)(b)", L"abab", 0}, 0, L"1 2\n1 1\n2 1\n" },
+ { {L"string", L"match", L"-r", L"-n", L"-a", L"(a)(b)", L"abab", 0}, 0, L"1 2\n1 1\n2 1\n3 2\n3 1\n4 1\n" },
+ { {L"string", L"match", L"-r", L"*", L"", 0}, 2, L"" },
+ { {L"string", L"match", L"-r", L"-a", L"a*", L"b", 0}, 0, L"\n\n" },
+ { {L"string", L"match", L"-r", L"foo\\Kbar", L"foobar", 0}, 0, L"bar\n" },
+ { {L"string", L"match", L"-r", L"(foo)\\Kbar", L"foobar", 0}, 0, L"bar\nfoo\n" },
+ { {L"string", L"match", L"-r", L"(?=ab\\K)", L"ab", 0}, 0, L"\n" },
+ { {L"string", L"match", L"-r", L"(?=ab\\K)..(?=cd\\K)", L"abcd", 0}, 0, L"\n" },
+
+ { {L"string", L"replace", 0}, 2, L"" },
+ { {L"string", L"replace", L"", 0}, 2, L"" },
+ { {L"string", L"replace", L"", L"", 0}, 1, L"" },
+ { {L"string", L"replace", L"", L"", L"", 0}, 1, L"\n" },
+ { {L"string", L"replace", L"", L"", L" ", 0}, 1, L" \n" },
+ { {L"string", L"replace", L"a", L"b", L"", 0}, 1, L"\n" },
+ { {L"string", L"replace", L"a", L"b", L"a", 0}, 0, L"b\n" },
+ { {L"string", L"replace", L"a", L"b", L"xax", 0}, 0, L"xbx\n" },
+ { {L"string", L"replace", L"a", L"b", L"xax", L"axa", 0}, 0, L"xbx\nbxa\n" },
+ { {L"string", L"replace", L"bar", L"x", L"red barn", 0}, 0, L"red xn\n" },
+ { {L"string", L"replace", L"x", L"bar", L"red xn", 0}, 0, L"red barn\n" },
+ { {L"string", L"replace", L"--", L"x", L"-", L"xyz", 0}, 0, L"-yz\n" },
+ { {L"string", L"replace", L"--", L"y", L"-", L"xyz", 0}, 0, L"x-z\n" },
+ { {L"string", L"replace", L"--", L"z", L"-", L"xyz", 0}, 0, L"xy-\n" },
+ { {L"string", L"replace", L"-i", L"z", L"X", L"_Z_", 0}, 0, L"_X_\n" },
+ { {L"string", L"replace", L"-a", L"a", L"A", L"aaa", 0}, 0, L"AAA\n" },
+ { {L"string", L"replace", L"-i", L"a", L"z", L"AAA", 0}, 0, L"zAA\n" },
+ { {L"string", L"replace", L"-q", L"x", L">x<", L"x", 0}, 0, L"" },
+ { {L"string", L"replace", L"-a", L"x", L"", L"xxx", 0}, 0, L"\n" },
+ { {L"string", L"replace", L"-a", L"***", L"_", L"*****", 0}, 0, L"_**\n" },
+ { {L"string", L"replace", L"-a", L"***", L"***", L"******", 0}, 0, L"******\n" },
+ { {L"string", L"replace", L"-a", L"a", L"b", L"xax", L"axa", 0}, 0, L"xbx\nbxb\n" },
+
+ { {L"string", L"replace", L"-r", 0}, 2, L"" },
+ { {L"string", L"replace", L"-r", L"", 0}, 2, L"" },
+ { {L"string", L"replace", L"-r", L"", L"", 0}, 1, L"" },
+ { {L"string", L"replace", L"-r", L"", L"", L"", 0}, 0, L"\n" }, // pcre2 behavior
+ { {L"string", L"replace", L"-r", L"", L"", L" ", 0}, 0, L" \n" }, // pcre2 behavior
+ { {L"string", L"replace", L"-r", L"a", L"b", L"", 0}, 1, L"\n" },
+ { {L"string", L"replace", L"-r", L"a", L"b", L"a", 0}, 0, L"b\n" },
+ { {L"string", L"replace", L"-r", L".", L"x", L"abc", 0}, 0, L"xbc\n" },
+ { {L"string", L"replace", L"-r", L".", L"", L"abc", 0}, 0, L"bc\n" },
+ { {L"string", L"replace", L"-r", L"(\\w)(\\w)", L"$2$1", L"ab", 0}, 0, L"ba\n" },
+ { {L"string", L"replace", L"-r", L"(\\w)", L"$1$1", L"ab", 0}, 0, L"aab\n" },
+ { {L"string", L"replace", L"-r", L"-a", L".", L"x", L"abc", 0}, 0, L"xxx\n" },
+ { {L"string", L"replace", L"-r", L"-a", L"(\\w)", L"$1$1", L"ab", 0}, 0, L"aabb\n" },
+ { {L"string", L"replace", L"-r", L"-a", L".", L"", L"abc", 0}, 0, L"\n" },
+ { {L"string", L"replace", L"-r", L"a", L"x", L"bc", L"cd", L"de", 0}, 1, L"bc\ncd\nde\n" },
+ { {L"string", L"replace", L"-r", L"a", L"x", L"aba", L"caa", 0}, 0, L"xba\ncxa\n" },
+ { {L"string", L"replace", L"-r", L"-a", L"a", L"x", L"aba", L"caa", 0}, 0, L"xbx\ncxx\n" },
+ { {L"string", L"replace", L"-r", L"-i", L"A", L"b", L"xax", 0}, 0, L"xbx\n" },
+ { {L"string", L"replace", L"-r", L"-i", L"[a-z]", L".", L"1A2B", 0}, 0, L"1.2B\n" },
+ { {L"string", L"replace", L"-r", L"A", L"b", L"xax", 0}, 1, L"xax\n" },
+ { {L"string", L"replace", L"-r", L"a", L"$1", L"a", 0}, 2, L"" },
+ { {L"string", L"replace", L"-r", L"(a)", L"$2", L"a", 0}, 2, L"" },
+ { {L"string", L"replace", L"-r", L"*", L".", L"a", 0}, 2, L"" },
+ { {L"string", L"replace", L"-r", L"^(.)", L"\t$1", L"abc", L"x", 0}, 0, L"\tabc\n\tx\n" },
+
+ { {L"string", L"split", 0}, 2, L"" },
+ { {L"string", L"split", L":", 0}, 1, L"" },
+ { {L"string", L"split", L".", L"www.ch.ic.ac.uk", 0}, 0, L"www\nch\nic\nac\nuk\n" },
+ { {L"string", L"split", L"..", L"....", 0}, 0, L"\n\n\n" },
+ { {L"string", L"split", L"-m", L"x", L"..", L"....", 0}, 2, L"" },
+ { {L"string", L"split", L"-m1", L"..", L"....", 0}, 0, L"\n..\n" },
+ { {L"string", L"split", L"-m0", L"/", L"/usr/local/bin/fish", 0}, 1, L"/usr/local/bin/fish\n" },
+ { {L"string", L"split", L"-m2", L":", L"a:b:c:d", L"e:f:g:h", 0}, 0, L"a\nb\nc:d\ne\nf\ng:h\n" },
+ { {L"string", L"split", L"-m1", L"-r", L"/", L"/usr/local/bin/fish", 0}, 0, L"/usr/local/bin\nfish\n" },
+ { {L"string", L"split", L"-r", L".", L"www.ch.ic.ac.uk", 0}, 0, L"www\nch\nic\nac\nuk\n" },
+ { {L"string", L"split", L"--", L"--", L"a--b---c----d", 0}, 0, L"a\nb\n-c\n\nd\n" },
+ { {L"string", L"split", L"-r", L"..", L"....", 0}, 0, L"\n\n\n" },
+ { {L"string", L"split", L"-r", L"--", L"--", L"a--b---c----d", 0}, 0, L"a\nb-\nc\n\nd\n" },
+ { {L"string", L"split", L"", L"", 0}, 1, L"\n" },
+ { {L"string", L"split", L"", L"a", 0}, 1, L"a\n" },
+ { {L"string", L"split", L"", L"ab", 0}, 0, L"a\nb\n" },
+ { {L"string", L"split", L"", L"abc", 0}, 0, L"a\nb\nc\n" },
+ { {L"string", L"split", L"-m1", L"", L"abc", 0}, 0, L"a\nbc\n" },
+ { {L"string", L"split", L"-r", L"", L"", 0}, 1, L"\n" },
+ { {L"string", L"split", L"-r", L"", L"a", 0}, 1, L"a\n" },
+ { {L"string", L"split", L"-r", L"", L"ab", 0}, 0, L"a\nb\n" },
+ { {L"string", L"split", L"-r", L"", L"abc", 0}, 0, L"a\nb\nc\n" },
+ { {L"string", L"split", L"-r", L"-m1", L"", L"abc", 0}, 0, L"ab\nc\n" },
+ { {L"string", L"split", L"-q", 0}, 2, L"" },
+ { {L"string", L"split", L"-q", L":", 0}, 1, L"" },
+ { {L"string", L"split", L"-q", L"x", L"axbxc", 0}, 0, L"" },
+
+ { {L"string", L"sub", 0}, 1, L"" },
+ { {L"string", L"sub", L"abcde", 0}, 0, L"abcde\n"},
+ { {L"string", L"sub", L"-l", L"x", L"abcde", 0}, 2, L""},
+ { {L"string", L"sub", L"-s", L"x", L"abcde", 0}, 2, L""},
+ { {L"string", L"sub", L"-l0", L"abcde", 0}, 0, L"\n"},
+ { {L"string", L"sub", L"-l2", L"abcde", 0}, 0, L"ab\n"},
+ { {L"string", L"sub", L"-l5", L"abcde", 0}, 0, L"abcde\n"},
+ { {L"string", L"sub", L"-l6", L"abcde", 0}, 0, L"abcde\n"},
+ { {L"string", L"sub", L"-l-1", L"abcde", 0}, 2, L""},
+ { {L"string", L"sub", L"-s0", L"abcde", 0}, 2, L""},
+ { {L"string", L"sub", L"-s1", L"abcde", 0}, 0, L"abcde\n"},
+ { {L"string", L"sub", L"-s5", L"abcde", 0}, 0, L"e\n"},
+ { {L"string", L"sub", L"-s6", L"abcde", 0}, 0, L"\n"},
+ { {L"string", L"sub", L"-s-1", L"abcde", 0}, 0, L"e\n"},
+ { {L"string", L"sub", L"-s-5", L"abcde", 0}, 0, L"abcde\n"},
+ { {L"string", L"sub", L"-s-6", L"abcde", 0}, 0, L"abcde\n"},
+ { {L"string", L"sub", L"-s1", L"-l0", L"abcde", 0}, 0, L"\n"},
+ { {L"string", L"sub", L"-s1", L"-l1", L"abcde", 0}, 0, L"a\n"},
+ { {L"string", L"sub", L"-s2", L"-l2", L"abcde", 0}, 0, L"bc\n"},
+ { {L"string", L"sub", L"-s-1", L"-l1", L"abcde", 0}, 0, L"e\n"},
+ { {L"string", L"sub", L"-s-1", L"-l2", L"abcde", 0}, 0, L"e\n"},
+ { {L"string", L"sub", L"-s-3", L"-l2", L"abcde", 0}, 0, L"cd\n"},
+ { {L"string", L"sub", L"-s-3", L"-l4", L"abcde", 0}, 0, L"cde\n"},
+ { {L"string", L"sub", L"-q", 0}, 1, L"" },
+ { {L"string", L"sub", L"-q", L"abcde", 0}, 0, L""},
+
+ { {L"string", L"trim", 0}, 1, L""},
+ { {L"string", L"trim", L""}, 1, L"\n"},
+ { {L"string", L"trim", L" "}, 0, L"\n"},
+ { {L"string", L"trim", L" \f\n\r\t"}, 0, L"\n"},
+ { {L"string", L"trim", L" a"}, 0, L"a\n"},
+ { {L"string", L"trim", L"a "}, 0, L"a\n"},
+ { {L"string", L"trim", L" a "}, 0, L"a\n"},
+ { {L"string", L"trim", L"-l", L" a"}, 0, L"a\n"},
+ { {L"string", L"trim", L"-l", L"a "}, 1, L"a \n"},
+ { {L"string", L"trim", L"-l", L" a "}, 0, L"a \n"},
+ { {L"string", L"trim", L"-r", L" a"}, 1, L" a\n"},
+ { {L"string", L"trim", L"-r", L"a "}, 0, L"a\n"},
+ { {L"string", L"trim", L"-r", L" a "}, 0, L" a\n"},
+ { {L"string", L"trim", L"-c", L".", L" a"}, 1, L" a\n"},
+ { {L"string", L"trim", L"-c", L".", L"a "}, 1, L"a \n"},
+ { {L"string", L"trim", L"-c", L".", L" a "}, 1, L" a \n"},
+ { {L"string", L"trim", L"-c", L".", L".a"}, 0, L"a\n"},
+ { {L"string", L"trim", L"-c", L".", L"a."}, 0, L"a\n"},
+ { {L"string", L"trim", L"-c", L".", L".a."}, 0, L"a\n"},
+ { {L"string", L"trim", L"-c", L"\\/", L"/a\\"}, 0, L"a\n"},
+ { {L"string", L"trim", L"-c", L"\\/", L"a/"}, 0, L"a\n"},
+ { {L"string", L"trim", L"-c", L"\\/", L"\\a/"}, 0, L"a\n"},
+ { {L"string", L"trim", L"-c", L"", L".a."}, 1, L".a.\n"},
+
+ { {0}, 0, 0 }
+ };
+
+ struct string_test *t = string_tests;
+ while (t->argv[0] != 0)
+ {
+ run_one_string_test(t->argv, t->expected_rc, t->expected_out);
+ t++;
+ }
+}
+
/**
Main test
*/
@@ -4108,6 +4393,7 @@ int main(int argc, char **argv)
if (should_test_function("history_races")) history_tests_t::test_history_races();
if (should_test_function("history_formats")) history_tests_t::test_history_formats();
//history_tests_t::test_history_speed();
+ if (should_test_function("string")) test_string();
say(L"Encountered %d errors in low-level tests", err_count);
if (s_test_run_count == 0)