aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/builtin_string.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/builtin_string.cpp')
-rw-r--r--src/builtin_string.cpp1375
1 files changed, 1375 insertions, 0 deletions
diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp
new file mode 100644
index 00000000..6a53e548
--- /dev/null
+++ b/src/builtin_string.cpp
@@ -0,0 +1,1375 @@
+/** \file builtin_string.cpp
+ Implementation of the string builtin.
+*/
+
+#define PCRE2_CODE_UNIT_WIDTH WCHAR_T_BITS
+#ifdef _WIN32
+#define PCRE2_STATIC
+#endif
+#include "pcre2.h"
+
+#include "wildcard.h"
+
+#define MAX_REPLACE_SIZE size_t(1048576) // pcre2_substitute maximum output size in wchar_t
+#define STRING_ERR_MISSING _(L"%ls: Expected argument\n")
+
+enum
+{
+ BUILTIN_STRING_OK = 0,
+ BUILTIN_STRING_NONE = 1,
+ BUILTIN_STRING_ERROR = 2
+};
+
+static void string_error(const wchar_t *fmt, ...)
+{
+ va_list va;
+ va_start(va, fmt);
+ wcstring errstr = vformat_string(fmt, va);
+ va_end(va);
+
+ stderr_buffer += L"string ";
+ stderr_buffer += errstr;
+}
+
+static void string_unknown_option(parser_t &parser, const wchar_t *subcmd, const wchar_t *opt)
+{
+ string_error(BUILTIN_ERR_UNKNOWN, subcmd, opt);
+ builtin_print_help(parser, L"string", stderr_buffer);
+}
+
+static bool string_args_from_stdin()
+{
+ return builtin_stdin != STDIN_FILENO || !isatty(builtin_stdin);
+}
+
+static const wchar_t *string_get_arg_stdin()
+{
+ static wcstring warg;
+
+ std::string arg;
+ for (;;)
+ {
+ char ch = '\0';
+ int rc = read_blocked(builtin_stdin, &ch, 1);
+
+ if (rc < 0)
+ {
+ // failure
+ return 0;
+ }
+
+ if (rc == 0)
+ {
+ // eof
+ if (arg.empty())
+ {
+ return 0;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ if (ch == '\n')
+ {
+ break;
+ }
+
+ arg += ch;
+ }
+
+ warg = str2wcstring(arg.c_str(), arg.size());
+ return warg.c_str();
+}
+
+static const wchar_t *string_get_arg_argv(int *argidx, wchar_t **argv)
+{
+ return (argv && argv[*argidx]) ? argv[(*argidx)++] : 0;
+}
+
+static const wchar_t *string_get_arg(int *argidx, wchar_t **argv)
+{
+ if (string_args_from_stdin())
+ {
+ return string_get_arg_stdin();
+ }
+ else
+ {
+ return string_get_arg_argv(argidx, argv);
+ }
+}
+
+static int string_escape(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L"n";
+ const struct woption long_options[] =
+ {
+ { L"no-quoted", no_argument, 0, 'n' },
+ { 0, 0, 0, 0 }
+ };
+
+ escape_flags_t flags = ESCAPE_ALL;
+ wgetopter_t w;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'n':
+ flags |= ESCAPE_NO_QUOTED;
+ break;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ int nesc = 0;
+ const wchar_t *arg;
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ stdout_buffer += escape(arg, flags);
+ stdout_buffer += L'\n';
+ nesc++;
+ }
+
+ return (nesc > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+}
+
+static int string_join(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L"q";
+ const struct woption long_options[] =
+ {
+ { L"quiet", no_argument, 0, 'q'},
+ { 0, 0, 0, 0 }
+ };
+
+ bool quiet = false;
+ wgetopter_t w;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'q':
+ quiet = true;
+ break;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ const wchar_t *sep;
+ if ((sep = string_get_arg_argv(&i, argv)) == 0)
+ {
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ int nargs = 0;
+ const wchar_t *arg;
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ if (!quiet)
+ {
+ stdout_buffer += arg;
+ stdout_buffer += sep;
+ }
+ nargs++;
+ }
+ if (nargs > 0 && !quiet)
+ {
+ stdout_buffer.resize(stdout_buffer.length() - wcslen(sep));
+ stdout_buffer += L'\n';
+ }
+
+ return (nargs > 1) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+}
+
+static int string_length(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L"q";
+ const struct woption long_options[] =
+ {
+ { L"quiet", no_argument, 0, 'q'},
+ { 0, 0, 0, 0 }
+ };
+
+ bool quiet = false;
+ wgetopter_t w;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'q':
+ quiet = true;
+ break;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ const wchar_t *arg;
+ int nnonempty = 0;
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ size_t n = wcslen(arg);
+ if (n > 0)
+ {
+ nnonempty++;
+ }
+ if (!quiet)
+ {
+ stdout_buffer += to_string(int(n));
+ stdout_buffer += L'\n';
+ }
+ }
+
+ return (nnonempty > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+}
+
+struct match_options_t
+{
+ bool all;
+ bool ignore_case;
+ bool index;
+ bool quiet;
+
+ match_options_t(): all(false), ignore_case(false), index(false), quiet(false) { }
+};
+
+class string_matcher_t
+{
+protected:
+ match_options_t opts;
+ int total_matched;
+
+public:
+ string_matcher_t(const match_options_t &opts_)
+ : opts(opts_), total_matched(0)
+ { }
+
+ virtual ~string_matcher_t() { }
+ virtual bool report_matches(const wchar_t *arg) = 0;
+ int match_count() { return total_matched; }
+};
+
+class wildcard_matcher_t: public string_matcher_t
+{
+ wcstring wcpattern;
+
+public:
+ wildcard_matcher_t(const wchar_t * /*argv0*/, const wchar_t *pattern, const match_options_t &opts)
+ : string_matcher_t(opts)
+ {
+ wcpattern = parse_util_unescape_wildcards(pattern);
+
+ if (opts.ignore_case)
+ {
+ for (int i = 0; i < wcpattern.length(); i++)
+ {
+ wcpattern[i] = towlower(wcpattern[i]);
+ }
+ }
+ }
+
+ virtual ~wildcard_matcher_t() { }
+
+ bool report_matches(const wchar_t *arg)
+ {
+ // Note: --all is a no-op for glob matching since the pattern is always
+ // matched against the entire argument
+ bool match;
+ if (opts.ignore_case)
+ {
+ wcstring s = arg;
+ for (int i = 0; i < s.length(); i++)
+ {
+ s[i] = towlower(s[i]);
+ }
+ match = wildcard_match(s, wcpattern, false);
+ }
+ else
+ {
+ match = wildcard_match(arg, wcpattern, false);
+ }
+ if (match)
+ {
+ total_matched++;
+ }
+ if (!opts.quiet)
+ {
+ if (match)
+ {
+ if (opts.index)
+ {
+ stdout_buffer += L"1 ";
+ stdout_buffer += to_string(wcslen(arg));
+ stdout_buffer += L'\n';
+ }
+ else
+ {
+ stdout_buffer += arg;
+ stdout_buffer += L'\n';
+ }
+ }
+ }
+ return true;
+ }
+};
+
+static const wchar_t *pcre2_strerror(int err_code)
+{
+ static wchar_t buf[128];
+ pcre2_get_error_message(err_code, (PCRE2_UCHAR *)buf, sizeof(buf) / sizeof(wchar_t));
+ return buf;
+}
+
+struct compiled_regex_t
+{
+ pcre2_code *code;
+ pcre2_match_data *match;
+
+ compiled_regex_t(const wchar_t *argv0, const wchar_t *pattern, bool ignore_case)
+ : code(0), match(0)
+ {
+ // Disable some sequences that can lead to security problems
+ uint32_t options = PCRE2_NEVER_UTF;
+#if PCRE2_CODE_UNIT_WIDTH < 32
+ options |= PCRE2_NEVER_BACKSLASH_C;
+#endif
+
+ int err_code = 0;
+ PCRE2_SIZE err_offset = 0;
+
+ code = pcre2_compile(
+ PCRE2_SPTR(pattern),
+ PCRE2_ZERO_TERMINATED,
+ options | (ignore_case ? PCRE2_CASELESS : 0),
+ &err_code,
+ &err_offset,
+ 0);
+ if (code == 0)
+ {
+ string_error(_(L"%ls: Regular expression compile error: %ls\n"),
+ argv0, pcre2_strerror(err_code));
+ string_error(L"%ls: %ls\n", argv0, pattern);
+ string_error(L"%ls: %*ls\n", argv0, err_offset, L"^");
+ return;
+ }
+
+ match = pcre2_match_data_create_from_pattern(code, 0);
+ if (match == 0)
+ {
+ DIE_MEM();
+ }
+ }
+
+ ~compiled_regex_t()
+ {
+ if (match != 0)
+ {
+ pcre2_match_data_free(match);
+ }
+ if (code != 0)
+ {
+ pcre2_code_free(code);
+ }
+ }
+};
+
+class pcre2_matcher_t: public string_matcher_t
+{
+ const wchar_t *argv0;
+ compiled_regex_t regex;
+
+ int report_match(const wchar_t *arg, int pcre2_rc)
+ {
+ // Return values: -1 = error, 0 = no match, 1 = match
+ if (pcre2_rc == PCRE2_ERROR_NOMATCH)
+ {
+ return 0;
+ }
+ if (pcre2_rc < 0)
+ {
+ string_error(_(L"%ls: Regular expression match error: %ls\n"),
+ argv0, pcre2_strerror(pcre2_rc));
+ return -1;
+ }
+ if (pcre2_rc == 0)
+ {
+ // The output vector wasn't big enough. Should not happen.
+ string_error(_(L"%ls: Regular expression internal error\n"), argv0);
+ return -1;
+ }
+ PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match);
+ for (int j = 0; j < pcre2_rc; j++)
+ {
+ PCRE2_SIZE begin = ovector[2*j];
+ PCRE2_SIZE end = ovector[2*j + 1];
+ if (!opts.quiet)
+ {
+ if (begin != PCRE2_UNSET && end != PCRE2_UNSET)
+ {
+ if (opts.index)
+ {
+ stdout_buffer += to_string(begin + 1);
+ stdout_buffer += ' ';
+ stdout_buffer += to_string(end - begin);
+ }
+ else if (end > begin) // may have end < begin if \K is used
+ {
+ stdout_buffer += wcstring(&arg[begin], end - begin);
+ }
+ stdout_buffer += L'\n';
+ }
+ }
+ }
+ return 1;
+ }
+
+public:
+ pcre2_matcher_t(const wchar_t *argv0_, const wchar_t *pattern, const match_options_t &opts)
+ : string_matcher_t(opts),
+ argv0(argv0_),
+ regex(argv0_, pattern, opts.ignore_case)
+ { }
+
+ virtual ~pcre2_matcher_t() { }
+
+ bool report_matches(const wchar_t *arg)
+ {
+ // A return value of true means all is well (even if no matches were
+ // found), false indicates an unrecoverable error.
+ if (regex.code == 0)
+ {
+ // pcre2_compile() failed
+ return false;
+ }
+
+ int matched = 0;
+
+ // See pcre2demo.c for an explanation of this logic
+ PCRE2_SIZE arglen = wcslen(arg);
+ int rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg), arglen, 0, 0, regex.match, 0));
+ if (rc < 0)
+ {
+ // pcre2 match error
+ return false;
+ }
+ if (rc == 0)
+ {
+ // no match
+ return true;
+ }
+ matched++;
+ total_matched++;
+
+ // Report any additional matches
+ PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match);
+ while (opts.all || matched == 0)
+ {
+ uint32_t options = 0;
+ PCRE2_SIZE offset = ovector[1]; // Start at end of previous match
+
+ if (ovector[0] == ovector[1])
+ {
+ if (ovector[0] == arglen)
+ {
+ break;
+ }
+ options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+ }
+
+ rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg), arglen, offset, options, regex.match, 0));
+ if (rc < 0)
+ {
+ return false;
+ }
+ if (rc == 0)
+ {
+ if (options == 0)
+ {
+ // All matches found
+ break;
+ }
+ ovector[1] = offset + 1;
+ continue;
+ }
+ matched++;
+ total_matched++;
+ }
+ return true;
+ }
+};
+
+static int string_match(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L"ainqr";
+ const struct woption long_options[] =
+ {
+ { L"all", no_argument, 0, 'a'},
+ { L"ignore-case", no_argument, 0, 'i'},
+ { L"index", no_argument, 0, 'n'},
+ { L"quiet", no_argument, 0, 'q'},
+ { L"regex", no_argument, 0, 'r'},
+ { 0, 0, 0, 0 }
+ };
+
+ match_options_t opts;
+ bool regex = false;
+ wgetopter_t w;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'a':
+ opts.all = true;
+ break;
+
+ case 'i':
+ opts.ignore_case = true;
+ break;
+
+ case 'n':
+ opts.index = true;
+ break;
+
+ case 'q':
+ opts.quiet = true;
+ break;
+
+ case 'r':
+ regex = true;
+ break;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ const wchar_t *pattern;
+ if ((pattern = string_get_arg_argv(&i, argv)) == 0)
+ {
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ string_matcher_t *matcher;
+ if (regex)
+ {
+ matcher = new pcre2_matcher_t(argv[0], pattern, opts);
+ }
+ else
+ {
+ matcher = new wildcard_matcher_t(argv[0], pattern, opts);
+ }
+
+ const wchar_t *arg;
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ if (!matcher->report_matches(arg))
+ {
+ delete matcher;
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int rc = matcher->match_count() > 0 ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+ delete matcher;
+ return rc;
+}
+
+struct replace_options_t
+{
+ bool all;
+ bool ignore_case;
+ bool quiet;
+
+ replace_options_t(): all(false), ignore_case(false), quiet(false) { }
+};
+
+class string_replacer_t
+{
+protected:
+ const wchar_t *argv0;
+ replace_options_t opts;
+ int total_replaced;
+
+public:
+ string_replacer_t(const wchar_t *argv0_, const replace_options_t &opts_)
+ : argv0(argv0_), opts(opts_), total_replaced(0)
+ { }
+
+ virtual ~string_replacer_t() {}
+ virtual bool replace_matches(const wchar_t *arg) = 0;
+ int replace_count() { return total_replaced; }
+};
+
+class literal_replacer_t: public string_replacer_t
+{
+ const wchar_t *pattern;
+ const wchar_t *replacement;
+ int patlen;
+
+public:
+ literal_replacer_t(const wchar_t *argv0, const wchar_t *pattern_, const wchar_t *replacement_,
+ const replace_options_t &opts)
+ : string_replacer_t(argv0, opts),
+ pattern(pattern_), replacement(replacement_), patlen(wcslen(pattern))
+ { }
+
+ virtual ~literal_replacer_t() { }
+
+ bool replace_matches(const wchar_t *arg)
+ {
+ wcstring result;
+ if (patlen == 0)
+ {
+ result = arg;
+ }
+ else
+ {
+ int replaced = 0;
+ const wchar_t *cur = arg;
+ while (*cur != L'\0')
+ {
+ if ((opts.all || replaced == 0) &&
+ (opts.ignore_case ? wcsncasecmp(cur, pattern, patlen) : wcsncmp(cur, pattern, patlen)) == 0)
+ {
+ result += replacement;
+ cur += patlen;
+ replaced++;
+ total_replaced++;
+ }
+ else
+ {
+ result += *cur;
+ cur++;
+ }
+ }
+ }
+ if (!opts.quiet)
+ {
+ stdout_buffer += result;
+ stdout_buffer += L'\n';
+ }
+ return true;
+ }
+};
+
+class regex_replacer_t: public string_replacer_t
+{
+ compiled_regex_t regex;
+ wcstring replacement;
+
+ wcstring interpret_escapes(const wchar_t *orig)
+ {
+ wcstring result;
+
+ while (*orig != L'\0')
+ {
+ if (*orig == L'\\')
+ {
+ orig += read_unquoted_escape(orig, &result, true, false);
+ }
+ else
+ {
+ result += *orig;
+ orig++;
+ }
+ }
+
+ return result;
+ }
+
+public:
+ regex_replacer_t(const wchar_t *argv0, const wchar_t *pattern, const wchar_t *replacement_,
+ const replace_options_t &opts)
+ : string_replacer_t(argv0, opts),
+ regex(argv0, pattern, opts.ignore_case),
+ replacement(interpret_escapes(replacement_))
+ { }
+
+ virtual ~regex_replacer_t() { }
+
+ bool replace_matches(const wchar_t *arg)
+ {
+ // A return value of true means all is well (even if no replacements
+ // were performed), false indicates an unrecoverable error.
+ if (regex.code == 0)
+ {
+ // pcre2_compile() failed
+ return false;
+ }
+
+ uint32_t options = opts.all ? PCRE2_SUBSTITUTE_GLOBAL : 0;
+ int arglen = wcslen(arg);
+ PCRE2_SIZE outlen = (arglen == 0) ? 16 : 2 * arglen;
+ wchar_t *output = (wchar_t *)malloc(sizeof(wchar_t) * outlen);
+ if (output == 0)
+ {
+ DIE_MEM();
+ }
+ int pcre2_rc = 0;
+ for (;;)
+ {
+ pcre2_rc = pcre2_substitute(
+ regex.code,
+ PCRE2_SPTR(arg),
+ arglen,
+ 0, // start offset
+ options,
+ regex.match,
+ 0, // match context
+ PCRE2_SPTR(replacement.c_str()),
+ PCRE2_ZERO_TERMINATED,
+ (PCRE2_UCHAR *)output,
+ &outlen);
+
+ if (pcre2_rc == PCRE2_ERROR_NOMEMORY)
+ {
+ if (outlen < MAX_REPLACE_SIZE)
+ {
+ outlen = std::min(2 * outlen, MAX_REPLACE_SIZE);
+ output = (wchar_t *)realloc(output, sizeof(wchar_t) * outlen);
+ if (output == 0)
+ {
+ DIE_MEM();
+ }
+ continue;
+ }
+ string_error(_(L"%ls: Replacement string too large\n"), argv0);
+ free(output);
+ return false;
+ }
+ break;
+ }
+
+ bool rc = true;
+ if (pcre2_rc < 0)
+ {
+ string_error(_(L"%ls: Regular expression substitute error: %ls\n"),
+ argv0, pcre2_strerror(pcre2_rc));
+ rc = false;
+ }
+ else
+ {
+ if (!opts.quiet)
+ {
+ stdout_buffer += output;
+ stdout_buffer += L'\n';
+ }
+ total_replaced += pcre2_rc;
+ }
+
+ free(output);
+ return rc;
+ }
+};
+
+static int string_replace(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L"aiqr";
+ const struct woption long_options[] =
+ {
+ { L"all", no_argument, 0, 'a'},
+ { L"ignore-case", no_argument, 0, 'i'},
+ { L"quiet", no_argument, 0, 'q'},
+ { L"regex", no_argument, 0, 'r'},
+ { 0, 0, 0, 0 }
+ };
+
+ replace_options_t opts;
+ bool regex = false;
+ wgetopter_t w;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'a':
+ opts.all = true;
+ break;
+
+ case 'i':
+ opts.ignore_case = true;
+ break;
+
+ case 'q':
+ opts.quiet = true;
+ break;
+
+ case 'r':
+ regex = true;
+ break;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ const wchar_t *pattern, *replacement;
+ if ((pattern = string_get_arg_argv(&i, argv)) == 0)
+ {
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+ if ((replacement = string_get_arg_argv(&i, argv)) == 0)
+ {
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ string_replacer_t *replacer;
+ if (regex)
+ {
+ replacer = new regex_replacer_t(argv[0], pattern, replacement, opts);
+ }
+ else
+ {
+ replacer = new literal_replacer_t(argv[0], pattern, replacement, opts);
+ }
+
+ const wchar_t *arg;
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ if (!replacer->replace_matches(arg))
+ {
+ delete replacer;
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int rc = replacer->replace_count() > 0 ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+ delete replacer;
+ return rc;
+}
+
+static int string_split(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L":m:qr";
+ const struct woption long_options[] =
+ {
+ { L"max", required_argument, 0, 'm'},
+ { L"quiet", no_argument, 0, 'q'},
+ { L"right", no_argument, 0, 'r'},
+ { 0, 0, 0, 0 }
+ };
+
+ long max = LONG_MAX;
+ bool quiet = false;
+ bool right = false;
+ wgetopter_t w;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'm':
+ {
+ errno = 0;
+ wchar_t *endptr = 0;
+ max = wcstol(w.woptarg, &endptr, 10);
+ if (*endptr != L'\0' || errno != 0)
+ {
+ string_error(BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
+ return BUILTIN_STRING_ERROR;
+ }
+ break;
+ }
+
+ case 'q':
+ quiet = true;
+ break;
+
+ case 'r':
+ right = true;
+ break;
+
+ case ':':
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ const wchar_t *sep;
+ if ((sep = string_get_arg_argv(&i, argv)) == 0)
+ {
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ std::list<wcstring> splits;
+ int seplen = wcslen(sep);
+ int nsplit = 0;
+ const wchar_t *arg;
+ if (right)
+ {
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ int nargsplit = 0;
+ if (seplen == 0)
+ {
+ // Split to individual characters
+ const wchar_t *cur = arg + wcslen(arg) - 1;
+ while (cur > arg && nargsplit < max)
+ {
+ splits.push_front(wcstring(cur, 1));
+ cur--;
+ nargsplit++;
+ nsplit++;
+ }
+ splits.push_front(wcstring(arg, cur - arg + 1));
+ }
+ else
+ {
+ const wchar_t *end = arg + wcslen(arg);
+ const wchar_t *cur = end - seplen;
+ while (cur >= arg && nargsplit < max)
+ {
+ if (wcsncmp(cur, sep, seplen) == 0)
+ {
+ splits.push_front(wcstring(cur + seplen, end - cur - seplen));
+ end = cur;
+ cur -= seplen;
+ nargsplit++;
+ nsplit++;
+ }
+ else
+ {
+ cur--;
+ }
+ }
+ splits.push_front(wcstring(arg, end - arg));
+ }
+ }
+ }
+ else
+ {
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ const wchar_t *cur = arg;
+ int nargsplit = 0;
+ if (seplen == 0)
+ {
+ // Split to individual characters
+ const wchar_t *last = arg + wcslen(arg) - 1;
+ while (cur < last && nargsplit < max)
+ {
+ splits.push_back(wcstring(cur, 1));
+ cur++;
+ nargsplit++;
+ nsplit++;
+ }
+ splits.push_back(cur);
+ }
+ else
+ {
+ while (cur != 0)
+ {
+ const wchar_t *ptr = (nargsplit < max) ? wcsstr(cur, sep) : 0;
+ if (ptr == 0)
+ {
+ splits.push_back(cur);
+ cur = 0;
+ }
+ else
+ {
+ splits.push_back(wcstring(cur, ptr - cur));
+ cur = ptr + seplen;
+ nargsplit++;
+ nsplit++;
+ }
+ }
+ }
+ }
+ }
+
+ if (!quiet)
+ {
+ std::list<wcstring>::const_iterator si = splits.begin();
+ while (si != splits.end())
+ {
+ stdout_buffer += *si;
+ stdout_buffer += L'\n';
+ si++;
+ }
+ }
+
+ return (nsplit > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+}
+
+static int string_sub(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L":l:qs:";
+ const struct woption long_options[] =
+ {
+ { L"length", required_argument, 0, 'l'},
+ { L"quiet", no_argument, 0, 'q'},
+ { L"start", required_argument, 0, 's'},
+ { 0, 0, 0, 0 }
+ };
+
+ int start = 0;
+ int length = -1;
+ bool quiet = false;
+ wgetopter_t w;
+ wchar_t *endptr = 0;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'l':
+ errno = 0;
+ length = int(wcstol(w.woptarg, &endptr, 10));
+ if (*endptr != L'\0' || errno != 0)
+ {
+ string_error(BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
+ return BUILTIN_STRING_ERROR;
+ }
+ if (length < 0)
+ {
+ string_error(_(L"%ls: Invalid length value '%d'\n"), argv[0], length);
+ return BUILTIN_STRING_ERROR;
+ }
+ break;
+
+ case 'q':
+ quiet = true;
+ break;
+
+ case 's':
+ errno = 0;
+ start = int(wcstol(w.woptarg, &endptr, 10));
+ if (*endptr != L'\0' || errno != 0)
+ {
+ string_error(BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
+ return BUILTIN_STRING_ERROR;
+ }
+ if (start == 0)
+ {
+ string_error(_(L"%ls: Invalid start value '%d'\n"), argv[0], start);
+ return BUILTIN_STRING_ERROR;
+ }
+ break;
+
+ case ':':
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ int nsub = 0;
+ const wchar_t *arg;
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ wcstring::size_type pos = 0;
+ wcstring::size_type count = wcstring::npos;
+ wcstring s(arg);
+ if (start > 0)
+ {
+ pos = start - 1;
+ }
+ else if (start < 0)
+ {
+ wcstring::size_type n = -start;
+ pos = n > s.length() ? 0 : s.length() - n;
+ }
+ if (pos > s.length())
+ {
+ pos = s.length();
+ }
+
+ if (length >= 0)
+ {
+ count = length;
+ }
+ if (pos + count > s.length())
+ {
+ count = wcstring::npos;
+ }
+
+ if (!quiet)
+ {
+ stdout_buffer += s.substr(pos, count);
+ stdout_buffer += L'\n';
+ }
+ nsub++;
+ }
+
+ return (nsub > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+}
+
+static int string_trim(parser_t &parser, int argc, wchar_t **argv)
+{
+ const wchar_t *short_options = L":c:lqr";
+ const struct woption long_options[] =
+ {
+ { L"chars", required_argument, 0, 'c'},
+ { L"left", no_argument, 0, 'l'},
+ { L"quiet", no_argument, 0, 'q'},
+ { L"right", no_argument, 0, 'r'},
+ { 0, 0, 0, 0 }
+ };
+
+ int leftright = 0;
+ bool quiet = false;
+ wcstring chars = L" \f\n\r\t";
+ wgetopter_t w;
+ for (;;)
+ {
+ int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+ if (c == -1)
+ {
+ break;
+ }
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'c':
+ chars = w.woptarg;
+ break;
+
+ case 'l':
+ leftright |= 1;
+ break;
+
+ case 'q':
+ quiet = true;
+ break;
+
+ case 'r':
+ leftright |= 2;
+ break;
+
+ case ':':
+ string_error(STRING_ERR_MISSING, argv[0]);
+ return BUILTIN_STRING_ERROR;
+
+ case '?':
+ string_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+ return BUILTIN_STRING_ERROR;
+ }
+ }
+
+ int i = w.woptind;
+ if (string_args_from_stdin() && argc > i)
+ {
+ string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ const wchar_t *arg;
+ int ntrim = 0;
+ while ((arg = string_get_arg(&i, argv)) != 0)
+ {
+ const wchar_t *begin = arg;
+ const wchar_t *end = arg + wcslen(arg);
+ if (!leftright || (leftright & 1))
+ {
+ while (begin != end && chars.find_first_of(begin, 0, 1) != wcstring::npos)
+ {
+ begin++;
+ ntrim++;
+ }
+ }
+ if (!leftright || (leftright & 2))
+ {
+ while (begin != end && chars.find_first_of(end - 1, 0, 1) != wcstring::npos)
+ {
+ end--;
+ ntrim++;
+ }
+ }
+ if (!quiet)
+ {
+ stdout_buffer += wcstring(begin, end - begin);
+ stdout_buffer += L'\n';
+ }
+ }
+
+ return (ntrim > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE;
+}
+
+static const struct string_subcommand
+{
+ const wchar_t *name;
+ int (*handler)(parser_t &, int argc, wchar_t **argv);
+}
+string_subcommands[] =
+{
+ { L"escape", &string_escape },
+ { L"join", &string_join },
+ { L"length", &string_length },
+ { L"match", &string_match },
+ { L"replace", &string_replace },
+ { L"split", &string_split },
+ { L"sub", &string_sub },
+ { L"trim", &string_trim },
+ { 0, 0 }
+};
+
+/**
+ The string builtin, for manipulating strings.
+*/
+/*static*/ int builtin_string(parser_t &parser, wchar_t **argv)
+{
+ int argc = builtin_count_args(argv);
+ if (argc <= 1)
+ {
+ string_error(STRING_ERR_MISSING, argv[0]);
+ builtin_print_help(parser, L"string", stderr_buffer);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ if (wcscmp(argv[1], L"-h") == 0 || wcscmp(argv[1], L"--help") == 0)
+ {
+ builtin_print_help(parser, L"string", stderr_buffer);
+ return BUILTIN_STRING_OK;
+ }
+
+ const string_subcommand *subcmd = &string_subcommands[0];
+ while (subcmd->name != 0 && wcscmp(subcmd->name, argv[1]) != 0)
+ {
+ subcmd++;
+ }
+ if (subcmd->handler == 0)
+ {
+ string_error(_(L"%ls: Unknown subcommand '%ls'\n"), argv[0], argv[1]);
+ builtin_print_help(parser, L"string", stderr_buffer);
+ return BUILTIN_STRING_ERROR;
+ }
+
+ argc--;
+ argv++;
+ return subcmd->handler(parser, argc, argv);
+}