diff options
Diffstat (limited to 'src/builtin_string.cpp')
-rw-r--r-- | src/builtin_string.cpp | 1375 |
1 files changed, 1375 insertions, 0 deletions
diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp new file mode 100644 index 00000000..6a53e548 --- /dev/null +++ b/src/builtin_string.cpp @@ -0,0 +1,1375 @@ +/** \file builtin_string.cpp + Implementation of the string builtin. +*/ + +#define PCRE2_CODE_UNIT_WIDTH WCHAR_T_BITS +#ifdef _WIN32 +#define PCRE2_STATIC +#endif +#include "pcre2.h" + +#include "wildcard.h" + +#define MAX_REPLACE_SIZE size_t(1048576) // pcre2_substitute maximum output size in wchar_t +#define STRING_ERR_MISSING _(L"%ls: Expected argument\n") + +enum +{ + BUILTIN_STRING_OK = 0, + BUILTIN_STRING_NONE = 1, + BUILTIN_STRING_ERROR = 2 +}; + +static void string_error(const wchar_t *fmt, ...) +{ + va_list va; + va_start(va, fmt); + wcstring errstr = vformat_string(fmt, va); + va_end(va); + + stderr_buffer += L"string "; + stderr_buffer += errstr; +} + +static void string_unknown_option(parser_t &parser, const wchar_t *subcmd, const wchar_t *opt) +{ + string_error(BUILTIN_ERR_UNKNOWN, subcmd, opt); + builtin_print_help(parser, L"string", stderr_buffer); +} + +static bool string_args_from_stdin() +{ + return builtin_stdin != STDIN_FILENO || !isatty(builtin_stdin); +} + +static const wchar_t *string_get_arg_stdin() +{ + static wcstring warg; + + std::string arg; + for (;;) + { + char ch = '\0'; + int rc = read_blocked(builtin_stdin, &ch, 1); + + if (rc < 0) + { + // failure + return 0; + } + + if (rc == 0) + { + // eof + if (arg.empty()) + { + return 0; + } + else + { + break; + } + } + + if (ch == '\n') + { + break; + } + + arg += ch; + } + + warg = str2wcstring(arg.c_str(), arg.size()); + return warg.c_str(); +} + +static const wchar_t *string_get_arg_argv(int *argidx, wchar_t **argv) +{ + return (argv && argv[*argidx]) ? argv[(*argidx)++] : 0; +} + +static const wchar_t *string_get_arg(int *argidx, wchar_t **argv) +{ + if (string_args_from_stdin()) + { + return string_get_arg_stdin(); + } + else + { + return string_get_arg_argv(argidx, argv); + } +} + +static int string_escape(parser_t &parser, int argc, wchar_t **argv) +{ + const wchar_t *short_options = L"n"; + const struct woption long_options[] = + { + { L"no-quoted", no_argument, 0, 'n' }, + { 0, 0, 0, 0 } + }; + + escape_flags_t flags = ESCAPE_ALL; + wgetopter_t w; + for (;;) + { + int c = w.wgetopt_long(argc, argv, short_options, long_options, 0); + + if (c == -1) + { + break; + } + switch (c) + { + case 0: + break; + + case 'n': + flags |= ESCAPE_NO_QUOTED; + break; + + case '?': + string_unknown_option(parser, argv[0], argv[w.woptind - 1]); + return BUILTIN_STRING_ERROR; + } + } + + int i = w.woptind; + if (string_args_from_stdin() && argc > i) + { + string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); + return BUILTIN_STRING_ERROR; + } + + int nesc = 0; + const wchar_t *arg; + while ((arg = string_get_arg(&i, argv)) != 0) + { + stdout_buffer += escape(arg, flags); + stdout_buffer += L'\n'; + nesc++; + } + + return (nesc > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE; +} + +static int string_join(parser_t &parser, int argc, wchar_t **argv) +{ + const wchar_t *short_options = L"q"; + const struct woption long_options[] = + { + { L"quiet", no_argument, 0, 'q'}, + { 0, 0, 0, 0 } + }; + + bool quiet = false; + wgetopter_t w; + for (;;) + { + int c = w.wgetopt_long(argc, argv, short_options, long_options, 0); + + if (c == -1) + { + break; + } + switch (c) + { + case 0: + break; + + case 'q': + quiet = true; + break; + + case '?': + string_unknown_option(parser, argv[0], argv[w.woptind - 1]); + return BUILTIN_STRING_ERROR; + } + } + + int i = w.woptind; + const wchar_t *sep; + if ((sep = string_get_arg_argv(&i, argv)) == 0) + { + string_error(STRING_ERR_MISSING, argv[0]); + return BUILTIN_STRING_ERROR; + } + + if (string_args_from_stdin() && argc > i) + { + string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); + return BUILTIN_STRING_ERROR; + } + + int nargs = 0; + const wchar_t *arg; + while ((arg = string_get_arg(&i, argv)) != 0) + { + if (!quiet) + { + stdout_buffer += arg; + stdout_buffer += sep; + } + nargs++; + } + if (nargs > 0 && !quiet) + { + stdout_buffer.resize(stdout_buffer.length() - wcslen(sep)); + stdout_buffer += L'\n'; + } + + return (nargs > 1) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE; +} + +static int string_length(parser_t &parser, int argc, wchar_t **argv) +{ + const wchar_t *short_options = L"q"; + const struct woption long_options[] = + { + { L"quiet", no_argument, 0, 'q'}, + { 0, 0, 0, 0 } + }; + + bool quiet = false; + wgetopter_t w; + for (;;) + { + int c = w.wgetopt_long(argc, argv, short_options, long_options, 0); + + if (c == -1) + { + break; + } + switch (c) + { + case 0: + break; + + case 'q': + quiet = true; + break; + + case '?': + string_unknown_option(parser, argv[0], argv[w.woptind - 1]); + return BUILTIN_STRING_ERROR; + } + } + + int i = w.woptind; + if (string_args_from_stdin() && argc > i) + { + string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); + return BUILTIN_STRING_ERROR; + } + + const wchar_t *arg; + int nnonempty = 0; + while ((arg = string_get_arg(&i, argv)) != 0) + { + size_t n = wcslen(arg); + if (n > 0) + { + nnonempty++; + } + if (!quiet) + { + stdout_buffer += to_string(int(n)); + stdout_buffer += L'\n'; + } + } + + return (nnonempty > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE; +} + +struct match_options_t +{ + bool all; + bool ignore_case; + bool index; + bool quiet; + + match_options_t(): all(false), ignore_case(false), index(false), quiet(false) { } +}; + +class string_matcher_t +{ +protected: + match_options_t opts; + int total_matched; + +public: + string_matcher_t(const match_options_t &opts_) + : opts(opts_), total_matched(0) + { } + + virtual ~string_matcher_t() { } + virtual bool report_matches(const wchar_t *arg) = 0; + int match_count() { return total_matched; } +}; + +class wildcard_matcher_t: public string_matcher_t +{ + wcstring wcpattern; + +public: + wildcard_matcher_t(const wchar_t * /*argv0*/, const wchar_t *pattern, const match_options_t &opts) + : string_matcher_t(opts) + { + wcpattern = parse_util_unescape_wildcards(pattern); + + if (opts.ignore_case) + { + for (int i = 0; i < wcpattern.length(); i++) + { + wcpattern[i] = towlower(wcpattern[i]); + } + } + } + + virtual ~wildcard_matcher_t() { } + + bool report_matches(const wchar_t *arg) + { + // Note: --all is a no-op for glob matching since the pattern is always + // matched against the entire argument + bool match; + if (opts.ignore_case) + { + wcstring s = arg; + for (int i = 0; i < s.length(); i++) + { + s[i] = towlower(s[i]); + } + match = wildcard_match(s, wcpattern, false); + } + else + { + match = wildcard_match(arg, wcpattern, false); + } + if (match) + { + total_matched++; + } + if (!opts.quiet) + { + if (match) + { + if (opts.index) + { + stdout_buffer += L"1 "; + stdout_buffer += to_string(wcslen(arg)); + stdout_buffer += L'\n'; + } + else + { + stdout_buffer += arg; + stdout_buffer += L'\n'; + } + } + } + return true; + } +}; + +static const wchar_t *pcre2_strerror(int err_code) +{ + static wchar_t buf[128]; + pcre2_get_error_message(err_code, (PCRE2_UCHAR *)buf, sizeof(buf) / sizeof(wchar_t)); + return buf; +} + +struct compiled_regex_t +{ + pcre2_code *code; + pcre2_match_data *match; + + compiled_regex_t(const wchar_t *argv0, const wchar_t *pattern, bool ignore_case) + : code(0), match(0) + { + // Disable some sequences that can lead to security problems + uint32_t options = PCRE2_NEVER_UTF; +#if PCRE2_CODE_UNIT_WIDTH < 32 + options |= PCRE2_NEVER_BACKSLASH_C; +#endif + + int err_code = 0; + PCRE2_SIZE err_offset = 0; + + code = pcre2_compile( + PCRE2_SPTR(pattern), + PCRE2_ZERO_TERMINATED, + options | (ignore_case ? PCRE2_CASELESS : 0), + &err_code, + &err_offset, + 0); + if (code == 0) + { + string_error(_(L"%ls: Regular expression compile error: %ls\n"), + argv0, pcre2_strerror(err_code)); + string_error(L"%ls: %ls\n", argv0, pattern); + string_error(L"%ls: %*ls\n", argv0, err_offset, L"^"); + return; + } + + match = pcre2_match_data_create_from_pattern(code, 0); + if (match == 0) + { + DIE_MEM(); + } + } + + ~compiled_regex_t() + { + if (match != 0) + { + pcre2_match_data_free(match); + } + if (code != 0) + { + pcre2_code_free(code); + } + } +}; + +class pcre2_matcher_t: public string_matcher_t +{ + const wchar_t *argv0; + compiled_regex_t regex; + + int report_match(const wchar_t *arg, int pcre2_rc) + { + // Return values: -1 = error, 0 = no match, 1 = match + if (pcre2_rc == PCRE2_ERROR_NOMATCH) + { + return 0; + } + if (pcre2_rc < 0) + { + string_error(_(L"%ls: Regular expression match error: %ls\n"), + argv0, pcre2_strerror(pcre2_rc)); + return -1; + } + if (pcre2_rc == 0) + { + // The output vector wasn't big enough. Should not happen. + string_error(_(L"%ls: Regular expression internal error\n"), argv0); + return -1; + } + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match); + for (int j = 0; j < pcre2_rc; j++) + { + PCRE2_SIZE begin = ovector[2*j]; + PCRE2_SIZE end = ovector[2*j + 1]; + if (!opts.quiet) + { + if (begin != PCRE2_UNSET && end != PCRE2_UNSET) + { + if (opts.index) + { + stdout_buffer += to_string(begin + 1); + stdout_buffer += ' '; + stdout_buffer += to_string(end - begin); + } + else if (end > begin) // may have end < begin if \K is used + { + stdout_buffer += wcstring(&arg[begin], end - begin); + } + stdout_buffer += L'\n'; + } + } + } + return 1; + } + +public: + pcre2_matcher_t(const wchar_t *argv0_, const wchar_t *pattern, const match_options_t &opts) + : string_matcher_t(opts), + argv0(argv0_), + regex(argv0_, pattern, opts.ignore_case) + { } + + virtual ~pcre2_matcher_t() { } + + bool report_matches(const wchar_t *arg) + { + // A return value of true means all is well (even if no matches were + // found), false indicates an unrecoverable error. + if (regex.code == 0) + { + // pcre2_compile() failed + return false; + } + + int matched = 0; + + // See pcre2demo.c for an explanation of this logic + PCRE2_SIZE arglen = wcslen(arg); + int rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg), arglen, 0, 0, regex.match, 0)); + if (rc < 0) + { + // pcre2 match error + return false; + } + if (rc == 0) + { + // no match + return true; + } + matched++; + total_matched++; + + // Report any additional matches + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match); + while (opts.all || matched == 0) + { + uint32_t options = 0; + PCRE2_SIZE offset = ovector[1]; // Start at end of previous match + + if (ovector[0] == ovector[1]) + { + if (ovector[0] == arglen) + { + break; + } + options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + } + + rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg), arglen, offset, options, regex.match, 0)); + if (rc < 0) + { + return false; + } + if (rc == 0) + { + if (options == 0) + { + // All matches found + break; + } + ovector[1] = offset + 1; + continue; + } + matched++; + total_matched++; + } + return true; + } +}; + +static int string_match(parser_t &parser, int argc, wchar_t **argv) +{ + const wchar_t *short_options = L"ainqr"; + const struct woption long_options[] = + { + { L"all", no_argument, 0, 'a'}, + { L"ignore-case", no_argument, 0, 'i'}, + { L"index", no_argument, 0, 'n'}, + { L"quiet", no_argument, 0, 'q'}, + { L"regex", no_argument, 0, 'r'}, + { 0, 0, 0, 0 } + }; + + match_options_t opts; + bool regex = false; + wgetopter_t w; + for (;;) + { + int c = w.wgetopt_long(argc, argv, short_options, long_options, 0); + + if (c == -1) + { + break; + } + switch (c) + { + case 0: + break; + + case 'a': + opts.all = true; + break; + + case 'i': + opts.ignore_case = true; + break; + + case 'n': + opts.index = true; + break; + + case 'q': + opts.quiet = true; + break; + + case 'r': + regex = true; + break; + + case '?': + string_unknown_option(parser, argv[0], argv[w.woptind - 1]); + return BUILTIN_STRING_ERROR; + } + } + + int i = w.woptind; + const wchar_t *pattern; + if ((pattern = string_get_arg_argv(&i, argv)) == 0) + { + string_error(STRING_ERR_MISSING, argv[0]); + return BUILTIN_STRING_ERROR; + } + + if (string_args_from_stdin() && argc > i) + { + string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); + return BUILTIN_STRING_ERROR; + } + + string_matcher_t *matcher; + if (regex) + { + matcher = new pcre2_matcher_t(argv[0], pattern, opts); + } + else + { + matcher = new wildcard_matcher_t(argv[0], pattern, opts); + } + + const wchar_t *arg; + while ((arg = string_get_arg(&i, argv)) != 0) + { + if (!matcher->report_matches(arg)) + { + delete matcher; + return BUILTIN_STRING_ERROR; + } + } + + int rc = matcher->match_count() > 0 ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE; + delete matcher; + return rc; +} + +struct replace_options_t +{ + bool all; + bool ignore_case; + bool quiet; + + replace_options_t(): all(false), ignore_case(false), quiet(false) { } +}; + +class string_replacer_t +{ +protected: + const wchar_t *argv0; + replace_options_t opts; + int total_replaced; + +public: + string_replacer_t(const wchar_t *argv0_, const replace_options_t &opts_) + : argv0(argv0_), opts(opts_), total_replaced(0) + { } + + virtual ~string_replacer_t() {} + virtual bool replace_matches(const wchar_t *arg) = 0; + int replace_count() { return total_replaced; } +}; + +class literal_replacer_t: public string_replacer_t +{ + const wchar_t *pattern; + const wchar_t *replacement; + int patlen; + +public: + literal_replacer_t(const wchar_t *argv0, const wchar_t *pattern_, const wchar_t *replacement_, + const replace_options_t &opts) + : string_replacer_t(argv0, opts), + pattern(pattern_), replacement(replacement_), patlen(wcslen(pattern)) + { } + + virtual ~literal_replacer_t() { } + + bool replace_matches(const wchar_t *arg) + { + wcstring result; + if (patlen == 0) + { + result = arg; + } + else + { + int replaced = 0; + const wchar_t *cur = arg; + while (*cur != L'\0') + { + if ((opts.all || replaced == 0) && + (opts.ignore_case ? wcsncasecmp(cur, pattern, patlen) : wcsncmp(cur, pattern, patlen)) == 0) + { + result += replacement; + cur += patlen; + replaced++; + total_replaced++; + } + else + { + result += *cur; + cur++; + } + } + } + if (!opts.quiet) + { + stdout_buffer += result; + stdout_buffer += L'\n'; + } + return true; + } +}; + +class regex_replacer_t: public string_replacer_t +{ + compiled_regex_t regex; + wcstring replacement; + + wcstring interpret_escapes(const wchar_t *orig) + { + wcstring result; + + while (*orig != L'\0') + { + if (*orig == L'\\') + { + orig += read_unquoted_escape(orig, &result, true, false); + } + else + { + result += *orig; + orig++; + } + } + + return result; + } + +public: + regex_replacer_t(const wchar_t *argv0, const wchar_t *pattern, const wchar_t *replacement_, + const replace_options_t &opts) + : string_replacer_t(argv0, opts), + regex(argv0, pattern, opts.ignore_case), + replacement(interpret_escapes(replacement_)) + { } + + virtual ~regex_replacer_t() { } + + bool replace_matches(const wchar_t *arg) + { + // A return value of true means all is well (even if no replacements + // were performed), false indicates an unrecoverable error. + if (regex.code == 0) + { + // pcre2_compile() failed + return false; + } + + uint32_t options = opts.all ? PCRE2_SUBSTITUTE_GLOBAL : 0; + int arglen = wcslen(arg); + PCRE2_SIZE outlen = (arglen == 0) ? 16 : 2 * arglen; + wchar_t *output = (wchar_t *)malloc(sizeof(wchar_t) * outlen); + if (output == 0) + { + DIE_MEM(); + } + int pcre2_rc = 0; + for (;;) + { + pcre2_rc = pcre2_substitute( + regex.code, + PCRE2_SPTR(arg), + arglen, + 0, // start offset + options, + regex.match, + 0, // match context + PCRE2_SPTR(replacement.c_str()), + PCRE2_ZERO_TERMINATED, + (PCRE2_UCHAR *)output, + &outlen); + + if (pcre2_rc == PCRE2_ERROR_NOMEMORY) + { + if (outlen < MAX_REPLACE_SIZE) + { + outlen = std::min(2 * outlen, MAX_REPLACE_SIZE); + output = (wchar_t *)realloc(output, sizeof(wchar_t) * outlen); + if (output == 0) + { + DIE_MEM(); + } + continue; + } + string_error(_(L"%ls: Replacement string too large\n"), argv0); + free(output); + return false; + } + break; + } + + bool rc = true; + if (pcre2_rc < 0) + { + string_error(_(L"%ls: Regular expression substitute error: %ls\n"), + argv0, pcre2_strerror(pcre2_rc)); + rc = false; + } + else + { + if (!opts.quiet) + { + stdout_buffer += output; + stdout_buffer += L'\n'; + } + total_replaced += pcre2_rc; + } + + free(output); + return rc; + } +}; + +static int string_replace(parser_t &parser, int argc, wchar_t **argv) +{ + const wchar_t *short_options = L"aiqr"; + const struct woption long_options[] = + { + { L"all", no_argument, 0, 'a'}, + { L"ignore-case", no_argument, 0, 'i'}, + { L"quiet", no_argument, 0, 'q'}, + { L"regex", no_argument, 0, 'r'}, + { 0, 0, 0, 0 } + }; + + replace_options_t opts; + bool regex = false; + wgetopter_t w; + for (;;) + { + int c = w.wgetopt_long(argc, argv, short_options, long_options, 0); + + if (c == -1) + { + break; + } + switch (c) + { + case 0: + break; + + case 'a': + opts.all = true; + break; + + case 'i': + opts.ignore_case = true; + break; + + case 'q': + opts.quiet = true; + break; + + case 'r': + regex = true; + break; + + case '?': + string_unknown_option(parser, argv[0], argv[w.woptind - 1]); + return BUILTIN_STRING_ERROR; + } + } + + int i = w.woptind; + const wchar_t *pattern, *replacement; + if ((pattern = string_get_arg_argv(&i, argv)) == 0) + { + string_error(STRING_ERR_MISSING, argv[0]); + return BUILTIN_STRING_ERROR; + } + if ((replacement = string_get_arg_argv(&i, argv)) == 0) + { + string_error(STRING_ERR_MISSING, argv[0]); + return BUILTIN_STRING_ERROR; + } + + if (string_args_from_stdin() && argc > i) + { + string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); + return BUILTIN_STRING_ERROR; + } + + string_replacer_t *replacer; + if (regex) + { + replacer = new regex_replacer_t(argv[0], pattern, replacement, opts); + } + else + { + replacer = new literal_replacer_t(argv[0], pattern, replacement, opts); + } + + const wchar_t *arg; + while ((arg = string_get_arg(&i, argv)) != 0) + { + if (!replacer->replace_matches(arg)) + { + delete replacer; + return BUILTIN_STRING_ERROR; + } + } + + int rc = replacer->replace_count() > 0 ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE; + delete replacer; + return rc; +} + +static int string_split(parser_t &parser, int argc, wchar_t **argv) +{ + const wchar_t *short_options = L":m:qr"; + const struct woption long_options[] = + { + { L"max", required_argument, 0, 'm'}, + { L"quiet", no_argument, 0, 'q'}, + { L"right", no_argument, 0, 'r'}, + { 0, 0, 0, 0 } + }; + + long max = LONG_MAX; + bool quiet = false; + bool right = false; + wgetopter_t w; + for (;;) + { + int c = w.wgetopt_long(argc, argv, short_options, long_options, 0); + + if (c == -1) + { + break; + } + switch (c) + { + case 0: + break; + + case 'm': + { + errno = 0; + wchar_t *endptr = 0; + max = wcstol(w.woptarg, &endptr, 10); + if (*endptr != L'\0' || errno != 0) + { + string_error(BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg); + return BUILTIN_STRING_ERROR; + } + break; + } + + case 'q': + quiet = true; + break; + + case 'r': + right = true; + break; + + case ':': + string_error(STRING_ERR_MISSING, argv[0]); + return BUILTIN_STRING_ERROR; + + case '?': + string_unknown_option(parser, argv[0], argv[w.woptind - 1]); + return BUILTIN_STRING_ERROR; + } + } + + int i = w.woptind; + const wchar_t *sep; + if ((sep = string_get_arg_argv(&i, argv)) == 0) + { + string_error(STRING_ERR_MISSING, argv[0]); + return BUILTIN_STRING_ERROR; + } + + if (string_args_from_stdin() && argc > i) + { + string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); + return BUILTIN_STRING_ERROR; + } + + std::list<wcstring> splits; + int seplen = wcslen(sep); + int nsplit = 0; + const wchar_t *arg; + if (right) + { + while ((arg = string_get_arg(&i, argv)) != 0) + { + int nargsplit = 0; + if (seplen == 0) + { + // Split to individual characters + const wchar_t *cur = arg + wcslen(arg) - 1; + while (cur > arg && nargsplit < max) + { + splits.push_front(wcstring(cur, 1)); + cur--; + nargsplit++; + nsplit++; + } + splits.push_front(wcstring(arg, cur - arg + 1)); + } + else + { + const wchar_t *end = arg + wcslen(arg); + const wchar_t *cur = end - seplen; + while (cur >= arg && nargsplit < max) + { + if (wcsncmp(cur, sep, seplen) == 0) + { + splits.push_front(wcstring(cur + seplen, end - cur - seplen)); + end = cur; + cur -= seplen; + nargsplit++; + nsplit++; + } + else + { + cur--; + } + } + splits.push_front(wcstring(arg, end - arg)); + } + } + } + else + { + while ((arg = string_get_arg(&i, argv)) != 0) + { + const wchar_t *cur = arg; + int nargsplit = 0; + if (seplen == 0) + { + // Split to individual characters + const wchar_t *last = arg + wcslen(arg) - 1; + while (cur < last && nargsplit < max) + { + splits.push_back(wcstring(cur, 1)); + cur++; + nargsplit++; + nsplit++; + } + splits.push_back(cur); + } + else + { + while (cur != 0) + { + const wchar_t *ptr = (nargsplit < max) ? wcsstr(cur, sep) : 0; + if (ptr == 0) + { + splits.push_back(cur); + cur = 0; + } + else + { + splits.push_back(wcstring(cur, ptr - cur)); + cur = ptr + seplen; + nargsplit++; + nsplit++; + } + } + } + } + } + + if (!quiet) + { + std::list<wcstring>::const_iterator si = splits.begin(); + while (si != splits.end()) + { + stdout_buffer += *si; + stdout_buffer += L'\n'; + si++; + } + } + + return (nsplit > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE; +} + +static int string_sub(parser_t &parser, int argc, wchar_t **argv) +{ + const wchar_t *short_options = L":l:qs:"; + const struct woption long_options[] = + { + { L"length", required_argument, 0, 'l'}, + { L"quiet", no_argument, 0, 'q'}, + { L"start", required_argument, 0, 's'}, + { 0, 0, 0, 0 } + }; + + int start = 0; + int length = -1; + bool quiet = false; + wgetopter_t w; + wchar_t *endptr = 0; + for (;;) + { + int c = w.wgetopt_long(argc, argv, short_options, long_options, 0); + + if (c == -1) + { + break; + } + switch (c) + { + case 0: + break; + + case 'l': + errno = 0; + length = int(wcstol(w.woptarg, &endptr, 10)); + if (*endptr != L'\0' || errno != 0) + { + string_error(BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg); + return BUILTIN_STRING_ERROR; + } + if (length < 0) + { + string_error(_(L"%ls: Invalid length value '%d'\n"), argv[0], length); + return BUILTIN_STRING_ERROR; + } + break; + + case 'q': + quiet = true; + break; + + case 's': + errno = 0; + start = int(wcstol(w.woptarg, &endptr, 10)); + if (*endptr != L'\0' || errno != 0) + { + string_error(BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg); + return BUILTIN_STRING_ERROR; + } + if (start == 0) + { + string_error(_(L"%ls: Invalid start value '%d'\n"), argv[0], start); + return BUILTIN_STRING_ERROR; + } + break; + + case ':': + string_error(STRING_ERR_MISSING, argv[0]); + return BUILTIN_STRING_ERROR; + + case '?': + string_unknown_option(parser, argv[0], argv[w.woptind - 1]); + return BUILTIN_STRING_ERROR; + } + } + + int i = w.woptind; + if (string_args_from_stdin() && argc > i) + { + string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); + return BUILTIN_STRING_ERROR; + } + + int nsub = 0; + const wchar_t *arg; + while ((arg = string_get_arg(&i, argv)) != 0) + { + wcstring::size_type pos = 0; + wcstring::size_type count = wcstring::npos; + wcstring s(arg); + if (start > 0) + { + pos = start - 1; + } + else if (start < 0) + { + wcstring::size_type n = -start; + pos = n > s.length() ? 0 : s.length() - n; + } + if (pos > s.length()) + { + pos = s.length(); + } + + if (length >= 0) + { + count = length; + } + if (pos + count > s.length()) + { + count = wcstring::npos; + } + + if (!quiet) + { + stdout_buffer += s.substr(pos, count); + stdout_buffer += L'\n'; + } + nsub++; + } + + return (nsub > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE; +} + +static int string_trim(parser_t &parser, int argc, wchar_t **argv) +{ + const wchar_t *short_options = L":c:lqr"; + const struct woption long_options[] = + { + { L"chars", required_argument, 0, 'c'}, + { L"left", no_argument, 0, 'l'}, + { L"quiet", no_argument, 0, 'q'}, + { L"right", no_argument, 0, 'r'}, + { 0, 0, 0, 0 } + }; + + int leftright = 0; + bool quiet = false; + wcstring chars = L" \f\n\r\t"; + wgetopter_t w; + for (;;) + { + int c = w.wgetopt_long(argc, argv, short_options, long_options, 0); + + if (c == -1) + { + break; + } + switch (c) + { + case 0: + break; + + case 'c': + chars = w.woptarg; + break; + + case 'l': + leftright |= 1; + break; + + case 'q': + quiet = true; + break; + + case 'r': + leftright |= 2; + break; + + case ':': + string_error(STRING_ERR_MISSING, argv[0]); + return BUILTIN_STRING_ERROR; + + case '?': + string_unknown_option(parser, argv[0], argv[w.woptind - 1]); + return BUILTIN_STRING_ERROR; + } + } + + int i = w.woptind; + if (string_args_from_stdin() && argc > i) + { + string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); + return BUILTIN_STRING_ERROR; + } + + const wchar_t *arg; + int ntrim = 0; + while ((arg = string_get_arg(&i, argv)) != 0) + { + const wchar_t *begin = arg; + const wchar_t *end = arg + wcslen(arg); + if (!leftright || (leftright & 1)) + { + while (begin != end && chars.find_first_of(begin, 0, 1) != wcstring::npos) + { + begin++; + ntrim++; + } + } + if (!leftright || (leftright & 2)) + { + while (begin != end && chars.find_first_of(end - 1, 0, 1) != wcstring::npos) + { + end--; + ntrim++; + } + } + if (!quiet) + { + stdout_buffer += wcstring(begin, end - begin); + stdout_buffer += L'\n'; + } + } + + return (ntrim > 0) ? BUILTIN_STRING_OK : BUILTIN_STRING_NONE; +} + +static const struct string_subcommand +{ + const wchar_t *name; + int (*handler)(parser_t &, int argc, wchar_t **argv); +} +string_subcommands[] = +{ + { L"escape", &string_escape }, + { L"join", &string_join }, + { L"length", &string_length }, + { L"match", &string_match }, + { L"replace", &string_replace }, + { L"split", &string_split }, + { L"sub", &string_sub }, + { L"trim", &string_trim }, + { 0, 0 } +}; + +/** + The string builtin, for manipulating strings. +*/ +/*static*/ int builtin_string(parser_t &parser, wchar_t **argv) +{ + int argc = builtin_count_args(argv); + if (argc <= 1) + { + string_error(STRING_ERR_MISSING, argv[0]); + builtin_print_help(parser, L"string", stderr_buffer); + return BUILTIN_STRING_ERROR; + } + + if (wcscmp(argv[1], L"-h") == 0 || wcscmp(argv[1], L"--help") == 0) + { + builtin_print_help(parser, L"string", stderr_buffer); + return BUILTIN_STRING_OK; + } + + const string_subcommand *subcmd = &string_subcommands[0]; + while (subcmd->name != 0 && wcscmp(subcmd->name, argv[1]) != 0) + { + subcmd++; + } + if (subcmd->handler == 0) + { + string_error(_(L"%ls: Unknown subcommand '%ls'\n"), argv[0], argv[1]); + builtin_print_help(parser, L"string", stderr_buffer); + return BUILTIN_STRING_ERROR; + } + + argc--; + argv++; + return subcmd->handler(parser, argc, argv); +} |