diff options
author | Kevin Ballard <kevin@sb.org> | 2014-09-21 19:18:56 -0700 |
---|---|---|
committer | Kevin Ballard <kevin@sb.org> | 2014-09-21 19:27:26 -0700 |
commit | 8f8c4cdd176fda4d93a2d1d4b0ae6321d5706e5f (patch) | |
tree | 68c2f6d4eb2bbd7a4b975b7901cc0db95f83b740 | |
parent | f889ad0fda9bf8d1f354cad37d508e0c4205af48 (diff) |
Implement new `read --null` flag
The `--null` flag to `read` makes it split incoming lines on NUL instead
of newlines. This is intended for processing the output of a command
that uses NUL separators (such as `find -print0`).
Fixes #1694.
-rw-r--r-- | Makefile.in | 14 | ||||
-rw-r--r-- | builtin.cpp | 80 | ||||
-rw-r--r-- | doc_src/read.txt | 2 | ||||
-rw-r--r-- | fish_tests.cpp | 33 | ||||
-rw-r--r-- | tests/read.in | 25 | ||||
-rw-r--r-- | tests/read.out | 12 | ||||
-rw-r--r-- | wcstringutil.cpp | 40 | ||||
-rw-r--r-- | wcstringutil.h | 29 |
8 files changed, 192 insertions, 43 deletions
diff --git a/Makefile.in b/Makefile.in index fea9ae5b..d6d1d09d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -91,7 +91,7 @@ FISH_OBJS := function.o builtin.o complete.o env.o exec.o expand.o \ signal.o io.o parse_util.o common.o screen.o path.o autoload.o \ parser_keywords.o iothread.o color.o postfork.o \ builtin_test.o parse_tree.o parse_productions.o parse_execution.o \ - pager.o utf8.o fish_version.o + pager.o utf8.o fish_version.o wcstringutil.o FISH_INDENT_OBJS := fish_indent.o print_help.o common.o \ parser_keywords.o wutil.o tokenizer.o fish_version.o @@ -843,9 +843,10 @@ builtin.o: io.h function.h event.h complete.h proc.h parse_tree.h tokenizer.h builtin.o: parse_constants.h parser.h reader.h highlight.h env.h color.h builtin.o: wgetopt.h sanity.h wildcard.h expand.h input_common.h input.h builtin.o: intern.h exec.h parse_util.h autoload.h lru.h parser_keywords.h -builtin.o: path.h history.h builtin_set.cpp builtin_commandline.cpp -builtin.o: builtin_complete.cpp builtin_ulimit.cpp builtin_jobs.cpp -builtin.o: builtin_set_color.cpp output.h screen.h builtin_printf.cpp +builtin.o: path.h history.h wcstringutil.h builtin_set.cpp +builtin.o: builtin_commandline.cpp builtin_complete.cpp builtin_ulimit.cpp +builtin.o: builtin_jobs.cpp builtin_set_color.cpp output.h screen.h +builtin.o: builtin_printf.cpp builtin_commandline.o: config.h signal.h fallback.h util.h wutil.h common.h builtin_commandline.o: builtin.h io.h wgetopt.h reader.h complete.h builtin_commandline.o: highlight.h env.h color.h proc.h parse_tree.h @@ -860,6 +861,7 @@ builtin_jobs.o: config.h fallback.h signal.h util.h wutil.h common.h builtin_jobs.o: builtin.h io.h proc.h parse_tree.h tokenizer.h builtin_jobs.o: parse_constants.h parser.h event.h function.h wgetopt.h builtin_printf.o: common.h util.h +builtin_scripts.o: builtin_scripts.h builtin_set.o: config.h signal.h fallback.h util.h wutil.h common.h builtin.h builtin_set.o: io.h env.h expand.h parse_constants.h wgetopt.h proc.h builtin_set.o: parse_tree.h tokenizer.h parser.h event.h function.h @@ -911,6 +913,7 @@ fish_tests.o: highlight.h env.h color.h builtin.h function.h event.h fish_tests.o: autoload.h lru.h wutil.h expand.h parser.h output.h screen.h fish_tests.o: exec.h path.h history.h iothread.h postfork.h parse_util.h fish_tests.o: pager.h input.h input_common.h utf8.h env_universal_common.h +fish_tests.o: wcstringutil.h fish_version.o: fish_version.h function.o: config.h signal.h wutil.h common.h util.h fallback.h function.h function.o: event.h proc.h io.h parse_tree.h tokenizer.h parse_constants.h @@ -930,7 +933,7 @@ input.o: complete.h highlight.h env.h color.h proc.h parse_tree.h tokenizer.h input.o: parse_constants.h sanity.h input_common.h input.h parser.h event.h input.o: function.h expand.h output.h screen.h intern.h input_common.o: config.h fallback.h signal.h util.h common.h wutil.h -input_common.o: input_common.h iothread.h +input_common.o: input_common.h env_universal_common.h env.h iothread.h intern.o: config.h fallback.h signal.h util.h wutil.h common.h intern.h io.o: config.h fallback.h signal.h util.h wutil.h common.h exec.h proc.h io.h io.o: parse_tree.h tokenizer.h parse_constants.h @@ -993,6 +996,7 @@ signal.o: parse_tree.h tokenizer.h parse_constants.h tokenizer.o: config.h fallback.h signal.h util.h wutil.h common.h tokenizer.h utf8.o: utf8.h util.o: config.h fallback.h signal.h util.h common.h wutil.h +wcstringutil.o: config.h wcstringutil.h common.h util.h wgetopt.o: config.h wgetopt.h wutil.h common.h util.h fallback.h signal.h wildcard.o: config.h fallback.h signal.h util.h wutil.h common.h complete.h wildcard.o: wildcard.h expand.h parse_constants.h reader.h io.h highlight.h diff --git a/builtin.cpp b/builtin.cpp index 3ab3bb48..4604190c 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -65,6 +65,7 @@ #include "path.h" #include "history.h" #include "parse_tree.h" +#include "wcstringutil.h" /** The default prompt for the read command @@ -2305,10 +2306,9 @@ static int builtin_random(parser_t &parser, wchar_t **argv) */ static int builtin_read(parser_t &parser, wchar_t **argv) { - wchar_t *buff=0; + wcstring buff; int i, argc = builtin_count_args(argv); int place = ENV_USER; - wchar_t *nxt; const wchar_t *prompt = DEFAULT_READ_PROMPT; const wchar_t *commandline = L""; int exit_res=STATUS_BUILTIN_OK; @@ -2317,6 +2317,7 @@ static int builtin_read(parser_t &parser, wchar_t **argv) wchar_t *end; int shell = 0; int array = 0; + bool split_null = false; woptind=0; @@ -2370,6 +2371,10 @@ static int builtin_read(parser_t &parser, wchar_t **argv) } , { + L"null", no_argument, 0, 'z' + } + , + { L"help", no_argument, 0, 'h' } , @@ -2383,7 +2388,7 @@ static int builtin_read(parser_t &parser, wchar_t **argv) int opt = wgetopt_long(argc, argv, - L"xglUup:c:hm:n:sa", + L"xglUup:c:hm:n:saz", long_options, &opt_index); if (opt == -1) @@ -2468,6 +2473,10 @@ static int builtin_read(parser_t &parser, wchar_t **argv) array = 1; break; + case L'z': + split_null = true; + break; + case 'h': builtin_print_help(parser, argv[0], stdout_buffer); return STATUS_BUILTIN_OK; @@ -2541,7 +2550,7 @@ static int builtin_read(parser_t &parser, wchar_t **argv) /* Check if we should read interactively using \c reader_readline() */ - if (isatty(0) && builtin_stdin == 0) + if (isatty(0) && builtin_stdin == 0 && !split_null) { const wchar_t *line; @@ -2572,13 +2581,11 @@ static int builtin_read(parser_t &parser, wchar_t **argv) // note: we're deliberately throwing away the tail of the commandline. // It shouldn't be unread because it was produced with `commandline -i`, // not typed. - buff = (wchar_t *)malloc(((size_t)nchars + 1) * sizeof(wchar_t)); - wmemcpy(buff, line, (size_t)nchars); - buff[nchars] = 0; + buff = wcstring(line, nchars); } else { - buff = wcsdup(line); + buff = wcstring(line); } } else @@ -2591,7 +2598,7 @@ static int builtin_read(parser_t &parser, wchar_t **argv) { int eof=0; - wcstring sb; + buff.clear(); while (1) { @@ -2621,7 +2628,6 @@ static int builtin_read(parser_t &parser, wchar_t **argv) case (size_t)(-2): break; case 0: - eof=1; finished = 1; break; @@ -2635,44 +2641,43 @@ static int builtin_read(parser_t &parser, wchar_t **argv) if (eof) break; - if (res == L'\n') + if (!split_null && res == L'\n') break; - sb.push_back(res); + if (split_null && res == L'\0') + break; + + buff.push_back(res); - if (0 < nchars && (size_t)nchars <= sb.size()) + if (0 < nchars && (size_t)nchars <= buff.size()) { break; } } - if (sb.size() < 2 && eof) + if (buff.size() < 2 && eof) { exit_res = 1; } - - buff = wcsdup(sb.c_str()); } if (i != argc && !exit_res) { - - wchar_t *state; - env_var_t ifs = env_get_string(L"IFS"); - if (ifs.missing_or_empty()) { /* Every character is a separate token */ - size_t bufflen = wcslen(buff); + size_t bufflen = buff.size(); if (array) { if (bufflen > 0) { wcstring chars(bufflen+(bufflen-1), ARRAY_SEP); - for (size_t j=0; j<bufflen; ++j) + wcstring::iterator out = chars.begin(); + for (wcstring::const_iterator it = buff.begin(), end = buff.end(); it != end; ++it) { - chars[j*2] = buff[j]; + *out = *it; + out += 2; } env_set(argv[i], chars.c_str(), place); } @@ -2686,14 +2691,15 @@ static int builtin_read(parser_t &parser, wchar_t **argv) size_t j = 0; for (; i+1 < argc; ++i) { - if (j < bufflen) { - wchar_t buffer[2] = {buff[j], 0}; + if (j < bufflen) + { + wchar_t buffer[2] = {buff[j++], 0}; env_set(argv[i], buffer, place); } - else { + else + { env_set(argv[i], L"", place); } - if (j < bufflen) ++j; } if (i < argc) env_set(argv[i], &buff[j], place); } @@ -2701,34 +2707,32 @@ static int builtin_read(parser_t &parser, wchar_t **argv) else if (array) { wcstring tokens; - tokens.reserve(wcslen(buff)); + tokens.reserve(buff.size()); bool empty = true; - for (nxt = wcstok(buff, ifs.c_str(), &state); nxt != 0; nxt = wcstok(0, ifs.c_str(), &state)) + for (wcstring_range loc = wcstring_tok(buff, ifs); loc.first != wcstring::npos; loc = wcstring_tok(buff, ifs, loc)) { - if (! tokens.empty()) tokens.push_back(ARRAY_SEP); - tokens.append(nxt); + if (!empty) tokens.push_back(ARRAY_SEP); + tokens.append(buff, loc.first, loc.second); empty = false; } env_set(argv[i], empty ? NULL : tokens.c_str(), place); } else { - nxt = wcstok(buff, (i<argc-1)?ifs.c_str():L"", &state); + wcstring_range loc = wcstring_range(0,0); while (i<argc) { - env_set(argv[i], nxt != 0 ? nxt: L"", place); + loc = wcstring_tok(buff, (i+1<argc) ? ifs : L"", loc); + env_set(argv[i], loc.first == wcstring::npos ? L"" : &buff.c_str()[loc.first], place); - i++; - if (nxt != 0) - nxt = wcstok(0, (i<argc-1)?ifs.c_str():L"", &state); + ++i; } + } } - free(buff); - return exit_res; } diff --git a/doc_src/read.txt b/doc_src/read.txt index aee3e85b..79335a79 100644 --- a/doc_src/read.txt +++ b/doc_src/read.txt @@ -33,6 +33,8 @@ The following options are available: - `-a` or `--array` stores the result as an array. +- `-z` or `--null` reads up to NUL instead of newline. Disables interactive mode. + `read` reads a single line of input from stdin, breaks it into tokens based on the `IFS` shell variable, and then assigns one token to each variable specified in `VARIABLES`. If there are more tokens than variables, the complete remainder is assigned to the last variable. As a special case, if `IFS` is set to the empty string, each character of the input is considered a separate token. If `-a` or `--array` is provided, only one variable name is allowed and the tokens are stored as an array in this variable. diff --git a/fish_tests.cpp b/fish_tests.cpp index 744da79d..7222065f 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -65,6 +65,7 @@ #include "input.h" #include "utf8.h" #include "env_universal_common.h" +#include "wcstringutil.h" static const char * const * s_arguments; static int s_test_run_count = 0; @@ -3629,6 +3630,37 @@ static void test_highlighting(void) } } +static void test_wcstring_tok(void) +{ + say(L"Testing wcstring_tok"); + wcstring buff = L"hello world"; + wcstring needle = L" \t\n"; + wcstring_range loc = wcstring_tok(buff, needle); + if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"hello") + { + err(L"Wrong results from first wcstring_tok(): {%zu, %zu}", loc.first, loc.second); + } + loc = wcstring_tok(buff, needle, loc); + if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"world") + { + err(L"Wrong results from second wcstring_tok(): {%zu, %zu}", loc.first, loc.second); + } + loc = wcstring_tok(buff, needle, loc); + if (loc.first != wcstring::npos) + { + err(L"Wrong results from third wcstring_tok(): {%zu, %zu}", loc.first, loc.second); + } + + buff = L"hello world"; + loc = wcstring_tok(buff, needle); + // loc is "hello" again + loc = wcstring_tok(buff, L"", loc); + if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"world") + { + err(L"Wrong results from wcstring_tok with empty needle: {%zu, %zu}", loc.first, loc.second); + } +} + /** Main test */ @@ -3709,6 +3741,7 @@ int main(int argc, char **argv) if (should_test_function("autosuggestion_ignores")) test_autosuggestion_ignores(); if (should_test_function("autosuggestion_combining")) test_autosuggestion_combining(); if (should_test_function("autosuggest_suggest_special")) test_autosuggest_suggest_special(); + if (should_test_function("wcstring_tok")) test_wcstring_tok(); if (should_test_function("history")) history_tests_t::test_history(); if (should_test_function("history_merge")) history_tests_t::test_history_merge(); if (should_test_function("history_races")) history_tests_t::test_history_races(); diff --git a/tests/read.in b/tests/read.in index f864287f..7cf09112 100644 --- a/tests/read.in +++ b/tests/read.in @@ -1,3 +1,4 @@ +# vim: set filetype=fish: # # Test read builtin and IFS # @@ -35,6 +36,8 @@ echo '' | read -l one two print_vars one two echo 'test' | read -l one two three print_vars one two three +echo 'foo bar baz' | read -l one two three +print_vars one two three echo set -l IFS @@ -91,3 +94,25 @@ echo $foo echo $bar echo 'test' | read -n 1 foo echo $foo + +# read -0 tests + +echo +echo '# read -z tests' +echo -n 'testing' | read -lz foo +echo $foo +echo -n 'test ing' | read -lz foo +echo $foo +echo 'newline' | read -lz foo +echo $foo +echo -n 'test ing' | read -lz foo bar +print_vars foo bar +echo -ne 'test\0ing' | read -lz foo bar +print_vars foo bar +echo -ne 'foo\nbar' | read -lz foo bar +print_vars foo bar +echo -ne 'foo\nbar\0baz\nquux' | while read -lza foo + print_vars foo +end + +true diff --git a/tests/read.out b/tests/read.out index 0de98831..d8cf948d 100644 --- a/tests/read.out +++ b/tests/read.out @@ -16,6 +16,7 @@ two 1 '' 1 '' 1 '' 1 'test' 1 '' 1 '' +1 'foo' 1 'bar' 1 ' baz' 1 'hello' 1 'h' 1 'ello' @@ -42,3 +43,14 @@ test tes tin t + +# read -z tests +testing +test ing +newline + +1 'test' 1 'ing' +1 'test' 1 '' +1 'foo' 1 'bar' +2 'foo' 'bar' +2 'baz' 'quux' diff --git a/wcstringutil.cpp b/wcstringutil.cpp new file mode 100644 index 00000000..51ec1a1c --- /dev/null +++ b/wcstringutil.cpp @@ -0,0 +1,40 @@ +/** \file wcstringutil.cpp + +Helper functions for working with wcstring +*/ + +#include "config.h" + +#include "wcstringutil.h" + +typedef wcstring::size_type size_type; + +wcstring_range wcstring_tok(wcstring& str, const wcstring &needle, wcstring_range last) +{ + size_type pos = last.second == wcstring::npos ? wcstring::npos : last.first; + if (pos != wcstring::npos && last.second != wcstring::npos) pos += last.second; + if (pos != wcstring::npos && pos != 0) ++pos; + if (pos == wcstring::npos || pos >= str.size()) + { + return std::make_pair(wcstring::npos, wcstring::npos); + } + + if (needle.empty()) + { + return std::make_pair(pos, wcstring::npos); + } + + pos = str.find_first_not_of(needle, pos); + if (pos == wcstring::npos) return std::make_pair(wcstring::npos, wcstring::npos); + + size_type next_pos = str.find_first_of(needle, pos); + if (next_pos == wcstring::npos) + { + return std::make_pair(pos, wcstring::npos); + } + else + { + str[next_pos] = L'\0'; + return std::make_pair(pos, next_pos - pos); + } +} diff --git a/wcstringutil.h b/wcstringutil.h new file mode 100644 index 00000000..73ca7ac6 --- /dev/null +++ b/wcstringutil.h @@ -0,0 +1,29 @@ +/** \file wcstringutil.h + +Helper functions for working with wcstring +*/ + +#ifndef FISH_WCSTRINGUTIL_H +#define FISH_WCSTRINGUTIL_H + +#include <utility> +#include "common.h" + +/** + typedef that represents a range in a wcstring. + The first element is the location, the second is the count. +*/ +typedef std::pair<wcstring::size_type, wcstring::size_type> wcstring_range; + +/** + wcstring equivalent of wcstok(). Supports NUL. + For convenience and wcstok() compatibility, the first character of each + token separator is replaced with NUL. + Returns a pair of (pos, count). + Returns (npos, npos) when it's done. + Returns (pos, npos) when the token is already known to be the final token. + Note that the final token may not necessarily return (pos, npos). +*/ +wcstring_range wcstring_tok(wcstring& str, const wcstring &needle, wcstring_range last = wcstring_range(0,0)); + +#endif |