aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Kevin Ballard <kevin@sb.org>2014-09-21 19:18:56 -0700
committerGravatar Kevin Ballard <kevin@sb.org>2014-09-21 19:27:26 -0700
commit8f8c4cdd176fda4d93a2d1d4b0ae6321d5706e5f (patch)
tree68c2f6d4eb2bbd7a4b975b7901cc0db95f83b740
parentf889ad0fda9bf8d1f354cad37d508e0c4205af48 (diff)
Implement new `read --null` flag
The `--null` flag to `read` makes it split incoming lines on NUL instead of newlines. This is intended for processing the output of a command that uses NUL separators (such as `find -print0`). Fixes #1694.
-rw-r--r--Makefile.in14
-rw-r--r--builtin.cpp80
-rw-r--r--doc_src/read.txt2
-rw-r--r--fish_tests.cpp33
-rw-r--r--tests/read.in25
-rw-r--r--tests/read.out12
-rw-r--r--wcstringutil.cpp40
-rw-r--r--wcstringutil.h29
8 files changed, 192 insertions, 43 deletions
diff --git a/Makefile.in b/Makefile.in
index fea9ae5b..d6d1d09d 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -91,7 +91,7 @@ FISH_OBJS := function.o builtin.o complete.o env.o exec.o expand.o \
signal.o io.o parse_util.o common.o screen.o path.o autoload.o \
parser_keywords.o iothread.o color.o postfork.o \
builtin_test.o parse_tree.o parse_productions.o parse_execution.o \
- pager.o utf8.o fish_version.o
+ pager.o utf8.o fish_version.o wcstringutil.o
FISH_INDENT_OBJS := fish_indent.o print_help.o common.o \
parser_keywords.o wutil.o tokenizer.o fish_version.o
@@ -843,9 +843,10 @@ builtin.o: io.h function.h event.h complete.h proc.h parse_tree.h tokenizer.h
builtin.o: parse_constants.h parser.h reader.h highlight.h env.h color.h
builtin.o: wgetopt.h sanity.h wildcard.h expand.h input_common.h input.h
builtin.o: intern.h exec.h parse_util.h autoload.h lru.h parser_keywords.h
-builtin.o: path.h history.h builtin_set.cpp builtin_commandline.cpp
-builtin.o: builtin_complete.cpp builtin_ulimit.cpp builtin_jobs.cpp
-builtin.o: builtin_set_color.cpp output.h screen.h builtin_printf.cpp
+builtin.o: path.h history.h wcstringutil.h builtin_set.cpp
+builtin.o: builtin_commandline.cpp builtin_complete.cpp builtin_ulimit.cpp
+builtin.o: builtin_jobs.cpp builtin_set_color.cpp output.h screen.h
+builtin.o: builtin_printf.cpp
builtin_commandline.o: config.h signal.h fallback.h util.h wutil.h common.h
builtin_commandline.o: builtin.h io.h wgetopt.h reader.h complete.h
builtin_commandline.o: highlight.h env.h color.h proc.h parse_tree.h
@@ -860,6 +861,7 @@ builtin_jobs.o: config.h fallback.h signal.h util.h wutil.h common.h
builtin_jobs.o: builtin.h io.h proc.h parse_tree.h tokenizer.h
builtin_jobs.o: parse_constants.h parser.h event.h function.h wgetopt.h
builtin_printf.o: common.h util.h
+builtin_scripts.o: builtin_scripts.h
builtin_set.o: config.h signal.h fallback.h util.h wutil.h common.h builtin.h
builtin_set.o: io.h env.h expand.h parse_constants.h wgetopt.h proc.h
builtin_set.o: parse_tree.h tokenizer.h parser.h event.h function.h
@@ -911,6 +913,7 @@ fish_tests.o: highlight.h env.h color.h builtin.h function.h event.h
fish_tests.o: autoload.h lru.h wutil.h expand.h parser.h output.h screen.h
fish_tests.o: exec.h path.h history.h iothread.h postfork.h parse_util.h
fish_tests.o: pager.h input.h input_common.h utf8.h env_universal_common.h
+fish_tests.o: wcstringutil.h
fish_version.o: fish_version.h
function.o: config.h signal.h wutil.h common.h util.h fallback.h function.h
function.o: event.h proc.h io.h parse_tree.h tokenizer.h parse_constants.h
@@ -930,7 +933,7 @@ input.o: complete.h highlight.h env.h color.h proc.h parse_tree.h tokenizer.h
input.o: parse_constants.h sanity.h input_common.h input.h parser.h event.h
input.o: function.h expand.h output.h screen.h intern.h
input_common.o: config.h fallback.h signal.h util.h common.h wutil.h
-input_common.o: input_common.h iothread.h
+input_common.o: input_common.h env_universal_common.h env.h iothread.h
intern.o: config.h fallback.h signal.h util.h wutil.h common.h intern.h
io.o: config.h fallback.h signal.h util.h wutil.h common.h exec.h proc.h io.h
io.o: parse_tree.h tokenizer.h parse_constants.h
@@ -993,6 +996,7 @@ signal.o: parse_tree.h tokenizer.h parse_constants.h
tokenizer.o: config.h fallback.h signal.h util.h wutil.h common.h tokenizer.h
utf8.o: utf8.h
util.o: config.h fallback.h signal.h util.h common.h wutil.h
+wcstringutil.o: config.h wcstringutil.h common.h util.h
wgetopt.o: config.h wgetopt.h wutil.h common.h util.h fallback.h signal.h
wildcard.o: config.h fallback.h signal.h util.h wutil.h common.h complete.h
wildcard.o: wildcard.h expand.h parse_constants.h reader.h io.h highlight.h
diff --git a/builtin.cpp b/builtin.cpp
index 3ab3bb48..4604190c 100644
--- a/builtin.cpp
+++ b/builtin.cpp
@@ -65,6 +65,7 @@
#include "path.h"
#include "history.h"
#include "parse_tree.h"
+#include "wcstringutil.h"
/**
The default prompt for the read command
@@ -2305,10 +2306,9 @@ static int builtin_random(parser_t &parser, wchar_t **argv)
*/
static int builtin_read(parser_t &parser, wchar_t **argv)
{
- wchar_t *buff=0;
+ wcstring buff;
int i, argc = builtin_count_args(argv);
int place = ENV_USER;
- wchar_t *nxt;
const wchar_t *prompt = DEFAULT_READ_PROMPT;
const wchar_t *commandline = L"";
int exit_res=STATUS_BUILTIN_OK;
@@ -2317,6 +2317,7 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
wchar_t *end;
int shell = 0;
int array = 0;
+ bool split_null = false;
woptind=0;
@@ -2370,6 +2371,10 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
}
,
{
+ L"null", no_argument, 0, 'z'
+ }
+ ,
+ {
L"help", no_argument, 0, 'h'
}
,
@@ -2383,7 +2388,7 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
int opt = wgetopt_long(argc,
argv,
- L"xglUup:c:hm:n:sa",
+ L"xglUup:c:hm:n:saz",
long_options,
&opt_index);
if (opt == -1)
@@ -2468,6 +2473,10 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
array = 1;
break;
+ case L'z':
+ split_null = true;
+ break;
+
case 'h':
builtin_print_help(parser, argv[0], stdout_buffer);
return STATUS_BUILTIN_OK;
@@ -2541,7 +2550,7 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
/*
Check if we should read interactively using \c reader_readline()
*/
- if (isatty(0) && builtin_stdin == 0)
+ if (isatty(0) && builtin_stdin == 0 && !split_null)
{
const wchar_t *line;
@@ -2572,13 +2581,11 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
// note: we're deliberately throwing away the tail of the commandline.
// It shouldn't be unread because it was produced with `commandline -i`,
// not typed.
- buff = (wchar_t *)malloc(((size_t)nchars + 1) * sizeof(wchar_t));
- wmemcpy(buff, line, (size_t)nchars);
- buff[nchars] = 0;
+ buff = wcstring(line, nchars);
}
else
{
- buff = wcsdup(line);
+ buff = wcstring(line);
}
}
else
@@ -2591,7 +2598,7 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
{
int eof=0;
- wcstring sb;
+ buff.clear();
while (1)
{
@@ -2621,7 +2628,6 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
case (size_t)(-2):
break;
case 0:
- eof=1;
finished = 1;
break;
@@ -2635,44 +2641,43 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
if (eof)
break;
- if (res == L'\n')
+ if (!split_null && res == L'\n')
break;
- sb.push_back(res);
+ if (split_null && res == L'\0')
+ break;
+
+ buff.push_back(res);
- if (0 < nchars && (size_t)nchars <= sb.size())
+ if (0 < nchars && (size_t)nchars <= buff.size())
{
break;
}
}
- if (sb.size() < 2 && eof)
+ if (buff.size() < 2 && eof)
{
exit_res = 1;
}
-
- buff = wcsdup(sb.c_str());
}
if (i != argc && !exit_res)
{
-
- wchar_t *state;
-
env_var_t ifs = env_get_string(L"IFS");
-
if (ifs.missing_or_empty())
{
/* Every character is a separate token */
- size_t bufflen = wcslen(buff);
+ size_t bufflen = buff.size();
if (array)
{
if (bufflen > 0)
{
wcstring chars(bufflen+(bufflen-1), ARRAY_SEP);
- for (size_t j=0; j<bufflen; ++j)
+ wcstring::iterator out = chars.begin();
+ for (wcstring::const_iterator it = buff.begin(), end = buff.end(); it != end; ++it)
{
- chars[j*2] = buff[j];
+ *out = *it;
+ out += 2;
}
env_set(argv[i], chars.c_str(), place);
}
@@ -2686,14 +2691,15 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
size_t j = 0;
for (; i+1 < argc; ++i)
{
- if (j < bufflen) {
- wchar_t buffer[2] = {buff[j], 0};
+ if (j < bufflen)
+ {
+ wchar_t buffer[2] = {buff[j++], 0};
env_set(argv[i], buffer, place);
}
- else {
+ else
+ {
env_set(argv[i], L"", place);
}
- if (j < bufflen) ++j;
}
if (i < argc) env_set(argv[i], &buff[j], place);
}
@@ -2701,34 +2707,32 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
else if (array)
{
wcstring tokens;
- tokens.reserve(wcslen(buff));
+ tokens.reserve(buff.size());
bool empty = true;
- for (nxt = wcstok(buff, ifs.c_str(), &state); nxt != 0; nxt = wcstok(0, ifs.c_str(), &state))
+ for (wcstring_range loc = wcstring_tok(buff, ifs); loc.first != wcstring::npos; loc = wcstring_tok(buff, ifs, loc))
{
- if (! tokens.empty()) tokens.push_back(ARRAY_SEP);
- tokens.append(nxt);
+ if (!empty) tokens.push_back(ARRAY_SEP);
+ tokens.append(buff, loc.first, loc.second);
empty = false;
}
env_set(argv[i], empty ? NULL : tokens.c_str(), place);
}
else
{
- nxt = wcstok(buff, (i<argc-1)?ifs.c_str():L"", &state);
+ wcstring_range loc = wcstring_range(0,0);
while (i<argc)
{
- env_set(argv[i], nxt != 0 ? nxt: L"", place);
+ loc = wcstring_tok(buff, (i+1<argc) ? ifs : L"", loc);
+ env_set(argv[i], loc.first == wcstring::npos ? L"" : &buff.c_str()[loc.first], place);
- i++;
- if (nxt != 0)
- nxt = wcstok(0, (i<argc-1)?ifs.c_str():L"", &state);
+ ++i;
}
+
}
}
- free(buff);
-
return exit_res;
}
diff --git a/doc_src/read.txt b/doc_src/read.txt
index aee3e85b..79335a79 100644
--- a/doc_src/read.txt
+++ b/doc_src/read.txt
@@ -33,6 +33,8 @@ The following options are available:
- `-a` or `--array` stores the result as an array.
+- `-z` or `--null` reads up to NUL instead of newline. Disables interactive mode.
+
`read` reads a single line of input from stdin, breaks it into tokens based on the `IFS` shell variable, and then assigns one token to each variable specified in `VARIABLES`. If there are more tokens than variables, the complete remainder is assigned to the last variable. As a special case, if `IFS` is set to the empty string, each character of the input is considered a separate token.
If `-a` or `--array` is provided, only one variable name is allowed and the tokens are stored as an array in this variable.
diff --git a/fish_tests.cpp b/fish_tests.cpp
index 744da79d..7222065f 100644
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@@ -65,6 +65,7 @@
#include "input.h"
#include "utf8.h"
#include "env_universal_common.h"
+#include "wcstringutil.h"
static const char * const * s_arguments;
static int s_test_run_count = 0;
@@ -3629,6 +3630,37 @@ static void test_highlighting(void)
}
}
+static void test_wcstring_tok(void)
+{
+ say(L"Testing wcstring_tok");
+ wcstring buff = L"hello world";
+ wcstring needle = L" \t\n";
+ wcstring_range loc = wcstring_tok(buff, needle);
+ if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"hello")
+ {
+ err(L"Wrong results from first wcstring_tok(): {%zu, %zu}", loc.first, loc.second);
+ }
+ loc = wcstring_tok(buff, needle, loc);
+ if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"world")
+ {
+ err(L"Wrong results from second wcstring_tok(): {%zu, %zu}", loc.first, loc.second);
+ }
+ loc = wcstring_tok(buff, needle, loc);
+ if (loc.first != wcstring::npos)
+ {
+ err(L"Wrong results from third wcstring_tok(): {%zu, %zu}", loc.first, loc.second);
+ }
+
+ buff = L"hello world";
+ loc = wcstring_tok(buff, needle);
+ // loc is "hello" again
+ loc = wcstring_tok(buff, L"", loc);
+ if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"world")
+ {
+ err(L"Wrong results from wcstring_tok with empty needle: {%zu, %zu}", loc.first, loc.second);
+ }
+}
+
/**
Main test
*/
@@ -3709,6 +3741,7 @@ int main(int argc, char **argv)
if (should_test_function("autosuggestion_ignores")) test_autosuggestion_ignores();
if (should_test_function("autosuggestion_combining")) test_autosuggestion_combining();
if (should_test_function("autosuggest_suggest_special")) test_autosuggest_suggest_special();
+ if (should_test_function("wcstring_tok")) test_wcstring_tok();
if (should_test_function("history")) history_tests_t::test_history();
if (should_test_function("history_merge")) history_tests_t::test_history_merge();
if (should_test_function("history_races")) history_tests_t::test_history_races();
diff --git a/tests/read.in b/tests/read.in
index f864287f..7cf09112 100644
--- a/tests/read.in
+++ b/tests/read.in
@@ -1,3 +1,4 @@
+# vim: set filetype=fish:
#
# Test read builtin and IFS
#
@@ -35,6 +36,8 @@ echo '' | read -l one two
print_vars one two
echo 'test' | read -l one two three
print_vars one two three
+echo 'foo bar baz' | read -l one two three
+print_vars one two three
echo
set -l IFS
@@ -91,3 +94,25 @@ echo $foo
echo $bar
echo 'test' | read -n 1 foo
echo $foo
+
+# read -0 tests
+
+echo
+echo '# read -z tests'
+echo -n 'testing' | read -lz foo
+echo $foo
+echo -n 'test ing' | read -lz foo
+echo $foo
+echo 'newline' | read -lz foo
+echo $foo
+echo -n 'test ing' | read -lz foo bar
+print_vars foo bar
+echo -ne 'test\0ing' | read -lz foo bar
+print_vars foo bar
+echo -ne 'foo\nbar' | read -lz foo bar
+print_vars foo bar
+echo -ne 'foo\nbar\0baz\nquux' | while read -lza foo
+ print_vars foo
+end
+
+true
diff --git a/tests/read.out b/tests/read.out
index 0de98831..d8cf948d 100644
--- a/tests/read.out
+++ b/tests/read.out
@@ -16,6 +16,7 @@ two
1 ''
1 '' 1 ''
1 'test' 1 '' 1 ''
+1 'foo' 1 'bar' 1 ' baz'
1 'hello'
1 'h' 1 'ello'
@@ -42,3 +43,14 @@ test
tes
tin
t
+
+# read -z tests
+testing
+test ing
+newline
+
+1 'test' 1 'ing'
+1 'test' 1 ''
+1 'foo' 1 'bar'
+2 'foo' 'bar'
+2 'baz' 'quux'
diff --git a/wcstringutil.cpp b/wcstringutil.cpp
new file mode 100644
index 00000000..51ec1a1c
--- /dev/null
+++ b/wcstringutil.cpp
@@ -0,0 +1,40 @@
+/** \file wcstringutil.cpp
+
+Helper functions for working with wcstring
+*/
+
+#include "config.h"
+
+#include "wcstringutil.h"
+
+typedef wcstring::size_type size_type;
+
+wcstring_range wcstring_tok(wcstring& str, const wcstring &needle, wcstring_range last)
+{
+ size_type pos = last.second == wcstring::npos ? wcstring::npos : last.first;
+ if (pos != wcstring::npos && last.second != wcstring::npos) pos += last.second;
+ if (pos != wcstring::npos && pos != 0) ++pos;
+ if (pos == wcstring::npos || pos >= str.size())
+ {
+ return std::make_pair(wcstring::npos, wcstring::npos);
+ }
+
+ if (needle.empty())
+ {
+ return std::make_pair(pos, wcstring::npos);
+ }
+
+ pos = str.find_first_not_of(needle, pos);
+ if (pos == wcstring::npos) return std::make_pair(wcstring::npos, wcstring::npos);
+
+ size_type next_pos = str.find_first_of(needle, pos);
+ if (next_pos == wcstring::npos)
+ {
+ return std::make_pair(pos, wcstring::npos);
+ }
+ else
+ {
+ str[next_pos] = L'\0';
+ return std::make_pair(pos, next_pos - pos);
+ }
+}
diff --git a/wcstringutil.h b/wcstringutil.h
new file mode 100644
index 00000000..73ca7ac6
--- /dev/null
+++ b/wcstringutil.h
@@ -0,0 +1,29 @@
+/** \file wcstringutil.h
+
+Helper functions for working with wcstring
+*/
+
+#ifndef FISH_WCSTRINGUTIL_H
+#define FISH_WCSTRINGUTIL_H
+
+#include <utility>
+#include "common.h"
+
+/**
+ typedef that represents a range in a wcstring.
+ The first element is the location, the second is the count.
+*/
+typedef std::pair<wcstring::size_type, wcstring::size_type> wcstring_range;
+
+/**
+ wcstring equivalent of wcstok(). Supports NUL.
+ For convenience and wcstok() compatibility, the first character of each
+ token separator is replaced with NUL.
+ Returns a pair of (pos, count).
+ Returns (npos, npos) when it's done.
+ Returns (pos, npos) when the token is already known to be the final token.
+ Note that the final token may not necessarily return (pos, npos).
+*/
+wcstring_range wcstring_tok(wcstring& str, const wcstring &needle, wcstring_range last = wcstring_range(0,0));
+
+#endif