From c2f1df1d4af0c7e633528cb4c8caa79ef04b0b5a Mon Sep 17 00:00:00 2001 From: Kurtis Rader Date: Thu, 10 Mar 2016 18:17:39 -0800 Subject: fix handling of non-ASCII chars in C locale The relevant standards allow the mbtowc/mbrtowc functions to reject non-ASCII characters (i.e., chars with the high bit set) when the locale is C or POSIX. The BSD libraries (e.g., on OS X) don't do this but the GNU libraries (e.g., on Linux) do. Like most programs we need the C/POSIX locales to allow arbitrary bytes. So explicitly check if we're in a single-byte locale (which would also include ISO-8859 variants) and simply pass-thru the chars without encoding or decoding. Fixes #2802. --- src/builtin.cpp | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'src/builtin.cpp') diff --git a/src/builtin.cpp b/src/builtin.cpp index 4597b18e..f4de72c7 100644 --- a/src/builtin.cpp +++ b/src/builtin.cpp @@ -1907,18 +1907,18 @@ static int builtin_echo(parser_t &parser, io_streams_t &streams, wchar_t **argv) return STATUS_BUILTIN_OK; } -/** The pwd builtin. We don't respect -P to resolve symbolic links because we try to always resolve them. */ +// The pwd builtin. We don't respect -P to resolve symbolic links because we +// try to always resolve them. static int builtin_pwd(parser_t &parser, io_streams_t &streams, wchar_t **argv) { - wchar_t dir_path[4096]; - wchar_t *res = wgetcwd(dir_path, 4096); - if (res == NULL) + wcstring res = wgetcwd(); + if (res.empty()) { return STATUS_BUILTIN_ERROR; } else { - streams.out.append(dir_path); + streams.out.append(res); streams.out.push_back(L'\n'); return STATUS_BUILTIN_OK; } @@ -2699,9 +2699,8 @@ static int builtin_read(parser_t &parser, io_streams_t &streams, wchar_t **argv) while (1) { - int finished=0; - - wchar_t res=0; + int finished = 0; + wchar_t res = 0; mbstate_t state = {}; while (!finished) @@ -2713,24 +2712,26 @@ static int builtin_read(parser_t &parser, io_streams_t &streams, wchar_t **argv) break; } - size_t sz = mbrtowc(&res, &b, 1, &state); - - switch (sz) + if (MB_CUR_MAX == 1) // single-byte locale { - case (size_t)(-1): - memset(&state, '\0', sizeof(state)); - break; - - case (size_t)(-2): - break; - case 0: - finished = 1; - break; + res = (unsigned char)b; + finished = 1; + } + else { + size_t sz = mbrtowc(&res, &b, 1, &state); + switch (sz) + { + case (size_t)-1: + memset(&state, 0, sizeof(state)); + break; - default: - finished=1; - break; + case (size_t)-2: + break; + default: + finished = 1; + break; + } } } -- cgit v1.2.3