diff options
author | 2016-05-18 17:46:13 -0700 | |
---|---|---|
committer | 2016-05-19 18:42:34 -0700 | |
commit | 46be5ac468db923bd3d19c55638b723aa760cd4e (patch) | |
tree | 44726156923b2243b946e11101242e543656de76 /src/common.cpp | |
parent | 30ea7cc3f8a5d56ad30dc749ea374363c15f312a (diff) |
make fish buildable on OS X Snow Leopard
I noticed that the `test_convert()` function was randomly failing when
run on OS X Snow Leopard. I tracked it down to the `mbrtowc()` function on
that OS being broken. Explicitly testing for UTF-8 prefixes that identify
a sequence longer than four bytes (which the Unicode standard made illegal
long ago) keeps us from having encoding errors on those OS's.
This also makes the errors reported by the `test_convert()` function actually
useful and readable.
Lastly, it makes it possible to build fish on OS X Snow Leopard.
Diffstat (limited to 'src/common.cpp')
-rw-r--r-- | src/common.cpp | 46 |
1 files changed, 29 insertions, 17 deletions
diff --git a/src/common.cpp b/src/common.cpp index 7bd90915..b8270ad1 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -109,8 +109,12 @@ void __attribute__((noinline)) show_stackframe(const wchar_t msg_level, int fram int skip_levels) { ASSERT_IS_NOT_FORKED_CHILD(); + // TODO: Decide if this is still needed. I'm commenting it out because it caused me some grief + // while trying to debug a test failure. And the tests run just fine without spurious failures + // if this check is not done. + // // Hack to avoid showing backtraces in the tester. - if (program_name && !wcscmp(program_name, L"(ignore)")) return; + // if (program_name && !wcscmp(program_name, L"(ignore)")) return; if (frame_count < 1) frame_count = 999; debug_shared(msg_level, L"Backtrace:"); @@ -177,24 +181,32 @@ static wcstring str2wcs_internal(const char *in, const size_t in_len) { mbstate_t state = {}; while (in_pos < in_len) { + bool use_encode_direct = false; + size_t ret; wchar_t wc = 0; - size_t ret = mbrtowc(&wc, &in[in_pos], in_len - in_pos, &state); - // Determine whether to encode this characters with our crazy scheme. - bool use_encode_direct = false; - if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) { - use_encode_direct = true; - } else if (wc == INTERNAL_SEPARATOR) { - use_encode_direct = true; - } else if (ret == (size_t)-2) { - // Incomplete sequence. - use_encode_direct = true; - } else if (ret == (size_t)-1) { - // Invalid data. - use_encode_direct = true; - } else if (ret > in_len - in_pos) { - // Other error codes? Terrifying, should never happen. + if ((in[in_pos] & 0xF8) == 0xF8) { + // Protect against broken mbrtowc() implementations which attempt to encode UTF-8 + // sequences longer than four bytes (e.g., OS X Snow Leopard). use_encode_direct = true; + } else { + ret = mbrtowc(&wc, &in[in_pos], in_len - in_pos, &state); + + // Determine whether to encode this characters with our crazy scheme. + if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) { + use_encode_direct = true; + } else if (wc == INTERNAL_SEPARATOR) { + use_encode_direct = true; + } else if (ret == (size_t)-2) { + // Incomplete sequence. + use_encode_direct = true; + } else if (ret == (size_t)-1) { + // Invalid data. + use_encode_direct = true; + } else if (ret > in_len - in_pos) { + // Other error codes? Terrifying, should never happen. + use_encode_direct = true; + } } if (use_encode_direct) { @@ -221,7 +233,7 @@ wcstring str2wcstring(const char *in, size_t len) { return str2wcs_internal(in, wcstring str2wcstring(const char *in) { return str2wcs_internal(in, strlen(in)); } wcstring str2wcstring(const std::string &in) { - /* Handles embedded nulls! */ + // Handles embedded nulls! return str2wcs_internal(in.data(), in.size()); } |