diff options
author | Kurtis Rader <krader@skepticism.us> | 2016-05-18 17:46:13 -0700 |
---|---|---|
committer | Kurtis Rader <krader@skepticism.us> | 2016-05-19 18:42:34 -0700 |
commit | 46be5ac468db923bd3d19c55638b723aa760cd4e (patch) | |
tree | 44726156923b2243b946e11101242e543656de76 /src | |
parent | 30ea7cc3f8a5d56ad30dc749ea374363c15f312a (diff) |
make fish buildable on OS X Snow Leopard
I noticed that the `test_convert()` function was randomly failing when
run on OS X Snow Leopard. I tracked it down to the `mbrtowc()` function on
that OS being broken. Explicitly testing for UTF-8 prefixes that identify
a sequence longer than four bytes (which the Unicode standard made illegal
long ago) keeps us from having encoding errors on those OS's.
This also makes the errors reported by the `test_convert()` function actually
useful and readable.
Lastly, it makes it possible to build fish on OS X Snow Leopard.
Diffstat (limited to 'src')
-rw-r--r-- | src/common.cpp | 46 | ||||
-rw-r--r-- | src/fallback.cpp | 11 | ||||
-rw-r--r-- | src/fallback.h | 9 | ||||
-rw-r--r-- | src/fish_tests.cpp | 24 | ||||
-rw-r--r-- | src/wutil.cpp | 6 |
5 files changed, 71 insertions, 25 deletions
diff --git a/src/common.cpp b/src/common.cpp index 7bd90915..b8270ad1 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -109,8 +109,12 @@ void __attribute__((noinline)) show_stackframe(const wchar_t msg_level, int fram int skip_levels) { ASSERT_IS_NOT_FORKED_CHILD(); + // TODO: Decide if this is still needed. I'm commenting it out because it caused me some grief + // while trying to debug a test failure. And the tests run just fine without spurious failures + // if this check is not done. + // // Hack to avoid showing backtraces in the tester. - if (program_name && !wcscmp(program_name, L"(ignore)")) return; + // if (program_name && !wcscmp(program_name, L"(ignore)")) return; if (frame_count < 1) frame_count = 999; debug_shared(msg_level, L"Backtrace:"); @@ -177,24 +181,32 @@ static wcstring str2wcs_internal(const char *in, const size_t in_len) { mbstate_t state = {}; while (in_pos < in_len) { + bool use_encode_direct = false; + size_t ret; wchar_t wc = 0; - size_t ret = mbrtowc(&wc, &in[in_pos], in_len - in_pos, &state); - // Determine whether to encode this characters with our crazy scheme. - bool use_encode_direct = false; - if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) { - use_encode_direct = true; - } else if (wc == INTERNAL_SEPARATOR) { - use_encode_direct = true; - } else if (ret == (size_t)-2) { - // Incomplete sequence. - use_encode_direct = true; - } else if (ret == (size_t)-1) { - // Invalid data. - use_encode_direct = true; - } else if (ret > in_len - in_pos) { - // Other error codes? Terrifying, should never happen. + if ((in[in_pos] & 0xF8) == 0xF8) { + // Protect against broken mbrtowc() implementations which attempt to encode UTF-8 + // sequences longer than four bytes (e.g., OS X Snow Leopard). use_encode_direct = true; + } else { + ret = mbrtowc(&wc, &in[in_pos], in_len - in_pos, &state); + + // Determine whether to encode this characters with our crazy scheme. + if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) { + use_encode_direct = true; + } else if (wc == INTERNAL_SEPARATOR) { + use_encode_direct = true; + } else if (ret == (size_t)-2) { + // Incomplete sequence. + use_encode_direct = true; + } else if (ret == (size_t)-1) { + // Invalid data. + use_encode_direct = true; + } else if (ret > in_len - in_pos) { + // Other error codes? Terrifying, should never happen. + use_encode_direct = true; + } } if (use_encode_direct) { @@ -221,7 +233,7 @@ wcstring str2wcstring(const char *in, size_t len) { return str2wcs_internal(in, wcstring str2wcstring(const char *in) { return str2wcs_internal(in, strlen(in)); } wcstring str2wcstring(const std::string &in) { - /* Handles embedded nulls! */ + // Handles embedded nulls! return str2wcs_internal(in.data(), in.size()); } diff --git a/src/fallback.cpp b/src/fallback.cpp index 7a829124..96fa417b 100644 --- a/src/fallback.cpp +++ b/src/fallback.cpp @@ -131,7 +131,8 @@ __attribute__((unused)) static int wcsncasecmp_fallback(const wchar_t *a, const return wcsncasecmp_fallback(a + 1, b + 1, count - 1); } -#if __APPLE__ && __DARWIN_C_LEVEL >= 200809L +#if __APPLE__ +#if __DARWIN_C_LEVEL >= 200809L // Note parens avoid the macro expansion. wchar_t *wcsdup_use_weak(const wchar_t *a) { if (&wcsdup != NULL) return (wcsdup)(a); @@ -147,8 +148,12 @@ int wcsncasecmp_use_weak(const wchar_t *s1, const wchar_t *s2, size_t n) { if (&wcsncasecmp != NULL) return (wcsncasecmp)(s1, s2, n); return wcsncasecmp_fallback(s1, s2, n); } - -#endif //__APPLE__ +#else // __DARWIN_C_LEVEL >= 200809L +wchar_t *wcsdup(const wchar_t *in) { return wcsdup_fallback(in); } +int wcscasecmp(const wchar_t *a, const wchar_t *b) { return wcscasecmp_fallback(a, b); } +int wcsncasecmp(const wchar_t *a, const wchar_t *b, size_t n) { return wcsncasecmp_fallback(a, b, n); } +#endif // __DARWIN_C_LEVEL >= 200809L +#endif // __APPLE__ #ifndef HAVE_WCSNDUP wchar_t *wcsndup(const wchar_t *in, size_t c) { diff --git a/src/fallback.h b/src/fallback.h index b28e7b17..a3143f09 100644 --- a/src/fallback.h +++ b/src/fallback.h @@ -62,13 +62,20 @@ char *tparm_solaris_kludge(char *str, ...); /// these functions only exist on 10.7+. /// /// On other platforms, use what's detected at build time. -#if __APPLE__ && __DARWIN_C_LEVEL >= 200809L +#if __APPLE__ +#if __DARWIN_C_LEVEL >= 200809L wchar_t *wcsdup_use_weak(const wchar_t *); int wcscasecmp_use_weak(const wchar_t *, const wchar_t *); int wcsncasecmp_use_weak(const wchar_t *s1, const wchar_t *s2, size_t n); #define wcsdup(a) wcsdup_use_weak((a)) #define wcscasecmp(a, b) wcscasecmp_use_weak((a), (b)) #define wcsncasecmp(a, b, c) wcsncasecmp_use_weak((a), (b), (c)) +#else +wchar_t *wcsdup(const wchar_t *in); +int wcscasecmp(const wchar_t *a, const wchar_t *b); +int wcsncasecmp(const wchar_t *s1, const wchar_t *s2, size_t n); +wchar_t *wcsndup(const wchar_t *in, size_t c); +#endif #endif //__APPLE__ #ifndef HAVE_WCSNDUP diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 7f1c0a7d..2457cee4 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -258,6 +258,18 @@ static void test_format(void) { do_test(!strcmp(buff1, buff2)); } +/// Helper to convert a narrow string to a sequence of hex digits. +static char *str2hex(const char *input) { + char *output = (char *)malloc(5 * strlen(input) + 1); + char *p = output; + for (; *input; input++) { + sprintf(p, "0x%02X ", (int)*input & 0xFF); + p += 5; + } + *p = '\0'; + return output; +} + /// Test wide/narrow conversion by creating random strings and verifying that the original string /// comes back thorugh double conversion. static void test_convert() { @@ -318,8 +330,13 @@ static void test_convert() { } if (strcmp(o, n)) { - err(L"Line %d - %d: Conversion cycle of string %s produced different string %s", - __LINE__, i, o, n); + char *o2 = str2hex(o); + char *n2 = str2hex(n); + err(L"Line %d - %d: Conversion cycle of string:\n%4d chars: %s\n" + L"produced different string:\n%4d chars: %s", + __LINE__, i, strlen(o), o2, strlen(n), n2); + free(o2); + free(n2); } free((void *)n); } @@ -3882,8 +3899,7 @@ int main(int argc, char **argv) { } } - setlocale(LC_ALL, ""); - // srand(time(0)); + srand(time(0)); configure_thread_assertions_for_testing(); program_name = L"(ignore)"; diff --git a/src/wutil.cpp b/src/wutil.cpp index 4299956a..9f975fab 100644 --- a/src/wutil.cpp +++ b/src/wutil.cpp @@ -343,7 +343,13 @@ wchar_t *wrealpath(const wcstring &pathname, wchar_t *resolved_path) { res = wcsdup(wide_res.c_str()); } +#if __APPLE__ && __DARWIN_C_LEVEL < 200809L + // OS X Snow Leopard is broken with respect to the dynamically allocated buffer returned by + // realpath(). It's not dynamically allocated so attempting to free that buffer triggers a + // malloc/free error. Thus we don't attempt the free in this case. +#else free(narrow_res); +#endif return res; } |