make fish buildable on OS X Snow Leopard

I noticed that the `test_convert()` function was randomly failing when run on OS X Snow Leopard. I tracked it down to the `mbrtowc()` function on that OS being broken. Explicitly testing for UTF-8 prefixes that identify a sequence longer than four bytes (which the Unicode standard made illegal long ago) keeps us from having encoding errors on those OS's. This also makes the errors reported by the `test_convert()` function actually useful and readable. Lastly, it makes it possible to build fish on OS X Snow Leopard.
author: Kurtis Rader <krader@skepticism.us> 2016-05-18 17:46:13 -0700
committer: Kurtis Rader <krader@skepticism.us> 2016-05-19 18:42:34 -0700
commit: 46be5ac468db923bd3d19c55638b723aa760cd4e (patch)
tree: 44726156923b2243b946e11101242e543656de76 /src/common.cpp
parent: 30ea7cc3f8a5d56ad30dc749ea374363c15f312a (diff)
1 files changed, 29 insertions, 17 deletions
diff --git a/src/common.cpp b/src/common.cpp
index 7bd90915..b8270ad1 100644
--- a/src/common.cpp
+++ b/src/common.cpp
@@ -109,8 +109,12 @@ void __attribute__((noinline)) show_stackframe(const wchar_t msg_level, int fram
                                                int skip_levels) {
     ASSERT_IS_NOT_FORKED_CHILD();
 
+    // TODO: Decide if this is still needed. I'm commenting it out because it caused me some grief
+    // while trying to debug a test failure. And the tests run just fine without spurious failures
+    // if this check is not done.
+    //
     // Hack to avoid showing backtraces in the tester.
-    if (program_name && !wcscmp(program_name, L"(ignore)")) return;
+    // if (program_name && !wcscmp(program_name, L"(ignore)")) return;
 
     if (frame_count < 1) frame_count = 999;
     debug_shared(msg_level, L"Backtrace:");
@@ -177,24 +181,32 @@ static wcstring str2wcs_internal(const char *in, const size_t in_len) {
 
     mbstate_t state = {};
     while (in_pos < in_len) {
+        bool use_encode_direct = false;
+        size_t ret;
         wchar_t wc = 0;
-        size_t ret = mbrtowc(&wc, &in[in_pos], in_len - in_pos, &state);
 
-        // Determine whether to encode this characters with our crazy scheme.
-        bool use_encode_direct = false;
-        if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) {
-            use_encode_direct = true;
-        } else if (wc == INTERNAL_SEPARATOR) {
-            use_encode_direct = true;
-        } else if (ret == (size_t)-2) {
-            // Incomplete sequence.
-            use_encode_direct = true;
-        } else if (ret == (size_t)-1) {
-            // Invalid data.
-            use_encode_direct = true;
-        } else if (ret > in_len - in_pos) {
-            // Other error codes? Terrifying, should never happen.
+        if ((in[in_pos] & 0xF8) == 0xF8) {
+            // Protect against broken mbrtowc() implementations which attempt to encode UTF-8
+            // sequences longer than four bytes (e.g., OS X Snow Leopard).
             use_encode_direct = true;
+        } else {
+            ret = mbrtowc(&wc, &in[in_pos], in_len - in_pos, &state);
+
+            // Determine whether to encode this characters with our crazy scheme.
+            if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) {
+                use_encode_direct = true;
+            } else if (wc == INTERNAL_SEPARATOR) {
+                use_encode_direct = true;
+            } else if (ret == (size_t)-2) {
+                // Incomplete sequence.
+                use_encode_direct = true;
+            } else if (ret == (size_t)-1) {
+                // Invalid data.
+                use_encode_direct = true;
+            } else if (ret > in_len - in_pos) {
+                // Other error codes? Terrifying, should never happen.
+                use_encode_direct = true;
+            }
         }
 
         if (use_encode_direct) {
@@ -221,7 +233,7 @@ wcstring str2wcstring(const char *in, size_t len) { return str2wcs_internal(in,
 wcstring str2wcstring(const char *in) { return str2wcs_internal(in, strlen(in)); }
 
 wcstring str2wcstring(const std::string &in) {
-    /* Handles embedded nulls! */
+    // Handles embedded nulls!
     return str2wcs_internal(in.data(), in.size());
 }
author	Kurtis Rader <krader@skepticism.us>	2016-05-18 17:46:13 -0700
committer	Kurtis Rader <krader@skepticism.us>	2016-05-19 18:42:34 -0700
commit	46be5ac468db923bd3d19c55638b723aa760cd4e (patch)
tree	44726156923b2243b946e11101242e543656de76 /src/common.cpp
parent	30ea7cc3f8a5d56ad30dc749ea374363c15f312a (diff)